1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I
3; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I
4; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32
5; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64
6
7define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
8; CHECK-ZVE64X-LABEL: cttz_nxv1i8:
9; CHECK-ZVE64X:       # %bb.0:
10; CHECK-ZVE64X-NEXT:    li a0, 1
11; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
12; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
13; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
14; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
15; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
16; CHECK-ZVE64X-NEXT:    li a0, 85
17; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
18; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
19; CHECK-ZVE64X-NEXT:    li a0, 51
20; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
21; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
22; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
23; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
24; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
25; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
26; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
27; CHECK-ZVE64X-NEXT:    ret
28;
29; CHECK-D-LABEL: cttz_nxv1i8:
30; CHECK-D:       # %bb.0:
31; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
32; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
33; CHECK-D-NEXT:    vand.vv v9, v8, v9
34; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
35; CHECK-D-NEXT:    vzext.vf4 v10, v9
36; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v10
37; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
38; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
39; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
40; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
41; CHECK-D-NEXT:    li a0, 127
42; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
43; CHECK-D-NEXT:    vsub.vx v8, v9, a0
44; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
45; CHECK-D-NEXT:    ret
46  %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false)
47  ret <vscale x 1 x i8> %a
48}
49declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1)
50
51define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
52; CHECK-ZVE64X-LABEL: cttz_nxv2i8:
53; CHECK-ZVE64X:       # %bb.0:
54; CHECK-ZVE64X-NEXT:    li a0, 1
55; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
56; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
57; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
58; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
59; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
60; CHECK-ZVE64X-NEXT:    li a0, 85
61; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
62; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
63; CHECK-ZVE64X-NEXT:    li a0, 51
64; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
65; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
66; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
67; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
68; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
69; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
70; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
71; CHECK-ZVE64X-NEXT:    ret
72;
73; CHECK-D-LABEL: cttz_nxv2i8:
74; CHECK-D:       # %bb.0:
75; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
76; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
77; CHECK-D-NEXT:    vand.vv v9, v8, v9
78; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
79; CHECK-D-NEXT:    vzext.vf4 v10, v9
80; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v10
81; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
82; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
83; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
84; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
85; CHECK-D-NEXT:    li a0, 127
86; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
87; CHECK-D-NEXT:    vsub.vx v8, v9, a0
88; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
89; CHECK-D-NEXT:    ret
90  %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false)
91  ret <vscale x 2 x i8> %a
92}
93declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1)
94
95define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
96; CHECK-ZVE64X-LABEL: cttz_nxv4i8:
97; CHECK-ZVE64X:       # %bb.0:
98; CHECK-ZVE64X-NEXT:    li a0, 1
99; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
100; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
101; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
102; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
103; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
104; CHECK-ZVE64X-NEXT:    li a0, 85
105; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
106; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
107; CHECK-ZVE64X-NEXT:    li a0, 51
108; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
109; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
110; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
111; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
112; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
113; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
114; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
115; CHECK-ZVE64X-NEXT:    ret
116;
117; CHECK-D-LABEL: cttz_nxv4i8:
118; CHECK-D:       # %bb.0:
119; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
120; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
121; CHECK-D-NEXT:    vand.vv v9, v8, v9
122; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
123; CHECK-D-NEXT:    vzext.vf4 v10, v9
124; CHECK-D-NEXT:    vfcvt.f.xu.v v10, v10
125; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
126; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
127; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
128; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
129; CHECK-D-NEXT:    li a0, 127
130; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
131; CHECK-D-NEXT:    vsub.vx v8, v9, a0
132; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
133; CHECK-D-NEXT:    ret
134  %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false)
135  ret <vscale x 4 x i8> %a
136}
137declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1)
138
139define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
140; CHECK-ZVE64X-LABEL: cttz_nxv8i8:
141; CHECK-ZVE64X:       # %bb.0:
142; CHECK-ZVE64X-NEXT:    li a0, 1
143; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
144; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
145; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
146; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
147; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
148; CHECK-ZVE64X-NEXT:    li a0, 85
149; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
150; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
151; CHECK-ZVE64X-NEXT:    li a0, 51
152; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
153; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
154; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
155; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
156; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
157; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
158; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
159; CHECK-ZVE64X-NEXT:    ret
160;
161; CHECK-D-LABEL: cttz_nxv8i8:
162; CHECK-D:       # %bb.0:
163; CHECK-D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
164; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
165; CHECK-D-NEXT:    vand.vv v9, v8, v9
166; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
167; CHECK-D-NEXT:    vzext.vf4 v12, v9
168; CHECK-D-NEXT:    vfcvt.f.xu.v v12, v12
169; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
170; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
171; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
172; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
173; CHECK-D-NEXT:    li a0, 127
174; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
175; CHECK-D-NEXT:    vsub.vx v8, v9, a0
176; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
177; CHECK-D-NEXT:    ret
178  %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false)
179  ret <vscale x 8 x i8> %a
180}
181declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1)
182
183define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
184; CHECK-ZVE64X-LABEL: cttz_nxv16i8:
185; CHECK-ZVE64X:       # %bb.0:
186; CHECK-ZVE64X-NEXT:    li a0, 1
187; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
188; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
189; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
190; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
191; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
192; CHECK-ZVE64X-NEXT:    li a0, 85
193; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
194; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
195; CHECK-ZVE64X-NEXT:    li a0, 51
196; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
197; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
198; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
199; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
200; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
201; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
202; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
203; CHECK-ZVE64X-NEXT:    ret
204;
205; CHECK-D-LABEL: cttz_nxv16i8:
206; CHECK-D:       # %bb.0:
207; CHECK-D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
208; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
209; CHECK-D-NEXT:    vand.vv v10, v8, v10
210; CHECK-D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
211; CHECK-D-NEXT:    vzext.vf4 v16, v10
212; CHECK-D-NEXT:    vfcvt.f.xu.v v16, v16
213; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
214; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
215; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
216; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
217; CHECK-D-NEXT:    li a0, 127
218; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
219; CHECK-D-NEXT:    vsub.vx v8, v10, a0
220; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
221; CHECK-D-NEXT:    ret
222  %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false)
223  ret <vscale x 16 x i8> %a
224}
225declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>, i1)
226
227define <vscale x 32 x i8> @cttz_nxv32i8(<vscale x 32 x i8> %va) {
228; CHECK-LABEL: cttz_nxv32i8:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    li a0, 1
231; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
232; CHECK-NEXT:    vsub.vx v12, v8, a0
233; CHECK-NEXT:    vnot.v v8, v8
234; CHECK-NEXT:    vand.vv v8, v8, v12
235; CHECK-NEXT:    vsrl.vi v12, v8, 1
236; CHECK-NEXT:    li a0, 85
237; CHECK-NEXT:    vand.vx v12, v12, a0
238; CHECK-NEXT:    vsub.vv v8, v8, v12
239; CHECK-NEXT:    li a0, 51
240; CHECK-NEXT:    vand.vx v12, v8, a0
241; CHECK-NEXT:    vsrl.vi v8, v8, 2
242; CHECK-NEXT:    vand.vx v8, v8, a0
243; CHECK-NEXT:    vadd.vv v8, v12, v8
244; CHECK-NEXT:    vsrl.vi v12, v8, 4
245; CHECK-NEXT:    vadd.vv v8, v8, v12
246; CHECK-NEXT:    vand.vi v8, v8, 15
247; CHECK-NEXT:    ret
248  %a = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false)
249  ret <vscale x 32 x i8> %a
250}
251declare <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8>, i1)
252
253define <vscale x 64 x i8> @cttz_nxv64i8(<vscale x 64 x i8> %va) {
254; CHECK-LABEL: cttz_nxv64i8:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    li a0, 1
257; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
258; CHECK-NEXT:    vsub.vx v16, v8, a0
259; CHECK-NEXT:    vnot.v v8, v8
260; CHECK-NEXT:    vand.vv v8, v8, v16
261; CHECK-NEXT:    vsrl.vi v16, v8, 1
262; CHECK-NEXT:    li a0, 85
263; CHECK-NEXT:    vand.vx v16, v16, a0
264; CHECK-NEXT:    vsub.vv v8, v8, v16
265; CHECK-NEXT:    li a0, 51
266; CHECK-NEXT:    vand.vx v16, v8, a0
267; CHECK-NEXT:    vsrl.vi v8, v8, 2
268; CHECK-NEXT:    vand.vx v8, v8, a0
269; CHECK-NEXT:    vadd.vv v8, v16, v8
270; CHECK-NEXT:    vsrl.vi v16, v8, 4
271; CHECK-NEXT:    vadd.vv v8, v8, v16
272; CHECK-NEXT:    vand.vi v8, v8, 15
273; CHECK-NEXT:    ret
274  %a = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false)
275  ret <vscale x 64 x i8> %a
276}
277declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
278
279define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
280; RV32I-LABEL: cttz_nxv1i16:
281; RV32I:       # %bb.0:
282; RV32I-NEXT:    li a0, 1
283; RV32I-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
284; RV32I-NEXT:    vsub.vx v9, v8, a0
285; RV32I-NEXT:    vnot.v v8, v8
286; RV32I-NEXT:    vand.vv v8, v8, v9
287; RV32I-NEXT:    vsrl.vi v9, v8, 1
288; RV32I-NEXT:    lui a0, 5
289; RV32I-NEXT:    addi a0, a0, 1365
290; RV32I-NEXT:    vand.vx v9, v9, a0
291; RV32I-NEXT:    vsub.vv v8, v8, v9
292; RV32I-NEXT:    lui a0, 3
293; RV32I-NEXT:    addi a0, a0, 819
294; RV32I-NEXT:    vand.vx v9, v8, a0
295; RV32I-NEXT:    vsrl.vi v8, v8, 2
296; RV32I-NEXT:    vand.vx v8, v8, a0
297; RV32I-NEXT:    vadd.vv v8, v9, v8
298; RV32I-NEXT:    vsrl.vi v9, v8, 4
299; RV32I-NEXT:    vadd.vv v8, v8, v9
300; RV32I-NEXT:    lui a0, 1
301; RV32I-NEXT:    addi a0, a0, -241
302; RV32I-NEXT:    vand.vx v8, v8, a0
303; RV32I-NEXT:    li a0, 257
304; RV32I-NEXT:    vmul.vx v8, v8, a0
305; RV32I-NEXT:    vsrl.vi v8, v8, 8
306; RV32I-NEXT:    ret
307;
308; RV64I-LABEL: cttz_nxv1i16:
309; RV64I:       # %bb.0:
310; RV64I-NEXT:    li a0, 1
311; RV64I-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
312; RV64I-NEXT:    vsub.vx v9, v8, a0
313; RV64I-NEXT:    vnot.v v8, v8
314; RV64I-NEXT:    vand.vv v8, v8, v9
315; RV64I-NEXT:    vsrl.vi v9, v8, 1
316; RV64I-NEXT:    lui a0, 5
317; RV64I-NEXT:    addiw a0, a0, 1365
318; RV64I-NEXT:    vand.vx v9, v9, a0
319; RV64I-NEXT:    vsub.vv v8, v8, v9
320; RV64I-NEXT:    lui a0, 3
321; RV64I-NEXT:    addiw a0, a0, 819
322; RV64I-NEXT:    vand.vx v9, v8, a0
323; RV64I-NEXT:    vsrl.vi v8, v8, 2
324; RV64I-NEXT:    vand.vx v8, v8, a0
325; RV64I-NEXT:    vadd.vv v8, v9, v8
326; RV64I-NEXT:    vsrl.vi v9, v8, 4
327; RV64I-NEXT:    vadd.vv v8, v8, v9
328; RV64I-NEXT:    lui a0, 1
329; RV64I-NEXT:    addiw a0, a0, -241
330; RV64I-NEXT:    vand.vx v8, v8, a0
331; RV64I-NEXT:    li a0, 257
332; RV64I-NEXT:    vmul.vx v8, v8, a0
333; RV64I-NEXT:    vsrl.vi v8, v8, 8
334; RV64I-NEXT:    ret
335;
336; CHECK-D-LABEL: cttz_nxv1i16:
337; CHECK-D:       # %bb.0:
338; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
339; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
340; CHECK-D-NEXT:    vand.vv v9, v8, v9
341; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
342; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
343; CHECK-D-NEXT:    li a0, 127
344; CHECK-D-NEXT:    vsub.vx v9, v9, a0
345; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
346; CHECK-D-NEXT:    li a0, 16
347; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
348; CHECK-D-NEXT:    ret
349  %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false)
350  ret <vscale x 1 x i16> %a
351}
352declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
353
354define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
355; RV32I-LABEL: cttz_nxv2i16:
356; RV32I:       # %bb.0:
357; RV32I-NEXT:    li a0, 1
358; RV32I-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
359; RV32I-NEXT:    vsub.vx v9, v8, a0
360; RV32I-NEXT:    vnot.v v8, v8
361; RV32I-NEXT:    vand.vv v8, v8, v9
362; RV32I-NEXT:    vsrl.vi v9, v8, 1
363; RV32I-NEXT:    lui a0, 5
364; RV32I-NEXT:    addi a0, a0, 1365
365; RV32I-NEXT:    vand.vx v9, v9, a0
366; RV32I-NEXT:    vsub.vv v8, v8, v9
367; RV32I-NEXT:    lui a0, 3
368; RV32I-NEXT:    addi a0, a0, 819
369; RV32I-NEXT:    vand.vx v9, v8, a0
370; RV32I-NEXT:    vsrl.vi v8, v8, 2
371; RV32I-NEXT:    vand.vx v8, v8, a0
372; RV32I-NEXT:    vadd.vv v8, v9, v8
373; RV32I-NEXT:    vsrl.vi v9, v8, 4
374; RV32I-NEXT:    vadd.vv v8, v8, v9
375; RV32I-NEXT:    lui a0, 1
376; RV32I-NEXT:    addi a0, a0, -241
377; RV32I-NEXT:    vand.vx v8, v8, a0
378; RV32I-NEXT:    li a0, 257
379; RV32I-NEXT:    vmul.vx v8, v8, a0
380; RV32I-NEXT:    vsrl.vi v8, v8, 8
381; RV32I-NEXT:    ret
382;
383; RV64I-LABEL: cttz_nxv2i16:
384; RV64I:       # %bb.0:
385; RV64I-NEXT:    li a0, 1
386; RV64I-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
387; RV64I-NEXT:    vsub.vx v9, v8, a0
388; RV64I-NEXT:    vnot.v v8, v8
389; RV64I-NEXT:    vand.vv v8, v8, v9
390; RV64I-NEXT:    vsrl.vi v9, v8, 1
391; RV64I-NEXT:    lui a0, 5
392; RV64I-NEXT:    addiw a0, a0, 1365
393; RV64I-NEXT:    vand.vx v9, v9, a0
394; RV64I-NEXT:    vsub.vv v8, v8, v9
395; RV64I-NEXT:    lui a0, 3
396; RV64I-NEXT:    addiw a0, a0, 819
397; RV64I-NEXT:    vand.vx v9, v8, a0
398; RV64I-NEXT:    vsrl.vi v8, v8, 2
399; RV64I-NEXT:    vand.vx v8, v8, a0
400; RV64I-NEXT:    vadd.vv v8, v9, v8
401; RV64I-NEXT:    vsrl.vi v9, v8, 4
402; RV64I-NEXT:    vadd.vv v8, v8, v9
403; RV64I-NEXT:    lui a0, 1
404; RV64I-NEXT:    addiw a0, a0, -241
405; RV64I-NEXT:    vand.vx v8, v8, a0
406; RV64I-NEXT:    li a0, 257
407; RV64I-NEXT:    vmul.vx v8, v8, a0
408; RV64I-NEXT:    vsrl.vi v8, v8, 8
409; RV64I-NEXT:    ret
410;
411; CHECK-D-LABEL: cttz_nxv2i16:
412; CHECK-D:       # %bb.0:
413; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
414; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
415; CHECK-D-NEXT:    vand.vv v9, v8, v9
416; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
417; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
418; CHECK-D-NEXT:    li a0, 127
419; CHECK-D-NEXT:    vsub.vx v9, v9, a0
420; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
421; CHECK-D-NEXT:    li a0, 16
422; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
423; CHECK-D-NEXT:    ret
424  %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false)
425  ret <vscale x 2 x i16> %a
426}
427declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
428
429define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
430; RV32I-LABEL: cttz_nxv4i16:
431; RV32I:       # %bb.0:
432; RV32I-NEXT:    li a0, 1
433; RV32I-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
434; RV32I-NEXT:    vsub.vx v9, v8, a0
435; RV32I-NEXT:    vnot.v v8, v8
436; RV32I-NEXT:    vand.vv v8, v8, v9
437; RV32I-NEXT:    vsrl.vi v9, v8, 1
438; RV32I-NEXT:    lui a0, 5
439; RV32I-NEXT:    addi a0, a0, 1365
440; RV32I-NEXT:    vand.vx v9, v9, a0
441; RV32I-NEXT:    vsub.vv v8, v8, v9
442; RV32I-NEXT:    lui a0, 3
443; RV32I-NEXT:    addi a0, a0, 819
444; RV32I-NEXT:    vand.vx v9, v8, a0
445; RV32I-NEXT:    vsrl.vi v8, v8, 2
446; RV32I-NEXT:    vand.vx v8, v8, a0
447; RV32I-NEXT:    vadd.vv v8, v9, v8
448; RV32I-NEXT:    vsrl.vi v9, v8, 4
449; RV32I-NEXT:    vadd.vv v8, v8, v9
450; RV32I-NEXT:    lui a0, 1
451; RV32I-NEXT:    addi a0, a0, -241
452; RV32I-NEXT:    vand.vx v8, v8, a0
453; RV32I-NEXT:    li a0, 257
454; RV32I-NEXT:    vmul.vx v8, v8, a0
455; RV32I-NEXT:    vsrl.vi v8, v8, 8
456; RV32I-NEXT:    ret
457;
458; RV64I-LABEL: cttz_nxv4i16:
459; RV64I:       # %bb.0:
460; RV64I-NEXT:    li a0, 1
461; RV64I-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
462; RV64I-NEXT:    vsub.vx v9, v8, a0
463; RV64I-NEXT:    vnot.v v8, v8
464; RV64I-NEXT:    vand.vv v8, v8, v9
465; RV64I-NEXT:    vsrl.vi v9, v8, 1
466; RV64I-NEXT:    lui a0, 5
467; RV64I-NEXT:    addiw a0, a0, 1365
468; RV64I-NEXT:    vand.vx v9, v9, a0
469; RV64I-NEXT:    vsub.vv v8, v8, v9
470; RV64I-NEXT:    lui a0, 3
471; RV64I-NEXT:    addiw a0, a0, 819
472; RV64I-NEXT:    vand.vx v9, v8, a0
473; RV64I-NEXT:    vsrl.vi v8, v8, 2
474; RV64I-NEXT:    vand.vx v8, v8, a0
475; RV64I-NEXT:    vadd.vv v8, v9, v8
476; RV64I-NEXT:    vsrl.vi v9, v8, 4
477; RV64I-NEXT:    vadd.vv v8, v8, v9
478; RV64I-NEXT:    lui a0, 1
479; RV64I-NEXT:    addiw a0, a0, -241
480; RV64I-NEXT:    vand.vx v8, v8, a0
481; RV64I-NEXT:    li a0, 257
482; RV64I-NEXT:    vmul.vx v8, v8, a0
483; RV64I-NEXT:    vsrl.vi v8, v8, 8
484; RV64I-NEXT:    ret
485;
486; CHECK-D-LABEL: cttz_nxv4i16:
487; CHECK-D:       # %bb.0:
488; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
489; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
490; CHECK-D-NEXT:    vand.vv v9, v8, v9
491; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
492; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
493; CHECK-D-NEXT:    li a0, 127
494; CHECK-D-NEXT:    vsub.vx v9, v9, a0
495; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
496; CHECK-D-NEXT:    li a0, 16
497; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
498; CHECK-D-NEXT:    ret
499  %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false)
500  ret <vscale x 4 x i16> %a
501}
502declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
503
504define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
505; RV32I-LABEL: cttz_nxv8i16:
506; RV32I:       # %bb.0:
507; RV32I-NEXT:    li a0, 1
508; RV32I-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
509; RV32I-NEXT:    vsub.vx v10, v8, a0
510; RV32I-NEXT:    vnot.v v8, v8
511; RV32I-NEXT:    vand.vv v8, v8, v10
512; RV32I-NEXT:    vsrl.vi v10, v8, 1
513; RV32I-NEXT:    lui a0, 5
514; RV32I-NEXT:    addi a0, a0, 1365
515; RV32I-NEXT:    vand.vx v10, v10, a0
516; RV32I-NEXT:    vsub.vv v8, v8, v10
517; RV32I-NEXT:    lui a0, 3
518; RV32I-NEXT:    addi a0, a0, 819
519; RV32I-NEXT:    vand.vx v10, v8, a0
520; RV32I-NEXT:    vsrl.vi v8, v8, 2
521; RV32I-NEXT:    vand.vx v8, v8, a0
522; RV32I-NEXT:    vadd.vv v8, v10, v8
523; RV32I-NEXT:    vsrl.vi v10, v8, 4
524; RV32I-NEXT:    vadd.vv v8, v8, v10
525; RV32I-NEXT:    lui a0, 1
526; RV32I-NEXT:    addi a0, a0, -241
527; RV32I-NEXT:    vand.vx v8, v8, a0
528; RV32I-NEXT:    li a0, 257
529; RV32I-NEXT:    vmul.vx v8, v8, a0
530; RV32I-NEXT:    vsrl.vi v8, v8, 8
531; RV32I-NEXT:    ret
532;
533; RV64I-LABEL: cttz_nxv8i16:
534; RV64I:       # %bb.0:
535; RV64I-NEXT:    li a0, 1
536; RV64I-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
537; RV64I-NEXT:    vsub.vx v10, v8, a0
538; RV64I-NEXT:    vnot.v v8, v8
539; RV64I-NEXT:    vand.vv v8, v8, v10
540; RV64I-NEXT:    vsrl.vi v10, v8, 1
541; RV64I-NEXT:    lui a0, 5
542; RV64I-NEXT:    addiw a0, a0, 1365
543; RV64I-NEXT:    vand.vx v10, v10, a0
544; RV64I-NEXT:    vsub.vv v8, v8, v10
545; RV64I-NEXT:    lui a0, 3
546; RV64I-NEXT:    addiw a0, a0, 819
547; RV64I-NEXT:    vand.vx v10, v8, a0
548; RV64I-NEXT:    vsrl.vi v8, v8, 2
549; RV64I-NEXT:    vand.vx v8, v8, a0
550; RV64I-NEXT:    vadd.vv v8, v10, v8
551; RV64I-NEXT:    vsrl.vi v10, v8, 4
552; RV64I-NEXT:    vadd.vv v8, v8, v10
553; RV64I-NEXT:    lui a0, 1
554; RV64I-NEXT:    addiw a0, a0, -241
555; RV64I-NEXT:    vand.vx v8, v8, a0
556; RV64I-NEXT:    li a0, 257
557; RV64I-NEXT:    vmul.vx v8, v8, a0
558; RV64I-NEXT:    vsrl.vi v8, v8, 8
559; RV64I-NEXT:    ret
560;
561; CHECK-D-LABEL: cttz_nxv8i16:
562; CHECK-D:       # %bb.0:
563; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
564; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
565; CHECK-D-NEXT:    vand.vv v10, v8, v10
566; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v10
567; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
568; CHECK-D-NEXT:    li a0, 127
569; CHECK-D-NEXT:    vsub.vx v10, v10, a0
570; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
571; CHECK-D-NEXT:    li a0, 16
572; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
573; CHECK-D-NEXT:    ret
574  %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false)
575  ret <vscale x 8 x i16> %a
576}
577declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
578
579define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
580; RV32I-LABEL: cttz_nxv16i16:
581; RV32I:       # %bb.0:
582; RV32I-NEXT:    li a0, 1
583; RV32I-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
584; RV32I-NEXT:    vsub.vx v12, v8, a0
585; RV32I-NEXT:    vnot.v v8, v8
586; RV32I-NEXT:    vand.vv v8, v8, v12
587; RV32I-NEXT:    vsrl.vi v12, v8, 1
588; RV32I-NEXT:    lui a0, 5
589; RV32I-NEXT:    addi a0, a0, 1365
590; RV32I-NEXT:    vand.vx v12, v12, a0
591; RV32I-NEXT:    vsub.vv v8, v8, v12
592; RV32I-NEXT:    lui a0, 3
593; RV32I-NEXT:    addi a0, a0, 819
594; RV32I-NEXT:    vand.vx v12, v8, a0
595; RV32I-NEXT:    vsrl.vi v8, v8, 2
596; RV32I-NEXT:    vand.vx v8, v8, a0
597; RV32I-NEXT:    vadd.vv v8, v12, v8
598; RV32I-NEXT:    vsrl.vi v12, v8, 4
599; RV32I-NEXT:    vadd.vv v8, v8, v12
600; RV32I-NEXT:    lui a0, 1
601; RV32I-NEXT:    addi a0, a0, -241
602; RV32I-NEXT:    vand.vx v8, v8, a0
603; RV32I-NEXT:    li a0, 257
604; RV32I-NEXT:    vmul.vx v8, v8, a0
605; RV32I-NEXT:    vsrl.vi v8, v8, 8
606; RV32I-NEXT:    ret
607;
608; RV64I-LABEL: cttz_nxv16i16:
609; RV64I:       # %bb.0:
610; RV64I-NEXT:    li a0, 1
611; RV64I-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
612; RV64I-NEXT:    vsub.vx v12, v8, a0
613; RV64I-NEXT:    vnot.v v8, v8
614; RV64I-NEXT:    vand.vv v8, v8, v12
615; RV64I-NEXT:    vsrl.vi v12, v8, 1
616; RV64I-NEXT:    lui a0, 5
617; RV64I-NEXT:    addiw a0, a0, 1365
618; RV64I-NEXT:    vand.vx v12, v12, a0
619; RV64I-NEXT:    vsub.vv v8, v8, v12
620; RV64I-NEXT:    lui a0, 3
621; RV64I-NEXT:    addiw a0, a0, 819
622; RV64I-NEXT:    vand.vx v12, v8, a0
623; RV64I-NEXT:    vsrl.vi v8, v8, 2
624; RV64I-NEXT:    vand.vx v8, v8, a0
625; RV64I-NEXT:    vadd.vv v8, v12, v8
626; RV64I-NEXT:    vsrl.vi v12, v8, 4
627; RV64I-NEXT:    vadd.vv v8, v8, v12
628; RV64I-NEXT:    lui a0, 1
629; RV64I-NEXT:    addiw a0, a0, -241
630; RV64I-NEXT:    vand.vx v8, v8, a0
631; RV64I-NEXT:    li a0, 257
632; RV64I-NEXT:    vmul.vx v8, v8, a0
633; RV64I-NEXT:    vsrl.vi v8, v8, 8
634; RV64I-NEXT:    ret
635;
636; CHECK-D-LABEL: cttz_nxv16i16:
637; CHECK-D:       # %bb.0:
638; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
639; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
640; CHECK-D-NEXT:    vand.vv v12, v8, v12
641; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v12
642; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
643; CHECK-D-NEXT:    li a0, 127
644; CHECK-D-NEXT:    vsub.vx v12, v12, a0
645; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
646; CHECK-D-NEXT:    li a0, 16
647; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
648; CHECK-D-NEXT:    ret
649  %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false)
650  ret <vscale x 16 x i16> %a
651}
652declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
653
654define <vscale x 32 x i16> @cttz_nxv32i16(<vscale x 32 x i16> %va) {
655; RV32-LABEL: cttz_nxv32i16:
656; RV32:       # %bb.0:
657; RV32-NEXT:    li a0, 1
658; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
659; RV32-NEXT:    vsub.vx v16, v8, a0
660; RV32-NEXT:    vnot.v v8, v8
661; RV32-NEXT:    vand.vv v8, v8, v16
662; RV32-NEXT:    vsrl.vi v16, v8, 1
663; RV32-NEXT:    lui a0, 5
664; RV32-NEXT:    addi a0, a0, 1365
665; RV32-NEXT:    vand.vx v16, v16, a0
666; RV32-NEXT:    vsub.vv v8, v8, v16
667; RV32-NEXT:    lui a0, 3
668; RV32-NEXT:    addi a0, a0, 819
669; RV32-NEXT:    vand.vx v16, v8, a0
670; RV32-NEXT:    vsrl.vi v8, v8, 2
671; RV32-NEXT:    vand.vx v8, v8, a0
672; RV32-NEXT:    vadd.vv v8, v16, v8
673; RV32-NEXT:    vsrl.vi v16, v8, 4
674; RV32-NEXT:    vadd.vv v8, v8, v16
675; RV32-NEXT:    lui a0, 1
676; RV32-NEXT:    addi a0, a0, -241
677; RV32-NEXT:    vand.vx v8, v8, a0
678; RV32-NEXT:    li a0, 257
679; RV32-NEXT:    vmul.vx v8, v8, a0
680; RV32-NEXT:    vsrl.vi v8, v8, 8
681; RV32-NEXT:    ret
682;
683; RV64-LABEL: cttz_nxv32i16:
684; RV64:       # %bb.0:
685; RV64-NEXT:    li a0, 1
686; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
687; RV64-NEXT:    vsub.vx v16, v8, a0
688; RV64-NEXT:    vnot.v v8, v8
689; RV64-NEXT:    vand.vv v8, v8, v16
690; RV64-NEXT:    vsrl.vi v16, v8, 1
691; RV64-NEXT:    lui a0, 5
692; RV64-NEXT:    addiw a0, a0, 1365
693; RV64-NEXT:    vand.vx v16, v16, a0
694; RV64-NEXT:    vsub.vv v8, v8, v16
695; RV64-NEXT:    lui a0, 3
696; RV64-NEXT:    addiw a0, a0, 819
697; RV64-NEXT:    vand.vx v16, v8, a0
698; RV64-NEXT:    vsrl.vi v8, v8, 2
699; RV64-NEXT:    vand.vx v8, v8, a0
700; RV64-NEXT:    vadd.vv v8, v16, v8
701; RV64-NEXT:    vsrl.vi v16, v8, 4
702; RV64-NEXT:    vadd.vv v8, v8, v16
703; RV64-NEXT:    lui a0, 1
704; RV64-NEXT:    addiw a0, a0, -241
705; RV64-NEXT:    vand.vx v8, v8, a0
706; RV64-NEXT:    li a0, 257
707; RV64-NEXT:    vmul.vx v8, v8, a0
708; RV64-NEXT:    vsrl.vi v8, v8, 8
709; RV64-NEXT:    ret
710  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
711  ret <vscale x 32 x i16> %a
712}
713declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
714
715define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
716; RV32I-LABEL: cttz_nxv1i32:
717; RV32I:       # %bb.0:
718; RV32I-NEXT:    li a0, 1
719; RV32I-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
720; RV32I-NEXT:    vsub.vx v9, v8, a0
721; RV32I-NEXT:    vnot.v v8, v8
722; RV32I-NEXT:    vand.vv v8, v8, v9
723; RV32I-NEXT:    vsrl.vi v9, v8, 1
724; RV32I-NEXT:    lui a0, 349525
725; RV32I-NEXT:    addi a0, a0, 1365
726; RV32I-NEXT:    vand.vx v9, v9, a0
727; RV32I-NEXT:    vsub.vv v8, v8, v9
728; RV32I-NEXT:    lui a0, 209715
729; RV32I-NEXT:    addi a0, a0, 819
730; RV32I-NEXT:    vand.vx v9, v8, a0
731; RV32I-NEXT:    vsrl.vi v8, v8, 2
732; RV32I-NEXT:    vand.vx v8, v8, a0
733; RV32I-NEXT:    vadd.vv v8, v9, v8
734; RV32I-NEXT:    vsrl.vi v9, v8, 4
735; RV32I-NEXT:    vadd.vv v8, v8, v9
736; RV32I-NEXT:    lui a0, 61681
737; RV32I-NEXT:    addi a0, a0, -241
738; RV32I-NEXT:    vand.vx v8, v8, a0
739; RV32I-NEXT:    lui a0, 4112
740; RV32I-NEXT:    addi a0, a0, 257
741; RV32I-NEXT:    vmul.vx v8, v8, a0
742; RV32I-NEXT:    vsrl.vi v8, v8, 24
743; RV32I-NEXT:    ret
744;
745; RV64I-LABEL: cttz_nxv1i32:
746; RV64I:       # %bb.0:
747; RV64I-NEXT:    li a0, 1
748; RV64I-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
749; RV64I-NEXT:    vsub.vx v9, v8, a0
750; RV64I-NEXT:    vnot.v v8, v8
751; RV64I-NEXT:    vand.vv v8, v8, v9
752; RV64I-NEXT:    vsrl.vi v9, v8, 1
753; RV64I-NEXT:    lui a0, 349525
754; RV64I-NEXT:    addiw a0, a0, 1365
755; RV64I-NEXT:    vand.vx v9, v9, a0
756; RV64I-NEXT:    vsub.vv v8, v8, v9
757; RV64I-NEXT:    lui a0, 209715
758; RV64I-NEXT:    addiw a0, a0, 819
759; RV64I-NEXT:    vand.vx v9, v8, a0
760; RV64I-NEXT:    vsrl.vi v8, v8, 2
761; RV64I-NEXT:    vand.vx v8, v8, a0
762; RV64I-NEXT:    vadd.vv v8, v9, v8
763; RV64I-NEXT:    vsrl.vi v9, v8, 4
764; RV64I-NEXT:    vadd.vv v8, v8, v9
765; RV64I-NEXT:    lui a0, 61681
766; RV64I-NEXT:    addiw a0, a0, -241
767; RV64I-NEXT:    vand.vx v8, v8, a0
768; RV64I-NEXT:    lui a0, 4112
769; RV64I-NEXT:    addiw a0, a0, 257
770; RV64I-NEXT:    vmul.vx v8, v8, a0
771; RV64I-NEXT:    vsrl.vi v8, v8, 24
772; RV64I-NEXT:    ret
773;
774; CHECK-D-LABEL: cttz_nxv1i32:
775; CHECK-D:       # %bb.0:
776; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
777; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
778; CHECK-D-NEXT:    vand.vv v9, v8, v9
779; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
780; CHECK-D-NEXT:    li a0, 52
781; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
782; CHECK-D-NEXT:    vsrl.vx v9, v10, a0
783; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
784; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
785; CHECK-D-NEXT:    li a0, 1023
786; CHECK-D-NEXT:    vsub.vx v9, v9, a0
787; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
788; CHECK-D-NEXT:    li a0, 32
789; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
790; CHECK-D-NEXT:    ret
791  %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false)
792  ret <vscale x 1 x i32> %a
793}
794declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
795
796define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
797; RV32I-LABEL: cttz_nxv2i32:
798; RV32I:       # %bb.0:
799; RV32I-NEXT:    li a0, 1
800; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
801; RV32I-NEXT:    vsub.vx v9, v8, a0
802; RV32I-NEXT:    vnot.v v8, v8
803; RV32I-NEXT:    vand.vv v8, v8, v9
804; RV32I-NEXT:    vsrl.vi v9, v8, 1
805; RV32I-NEXT:    lui a0, 349525
806; RV32I-NEXT:    addi a0, a0, 1365
807; RV32I-NEXT:    vand.vx v9, v9, a0
808; RV32I-NEXT:    vsub.vv v8, v8, v9
809; RV32I-NEXT:    lui a0, 209715
810; RV32I-NEXT:    addi a0, a0, 819
811; RV32I-NEXT:    vand.vx v9, v8, a0
812; RV32I-NEXT:    vsrl.vi v8, v8, 2
813; RV32I-NEXT:    vand.vx v8, v8, a0
814; RV32I-NEXT:    vadd.vv v8, v9, v8
815; RV32I-NEXT:    vsrl.vi v9, v8, 4
816; RV32I-NEXT:    vadd.vv v8, v8, v9
817; RV32I-NEXT:    lui a0, 61681
818; RV32I-NEXT:    addi a0, a0, -241
819; RV32I-NEXT:    vand.vx v8, v8, a0
820; RV32I-NEXT:    lui a0, 4112
821; RV32I-NEXT:    addi a0, a0, 257
822; RV32I-NEXT:    vmul.vx v8, v8, a0
823; RV32I-NEXT:    vsrl.vi v8, v8, 24
824; RV32I-NEXT:    ret
825;
826; RV64I-LABEL: cttz_nxv2i32:
827; RV64I:       # %bb.0:
828; RV64I-NEXT:    li a0, 1
829; RV64I-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
830; RV64I-NEXT:    vsub.vx v9, v8, a0
831; RV64I-NEXT:    vnot.v v8, v8
832; RV64I-NEXT:    vand.vv v8, v8, v9
833; RV64I-NEXT:    vsrl.vi v9, v8, 1
834; RV64I-NEXT:    lui a0, 349525
835; RV64I-NEXT:    addiw a0, a0, 1365
836; RV64I-NEXT:    vand.vx v9, v9, a0
837; RV64I-NEXT:    vsub.vv v8, v8, v9
838; RV64I-NEXT:    lui a0, 209715
839; RV64I-NEXT:    addiw a0, a0, 819
840; RV64I-NEXT:    vand.vx v9, v8, a0
841; RV64I-NEXT:    vsrl.vi v8, v8, 2
842; RV64I-NEXT:    vand.vx v8, v8, a0
843; RV64I-NEXT:    vadd.vv v8, v9, v8
844; RV64I-NEXT:    vsrl.vi v9, v8, 4
845; RV64I-NEXT:    vadd.vv v8, v8, v9
846; RV64I-NEXT:    lui a0, 61681
847; RV64I-NEXT:    addiw a0, a0, -241
848; RV64I-NEXT:    vand.vx v8, v8, a0
849; RV64I-NEXT:    lui a0, 4112
850; RV64I-NEXT:    addiw a0, a0, 257
851; RV64I-NEXT:    vmul.vx v8, v8, a0
852; RV64I-NEXT:    vsrl.vi v8, v8, 24
853; RV64I-NEXT:    ret
854;
855; CHECK-D-LABEL: cttz_nxv2i32:
856; CHECK-D:       # %bb.0:
857; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
858; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
859; CHECK-D-NEXT:    vand.vv v9, v8, v9
860; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v9
861; CHECK-D-NEXT:    li a0, 52
862; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
863; CHECK-D-NEXT:    vsrl.vx v10, v10, a0
864; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
865; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
866; CHECK-D-NEXT:    li a0, 1023
867; CHECK-D-NEXT:    vsub.vx v9, v9, a0
868; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
869; CHECK-D-NEXT:    li a0, 32
870; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
871; CHECK-D-NEXT:    ret
872  %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false)
873  ret <vscale x 2 x i32> %a
874}
875declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
876
877define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
878; RV32I-LABEL: cttz_nxv4i32:
879; RV32I:       # %bb.0:
880; RV32I-NEXT:    li a0, 1
881; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
882; RV32I-NEXT:    vsub.vx v10, v8, a0
883; RV32I-NEXT:    vnot.v v8, v8
884; RV32I-NEXT:    vand.vv v8, v8, v10
885; RV32I-NEXT:    vsrl.vi v10, v8, 1
886; RV32I-NEXT:    lui a0, 349525
887; RV32I-NEXT:    addi a0, a0, 1365
888; RV32I-NEXT:    vand.vx v10, v10, a0
889; RV32I-NEXT:    vsub.vv v8, v8, v10
890; RV32I-NEXT:    lui a0, 209715
891; RV32I-NEXT:    addi a0, a0, 819
892; RV32I-NEXT:    vand.vx v10, v8, a0
893; RV32I-NEXT:    vsrl.vi v8, v8, 2
894; RV32I-NEXT:    vand.vx v8, v8, a0
895; RV32I-NEXT:    vadd.vv v8, v10, v8
896; RV32I-NEXT:    vsrl.vi v10, v8, 4
897; RV32I-NEXT:    vadd.vv v8, v8, v10
898; RV32I-NEXT:    lui a0, 61681
899; RV32I-NEXT:    addi a0, a0, -241
900; RV32I-NEXT:    vand.vx v8, v8, a0
901; RV32I-NEXT:    lui a0, 4112
902; RV32I-NEXT:    addi a0, a0, 257
903; RV32I-NEXT:    vmul.vx v8, v8, a0
904; RV32I-NEXT:    vsrl.vi v8, v8, 24
905; RV32I-NEXT:    ret
906;
907; RV64I-LABEL: cttz_nxv4i32:
908; RV64I:       # %bb.0:
909; RV64I-NEXT:    li a0, 1
910; RV64I-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
911; RV64I-NEXT:    vsub.vx v10, v8, a0
912; RV64I-NEXT:    vnot.v v8, v8
913; RV64I-NEXT:    vand.vv v8, v8, v10
914; RV64I-NEXT:    vsrl.vi v10, v8, 1
915; RV64I-NEXT:    lui a0, 349525
916; RV64I-NEXT:    addiw a0, a0, 1365
917; RV64I-NEXT:    vand.vx v10, v10, a0
918; RV64I-NEXT:    vsub.vv v8, v8, v10
919; RV64I-NEXT:    lui a0, 209715
920; RV64I-NEXT:    addiw a0, a0, 819
921; RV64I-NEXT:    vand.vx v10, v8, a0
922; RV64I-NEXT:    vsrl.vi v8, v8, 2
923; RV64I-NEXT:    vand.vx v8, v8, a0
924; RV64I-NEXT:    vadd.vv v8, v10, v8
925; RV64I-NEXT:    vsrl.vi v10, v8, 4
926; RV64I-NEXT:    vadd.vv v8, v8, v10
927; RV64I-NEXT:    lui a0, 61681
928; RV64I-NEXT:    addiw a0, a0, -241
929; RV64I-NEXT:    vand.vx v8, v8, a0
930; RV64I-NEXT:    lui a0, 4112
931; RV64I-NEXT:    addiw a0, a0, 257
932; RV64I-NEXT:    vmul.vx v8, v8, a0
933; RV64I-NEXT:    vsrl.vi v8, v8, 24
934; RV64I-NEXT:    ret
935;
936; CHECK-D-LABEL: cttz_nxv4i32:
937; CHECK-D:       # %bb.0:
938; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
939; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
940; CHECK-D-NEXT:    vand.vv v10, v8, v10
941; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v10
942; CHECK-D-NEXT:    li a0, 52
943; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
944; CHECK-D-NEXT:    vsrl.vx v12, v12, a0
945; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
946; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
947; CHECK-D-NEXT:    li a0, 1023
948; CHECK-D-NEXT:    vsub.vx v10, v10, a0
949; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
950; CHECK-D-NEXT:    li a0, 32
951; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
952; CHECK-D-NEXT:    ret
953  %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false)
954  ret <vscale x 4 x i32> %a
955}
956declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
957
958define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
959; RV32I-LABEL: cttz_nxv8i32:
960; RV32I:       # %bb.0:
961; RV32I-NEXT:    li a0, 1
962; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
963; RV32I-NEXT:    vsub.vx v12, v8, a0
964; RV32I-NEXT:    vnot.v v8, v8
965; RV32I-NEXT:    vand.vv v8, v8, v12
966; RV32I-NEXT:    vsrl.vi v12, v8, 1
967; RV32I-NEXT:    lui a0, 349525
968; RV32I-NEXT:    addi a0, a0, 1365
969; RV32I-NEXT:    vand.vx v12, v12, a0
970; RV32I-NEXT:    vsub.vv v8, v8, v12
971; RV32I-NEXT:    lui a0, 209715
972; RV32I-NEXT:    addi a0, a0, 819
973; RV32I-NEXT:    vand.vx v12, v8, a0
974; RV32I-NEXT:    vsrl.vi v8, v8, 2
975; RV32I-NEXT:    vand.vx v8, v8, a0
976; RV32I-NEXT:    vadd.vv v8, v12, v8
977; RV32I-NEXT:    vsrl.vi v12, v8, 4
978; RV32I-NEXT:    vadd.vv v8, v8, v12
979; RV32I-NEXT:    lui a0, 61681
980; RV32I-NEXT:    addi a0, a0, -241
981; RV32I-NEXT:    vand.vx v8, v8, a0
982; RV32I-NEXT:    lui a0, 4112
983; RV32I-NEXT:    addi a0, a0, 257
984; RV32I-NEXT:    vmul.vx v8, v8, a0
985; RV32I-NEXT:    vsrl.vi v8, v8, 24
986; RV32I-NEXT:    ret
987;
988; RV64I-LABEL: cttz_nxv8i32:
989; RV64I:       # %bb.0:
990; RV64I-NEXT:    li a0, 1
991; RV64I-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
992; RV64I-NEXT:    vsub.vx v12, v8, a0
993; RV64I-NEXT:    vnot.v v8, v8
994; RV64I-NEXT:    vand.vv v8, v8, v12
995; RV64I-NEXT:    vsrl.vi v12, v8, 1
996; RV64I-NEXT:    lui a0, 349525
997; RV64I-NEXT:    addiw a0, a0, 1365
998; RV64I-NEXT:    vand.vx v12, v12, a0
999; RV64I-NEXT:    vsub.vv v8, v8, v12
1000; RV64I-NEXT:    lui a0, 209715
1001; RV64I-NEXT:    addiw a0, a0, 819
1002; RV64I-NEXT:    vand.vx v12, v8, a0
1003; RV64I-NEXT:    vsrl.vi v8, v8, 2
1004; RV64I-NEXT:    vand.vx v8, v8, a0
1005; RV64I-NEXT:    vadd.vv v8, v12, v8
1006; RV64I-NEXT:    vsrl.vi v12, v8, 4
1007; RV64I-NEXT:    vadd.vv v8, v8, v12
1008; RV64I-NEXT:    lui a0, 61681
1009; RV64I-NEXT:    addiw a0, a0, -241
1010; RV64I-NEXT:    vand.vx v8, v8, a0
1011; RV64I-NEXT:    lui a0, 4112
1012; RV64I-NEXT:    addiw a0, a0, 257
1013; RV64I-NEXT:    vmul.vx v8, v8, a0
1014; RV64I-NEXT:    vsrl.vi v8, v8, 24
1015; RV64I-NEXT:    ret
1016;
1017; CHECK-D-LABEL: cttz_nxv8i32:
1018; CHECK-D:       # %bb.0:
1019; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1020; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
1021; CHECK-D-NEXT:    vand.vv v12, v8, v12
1022; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v12
1023; CHECK-D-NEXT:    li a0, 52
1024; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1025; CHECK-D-NEXT:    vsrl.vx v16, v16, a0
1026; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1027; CHECK-D-NEXT:    vncvt.x.x.w v12, v16
1028; CHECK-D-NEXT:    li a0, 1023
1029; CHECK-D-NEXT:    vsub.vx v12, v12, a0
1030; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
1031; CHECK-D-NEXT:    li a0, 32
1032; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
1033; CHECK-D-NEXT:    ret
1034  %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false)
1035  ret <vscale x 8 x i32> %a
1036}
1037declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1)
1038
1039define <vscale x 16 x i32> @cttz_nxv16i32(<vscale x 16 x i32> %va) {
1040; RV32-LABEL: cttz_nxv16i32:
1041; RV32:       # %bb.0:
1042; RV32-NEXT:    li a0, 1
1043; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1044; RV32-NEXT:    vsub.vx v16, v8, a0
1045; RV32-NEXT:    vnot.v v8, v8
1046; RV32-NEXT:    vand.vv v8, v8, v16
1047; RV32-NEXT:    vsrl.vi v16, v8, 1
1048; RV32-NEXT:    lui a0, 349525
1049; RV32-NEXT:    addi a0, a0, 1365
1050; RV32-NEXT:    vand.vx v16, v16, a0
1051; RV32-NEXT:    vsub.vv v8, v8, v16
1052; RV32-NEXT:    lui a0, 209715
1053; RV32-NEXT:    addi a0, a0, 819
1054; RV32-NEXT:    vand.vx v16, v8, a0
1055; RV32-NEXT:    vsrl.vi v8, v8, 2
1056; RV32-NEXT:    vand.vx v8, v8, a0
1057; RV32-NEXT:    vadd.vv v8, v16, v8
1058; RV32-NEXT:    vsrl.vi v16, v8, 4
1059; RV32-NEXT:    vadd.vv v8, v8, v16
1060; RV32-NEXT:    lui a0, 61681
1061; RV32-NEXT:    addi a0, a0, -241
1062; RV32-NEXT:    vand.vx v8, v8, a0
1063; RV32-NEXT:    lui a0, 4112
1064; RV32-NEXT:    addi a0, a0, 257
1065; RV32-NEXT:    vmul.vx v8, v8, a0
1066; RV32-NEXT:    vsrl.vi v8, v8, 24
1067; RV32-NEXT:    ret
1068;
1069; RV64-LABEL: cttz_nxv16i32:
1070; RV64:       # %bb.0:
1071; RV64-NEXT:    li a0, 1
1072; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1073; RV64-NEXT:    vsub.vx v16, v8, a0
1074; RV64-NEXT:    vnot.v v8, v8
1075; RV64-NEXT:    vand.vv v8, v8, v16
1076; RV64-NEXT:    vsrl.vi v16, v8, 1
1077; RV64-NEXT:    lui a0, 349525
1078; RV64-NEXT:    addiw a0, a0, 1365
1079; RV64-NEXT:    vand.vx v16, v16, a0
1080; RV64-NEXT:    vsub.vv v8, v8, v16
1081; RV64-NEXT:    lui a0, 209715
1082; RV64-NEXT:    addiw a0, a0, 819
1083; RV64-NEXT:    vand.vx v16, v8, a0
1084; RV64-NEXT:    vsrl.vi v8, v8, 2
1085; RV64-NEXT:    vand.vx v8, v8, a0
1086; RV64-NEXT:    vadd.vv v8, v16, v8
1087; RV64-NEXT:    vsrl.vi v16, v8, 4
1088; RV64-NEXT:    vadd.vv v8, v8, v16
1089; RV64-NEXT:    lui a0, 61681
1090; RV64-NEXT:    addiw a0, a0, -241
1091; RV64-NEXT:    vand.vx v8, v8, a0
1092; RV64-NEXT:    lui a0, 4112
1093; RV64-NEXT:    addiw a0, a0, 257
1094; RV64-NEXT:    vmul.vx v8, v8, a0
1095; RV64-NEXT:    vsrl.vi v8, v8, 24
1096; RV64-NEXT:    ret
1097  %a = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false)
1098  ret <vscale x 16 x i32> %a
1099}
1100declare <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32>, i1)
1101
1102define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
1103; RV32-LABEL: cttz_nxv1i64:
1104; RV32:       # %bb.0:
1105; RV32-NEXT:    addi sp, sp, -16
1106; RV32-NEXT:    .cfi_def_cfa_offset 16
1107; RV32-NEXT:    lui a0, 349525
1108; RV32-NEXT:    addi a0, a0, 1365
1109; RV32-NEXT:    sw a0, 12(sp)
1110; RV32-NEXT:    sw a0, 8(sp)
1111; RV32-NEXT:    lui a0, 209715
1112; RV32-NEXT:    addi a0, a0, 819
1113; RV32-NEXT:    sw a0, 12(sp)
1114; RV32-NEXT:    sw a0, 8(sp)
1115; RV32-NEXT:    lui a0, 61681
1116; RV32-NEXT:    addi a0, a0, -241
1117; RV32-NEXT:    sw a0, 12(sp)
1118; RV32-NEXT:    sw a0, 8(sp)
1119; RV32-NEXT:    lui a0, 4112
1120; RV32-NEXT:    addi a0, a0, 257
1121; RV32-NEXT:    sw a0, 12(sp)
1122; RV32-NEXT:    sw a0, 8(sp)
1123; RV32-NEXT:    li a0, 1
1124; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
1125; RV32-NEXT:    vsub.vx v9, v8, a0
1126; RV32-NEXT:    vnot.v v8, v8
1127; RV32-NEXT:    addi a0, sp, 8
1128; RV32-NEXT:    vlse64.v v10, (a0), zero
1129; RV32-NEXT:    vand.vv v8, v8, v9
1130; RV32-NEXT:    vlse64.v v9, (a0), zero
1131; RV32-NEXT:    vsrl.vi v11, v8, 1
1132; RV32-NEXT:    vand.vv v10, v11, v10
1133; RV32-NEXT:    vsub.vv v8, v8, v10
1134; RV32-NEXT:    vand.vv v10, v8, v9
1135; RV32-NEXT:    vsrl.vi v8, v8, 2
1136; RV32-NEXT:    vand.vv v8, v8, v9
1137; RV32-NEXT:    vadd.vv v8, v10, v8
1138; RV32-NEXT:    vlse64.v v9, (a0), zero
1139; RV32-NEXT:    vlse64.v v10, (a0), zero
1140; RV32-NEXT:    vsrl.vi v11, v8, 4
1141; RV32-NEXT:    vadd.vv v8, v8, v11
1142; RV32-NEXT:    vand.vv v8, v8, v9
1143; RV32-NEXT:    vmul.vv v8, v8, v10
1144; RV32-NEXT:    li a0, 56
1145; RV32-NEXT:    vsrl.vx v8, v8, a0
1146; RV32-NEXT:    addi sp, sp, 16
1147; RV32-NEXT:    ret
1148;
1149; RV64-LABEL: cttz_nxv1i64:
1150; RV64:       # %bb.0:
1151; RV64-NEXT:    li a0, 1
1152; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
1153; RV64-NEXT:    vsub.vx v9, v8, a0
1154; RV64-NEXT:    vnot.v v8, v8
1155; RV64-NEXT:    vand.vv v8, v8, v9
1156; RV64-NEXT:    lui a0, %hi(.LCPI18_0)
1157; RV64-NEXT:    ld a0, %lo(.LCPI18_0)(a0)
1158; RV64-NEXT:    lui a1, %hi(.LCPI18_1)
1159; RV64-NEXT:    ld a1, %lo(.LCPI18_1)(a1)
1160; RV64-NEXT:    vsrl.vi v9, v8, 1
1161; RV64-NEXT:    vand.vx v9, v9, a0
1162; RV64-NEXT:    vsub.vv v8, v8, v9
1163; RV64-NEXT:    vand.vx v9, v8, a1
1164; RV64-NEXT:    vsrl.vi v8, v8, 2
1165; RV64-NEXT:    vand.vx v8, v8, a1
1166; RV64-NEXT:    vadd.vv v8, v9, v8
1167; RV64-NEXT:    lui a0, %hi(.LCPI18_2)
1168; RV64-NEXT:    ld a0, %lo(.LCPI18_2)(a0)
1169; RV64-NEXT:    lui a1, %hi(.LCPI18_3)
1170; RV64-NEXT:    ld a1, %lo(.LCPI18_3)(a1)
1171; RV64-NEXT:    vsrl.vi v9, v8, 4
1172; RV64-NEXT:    vadd.vv v8, v8, v9
1173; RV64-NEXT:    vand.vx v8, v8, a0
1174; RV64-NEXT:    vmul.vx v8, v8, a1
1175; RV64-NEXT:    li a0, 56
1176; RV64-NEXT:    vsrl.vx v8, v8, a0
1177; RV64-NEXT:    ret
1178  %a = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false)
1179  ret <vscale x 1 x i64> %a
1180}
1181declare <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64>, i1)
1182
1183define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
1184; RV32-LABEL: cttz_nxv2i64:
1185; RV32:       # %bb.0:
1186; RV32-NEXT:    addi sp, sp, -16
1187; RV32-NEXT:    .cfi_def_cfa_offset 16
1188; RV32-NEXT:    lui a0, 349525
1189; RV32-NEXT:    addi a0, a0, 1365
1190; RV32-NEXT:    sw a0, 12(sp)
1191; RV32-NEXT:    sw a0, 8(sp)
1192; RV32-NEXT:    lui a0, 209715
1193; RV32-NEXT:    addi a0, a0, 819
1194; RV32-NEXT:    sw a0, 12(sp)
1195; RV32-NEXT:    sw a0, 8(sp)
1196; RV32-NEXT:    lui a0, 61681
1197; RV32-NEXT:    addi a0, a0, -241
1198; RV32-NEXT:    sw a0, 12(sp)
1199; RV32-NEXT:    sw a0, 8(sp)
1200; RV32-NEXT:    lui a0, 4112
1201; RV32-NEXT:    addi a0, a0, 257
1202; RV32-NEXT:    sw a0, 12(sp)
1203; RV32-NEXT:    sw a0, 8(sp)
1204; RV32-NEXT:    li a0, 1
1205; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
1206; RV32-NEXT:    vsub.vx v10, v8, a0
1207; RV32-NEXT:    vnot.v v8, v8
1208; RV32-NEXT:    addi a0, sp, 8
1209; RV32-NEXT:    vlse64.v v12, (a0), zero
1210; RV32-NEXT:    vand.vv v8, v8, v10
1211; RV32-NEXT:    vlse64.v v10, (a0), zero
1212; RV32-NEXT:    vsrl.vi v14, v8, 1
1213; RV32-NEXT:    vand.vv v12, v14, v12
1214; RV32-NEXT:    vsub.vv v8, v8, v12
1215; RV32-NEXT:    vand.vv v12, v8, v10
1216; RV32-NEXT:    vsrl.vi v8, v8, 2
1217; RV32-NEXT:    vand.vv v8, v8, v10
1218; RV32-NEXT:    vadd.vv v8, v12, v8
1219; RV32-NEXT:    vlse64.v v10, (a0), zero
1220; RV32-NEXT:    vlse64.v v12, (a0), zero
1221; RV32-NEXT:    vsrl.vi v14, v8, 4
1222; RV32-NEXT:    vadd.vv v8, v8, v14
1223; RV32-NEXT:    vand.vv v8, v8, v10
1224; RV32-NEXT:    vmul.vv v8, v8, v12
1225; RV32-NEXT:    li a0, 56
1226; RV32-NEXT:    vsrl.vx v8, v8, a0
1227; RV32-NEXT:    addi sp, sp, 16
1228; RV32-NEXT:    ret
1229;
1230; RV64-LABEL: cttz_nxv2i64:
1231; RV64:       # %bb.0:
1232; RV64-NEXT:    li a0, 1
1233; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
1234; RV64-NEXT:    vsub.vx v10, v8, a0
1235; RV64-NEXT:    vnot.v v8, v8
1236; RV64-NEXT:    vand.vv v8, v8, v10
1237; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
1238; RV64-NEXT:    ld a0, %lo(.LCPI19_0)(a0)
1239; RV64-NEXT:    lui a1, %hi(.LCPI19_1)
1240; RV64-NEXT:    ld a1, %lo(.LCPI19_1)(a1)
1241; RV64-NEXT:    vsrl.vi v10, v8, 1
1242; RV64-NEXT:    vand.vx v10, v10, a0
1243; RV64-NEXT:    vsub.vv v8, v8, v10
1244; RV64-NEXT:    vand.vx v10, v8, a1
1245; RV64-NEXT:    vsrl.vi v8, v8, 2
1246; RV64-NEXT:    vand.vx v8, v8, a1
1247; RV64-NEXT:    vadd.vv v8, v10, v8
1248; RV64-NEXT:    lui a0, %hi(.LCPI19_2)
1249; RV64-NEXT:    ld a0, %lo(.LCPI19_2)(a0)
1250; RV64-NEXT:    lui a1, %hi(.LCPI19_3)
1251; RV64-NEXT:    ld a1, %lo(.LCPI19_3)(a1)
1252; RV64-NEXT:    vsrl.vi v10, v8, 4
1253; RV64-NEXT:    vadd.vv v8, v8, v10
1254; RV64-NEXT:    vand.vx v8, v8, a0
1255; RV64-NEXT:    vmul.vx v8, v8, a1
1256; RV64-NEXT:    li a0, 56
1257; RV64-NEXT:    vsrl.vx v8, v8, a0
1258; RV64-NEXT:    ret
1259  %a = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false)
1260  ret <vscale x 2 x i64> %a
1261}
1262declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>, i1)
1263
1264define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
1265; RV32-LABEL: cttz_nxv4i64:
1266; RV32:       # %bb.0:
1267; RV32-NEXT:    addi sp, sp, -16
1268; RV32-NEXT:    .cfi_def_cfa_offset 16
1269; RV32-NEXT:    lui a0, 349525
1270; RV32-NEXT:    addi a0, a0, 1365
1271; RV32-NEXT:    sw a0, 12(sp)
1272; RV32-NEXT:    sw a0, 8(sp)
1273; RV32-NEXT:    lui a0, 209715
1274; RV32-NEXT:    addi a0, a0, 819
1275; RV32-NEXT:    sw a0, 12(sp)
1276; RV32-NEXT:    sw a0, 8(sp)
1277; RV32-NEXT:    lui a0, 61681
1278; RV32-NEXT:    addi a0, a0, -241
1279; RV32-NEXT:    sw a0, 12(sp)
1280; RV32-NEXT:    sw a0, 8(sp)
1281; RV32-NEXT:    lui a0, 4112
1282; RV32-NEXT:    addi a0, a0, 257
1283; RV32-NEXT:    sw a0, 12(sp)
1284; RV32-NEXT:    sw a0, 8(sp)
1285; RV32-NEXT:    li a0, 1
1286; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
1287; RV32-NEXT:    vsub.vx v12, v8, a0
1288; RV32-NEXT:    vnot.v v8, v8
1289; RV32-NEXT:    addi a0, sp, 8
1290; RV32-NEXT:    vlse64.v v16, (a0), zero
1291; RV32-NEXT:    vand.vv v8, v8, v12
1292; RV32-NEXT:    vlse64.v v12, (a0), zero
1293; RV32-NEXT:    vsrl.vi v20, v8, 1
1294; RV32-NEXT:    vand.vv v16, v20, v16
1295; RV32-NEXT:    vsub.vv v8, v8, v16
1296; RV32-NEXT:    vand.vv v16, v8, v12
1297; RV32-NEXT:    vsrl.vi v8, v8, 2
1298; RV32-NEXT:    vand.vv v8, v8, v12
1299; RV32-NEXT:    vadd.vv v8, v16, v8
1300; RV32-NEXT:    vlse64.v v12, (a0), zero
1301; RV32-NEXT:    vlse64.v v16, (a0), zero
1302; RV32-NEXT:    vsrl.vi v20, v8, 4
1303; RV32-NEXT:    vadd.vv v8, v8, v20
1304; RV32-NEXT:    vand.vv v8, v8, v12
1305; RV32-NEXT:    vmul.vv v8, v8, v16
1306; RV32-NEXT:    li a0, 56
1307; RV32-NEXT:    vsrl.vx v8, v8, a0
1308; RV32-NEXT:    addi sp, sp, 16
1309; RV32-NEXT:    ret
1310;
1311; RV64-LABEL: cttz_nxv4i64:
1312; RV64:       # %bb.0:
1313; RV64-NEXT:    li a0, 1
1314; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
1315; RV64-NEXT:    vsub.vx v12, v8, a0
1316; RV64-NEXT:    vnot.v v8, v8
1317; RV64-NEXT:    vand.vv v8, v8, v12
1318; RV64-NEXT:    lui a0, %hi(.LCPI20_0)
1319; RV64-NEXT:    ld a0, %lo(.LCPI20_0)(a0)
1320; RV64-NEXT:    lui a1, %hi(.LCPI20_1)
1321; RV64-NEXT:    ld a1, %lo(.LCPI20_1)(a1)
1322; RV64-NEXT:    vsrl.vi v12, v8, 1
1323; RV64-NEXT:    vand.vx v12, v12, a0
1324; RV64-NEXT:    vsub.vv v8, v8, v12
1325; RV64-NEXT:    vand.vx v12, v8, a1
1326; RV64-NEXT:    vsrl.vi v8, v8, 2
1327; RV64-NEXT:    vand.vx v8, v8, a1
1328; RV64-NEXT:    vadd.vv v8, v12, v8
1329; RV64-NEXT:    lui a0, %hi(.LCPI20_2)
1330; RV64-NEXT:    ld a0, %lo(.LCPI20_2)(a0)
1331; RV64-NEXT:    lui a1, %hi(.LCPI20_3)
1332; RV64-NEXT:    ld a1, %lo(.LCPI20_3)(a1)
1333; RV64-NEXT:    vsrl.vi v12, v8, 4
1334; RV64-NEXT:    vadd.vv v8, v8, v12
1335; RV64-NEXT:    vand.vx v8, v8, a0
1336; RV64-NEXT:    vmul.vx v8, v8, a1
1337; RV64-NEXT:    li a0, 56
1338; RV64-NEXT:    vsrl.vx v8, v8, a0
1339; RV64-NEXT:    ret
1340  %a = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false)
1341  ret <vscale x 4 x i64> %a
1342}
1343declare <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64>, i1)
1344
1345define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
1346; RV32-LABEL: cttz_nxv8i64:
1347; RV32:       # %bb.0:
1348; RV32-NEXT:    addi sp, sp, -16
1349; RV32-NEXT:    .cfi_def_cfa_offset 16
1350; RV32-NEXT:    lui a0, 349525
1351; RV32-NEXT:    addi a0, a0, 1365
1352; RV32-NEXT:    sw a0, 12(sp)
1353; RV32-NEXT:    sw a0, 8(sp)
1354; RV32-NEXT:    lui a0, 209715
1355; RV32-NEXT:    addi a0, a0, 819
1356; RV32-NEXT:    sw a0, 12(sp)
1357; RV32-NEXT:    sw a0, 8(sp)
1358; RV32-NEXT:    lui a0, 61681
1359; RV32-NEXT:    addi a0, a0, -241
1360; RV32-NEXT:    sw a0, 12(sp)
1361; RV32-NEXT:    sw a0, 8(sp)
1362; RV32-NEXT:    lui a0, 4112
1363; RV32-NEXT:    addi a0, a0, 257
1364; RV32-NEXT:    sw a0, 12(sp)
1365; RV32-NEXT:    sw a0, 8(sp)
1366; RV32-NEXT:    li a0, 1
1367; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1368; RV32-NEXT:    vsub.vx v16, v8, a0
1369; RV32-NEXT:    vnot.v v8, v8
1370; RV32-NEXT:    addi a0, sp, 8
1371; RV32-NEXT:    vlse64.v v24, (a0), zero
1372; RV32-NEXT:    vand.vv v8, v8, v16
1373; RV32-NEXT:    vlse64.v v16, (a0), zero
1374; RV32-NEXT:    vsrl.vi v0, v8, 1
1375; RV32-NEXT:    vand.vv v24, v0, v24
1376; RV32-NEXT:    vsub.vv v8, v8, v24
1377; RV32-NEXT:    vand.vv v24, v8, v16
1378; RV32-NEXT:    vsrl.vi v8, v8, 2
1379; RV32-NEXT:    vand.vv v8, v8, v16
1380; RV32-NEXT:    vadd.vv v8, v24, v8
1381; RV32-NEXT:    vlse64.v v16, (a0), zero
1382; RV32-NEXT:    vlse64.v v24, (a0), zero
1383; RV32-NEXT:    vsrl.vi v0, v8, 4
1384; RV32-NEXT:    vadd.vv v8, v8, v0
1385; RV32-NEXT:    vand.vv v8, v8, v16
1386; RV32-NEXT:    vmul.vv v8, v8, v24
1387; RV32-NEXT:    li a0, 56
1388; RV32-NEXT:    vsrl.vx v8, v8, a0
1389; RV32-NEXT:    addi sp, sp, 16
1390; RV32-NEXT:    ret
1391;
1392; RV64-LABEL: cttz_nxv8i64:
1393; RV64:       # %bb.0:
1394; RV64-NEXT:    li a0, 1
1395; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1396; RV64-NEXT:    vsub.vx v16, v8, a0
1397; RV64-NEXT:    vnot.v v8, v8
1398; RV64-NEXT:    vand.vv v8, v8, v16
1399; RV64-NEXT:    lui a0, %hi(.LCPI21_0)
1400; RV64-NEXT:    ld a0, %lo(.LCPI21_0)(a0)
1401; RV64-NEXT:    lui a1, %hi(.LCPI21_1)
1402; RV64-NEXT:    ld a1, %lo(.LCPI21_1)(a1)
1403; RV64-NEXT:    vsrl.vi v16, v8, 1
1404; RV64-NEXT:    vand.vx v16, v16, a0
1405; RV64-NEXT:    vsub.vv v8, v8, v16
1406; RV64-NEXT:    vand.vx v16, v8, a1
1407; RV64-NEXT:    vsrl.vi v8, v8, 2
1408; RV64-NEXT:    vand.vx v8, v8, a1
1409; RV64-NEXT:    vadd.vv v8, v16, v8
1410; RV64-NEXT:    lui a0, %hi(.LCPI21_2)
1411; RV64-NEXT:    ld a0, %lo(.LCPI21_2)(a0)
1412; RV64-NEXT:    lui a1, %hi(.LCPI21_3)
1413; RV64-NEXT:    ld a1, %lo(.LCPI21_3)(a1)
1414; RV64-NEXT:    vsrl.vi v16, v8, 4
1415; RV64-NEXT:    vadd.vv v8, v8, v16
1416; RV64-NEXT:    vand.vx v8, v8, a0
1417; RV64-NEXT:    vmul.vx v8, v8, a1
1418; RV64-NEXT:    li a0, 56
1419; RV64-NEXT:    vsrl.vx v8, v8, a0
1420; RV64-NEXT:    ret
1421  %a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false)
1422  ret <vscale x 8 x i64> %a
1423}
1424declare <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64>, i1)
1425
1426define <vscale x 1 x i8> @cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
1427; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8:
1428; CHECK-ZVE64X:       # %bb.0:
1429; CHECK-ZVE64X-NEXT:    li a0, 1
1430; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
1431; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
1432; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1433; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
1434; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1435; CHECK-ZVE64X-NEXT:    li a0, 85
1436; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1437; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1438; CHECK-ZVE64X-NEXT:    li a0, 51
1439; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1440; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1441; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1442; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1443; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1444; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1445; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1446; CHECK-ZVE64X-NEXT:    ret
1447;
1448; CHECK-D-LABEL: cttz_zero_undef_nxv1i8:
1449; CHECK-D:       # %bb.0:
1450; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
1451; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1452; CHECK-D-NEXT:    vand.vv v8, v8, v9
1453; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
1454; CHECK-D-NEXT:    vzext.vf4 v9, v8
1455; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
1456; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
1457; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
1458; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
1459; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
1460; CHECK-D-NEXT:    li a0, 127
1461; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1462; CHECK-D-NEXT:    ret
1463  %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true)
1464  ret <vscale x 1 x i8> %a
1465}
1466
1467define <vscale x 2 x i8> @cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
1468; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8:
1469; CHECK-ZVE64X:       # %bb.0:
1470; CHECK-ZVE64X-NEXT:    li a0, 1
1471; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1472; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
1473; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1474; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
1475; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1476; CHECK-ZVE64X-NEXT:    li a0, 85
1477; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1478; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1479; CHECK-ZVE64X-NEXT:    li a0, 51
1480; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1481; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1482; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1483; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1484; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1485; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1486; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1487; CHECK-ZVE64X-NEXT:    ret
1488;
1489; CHECK-D-LABEL: cttz_zero_undef_nxv2i8:
1490; CHECK-D:       # %bb.0:
1491; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
1492; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1493; CHECK-D-NEXT:    vand.vv v8, v8, v9
1494; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
1495; CHECK-D-NEXT:    vzext.vf4 v9, v8
1496; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
1497; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
1498; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
1499; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
1500; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
1501; CHECK-D-NEXT:    li a0, 127
1502; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1503; CHECK-D-NEXT:    ret
1504  %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true)
1505  ret <vscale x 2 x i8> %a
1506}
1507
1508define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
1509; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8:
1510; CHECK-ZVE64X:       # %bb.0:
1511; CHECK-ZVE64X-NEXT:    li a0, 1
1512; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
1513; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
1514; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1515; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
1516; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1517; CHECK-ZVE64X-NEXT:    li a0, 85
1518; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1519; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1520; CHECK-ZVE64X-NEXT:    li a0, 51
1521; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1522; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1523; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1524; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1525; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1526; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1527; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1528; CHECK-ZVE64X-NEXT:    ret
1529;
1530; CHECK-D-LABEL: cttz_zero_undef_nxv4i8:
1531; CHECK-D:       # %bb.0:
1532; CHECK-D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
1533; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1534; CHECK-D-NEXT:    vand.vv v8, v8, v9
1535; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1536; CHECK-D-NEXT:    vzext.vf4 v10, v8
1537; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v10
1538; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1539; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
1540; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
1541; CHECK-D-NEXT:    vncvt.x.x.w v8, v10
1542; CHECK-D-NEXT:    li a0, 127
1543; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1544; CHECK-D-NEXT:    ret
1545  %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true)
1546  ret <vscale x 4 x i8> %a
1547}
1548
1549define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
1550; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8:
1551; CHECK-ZVE64X:       # %bb.0:
1552; CHECK-ZVE64X-NEXT:    li a0, 1
1553; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
1554; CHECK-ZVE64X-NEXT:    vsub.vx v9, v8, a0
1555; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1556; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v9
1557; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1558; CHECK-ZVE64X-NEXT:    li a0, 85
1559; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1560; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1561; CHECK-ZVE64X-NEXT:    li a0, 51
1562; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1563; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1564; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1565; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1566; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1567; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1568; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1569; CHECK-ZVE64X-NEXT:    ret
1570;
1571; CHECK-D-LABEL: cttz_zero_undef_nxv8i8:
1572; CHECK-D:       # %bb.0:
1573; CHECK-D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
1574; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1575; CHECK-D-NEXT:    vand.vv v8, v8, v9
1576; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1577; CHECK-D-NEXT:    vzext.vf4 v12, v8
1578; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v12
1579; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1580; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
1581; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
1582; CHECK-D-NEXT:    vncvt.x.x.w v8, v12
1583; CHECK-D-NEXT:    li a0, 127
1584; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1585; CHECK-D-NEXT:    ret
1586  %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true)
1587  ret <vscale x 8 x i8> %a
1588}
1589
1590define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
1591; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8:
1592; CHECK-ZVE64X:       # %bb.0:
1593; CHECK-ZVE64X-NEXT:    li a0, 1
1594; CHECK-ZVE64X-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
1595; CHECK-ZVE64X-NEXT:    vsub.vx v10, v8, a0
1596; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1597; CHECK-ZVE64X-NEXT:    vand.vv v8, v8, v10
1598; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
1599; CHECK-ZVE64X-NEXT:    li a0, 85
1600; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
1601; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
1602; CHECK-ZVE64X-NEXT:    li a0, 51
1603; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
1604; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1605; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1606; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
1607; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
1608; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
1609; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1610; CHECK-ZVE64X-NEXT:    ret
1611;
1612; CHECK-D-LABEL: cttz_zero_undef_nxv16i8:
1613; CHECK-D:       # %bb.0:
1614; CHECK-D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
1615; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
1616; CHECK-D-NEXT:    vand.vv v8, v8, v10
1617; CHECK-D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
1618; CHECK-D-NEXT:    vzext.vf4 v16, v8
1619; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v16
1620; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
1621; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
1622; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
1623; CHECK-D-NEXT:    vncvt.x.x.w v8, v16
1624; CHECK-D-NEXT:    li a0, 127
1625; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1626; CHECK-D-NEXT:    ret
1627  %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true)
1628  ret <vscale x 16 x i8> %a
1629}
1630
1631define <vscale x 32 x i8> @cttz_zero_undef_nxv32i8(<vscale x 32 x i8> %va) {
1632; CHECK-LABEL: cttz_zero_undef_nxv32i8:
1633; CHECK:       # %bb.0:
1634; CHECK-NEXT:    li a0, 1
1635; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
1636; CHECK-NEXT:    vsub.vx v12, v8, a0
1637; CHECK-NEXT:    vnot.v v8, v8
1638; CHECK-NEXT:    vand.vv v8, v8, v12
1639; CHECK-NEXT:    vsrl.vi v12, v8, 1
1640; CHECK-NEXT:    li a0, 85
1641; CHECK-NEXT:    vand.vx v12, v12, a0
1642; CHECK-NEXT:    vsub.vv v8, v8, v12
1643; CHECK-NEXT:    li a0, 51
1644; CHECK-NEXT:    vand.vx v12, v8, a0
1645; CHECK-NEXT:    vsrl.vi v8, v8, 2
1646; CHECK-NEXT:    vand.vx v8, v8, a0
1647; CHECK-NEXT:    vadd.vv v8, v12, v8
1648; CHECK-NEXT:    vsrl.vi v12, v8, 4
1649; CHECK-NEXT:    vadd.vv v8, v8, v12
1650; CHECK-NEXT:    vand.vi v8, v8, 15
1651; CHECK-NEXT:    ret
1652  %a = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true)
1653  ret <vscale x 32 x i8> %a
1654}
1655
1656define <vscale x 64 x i8> @cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
1657; CHECK-LABEL: cttz_zero_undef_nxv64i8:
1658; CHECK:       # %bb.0:
1659; CHECK-NEXT:    li a0, 1
1660; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, mu
1661; CHECK-NEXT:    vsub.vx v16, v8, a0
1662; CHECK-NEXT:    vnot.v v8, v8
1663; CHECK-NEXT:    vand.vv v8, v8, v16
1664; CHECK-NEXT:    vsrl.vi v16, v8, 1
1665; CHECK-NEXT:    li a0, 85
1666; CHECK-NEXT:    vand.vx v16, v16, a0
1667; CHECK-NEXT:    vsub.vv v8, v8, v16
1668; CHECK-NEXT:    li a0, 51
1669; CHECK-NEXT:    vand.vx v16, v8, a0
1670; CHECK-NEXT:    vsrl.vi v8, v8, 2
1671; CHECK-NEXT:    vand.vx v8, v8, a0
1672; CHECK-NEXT:    vadd.vv v8, v16, v8
1673; CHECK-NEXT:    vsrl.vi v16, v8, 4
1674; CHECK-NEXT:    vadd.vv v8, v8, v16
1675; CHECK-NEXT:    vand.vi v8, v8, 15
1676; CHECK-NEXT:    ret
1677  %a = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true)
1678  ret <vscale x 64 x i8> %a
1679}
1680
1681define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
1682; RV32I-LABEL: cttz_zero_undef_nxv1i16:
1683; RV32I:       # %bb.0:
1684; RV32I-NEXT:    li a0, 1
1685; RV32I-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
1686; RV32I-NEXT:    vsub.vx v9, v8, a0
1687; RV32I-NEXT:    vnot.v v8, v8
1688; RV32I-NEXT:    vand.vv v8, v8, v9
1689; RV32I-NEXT:    vsrl.vi v9, v8, 1
1690; RV32I-NEXT:    lui a0, 5
1691; RV32I-NEXT:    addi a0, a0, 1365
1692; RV32I-NEXT:    vand.vx v9, v9, a0
1693; RV32I-NEXT:    vsub.vv v8, v8, v9
1694; RV32I-NEXT:    lui a0, 3
1695; RV32I-NEXT:    addi a0, a0, 819
1696; RV32I-NEXT:    vand.vx v9, v8, a0
1697; RV32I-NEXT:    vsrl.vi v8, v8, 2
1698; RV32I-NEXT:    vand.vx v8, v8, a0
1699; RV32I-NEXT:    vadd.vv v8, v9, v8
1700; RV32I-NEXT:    vsrl.vi v9, v8, 4
1701; RV32I-NEXT:    vadd.vv v8, v8, v9
1702; RV32I-NEXT:    lui a0, 1
1703; RV32I-NEXT:    addi a0, a0, -241
1704; RV32I-NEXT:    vand.vx v8, v8, a0
1705; RV32I-NEXT:    li a0, 257
1706; RV32I-NEXT:    vmul.vx v8, v8, a0
1707; RV32I-NEXT:    vsrl.vi v8, v8, 8
1708; RV32I-NEXT:    ret
1709;
1710; RV64I-LABEL: cttz_zero_undef_nxv1i16:
1711; RV64I:       # %bb.0:
1712; RV64I-NEXT:    li a0, 1
1713; RV64I-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
1714; RV64I-NEXT:    vsub.vx v9, v8, a0
1715; RV64I-NEXT:    vnot.v v8, v8
1716; RV64I-NEXT:    vand.vv v8, v8, v9
1717; RV64I-NEXT:    vsrl.vi v9, v8, 1
1718; RV64I-NEXT:    lui a0, 5
1719; RV64I-NEXT:    addiw a0, a0, 1365
1720; RV64I-NEXT:    vand.vx v9, v9, a0
1721; RV64I-NEXT:    vsub.vv v8, v8, v9
1722; RV64I-NEXT:    lui a0, 3
1723; RV64I-NEXT:    addiw a0, a0, 819
1724; RV64I-NEXT:    vand.vx v9, v8, a0
1725; RV64I-NEXT:    vsrl.vi v8, v8, 2
1726; RV64I-NEXT:    vand.vx v8, v8, a0
1727; RV64I-NEXT:    vadd.vv v8, v9, v8
1728; RV64I-NEXT:    vsrl.vi v9, v8, 4
1729; RV64I-NEXT:    vadd.vv v8, v8, v9
1730; RV64I-NEXT:    lui a0, 1
1731; RV64I-NEXT:    addiw a0, a0, -241
1732; RV64I-NEXT:    vand.vx v8, v8, a0
1733; RV64I-NEXT:    li a0, 257
1734; RV64I-NEXT:    vmul.vx v8, v8, a0
1735; RV64I-NEXT:    vsrl.vi v8, v8, 8
1736; RV64I-NEXT:    ret
1737;
1738; CHECK-D-LABEL: cttz_zero_undef_nxv1i16:
1739; CHECK-D:       # %bb.0:
1740; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1741; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1742; CHECK-D-NEXT:    vand.vv v8, v8, v9
1743; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
1744; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
1745; CHECK-D-NEXT:    li a0, 127
1746; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1747; CHECK-D-NEXT:    ret
1748  %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
1749  ret <vscale x 1 x i16> %a
1750}
1751
1752define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
1753; RV32I-LABEL: cttz_zero_undef_nxv2i16:
1754; RV32I:       # %bb.0:
1755; RV32I-NEXT:    li a0, 1
1756; RV32I-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
1757; RV32I-NEXT:    vsub.vx v9, v8, a0
1758; RV32I-NEXT:    vnot.v v8, v8
1759; RV32I-NEXT:    vand.vv v8, v8, v9
1760; RV32I-NEXT:    vsrl.vi v9, v8, 1
1761; RV32I-NEXT:    lui a0, 5
1762; RV32I-NEXT:    addi a0, a0, 1365
1763; RV32I-NEXT:    vand.vx v9, v9, a0
1764; RV32I-NEXT:    vsub.vv v8, v8, v9
1765; RV32I-NEXT:    lui a0, 3
1766; RV32I-NEXT:    addi a0, a0, 819
1767; RV32I-NEXT:    vand.vx v9, v8, a0
1768; RV32I-NEXT:    vsrl.vi v8, v8, 2
1769; RV32I-NEXT:    vand.vx v8, v8, a0
1770; RV32I-NEXT:    vadd.vv v8, v9, v8
1771; RV32I-NEXT:    vsrl.vi v9, v8, 4
1772; RV32I-NEXT:    vadd.vv v8, v8, v9
1773; RV32I-NEXT:    lui a0, 1
1774; RV32I-NEXT:    addi a0, a0, -241
1775; RV32I-NEXT:    vand.vx v8, v8, a0
1776; RV32I-NEXT:    li a0, 257
1777; RV32I-NEXT:    vmul.vx v8, v8, a0
1778; RV32I-NEXT:    vsrl.vi v8, v8, 8
1779; RV32I-NEXT:    ret
1780;
1781; RV64I-LABEL: cttz_zero_undef_nxv2i16:
1782; RV64I:       # %bb.0:
1783; RV64I-NEXT:    li a0, 1
1784; RV64I-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
1785; RV64I-NEXT:    vsub.vx v9, v8, a0
1786; RV64I-NEXT:    vnot.v v8, v8
1787; RV64I-NEXT:    vand.vv v8, v8, v9
1788; RV64I-NEXT:    vsrl.vi v9, v8, 1
1789; RV64I-NEXT:    lui a0, 5
1790; RV64I-NEXT:    addiw a0, a0, 1365
1791; RV64I-NEXT:    vand.vx v9, v9, a0
1792; RV64I-NEXT:    vsub.vv v8, v8, v9
1793; RV64I-NEXT:    lui a0, 3
1794; RV64I-NEXT:    addiw a0, a0, 819
1795; RV64I-NEXT:    vand.vx v9, v8, a0
1796; RV64I-NEXT:    vsrl.vi v8, v8, 2
1797; RV64I-NEXT:    vand.vx v8, v8, a0
1798; RV64I-NEXT:    vadd.vv v8, v9, v8
1799; RV64I-NEXT:    vsrl.vi v9, v8, 4
1800; RV64I-NEXT:    vadd.vv v8, v8, v9
1801; RV64I-NEXT:    lui a0, 1
1802; RV64I-NEXT:    addiw a0, a0, -241
1803; RV64I-NEXT:    vand.vx v8, v8, a0
1804; RV64I-NEXT:    li a0, 257
1805; RV64I-NEXT:    vmul.vx v8, v8, a0
1806; RV64I-NEXT:    vsrl.vi v8, v8, 8
1807; RV64I-NEXT:    ret
1808;
1809; CHECK-D-LABEL: cttz_zero_undef_nxv2i16:
1810; CHECK-D:       # %bb.0:
1811; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1812; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1813; CHECK-D-NEXT:    vand.vv v8, v8, v9
1814; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
1815; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
1816; CHECK-D-NEXT:    li a0, 127
1817; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1818; CHECK-D-NEXT:    ret
1819  %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true)
1820  ret <vscale x 2 x i16> %a
1821}
1822
1823define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
1824; RV32I-LABEL: cttz_zero_undef_nxv4i16:
1825; RV32I:       # %bb.0:
1826; RV32I-NEXT:    li a0, 1
1827; RV32I-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
1828; RV32I-NEXT:    vsub.vx v9, v8, a0
1829; RV32I-NEXT:    vnot.v v8, v8
1830; RV32I-NEXT:    vand.vv v8, v8, v9
1831; RV32I-NEXT:    vsrl.vi v9, v8, 1
1832; RV32I-NEXT:    lui a0, 5
1833; RV32I-NEXT:    addi a0, a0, 1365
1834; RV32I-NEXT:    vand.vx v9, v9, a0
1835; RV32I-NEXT:    vsub.vv v8, v8, v9
1836; RV32I-NEXT:    lui a0, 3
1837; RV32I-NEXT:    addi a0, a0, 819
1838; RV32I-NEXT:    vand.vx v9, v8, a0
1839; RV32I-NEXT:    vsrl.vi v8, v8, 2
1840; RV32I-NEXT:    vand.vx v8, v8, a0
1841; RV32I-NEXT:    vadd.vv v8, v9, v8
1842; RV32I-NEXT:    vsrl.vi v9, v8, 4
1843; RV32I-NEXT:    vadd.vv v8, v8, v9
1844; RV32I-NEXT:    lui a0, 1
1845; RV32I-NEXT:    addi a0, a0, -241
1846; RV32I-NEXT:    vand.vx v8, v8, a0
1847; RV32I-NEXT:    li a0, 257
1848; RV32I-NEXT:    vmul.vx v8, v8, a0
1849; RV32I-NEXT:    vsrl.vi v8, v8, 8
1850; RV32I-NEXT:    ret
1851;
1852; RV64I-LABEL: cttz_zero_undef_nxv4i16:
1853; RV64I:       # %bb.0:
1854; RV64I-NEXT:    li a0, 1
1855; RV64I-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
1856; RV64I-NEXT:    vsub.vx v9, v8, a0
1857; RV64I-NEXT:    vnot.v v8, v8
1858; RV64I-NEXT:    vand.vv v8, v8, v9
1859; RV64I-NEXT:    vsrl.vi v9, v8, 1
1860; RV64I-NEXT:    lui a0, 5
1861; RV64I-NEXT:    addiw a0, a0, 1365
1862; RV64I-NEXT:    vand.vx v9, v9, a0
1863; RV64I-NEXT:    vsub.vv v8, v8, v9
1864; RV64I-NEXT:    lui a0, 3
1865; RV64I-NEXT:    addiw a0, a0, 819
1866; RV64I-NEXT:    vand.vx v9, v8, a0
1867; RV64I-NEXT:    vsrl.vi v8, v8, 2
1868; RV64I-NEXT:    vand.vx v8, v8, a0
1869; RV64I-NEXT:    vadd.vv v8, v9, v8
1870; RV64I-NEXT:    vsrl.vi v9, v8, 4
1871; RV64I-NEXT:    vadd.vv v8, v8, v9
1872; RV64I-NEXT:    lui a0, 1
1873; RV64I-NEXT:    addiw a0, a0, -241
1874; RV64I-NEXT:    vand.vx v8, v8, a0
1875; RV64I-NEXT:    li a0, 257
1876; RV64I-NEXT:    vmul.vx v8, v8, a0
1877; RV64I-NEXT:    vsrl.vi v8, v8, 8
1878; RV64I-NEXT:    ret
1879;
1880; CHECK-D-LABEL: cttz_zero_undef_nxv4i16:
1881; CHECK-D:       # %bb.0:
1882; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1883; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
1884; CHECK-D-NEXT:    vand.vv v8, v8, v9
1885; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
1886; CHECK-D-NEXT:    vnsrl.wi v8, v10, 23
1887; CHECK-D-NEXT:    li a0, 127
1888; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1889; CHECK-D-NEXT:    ret
1890  %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true)
1891  ret <vscale x 4 x i16> %a
1892}
1893
1894define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
1895; RV32I-LABEL: cttz_zero_undef_nxv8i16:
1896; RV32I:       # %bb.0:
1897; RV32I-NEXT:    li a0, 1
1898; RV32I-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
1899; RV32I-NEXT:    vsub.vx v10, v8, a0
1900; RV32I-NEXT:    vnot.v v8, v8
1901; RV32I-NEXT:    vand.vv v8, v8, v10
1902; RV32I-NEXT:    vsrl.vi v10, v8, 1
1903; RV32I-NEXT:    lui a0, 5
1904; RV32I-NEXT:    addi a0, a0, 1365
1905; RV32I-NEXT:    vand.vx v10, v10, a0
1906; RV32I-NEXT:    vsub.vv v8, v8, v10
1907; RV32I-NEXT:    lui a0, 3
1908; RV32I-NEXT:    addi a0, a0, 819
1909; RV32I-NEXT:    vand.vx v10, v8, a0
1910; RV32I-NEXT:    vsrl.vi v8, v8, 2
1911; RV32I-NEXT:    vand.vx v8, v8, a0
1912; RV32I-NEXT:    vadd.vv v8, v10, v8
1913; RV32I-NEXT:    vsrl.vi v10, v8, 4
1914; RV32I-NEXT:    vadd.vv v8, v8, v10
1915; RV32I-NEXT:    lui a0, 1
1916; RV32I-NEXT:    addi a0, a0, -241
1917; RV32I-NEXT:    vand.vx v8, v8, a0
1918; RV32I-NEXT:    li a0, 257
1919; RV32I-NEXT:    vmul.vx v8, v8, a0
1920; RV32I-NEXT:    vsrl.vi v8, v8, 8
1921; RV32I-NEXT:    ret
1922;
1923; RV64I-LABEL: cttz_zero_undef_nxv8i16:
1924; RV64I:       # %bb.0:
1925; RV64I-NEXT:    li a0, 1
1926; RV64I-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
1927; RV64I-NEXT:    vsub.vx v10, v8, a0
1928; RV64I-NEXT:    vnot.v v8, v8
1929; RV64I-NEXT:    vand.vv v8, v8, v10
1930; RV64I-NEXT:    vsrl.vi v10, v8, 1
1931; RV64I-NEXT:    lui a0, 5
1932; RV64I-NEXT:    addiw a0, a0, 1365
1933; RV64I-NEXT:    vand.vx v10, v10, a0
1934; RV64I-NEXT:    vsub.vv v8, v8, v10
1935; RV64I-NEXT:    lui a0, 3
1936; RV64I-NEXT:    addiw a0, a0, 819
1937; RV64I-NEXT:    vand.vx v10, v8, a0
1938; RV64I-NEXT:    vsrl.vi v8, v8, 2
1939; RV64I-NEXT:    vand.vx v8, v8, a0
1940; RV64I-NEXT:    vadd.vv v8, v10, v8
1941; RV64I-NEXT:    vsrl.vi v10, v8, 4
1942; RV64I-NEXT:    vadd.vv v8, v8, v10
1943; RV64I-NEXT:    lui a0, 1
1944; RV64I-NEXT:    addiw a0, a0, -241
1945; RV64I-NEXT:    vand.vx v8, v8, a0
1946; RV64I-NEXT:    li a0, 257
1947; RV64I-NEXT:    vmul.vx v8, v8, a0
1948; RV64I-NEXT:    vsrl.vi v8, v8, 8
1949; RV64I-NEXT:    ret
1950;
1951; CHECK-D-LABEL: cttz_zero_undef_nxv8i16:
1952; CHECK-D:       # %bb.0:
1953; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1954; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
1955; CHECK-D-NEXT:    vand.vv v8, v8, v10
1956; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
1957; CHECK-D-NEXT:    vnsrl.wi v8, v12, 23
1958; CHECK-D-NEXT:    li a0, 127
1959; CHECK-D-NEXT:    vsub.vx v8, v8, a0
1960; CHECK-D-NEXT:    ret
1961  %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true)
1962  ret <vscale x 8 x i16> %a
1963}
1964
1965define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
1966; RV32I-LABEL: cttz_zero_undef_nxv16i16:
1967; RV32I:       # %bb.0:
1968; RV32I-NEXT:    li a0, 1
1969; RV32I-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
1970; RV32I-NEXT:    vsub.vx v12, v8, a0
1971; RV32I-NEXT:    vnot.v v8, v8
1972; RV32I-NEXT:    vand.vv v8, v8, v12
1973; RV32I-NEXT:    vsrl.vi v12, v8, 1
1974; RV32I-NEXT:    lui a0, 5
1975; RV32I-NEXT:    addi a0, a0, 1365
1976; RV32I-NEXT:    vand.vx v12, v12, a0
1977; RV32I-NEXT:    vsub.vv v8, v8, v12
1978; RV32I-NEXT:    lui a0, 3
1979; RV32I-NEXT:    addi a0, a0, 819
1980; RV32I-NEXT:    vand.vx v12, v8, a0
1981; RV32I-NEXT:    vsrl.vi v8, v8, 2
1982; RV32I-NEXT:    vand.vx v8, v8, a0
1983; RV32I-NEXT:    vadd.vv v8, v12, v8
1984; RV32I-NEXT:    vsrl.vi v12, v8, 4
1985; RV32I-NEXT:    vadd.vv v8, v8, v12
1986; RV32I-NEXT:    lui a0, 1
1987; RV32I-NEXT:    addi a0, a0, -241
1988; RV32I-NEXT:    vand.vx v8, v8, a0
1989; RV32I-NEXT:    li a0, 257
1990; RV32I-NEXT:    vmul.vx v8, v8, a0
1991; RV32I-NEXT:    vsrl.vi v8, v8, 8
1992; RV32I-NEXT:    ret
1993;
1994; RV64I-LABEL: cttz_zero_undef_nxv16i16:
1995; RV64I:       # %bb.0:
1996; RV64I-NEXT:    li a0, 1
1997; RV64I-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
1998; RV64I-NEXT:    vsub.vx v12, v8, a0
1999; RV64I-NEXT:    vnot.v v8, v8
2000; RV64I-NEXT:    vand.vv v8, v8, v12
2001; RV64I-NEXT:    vsrl.vi v12, v8, 1
2002; RV64I-NEXT:    lui a0, 5
2003; RV64I-NEXT:    addiw a0, a0, 1365
2004; RV64I-NEXT:    vand.vx v12, v12, a0
2005; RV64I-NEXT:    vsub.vv v8, v8, v12
2006; RV64I-NEXT:    lui a0, 3
2007; RV64I-NEXT:    addiw a0, a0, 819
2008; RV64I-NEXT:    vand.vx v12, v8, a0
2009; RV64I-NEXT:    vsrl.vi v8, v8, 2
2010; RV64I-NEXT:    vand.vx v8, v8, a0
2011; RV64I-NEXT:    vadd.vv v8, v12, v8
2012; RV64I-NEXT:    vsrl.vi v12, v8, 4
2013; RV64I-NEXT:    vadd.vv v8, v8, v12
2014; RV64I-NEXT:    lui a0, 1
2015; RV64I-NEXT:    addiw a0, a0, -241
2016; RV64I-NEXT:    vand.vx v8, v8, a0
2017; RV64I-NEXT:    li a0, 257
2018; RV64I-NEXT:    vmul.vx v8, v8, a0
2019; RV64I-NEXT:    vsrl.vi v8, v8, 8
2020; RV64I-NEXT:    ret
2021;
2022; CHECK-D-LABEL: cttz_zero_undef_nxv16i16:
2023; CHECK-D:       # %bb.0:
2024; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
2025; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
2026; CHECK-D-NEXT:    vand.vv v8, v8, v12
2027; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
2028; CHECK-D-NEXT:    vnsrl.wi v8, v16, 23
2029; CHECK-D-NEXT:    li a0, 127
2030; CHECK-D-NEXT:    vsub.vx v8, v8, a0
2031; CHECK-D-NEXT:    ret
2032  %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true)
2033  ret <vscale x 16 x i16> %a
2034}
2035
2036define <vscale x 32 x i16> @cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
2037; RV32-LABEL: cttz_zero_undef_nxv32i16:
2038; RV32:       # %bb.0:
2039; RV32-NEXT:    li a0, 1
2040; RV32-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
2041; RV32-NEXT:    vsub.vx v16, v8, a0
2042; RV32-NEXT:    vnot.v v8, v8
2043; RV32-NEXT:    vand.vv v8, v8, v16
2044; RV32-NEXT:    vsrl.vi v16, v8, 1
2045; RV32-NEXT:    lui a0, 5
2046; RV32-NEXT:    addi a0, a0, 1365
2047; RV32-NEXT:    vand.vx v16, v16, a0
2048; RV32-NEXT:    vsub.vv v8, v8, v16
2049; RV32-NEXT:    lui a0, 3
2050; RV32-NEXT:    addi a0, a0, 819
2051; RV32-NEXT:    vand.vx v16, v8, a0
2052; RV32-NEXT:    vsrl.vi v8, v8, 2
2053; RV32-NEXT:    vand.vx v8, v8, a0
2054; RV32-NEXT:    vadd.vv v8, v16, v8
2055; RV32-NEXT:    vsrl.vi v16, v8, 4
2056; RV32-NEXT:    vadd.vv v8, v8, v16
2057; RV32-NEXT:    lui a0, 1
2058; RV32-NEXT:    addi a0, a0, -241
2059; RV32-NEXT:    vand.vx v8, v8, a0
2060; RV32-NEXT:    li a0, 257
2061; RV32-NEXT:    vmul.vx v8, v8, a0
2062; RV32-NEXT:    vsrl.vi v8, v8, 8
2063; RV32-NEXT:    ret
2064;
2065; RV64-LABEL: cttz_zero_undef_nxv32i16:
2066; RV64:       # %bb.0:
2067; RV64-NEXT:    li a0, 1
2068; RV64-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
2069; RV64-NEXT:    vsub.vx v16, v8, a0
2070; RV64-NEXT:    vnot.v v8, v8
2071; RV64-NEXT:    vand.vv v8, v8, v16
2072; RV64-NEXT:    vsrl.vi v16, v8, 1
2073; RV64-NEXT:    lui a0, 5
2074; RV64-NEXT:    addiw a0, a0, 1365
2075; RV64-NEXT:    vand.vx v16, v16, a0
2076; RV64-NEXT:    vsub.vv v8, v8, v16
2077; RV64-NEXT:    lui a0, 3
2078; RV64-NEXT:    addiw a0, a0, 819
2079; RV64-NEXT:    vand.vx v16, v8, a0
2080; RV64-NEXT:    vsrl.vi v8, v8, 2
2081; RV64-NEXT:    vand.vx v8, v8, a0
2082; RV64-NEXT:    vadd.vv v8, v16, v8
2083; RV64-NEXT:    vsrl.vi v16, v8, 4
2084; RV64-NEXT:    vadd.vv v8, v8, v16
2085; RV64-NEXT:    lui a0, 1
2086; RV64-NEXT:    addiw a0, a0, -241
2087; RV64-NEXT:    vand.vx v8, v8, a0
2088; RV64-NEXT:    li a0, 257
2089; RV64-NEXT:    vmul.vx v8, v8, a0
2090; RV64-NEXT:    vsrl.vi v8, v8, 8
2091; RV64-NEXT:    ret
2092  %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
2093  ret <vscale x 32 x i16> %a
2094}
2095
2096define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
2097; RV32I-LABEL: cttz_zero_undef_nxv1i32:
2098; RV32I:       # %bb.0:
2099; RV32I-NEXT:    li a0, 1
2100; RV32I-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
2101; RV32I-NEXT:    vsub.vx v9, v8, a0
2102; RV32I-NEXT:    vnot.v v8, v8
2103; RV32I-NEXT:    vand.vv v8, v8, v9
2104; RV32I-NEXT:    vsrl.vi v9, v8, 1
2105; RV32I-NEXT:    lui a0, 349525
2106; RV32I-NEXT:    addi a0, a0, 1365
2107; RV32I-NEXT:    vand.vx v9, v9, a0
2108; RV32I-NEXT:    vsub.vv v8, v8, v9
2109; RV32I-NEXT:    lui a0, 209715
2110; RV32I-NEXT:    addi a0, a0, 819
2111; RV32I-NEXT:    vand.vx v9, v8, a0
2112; RV32I-NEXT:    vsrl.vi v8, v8, 2
2113; RV32I-NEXT:    vand.vx v8, v8, a0
2114; RV32I-NEXT:    vadd.vv v8, v9, v8
2115; RV32I-NEXT:    vsrl.vi v9, v8, 4
2116; RV32I-NEXT:    vadd.vv v8, v8, v9
2117; RV32I-NEXT:    lui a0, 61681
2118; RV32I-NEXT:    addi a0, a0, -241
2119; RV32I-NEXT:    vand.vx v8, v8, a0
2120; RV32I-NEXT:    lui a0, 4112
2121; RV32I-NEXT:    addi a0, a0, 257
2122; RV32I-NEXT:    vmul.vx v8, v8, a0
2123; RV32I-NEXT:    vsrl.vi v8, v8, 24
2124; RV32I-NEXT:    ret
2125;
2126; RV64I-LABEL: cttz_zero_undef_nxv1i32:
2127; RV64I:       # %bb.0:
2128; RV64I-NEXT:    li a0, 1
2129; RV64I-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
2130; RV64I-NEXT:    vsub.vx v9, v8, a0
2131; RV64I-NEXT:    vnot.v v8, v8
2132; RV64I-NEXT:    vand.vv v8, v8, v9
2133; RV64I-NEXT:    vsrl.vi v9, v8, 1
2134; RV64I-NEXT:    lui a0, 349525
2135; RV64I-NEXT:    addiw a0, a0, 1365
2136; RV64I-NEXT:    vand.vx v9, v9, a0
2137; RV64I-NEXT:    vsub.vv v8, v8, v9
2138; RV64I-NEXT:    lui a0, 209715
2139; RV64I-NEXT:    addiw a0, a0, 819
2140; RV64I-NEXT:    vand.vx v9, v8, a0
2141; RV64I-NEXT:    vsrl.vi v8, v8, 2
2142; RV64I-NEXT:    vand.vx v8, v8, a0
2143; RV64I-NEXT:    vadd.vv v8, v9, v8
2144; RV64I-NEXT:    vsrl.vi v9, v8, 4
2145; RV64I-NEXT:    vadd.vv v8, v8, v9
2146; RV64I-NEXT:    lui a0, 61681
2147; RV64I-NEXT:    addiw a0, a0, -241
2148; RV64I-NEXT:    vand.vx v8, v8, a0
2149; RV64I-NEXT:    lui a0, 4112
2150; RV64I-NEXT:    addiw a0, a0, 257
2151; RV64I-NEXT:    vmul.vx v8, v8, a0
2152; RV64I-NEXT:    vsrl.vi v8, v8, 24
2153; RV64I-NEXT:    ret
2154;
2155; CHECK-D-LABEL: cttz_zero_undef_nxv1i32:
2156; CHECK-D:       # %bb.0:
2157; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
2158; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
2159; CHECK-D-NEXT:    vand.vv v8, v8, v9
2160; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
2161; CHECK-D-NEXT:    li a0, 52
2162; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
2163; CHECK-D-NEXT:    vsrl.vx v8, v9, a0
2164; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
2165; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
2166; CHECK-D-NEXT:    li a0, 1023
2167; CHECK-D-NEXT:    vsub.vx v8, v8, a0
2168; CHECK-D-NEXT:    ret
2169  %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true)
2170  ret <vscale x 1 x i32> %a
2171}
2172
2173define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
2174; RV32I-LABEL: cttz_zero_undef_nxv2i32:
2175; RV32I:       # %bb.0:
2176; RV32I-NEXT:    li a0, 1
2177; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
2178; RV32I-NEXT:    vsub.vx v9, v8, a0
2179; RV32I-NEXT:    vnot.v v8, v8
2180; RV32I-NEXT:    vand.vv v8, v8, v9
2181; RV32I-NEXT:    vsrl.vi v9, v8, 1
2182; RV32I-NEXT:    lui a0, 349525
2183; RV32I-NEXT:    addi a0, a0, 1365
2184; RV32I-NEXT:    vand.vx v9, v9, a0
2185; RV32I-NEXT:    vsub.vv v8, v8, v9
2186; RV32I-NEXT:    lui a0, 209715
2187; RV32I-NEXT:    addi a0, a0, 819
2188; RV32I-NEXT:    vand.vx v9, v8, a0
2189; RV32I-NEXT:    vsrl.vi v8, v8, 2
2190; RV32I-NEXT:    vand.vx v8, v8, a0
2191; RV32I-NEXT:    vadd.vv v8, v9, v8
2192; RV32I-NEXT:    vsrl.vi v9, v8, 4
2193; RV32I-NEXT:    vadd.vv v8, v8, v9
2194; RV32I-NEXT:    lui a0, 61681
2195; RV32I-NEXT:    addi a0, a0, -241
2196; RV32I-NEXT:    vand.vx v8, v8, a0
2197; RV32I-NEXT:    lui a0, 4112
2198; RV32I-NEXT:    addi a0, a0, 257
2199; RV32I-NEXT:    vmul.vx v8, v8, a0
2200; RV32I-NEXT:    vsrl.vi v8, v8, 24
2201; RV32I-NEXT:    ret
2202;
2203; RV64I-LABEL: cttz_zero_undef_nxv2i32:
2204; RV64I:       # %bb.0:
2205; RV64I-NEXT:    li a0, 1
2206; RV64I-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
2207; RV64I-NEXT:    vsub.vx v9, v8, a0
2208; RV64I-NEXT:    vnot.v v8, v8
2209; RV64I-NEXT:    vand.vv v8, v8, v9
2210; RV64I-NEXT:    vsrl.vi v9, v8, 1
2211; RV64I-NEXT:    lui a0, 349525
2212; RV64I-NEXT:    addiw a0, a0, 1365
2213; RV64I-NEXT:    vand.vx v9, v9, a0
2214; RV64I-NEXT:    vsub.vv v8, v8, v9
2215; RV64I-NEXT:    lui a0, 209715
2216; RV64I-NEXT:    addiw a0, a0, 819
2217; RV64I-NEXT:    vand.vx v9, v8, a0
2218; RV64I-NEXT:    vsrl.vi v8, v8, 2
2219; RV64I-NEXT:    vand.vx v8, v8, a0
2220; RV64I-NEXT:    vadd.vv v8, v9, v8
2221; RV64I-NEXT:    vsrl.vi v9, v8, 4
2222; RV64I-NEXT:    vadd.vv v8, v8, v9
2223; RV64I-NEXT:    lui a0, 61681
2224; RV64I-NEXT:    addiw a0, a0, -241
2225; RV64I-NEXT:    vand.vx v8, v8, a0
2226; RV64I-NEXT:    lui a0, 4112
2227; RV64I-NEXT:    addiw a0, a0, 257
2228; RV64I-NEXT:    vmul.vx v8, v8, a0
2229; RV64I-NEXT:    vsrl.vi v8, v8, 24
2230; RV64I-NEXT:    ret
2231;
2232; CHECK-D-LABEL: cttz_zero_undef_nxv2i32:
2233; CHECK-D:       # %bb.0:
2234; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
2235; CHECK-D-NEXT:    vrsub.vi v9, v8, 0
2236; CHECK-D-NEXT:    vand.vv v8, v8, v9
2237; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
2238; CHECK-D-NEXT:    li a0, 52
2239; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
2240; CHECK-D-NEXT:    vsrl.vx v8, v10, a0
2241; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
2242; CHECK-D-NEXT:    vncvt.x.x.w v10, v8
2243; CHECK-D-NEXT:    li a0, 1023
2244; CHECK-D-NEXT:    vsub.vx v8, v10, a0
2245; CHECK-D-NEXT:    ret
2246  %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true)
2247  ret <vscale x 2 x i32> %a
2248}
2249
2250define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
2251; RV32I-LABEL: cttz_zero_undef_nxv4i32:
2252; RV32I:       # %bb.0:
2253; RV32I-NEXT:    li a0, 1
2254; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
2255; RV32I-NEXT:    vsub.vx v10, v8, a0
2256; RV32I-NEXT:    vnot.v v8, v8
2257; RV32I-NEXT:    vand.vv v8, v8, v10
2258; RV32I-NEXT:    vsrl.vi v10, v8, 1
2259; RV32I-NEXT:    lui a0, 349525
2260; RV32I-NEXT:    addi a0, a0, 1365
2261; RV32I-NEXT:    vand.vx v10, v10, a0
2262; RV32I-NEXT:    vsub.vv v8, v8, v10
2263; RV32I-NEXT:    lui a0, 209715
2264; RV32I-NEXT:    addi a0, a0, 819
2265; RV32I-NEXT:    vand.vx v10, v8, a0
2266; RV32I-NEXT:    vsrl.vi v8, v8, 2
2267; RV32I-NEXT:    vand.vx v8, v8, a0
2268; RV32I-NEXT:    vadd.vv v8, v10, v8
2269; RV32I-NEXT:    vsrl.vi v10, v8, 4
2270; RV32I-NEXT:    vadd.vv v8, v8, v10
2271; RV32I-NEXT:    lui a0, 61681
2272; RV32I-NEXT:    addi a0, a0, -241
2273; RV32I-NEXT:    vand.vx v8, v8, a0
2274; RV32I-NEXT:    lui a0, 4112
2275; RV32I-NEXT:    addi a0, a0, 257
2276; RV32I-NEXT:    vmul.vx v8, v8, a0
2277; RV32I-NEXT:    vsrl.vi v8, v8, 24
2278; RV32I-NEXT:    ret
2279;
2280; RV64I-LABEL: cttz_zero_undef_nxv4i32:
2281; RV64I:       # %bb.0:
2282; RV64I-NEXT:    li a0, 1
2283; RV64I-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
2284; RV64I-NEXT:    vsub.vx v10, v8, a0
2285; RV64I-NEXT:    vnot.v v8, v8
2286; RV64I-NEXT:    vand.vv v8, v8, v10
2287; RV64I-NEXT:    vsrl.vi v10, v8, 1
2288; RV64I-NEXT:    lui a0, 349525
2289; RV64I-NEXT:    addiw a0, a0, 1365
2290; RV64I-NEXT:    vand.vx v10, v10, a0
2291; RV64I-NEXT:    vsub.vv v8, v8, v10
2292; RV64I-NEXT:    lui a0, 209715
2293; RV64I-NEXT:    addiw a0, a0, 819
2294; RV64I-NEXT:    vand.vx v10, v8, a0
2295; RV64I-NEXT:    vsrl.vi v8, v8, 2
2296; RV64I-NEXT:    vand.vx v8, v8, a0
2297; RV64I-NEXT:    vadd.vv v8, v10, v8
2298; RV64I-NEXT:    vsrl.vi v10, v8, 4
2299; RV64I-NEXT:    vadd.vv v8, v8, v10
2300; RV64I-NEXT:    lui a0, 61681
2301; RV64I-NEXT:    addiw a0, a0, -241
2302; RV64I-NEXT:    vand.vx v8, v8, a0
2303; RV64I-NEXT:    lui a0, 4112
2304; RV64I-NEXT:    addiw a0, a0, 257
2305; RV64I-NEXT:    vmul.vx v8, v8, a0
2306; RV64I-NEXT:    vsrl.vi v8, v8, 24
2307; RV64I-NEXT:    ret
2308;
2309; CHECK-D-LABEL: cttz_zero_undef_nxv4i32:
2310; CHECK-D:       # %bb.0:
2311; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
2312; CHECK-D-NEXT:    vrsub.vi v10, v8, 0
2313; CHECK-D-NEXT:    vand.vv v8, v8, v10
2314; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
2315; CHECK-D-NEXT:    li a0, 52
2316; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
2317; CHECK-D-NEXT:    vsrl.vx v8, v12, a0
2318; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2319; CHECK-D-NEXT:    vncvt.x.x.w v12, v8
2320; CHECK-D-NEXT:    li a0, 1023
2321; CHECK-D-NEXT:    vsub.vx v8, v12, a0
2322; CHECK-D-NEXT:    ret
2323  %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true)
2324  ret <vscale x 4 x i32> %a
2325}
2326
2327define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
2328; RV32I-LABEL: cttz_zero_undef_nxv8i32:
2329; RV32I:       # %bb.0:
2330; RV32I-NEXT:    li a0, 1
2331; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
2332; RV32I-NEXT:    vsub.vx v12, v8, a0
2333; RV32I-NEXT:    vnot.v v8, v8
2334; RV32I-NEXT:    vand.vv v8, v8, v12
2335; RV32I-NEXT:    vsrl.vi v12, v8, 1
2336; RV32I-NEXT:    lui a0, 349525
2337; RV32I-NEXT:    addi a0, a0, 1365
2338; RV32I-NEXT:    vand.vx v12, v12, a0
2339; RV32I-NEXT:    vsub.vv v8, v8, v12
2340; RV32I-NEXT:    lui a0, 209715
2341; RV32I-NEXT:    addi a0, a0, 819
2342; RV32I-NEXT:    vand.vx v12, v8, a0
2343; RV32I-NEXT:    vsrl.vi v8, v8, 2
2344; RV32I-NEXT:    vand.vx v8, v8, a0
2345; RV32I-NEXT:    vadd.vv v8, v12, v8
2346; RV32I-NEXT:    vsrl.vi v12, v8, 4
2347; RV32I-NEXT:    vadd.vv v8, v8, v12
2348; RV32I-NEXT:    lui a0, 61681
2349; RV32I-NEXT:    addi a0, a0, -241
2350; RV32I-NEXT:    vand.vx v8, v8, a0
2351; RV32I-NEXT:    lui a0, 4112
2352; RV32I-NEXT:    addi a0, a0, 257
2353; RV32I-NEXT:    vmul.vx v8, v8, a0
2354; RV32I-NEXT:    vsrl.vi v8, v8, 24
2355; RV32I-NEXT:    ret
2356;
2357; RV64I-LABEL: cttz_zero_undef_nxv8i32:
2358; RV64I:       # %bb.0:
2359; RV64I-NEXT:    li a0, 1
2360; RV64I-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
2361; RV64I-NEXT:    vsub.vx v12, v8, a0
2362; RV64I-NEXT:    vnot.v v8, v8
2363; RV64I-NEXT:    vand.vv v8, v8, v12
2364; RV64I-NEXT:    vsrl.vi v12, v8, 1
2365; RV64I-NEXT:    lui a0, 349525
2366; RV64I-NEXT:    addiw a0, a0, 1365
2367; RV64I-NEXT:    vand.vx v12, v12, a0
2368; RV64I-NEXT:    vsub.vv v8, v8, v12
2369; RV64I-NEXT:    lui a0, 209715
2370; RV64I-NEXT:    addiw a0, a0, 819
2371; RV64I-NEXT:    vand.vx v12, v8, a0
2372; RV64I-NEXT:    vsrl.vi v8, v8, 2
2373; RV64I-NEXT:    vand.vx v8, v8, a0
2374; RV64I-NEXT:    vadd.vv v8, v12, v8
2375; RV64I-NEXT:    vsrl.vi v12, v8, 4
2376; RV64I-NEXT:    vadd.vv v8, v8, v12
2377; RV64I-NEXT:    lui a0, 61681
2378; RV64I-NEXT:    addiw a0, a0, -241
2379; RV64I-NEXT:    vand.vx v8, v8, a0
2380; RV64I-NEXT:    lui a0, 4112
2381; RV64I-NEXT:    addiw a0, a0, 257
2382; RV64I-NEXT:    vmul.vx v8, v8, a0
2383; RV64I-NEXT:    vsrl.vi v8, v8, 24
2384; RV64I-NEXT:    ret
2385;
2386; CHECK-D-LABEL: cttz_zero_undef_nxv8i32:
2387; CHECK-D:       # %bb.0:
2388; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
2389; CHECK-D-NEXT:    vrsub.vi v12, v8, 0
2390; CHECK-D-NEXT:    vand.vv v8, v8, v12
2391; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
2392; CHECK-D-NEXT:    li a0, 52
2393; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2394; CHECK-D-NEXT:    vsrl.vx v8, v16, a0
2395; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
2396; CHECK-D-NEXT:    vncvt.x.x.w v16, v8
2397; CHECK-D-NEXT:    li a0, 1023
2398; CHECK-D-NEXT:    vsub.vx v8, v16, a0
2399; CHECK-D-NEXT:    ret
2400  %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true)
2401  ret <vscale x 8 x i32> %a
2402}
2403
2404define <vscale x 16 x i32> @cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
2405; RV32-LABEL: cttz_zero_undef_nxv16i32:
2406; RV32:       # %bb.0:
2407; RV32-NEXT:    li a0, 1
2408; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
2409; RV32-NEXT:    vsub.vx v16, v8, a0
2410; RV32-NEXT:    vnot.v v8, v8
2411; RV32-NEXT:    vand.vv v8, v8, v16
2412; RV32-NEXT:    vsrl.vi v16, v8, 1
2413; RV32-NEXT:    lui a0, 349525
2414; RV32-NEXT:    addi a0, a0, 1365
2415; RV32-NEXT:    vand.vx v16, v16, a0
2416; RV32-NEXT:    vsub.vv v8, v8, v16
2417; RV32-NEXT:    lui a0, 209715
2418; RV32-NEXT:    addi a0, a0, 819
2419; RV32-NEXT:    vand.vx v16, v8, a0
2420; RV32-NEXT:    vsrl.vi v8, v8, 2
2421; RV32-NEXT:    vand.vx v8, v8, a0
2422; RV32-NEXT:    vadd.vv v8, v16, v8
2423; RV32-NEXT:    vsrl.vi v16, v8, 4
2424; RV32-NEXT:    vadd.vv v8, v8, v16
2425; RV32-NEXT:    lui a0, 61681
2426; RV32-NEXT:    addi a0, a0, -241
2427; RV32-NEXT:    vand.vx v8, v8, a0
2428; RV32-NEXT:    lui a0, 4112
2429; RV32-NEXT:    addi a0, a0, 257
2430; RV32-NEXT:    vmul.vx v8, v8, a0
2431; RV32-NEXT:    vsrl.vi v8, v8, 24
2432; RV32-NEXT:    ret
2433;
2434; RV64-LABEL: cttz_zero_undef_nxv16i32:
2435; RV64:       # %bb.0:
2436; RV64-NEXT:    li a0, 1
2437; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
2438; RV64-NEXT:    vsub.vx v16, v8, a0
2439; RV64-NEXT:    vnot.v v8, v8
2440; RV64-NEXT:    vand.vv v8, v8, v16
2441; RV64-NEXT:    vsrl.vi v16, v8, 1
2442; RV64-NEXT:    lui a0, 349525
2443; RV64-NEXT:    addiw a0, a0, 1365
2444; RV64-NEXT:    vand.vx v16, v16, a0
2445; RV64-NEXT:    vsub.vv v8, v8, v16
2446; RV64-NEXT:    lui a0, 209715
2447; RV64-NEXT:    addiw a0, a0, 819
2448; RV64-NEXT:    vand.vx v16, v8, a0
2449; RV64-NEXT:    vsrl.vi v8, v8, 2
2450; RV64-NEXT:    vand.vx v8, v8, a0
2451; RV64-NEXT:    vadd.vv v8, v16, v8
2452; RV64-NEXT:    vsrl.vi v16, v8, 4
2453; RV64-NEXT:    vadd.vv v8, v8, v16
2454; RV64-NEXT:    lui a0, 61681
2455; RV64-NEXT:    addiw a0, a0, -241
2456; RV64-NEXT:    vand.vx v8, v8, a0
2457; RV64-NEXT:    lui a0, 4112
2458; RV64-NEXT:    addiw a0, a0, 257
2459; RV64-NEXT:    vmul.vx v8, v8, a0
2460; RV64-NEXT:    vsrl.vi v8, v8, 24
2461; RV64-NEXT:    ret
2462  %a = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true)
2463  ret <vscale x 16 x i32> %a
2464}
2465
2466define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
2467; RV32-LABEL: cttz_zero_undef_nxv1i64:
2468; RV32:       # %bb.0:
2469; RV32-NEXT:    addi sp, sp, -16
2470; RV32-NEXT:    .cfi_def_cfa_offset 16
2471; RV32-NEXT:    lui a0, 349525
2472; RV32-NEXT:    addi a0, a0, 1365
2473; RV32-NEXT:    sw a0, 12(sp)
2474; RV32-NEXT:    sw a0, 8(sp)
2475; RV32-NEXT:    lui a0, 209715
2476; RV32-NEXT:    addi a0, a0, 819
2477; RV32-NEXT:    sw a0, 12(sp)
2478; RV32-NEXT:    sw a0, 8(sp)
2479; RV32-NEXT:    lui a0, 61681
2480; RV32-NEXT:    addi a0, a0, -241
2481; RV32-NEXT:    sw a0, 12(sp)
2482; RV32-NEXT:    sw a0, 8(sp)
2483; RV32-NEXT:    lui a0, 4112
2484; RV32-NEXT:    addi a0, a0, 257
2485; RV32-NEXT:    sw a0, 12(sp)
2486; RV32-NEXT:    sw a0, 8(sp)
2487; RV32-NEXT:    li a0, 1
2488; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
2489; RV32-NEXT:    vsub.vx v9, v8, a0
2490; RV32-NEXT:    vnot.v v8, v8
2491; RV32-NEXT:    addi a0, sp, 8
2492; RV32-NEXT:    vlse64.v v10, (a0), zero
2493; RV32-NEXT:    vand.vv v8, v8, v9
2494; RV32-NEXT:    vlse64.v v9, (a0), zero
2495; RV32-NEXT:    vsrl.vi v11, v8, 1
2496; RV32-NEXT:    vand.vv v10, v11, v10
2497; RV32-NEXT:    vsub.vv v8, v8, v10
2498; RV32-NEXT:    vand.vv v10, v8, v9
2499; RV32-NEXT:    vsrl.vi v8, v8, 2
2500; RV32-NEXT:    vand.vv v8, v8, v9
2501; RV32-NEXT:    vadd.vv v8, v10, v8
2502; RV32-NEXT:    vlse64.v v9, (a0), zero
2503; RV32-NEXT:    vlse64.v v10, (a0), zero
2504; RV32-NEXT:    vsrl.vi v11, v8, 4
2505; RV32-NEXT:    vadd.vv v8, v8, v11
2506; RV32-NEXT:    vand.vv v8, v8, v9
2507; RV32-NEXT:    vmul.vv v8, v8, v10
2508; RV32-NEXT:    li a0, 56
2509; RV32-NEXT:    vsrl.vx v8, v8, a0
2510; RV32-NEXT:    addi sp, sp, 16
2511; RV32-NEXT:    ret
2512;
2513; RV64-LABEL: cttz_zero_undef_nxv1i64:
2514; RV64:       # %bb.0:
2515; RV64-NEXT:    li a0, 1
2516; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
2517; RV64-NEXT:    vsub.vx v9, v8, a0
2518; RV64-NEXT:    vnot.v v8, v8
2519; RV64-NEXT:    vand.vv v8, v8, v9
2520; RV64-NEXT:    lui a0, %hi(.LCPI40_0)
2521; RV64-NEXT:    ld a0, %lo(.LCPI40_0)(a0)
2522; RV64-NEXT:    lui a1, %hi(.LCPI40_1)
2523; RV64-NEXT:    ld a1, %lo(.LCPI40_1)(a1)
2524; RV64-NEXT:    vsrl.vi v9, v8, 1
2525; RV64-NEXT:    vand.vx v9, v9, a0
2526; RV64-NEXT:    vsub.vv v8, v8, v9
2527; RV64-NEXT:    vand.vx v9, v8, a1
2528; RV64-NEXT:    vsrl.vi v8, v8, 2
2529; RV64-NEXT:    vand.vx v8, v8, a1
2530; RV64-NEXT:    vadd.vv v8, v9, v8
2531; RV64-NEXT:    lui a0, %hi(.LCPI40_2)
2532; RV64-NEXT:    ld a0, %lo(.LCPI40_2)(a0)
2533; RV64-NEXT:    lui a1, %hi(.LCPI40_3)
2534; RV64-NEXT:    ld a1, %lo(.LCPI40_3)(a1)
2535; RV64-NEXT:    vsrl.vi v9, v8, 4
2536; RV64-NEXT:    vadd.vv v8, v8, v9
2537; RV64-NEXT:    vand.vx v8, v8, a0
2538; RV64-NEXT:    vmul.vx v8, v8, a1
2539; RV64-NEXT:    li a0, 56
2540; RV64-NEXT:    vsrl.vx v8, v8, a0
2541; RV64-NEXT:    ret
2542  %a = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true)
2543  ret <vscale x 1 x i64> %a
2544}
2545
2546define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
2547; RV32-LABEL: cttz_zero_undef_nxv2i64:
2548; RV32:       # %bb.0:
2549; RV32-NEXT:    addi sp, sp, -16
2550; RV32-NEXT:    .cfi_def_cfa_offset 16
2551; RV32-NEXT:    lui a0, 349525
2552; RV32-NEXT:    addi a0, a0, 1365
2553; RV32-NEXT:    sw a0, 12(sp)
2554; RV32-NEXT:    sw a0, 8(sp)
2555; RV32-NEXT:    lui a0, 209715
2556; RV32-NEXT:    addi a0, a0, 819
2557; RV32-NEXT:    sw a0, 12(sp)
2558; RV32-NEXT:    sw a0, 8(sp)
2559; RV32-NEXT:    lui a0, 61681
2560; RV32-NEXT:    addi a0, a0, -241
2561; RV32-NEXT:    sw a0, 12(sp)
2562; RV32-NEXT:    sw a0, 8(sp)
2563; RV32-NEXT:    lui a0, 4112
2564; RV32-NEXT:    addi a0, a0, 257
2565; RV32-NEXT:    sw a0, 12(sp)
2566; RV32-NEXT:    sw a0, 8(sp)
2567; RV32-NEXT:    li a0, 1
2568; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
2569; RV32-NEXT:    vsub.vx v10, v8, a0
2570; RV32-NEXT:    vnot.v v8, v8
2571; RV32-NEXT:    addi a0, sp, 8
2572; RV32-NEXT:    vlse64.v v12, (a0), zero
2573; RV32-NEXT:    vand.vv v8, v8, v10
2574; RV32-NEXT:    vlse64.v v10, (a0), zero
2575; RV32-NEXT:    vsrl.vi v14, v8, 1
2576; RV32-NEXT:    vand.vv v12, v14, v12
2577; RV32-NEXT:    vsub.vv v8, v8, v12
2578; RV32-NEXT:    vand.vv v12, v8, v10
2579; RV32-NEXT:    vsrl.vi v8, v8, 2
2580; RV32-NEXT:    vand.vv v8, v8, v10
2581; RV32-NEXT:    vadd.vv v8, v12, v8
2582; RV32-NEXT:    vlse64.v v10, (a0), zero
2583; RV32-NEXT:    vlse64.v v12, (a0), zero
2584; RV32-NEXT:    vsrl.vi v14, v8, 4
2585; RV32-NEXT:    vadd.vv v8, v8, v14
2586; RV32-NEXT:    vand.vv v8, v8, v10
2587; RV32-NEXT:    vmul.vv v8, v8, v12
2588; RV32-NEXT:    li a0, 56
2589; RV32-NEXT:    vsrl.vx v8, v8, a0
2590; RV32-NEXT:    addi sp, sp, 16
2591; RV32-NEXT:    ret
2592;
2593; RV64-LABEL: cttz_zero_undef_nxv2i64:
2594; RV64:       # %bb.0:
2595; RV64-NEXT:    li a0, 1
2596; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
2597; RV64-NEXT:    vsub.vx v10, v8, a0
2598; RV64-NEXT:    vnot.v v8, v8
2599; RV64-NEXT:    vand.vv v8, v8, v10
2600; RV64-NEXT:    lui a0, %hi(.LCPI41_0)
2601; RV64-NEXT:    ld a0, %lo(.LCPI41_0)(a0)
2602; RV64-NEXT:    lui a1, %hi(.LCPI41_1)
2603; RV64-NEXT:    ld a1, %lo(.LCPI41_1)(a1)
2604; RV64-NEXT:    vsrl.vi v10, v8, 1
2605; RV64-NEXT:    vand.vx v10, v10, a0
2606; RV64-NEXT:    vsub.vv v8, v8, v10
2607; RV64-NEXT:    vand.vx v10, v8, a1
2608; RV64-NEXT:    vsrl.vi v8, v8, 2
2609; RV64-NEXT:    vand.vx v8, v8, a1
2610; RV64-NEXT:    vadd.vv v8, v10, v8
2611; RV64-NEXT:    lui a0, %hi(.LCPI41_2)
2612; RV64-NEXT:    ld a0, %lo(.LCPI41_2)(a0)
2613; RV64-NEXT:    lui a1, %hi(.LCPI41_3)
2614; RV64-NEXT:    ld a1, %lo(.LCPI41_3)(a1)
2615; RV64-NEXT:    vsrl.vi v10, v8, 4
2616; RV64-NEXT:    vadd.vv v8, v8, v10
2617; RV64-NEXT:    vand.vx v8, v8, a0
2618; RV64-NEXT:    vmul.vx v8, v8, a1
2619; RV64-NEXT:    li a0, 56
2620; RV64-NEXT:    vsrl.vx v8, v8, a0
2621; RV64-NEXT:    ret
2622  %a = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true)
2623  ret <vscale x 2 x i64> %a
2624}
2625
2626define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
2627; RV32-LABEL: cttz_zero_undef_nxv4i64:
2628; RV32:       # %bb.0:
2629; RV32-NEXT:    addi sp, sp, -16
2630; RV32-NEXT:    .cfi_def_cfa_offset 16
2631; RV32-NEXT:    lui a0, 349525
2632; RV32-NEXT:    addi a0, a0, 1365
2633; RV32-NEXT:    sw a0, 12(sp)
2634; RV32-NEXT:    sw a0, 8(sp)
2635; RV32-NEXT:    lui a0, 209715
2636; RV32-NEXT:    addi a0, a0, 819
2637; RV32-NEXT:    sw a0, 12(sp)
2638; RV32-NEXT:    sw a0, 8(sp)
2639; RV32-NEXT:    lui a0, 61681
2640; RV32-NEXT:    addi a0, a0, -241
2641; RV32-NEXT:    sw a0, 12(sp)
2642; RV32-NEXT:    sw a0, 8(sp)
2643; RV32-NEXT:    lui a0, 4112
2644; RV32-NEXT:    addi a0, a0, 257
2645; RV32-NEXT:    sw a0, 12(sp)
2646; RV32-NEXT:    sw a0, 8(sp)
2647; RV32-NEXT:    li a0, 1
2648; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
2649; RV32-NEXT:    vsub.vx v12, v8, a0
2650; RV32-NEXT:    vnot.v v8, v8
2651; RV32-NEXT:    addi a0, sp, 8
2652; RV32-NEXT:    vlse64.v v16, (a0), zero
2653; RV32-NEXT:    vand.vv v8, v8, v12
2654; RV32-NEXT:    vlse64.v v12, (a0), zero
2655; RV32-NEXT:    vsrl.vi v20, v8, 1
2656; RV32-NEXT:    vand.vv v16, v20, v16
2657; RV32-NEXT:    vsub.vv v8, v8, v16
2658; RV32-NEXT:    vand.vv v16, v8, v12
2659; RV32-NEXT:    vsrl.vi v8, v8, 2
2660; RV32-NEXT:    vand.vv v8, v8, v12
2661; RV32-NEXT:    vadd.vv v8, v16, v8
2662; RV32-NEXT:    vlse64.v v12, (a0), zero
2663; RV32-NEXT:    vlse64.v v16, (a0), zero
2664; RV32-NEXT:    vsrl.vi v20, v8, 4
2665; RV32-NEXT:    vadd.vv v8, v8, v20
2666; RV32-NEXT:    vand.vv v8, v8, v12
2667; RV32-NEXT:    vmul.vv v8, v8, v16
2668; RV32-NEXT:    li a0, 56
2669; RV32-NEXT:    vsrl.vx v8, v8, a0
2670; RV32-NEXT:    addi sp, sp, 16
2671; RV32-NEXT:    ret
2672;
2673; RV64-LABEL: cttz_zero_undef_nxv4i64:
2674; RV64:       # %bb.0:
2675; RV64-NEXT:    li a0, 1
2676; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
2677; RV64-NEXT:    vsub.vx v12, v8, a0
2678; RV64-NEXT:    vnot.v v8, v8
2679; RV64-NEXT:    vand.vv v8, v8, v12
2680; RV64-NEXT:    lui a0, %hi(.LCPI42_0)
2681; RV64-NEXT:    ld a0, %lo(.LCPI42_0)(a0)
2682; RV64-NEXT:    lui a1, %hi(.LCPI42_1)
2683; RV64-NEXT:    ld a1, %lo(.LCPI42_1)(a1)
2684; RV64-NEXT:    vsrl.vi v12, v8, 1
2685; RV64-NEXT:    vand.vx v12, v12, a0
2686; RV64-NEXT:    vsub.vv v8, v8, v12
2687; RV64-NEXT:    vand.vx v12, v8, a1
2688; RV64-NEXT:    vsrl.vi v8, v8, 2
2689; RV64-NEXT:    vand.vx v8, v8, a1
2690; RV64-NEXT:    vadd.vv v8, v12, v8
2691; RV64-NEXT:    lui a0, %hi(.LCPI42_2)
2692; RV64-NEXT:    ld a0, %lo(.LCPI42_2)(a0)
2693; RV64-NEXT:    lui a1, %hi(.LCPI42_3)
2694; RV64-NEXT:    ld a1, %lo(.LCPI42_3)(a1)
2695; RV64-NEXT:    vsrl.vi v12, v8, 4
2696; RV64-NEXT:    vadd.vv v8, v8, v12
2697; RV64-NEXT:    vand.vx v8, v8, a0
2698; RV64-NEXT:    vmul.vx v8, v8, a1
2699; RV64-NEXT:    li a0, 56
2700; RV64-NEXT:    vsrl.vx v8, v8, a0
2701; RV64-NEXT:    ret
2702  %a = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true)
2703  ret <vscale x 4 x i64> %a
2704}
2705
2706define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
2707; RV32-LABEL: cttz_zero_undef_nxv8i64:
2708; RV32:       # %bb.0:
2709; RV32-NEXT:    addi sp, sp, -16
2710; RV32-NEXT:    .cfi_def_cfa_offset 16
2711; RV32-NEXT:    lui a0, 349525
2712; RV32-NEXT:    addi a0, a0, 1365
2713; RV32-NEXT:    sw a0, 12(sp)
2714; RV32-NEXT:    sw a0, 8(sp)
2715; RV32-NEXT:    lui a0, 209715
2716; RV32-NEXT:    addi a0, a0, 819
2717; RV32-NEXT:    sw a0, 12(sp)
2718; RV32-NEXT:    sw a0, 8(sp)
2719; RV32-NEXT:    lui a0, 61681
2720; RV32-NEXT:    addi a0, a0, -241
2721; RV32-NEXT:    sw a0, 12(sp)
2722; RV32-NEXT:    sw a0, 8(sp)
2723; RV32-NEXT:    lui a0, 4112
2724; RV32-NEXT:    addi a0, a0, 257
2725; RV32-NEXT:    sw a0, 12(sp)
2726; RV32-NEXT:    sw a0, 8(sp)
2727; RV32-NEXT:    li a0, 1
2728; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2729; RV32-NEXT:    vsub.vx v16, v8, a0
2730; RV32-NEXT:    vnot.v v8, v8
2731; RV32-NEXT:    addi a0, sp, 8
2732; RV32-NEXT:    vlse64.v v24, (a0), zero
2733; RV32-NEXT:    vand.vv v8, v8, v16
2734; RV32-NEXT:    vlse64.v v16, (a0), zero
2735; RV32-NEXT:    vsrl.vi v0, v8, 1
2736; RV32-NEXT:    vand.vv v24, v0, v24
2737; RV32-NEXT:    vsub.vv v8, v8, v24
2738; RV32-NEXT:    vand.vv v24, v8, v16
2739; RV32-NEXT:    vsrl.vi v8, v8, 2
2740; RV32-NEXT:    vand.vv v8, v8, v16
2741; RV32-NEXT:    vadd.vv v8, v24, v8
2742; RV32-NEXT:    vlse64.v v16, (a0), zero
2743; RV32-NEXT:    vlse64.v v24, (a0), zero
2744; RV32-NEXT:    vsrl.vi v0, v8, 4
2745; RV32-NEXT:    vadd.vv v8, v8, v0
2746; RV32-NEXT:    vand.vv v8, v8, v16
2747; RV32-NEXT:    vmul.vv v8, v8, v24
2748; RV32-NEXT:    li a0, 56
2749; RV32-NEXT:    vsrl.vx v8, v8, a0
2750; RV32-NEXT:    addi sp, sp, 16
2751; RV32-NEXT:    ret
2752;
2753; RV64-LABEL: cttz_zero_undef_nxv8i64:
2754; RV64:       # %bb.0:
2755; RV64-NEXT:    li a0, 1
2756; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2757; RV64-NEXT:    vsub.vx v16, v8, a0
2758; RV64-NEXT:    vnot.v v8, v8
2759; RV64-NEXT:    vand.vv v8, v8, v16
2760; RV64-NEXT:    lui a0, %hi(.LCPI43_0)
2761; RV64-NEXT:    ld a0, %lo(.LCPI43_0)(a0)
2762; RV64-NEXT:    lui a1, %hi(.LCPI43_1)
2763; RV64-NEXT:    ld a1, %lo(.LCPI43_1)(a1)
2764; RV64-NEXT:    vsrl.vi v16, v8, 1
2765; RV64-NEXT:    vand.vx v16, v16, a0
2766; RV64-NEXT:    vsub.vv v8, v8, v16
2767; RV64-NEXT:    vand.vx v16, v8, a1
2768; RV64-NEXT:    vsrl.vi v8, v8, 2
2769; RV64-NEXT:    vand.vx v8, v8, a1
2770; RV64-NEXT:    vadd.vv v8, v16, v8
2771; RV64-NEXT:    lui a0, %hi(.LCPI43_2)
2772; RV64-NEXT:    ld a0, %lo(.LCPI43_2)(a0)
2773; RV64-NEXT:    lui a1, %hi(.LCPI43_3)
2774; RV64-NEXT:    ld a1, %lo(.LCPI43_3)(a1)
2775; RV64-NEXT:    vsrl.vi v16, v8, 4
2776; RV64-NEXT:    vadd.vv v8, v8, v16
2777; RV64-NEXT:    vand.vx v8, v8, a0
2778; RV64-NEXT:    vmul.vx v8, v8, a1
2779; RV64-NEXT:    li a0, 56
2780; RV64-NEXT:    vsrl.vx v8, v8, a0
2781; RV64-NEXT:    ret
2782  %a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
2783  ret <vscale x 8 x i64> %a
2784}
2785