1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
3
4declare { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
5
6define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y) {
7; CHECK-LABEL: umulo_nxv2i8:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    ptrue p0.d
10; CHECK-NEXT:    and z1.d, z1.d, #0xff
11; CHECK-NEXT:    and z0.d, z0.d, #0xff
12; CHECK-NEXT:    movprfx z2, z0
13; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z1.d
14; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
15; CHECK-NEXT:    lsr z1.d, z2.d, #8
16; CHECK-NEXT:    cmpne p1.d, p0/z, z0.d, #0
17; CHECK-NEXT:    cmpne p0.d, p0/z, z1.d, #0
18; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
19; CHECK-NEXT:    mov z2.d, p0/m, #0 // =0x0
20; CHECK-NEXT:    mov z0.d, z2.d
21; CHECK-NEXT:    ret
22  %a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
23  %b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
24  %c = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 1
25  %d = select <vscale x 2 x i1> %c, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %b
26  ret <vscale x 2 x i8> %d
27}
28
29declare { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
30
31define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y) {
32; CHECK-LABEL: umulo_nxv4i8:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    ptrue p0.s
35; CHECK-NEXT:    and z1.s, z1.s, #0xff
36; CHECK-NEXT:    and z0.s, z0.s, #0xff
37; CHECK-NEXT:    movprfx z2, z0
38; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z1.s
39; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
40; CHECK-NEXT:    lsr z1.s, z2.s, #8
41; CHECK-NEXT:    cmpne p1.s, p0/z, z0.s, #0
42; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
43; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
44; CHECK-NEXT:    mov z2.s, p0/m, #0 // =0x0
45; CHECK-NEXT:    mov z0.d, z2.d
46; CHECK-NEXT:    ret
47  %a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
48  %b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
49  %c = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 1
50  %d = select <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> %b
51  ret <vscale x 4 x i8> %d
52}
53
54declare { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
55
56define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
57; CHECK-LABEL: umulo_nxv8i8:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    ptrue p0.h
60; CHECK-NEXT:    and z1.h, z1.h, #0xff
61; CHECK-NEXT:    and z0.h, z0.h, #0xff
62; CHECK-NEXT:    movprfx z2, z0
63; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z1.h
64; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
65; CHECK-NEXT:    lsr z1.h, z2.h, #8
66; CHECK-NEXT:    cmpne p1.h, p0/z, z0.h, #0
67; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
68; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
69; CHECK-NEXT:    mov z2.h, p0/m, #0 // =0x0
70; CHECK-NEXT:    mov z0.d, z2.d
71; CHECK-NEXT:    ret
72  %a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
73  %b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
74  %c = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 1
75  %d = select <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> %b
76  ret <vscale x 8 x i8> %d
77}
78
79declare { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
80
81define <vscale x 16 x i8> @umulo_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
82; CHECK-LABEL: umulo_nxv16i8:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    ptrue p0.b
85; CHECK-NEXT:    movprfx z2, z0
86; CHECK-NEXT:    umulh z2.b, p0/m, z2.b, z1.b
87; CHECK-NEXT:    mul z0.b, p0/m, z0.b, z1.b
88; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
89; CHECK-NEXT:    mov z0.b, p0/m, #0 // =0x0
90; CHECK-NEXT:    ret
91  %a = call { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
92  %b = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 0
93  %c = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 1
94  %d = select <vscale x 16 x i1> %c, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %b
95  ret <vscale x 16 x i8> %d
96}
97
98declare { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>)
99
100define <vscale x 32 x i8> @umulo_nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y) {
101; CHECK-LABEL: umulo_nxv32i8:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    ptrue p0.b
104; CHECK-NEXT:    movprfx z4, z1
105; CHECK-NEXT:    mul z4.b, p0/m, z4.b, z3.b
106; CHECK-NEXT:    umulh z1.b, p0/m, z1.b, z3.b
107; CHECK-NEXT:    movprfx z3, z0
108; CHECK-NEXT:    umulh z3.b, p0/m, z3.b, z2.b
109; CHECK-NEXT:    cmpne p1.b, p0/z, z1.b, #0
110; CHECK-NEXT:    mul z0.b, p0/m, z0.b, z2.b
111; CHECK-NEXT:    cmpne p0.b, p0/z, z3.b, #0
112; CHECK-NEXT:    mov z4.b, p1/m, #0 // =0x0
113; CHECK-NEXT:    mov z0.b, p0/m, #0 // =0x0
114; CHECK-NEXT:    mov z1.d, z4.d
115; CHECK-NEXT:    ret
116  %a = call { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y)
117  %b = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 0
118  %c = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 1
119  %d = select <vscale x 32 x i1> %c, <vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> %b
120  ret <vscale x 32 x i8> %d
121}
122
123declare { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>)
124
125define <vscale x 64 x i8> @umulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y) {
126; CHECK-LABEL: umulo_nxv64i8:
127; CHECK:       // %bb.0:
128; CHECK-NEXT:    ptrue p0.b
129; CHECK-NEXT:    movprfx z24, z3
130; CHECK-NEXT:    mul z24.b, p0/m, z24.b, z7.b
131; CHECK-NEXT:    umulh z3.b, p0/m, z3.b, z7.b
132; CHECK-NEXT:    cmpne p1.b, p0/z, z3.b, #0
133; CHECK-NEXT:    movprfx z3, z2
134; CHECK-NEXT:    umulh z3.b, p0/m, z3.b, z6.b
135; CHECK-NEXT:    cmpne p2.b, p0/z, z3.b, #0
136; CHECK-NEXT:    movprfx z3, z1
137; CHECK-NEXT:    mul z3.b, p0/m, z3.b, z5.b
138; CHECK-NEXT:    umulh z1.b, p0/m, z1.b, z5.b
139; CHECK-NEXT:    mul z2.b, p0/m, z2.b, z6.b
140; CHECK-NEXT:    cmpne p3.b, p0/z, z1.b, #0
141; CHECK-NEXT:    movprfx z1, z0
142; CHECK-NEXT:    umulh z1.b, p0/m, z1.b, z4.b
143; CHECK-NEXT:    mul z0.b, p0/m, z0.b, z4.b
144; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
145; CHECK-NEXT:    mov z3.b, p3/m, #0 // =0x0
146; CHECK-NEXT:    mov z24.b, p1/m, #0 // =0x0
147; CHECK-NEXT:    mov z0.b, p0/m, #0 // =0x0
148; CHECK-NEXT:    mov z2.b, p2/m, #0 // =0x0
149; CHECK-NEXT:    mov z1.d, z3.d
150; CHECK-NEXT:    mov z3.d, z24.d
151; CHECK-NEXT:    ret
152  %a = call { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y)
153  %b = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 0
154  %c = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 1
155  %d = select <vscale x 64 x i1> %c, <vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> %b
156  ret <vscale x 64 x i8> %d
157}
158
159declare { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
160
161define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y) {
162; CHECK-LABEL: umulo_nxv2i16:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    ptrue p0.d
165; CHECK-NEXT:    and z1.d, z1.d, #0xffff
166; CHECK-NEXT:    and z0.d, z0.d, #0xffff
167; CHECK-NEXT:    movprfx z2, z0
168; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z1.d
169; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
170; CHECK-NEXT:    lsr z1.d, z2.d, #16
171; CHECK-NEXT:    cmpne p1.d, p0/z, z0.d, #0
172; CHECK-NEXT:    cmpne p0.d, p0/z, z1.d, #0
173; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
174; CHECK-NEXT:    mov z2.d, p0/m, #0 // =0x0
175; CHECK-NEXT:    mov z0.d, z2.d
176; CHECK-NEXT:    ret
177  %a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
178  %b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
179  %c = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 1
180  %d = select <vscale x 2 x i1> %c, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %b
181  ret <vscale x 2 x i16> %d
182}
183
184declare { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
185
186define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
187; CHECK-LABEL: umulo_nxv4i16:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    ptrue p0.s
190; CHECK-NEXT:    and z1.s, z1.s, #0xffff
191; CHECK-NEXT:    and z0.s, z0.s, #0xffff
192; CHECK-NEXT:    movprfx z2, z0
193; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z1.s
194; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
195; CHECK-NEXT:    lsr z1.s, z2.s, #16
196; CHECK-NEXT:    cmpne p1.s, p0/z, z0.s, #0
197; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
198; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
199; CHECK-NEXT:    mov z2.s, p0/m, #0 // =0x0
200; CHECK-NEXT:    mov z0.d, z2.d
201; CHECK-NEXT:    ret
202  %a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
203  %b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
204  %c = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 1
205  %d = select <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> %b
206  ret <vscale x 4 x i16> %d
207}
208
209declare { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
210
211define <vscale x 8 x i16> @umulo_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
212; CHECK-LABEL: umulo_nxv8i16:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    ptrue p0.h
215; CHECK-NEXT:    movprfx z2, z0
216; CHECK-NEXT:    umulh z2.h, p0/m, z2.h, z1.h
217; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z1.h
218; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
219; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
220; CHECK-NEXT:    ret
221  %a = call { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
222  %b = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 0
223  %c = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 1
224  %d = select <vscale x 8 x i1> %c, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %b
225  ret <vscale x 8 x i16> %d
226}
227
228declare { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
229
230define <vscale x 16 x i16> @umulo_nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y) {
231; CHECK-LABEL: umulo_nxv16i16:
232; CHECK:       // %bb.0:
233; CHECK-NEXT:    ptrue p0.h
234; CHECK-NEXT:    movprfx z4, z1
235; CHECK-NEXT:    mul z4.h, p0/m, z4.h, z3.h
236; CHECK-NEXT:    umulh z1.h, p0/m, z1.h, z3.h
237; CHECK-NEXT:    movprfx z3, z0
238; CHECK-NEXT:    umulh z3.h, p0/m, z3.h, z2.h
239; CHECK-NEXT:    cmpne p1.h, p0/z, z1.h, #0
240; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z2.h
241; CHECK-NEXT:    cmpne p0.h, p0/z, z3.h, #0
242; CHECK-NEXT:    mov z4.h, p1/m, #0 // =0x0
243; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
244; CHECK-NEXT:    mov z1.d, z4.d
245; CHECK-NEXT:    ret
246  %a = call { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y)
247  %b = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 0
248  %c = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 1
249  %d = select <vscale x 16 x i1> %c, <vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> %b
250  ret <vscale x 16 x i16> %d
251}
252
253declare { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>)
254
255define <vscale x 32 x i16> @umulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y) {
256; CHECK-LABEL: umulo_nxv32i16:
257; CHECK:       // %bb.0:
258; CHECK-NEXT:    ptrue p0.h
259; CHECK-NEXT:    movprfx z24, z3
260; CHECK-NEXT:    mul z24.h, p0/m, z24.h, z7.h
261; CHECK-NEXT:    umulh z3.h, p0/m, z3.h, z7.h
262; CHECK-NEXT:    cmpne p1.h, p0/z, z3.h, #0
263; CHECK-NEXT:    movprfx z3, z2
264; CHECK-NEXT:    umulh z3.h, p0/m, z3.h, z6.h
265; CHECK-NEXT:    cmpne p2.h, p0/z, z3.h, #0
266; CHECK-NEXT:    movprfx z3, z1
267; CHECK-NEXT:    mul z3.h, p0/m, z3.h, z5.h
268; CHECK-NEXT:    umulh z1.h, p0/m, z1.h, z5.h
269; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z6.h
270; CHECK-NEXT:    cmpne p3.h, p0/z, z1.h, #0
271; CHECK-NEXT:    movprfx z1, z0
272; CHECK-NEXT:    umulh z1.h, p0/m, z1.h, z4.h
273; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z4.h
274; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
275; CHECK-NEXT:    mov z3.h, p3/m, #0 // =0x0
276; CHECK-NEXT:    mov z24.h, p1/m, #0 // =0x0
277; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
278; CHECK-NEXT:    mov z2.h, p2/m, #0 // =0x0
279; CHECK-NEXT:    mov z1.d, z3.d
280; CHECK-NEXT:    mov z3.d, z24.d
281; CHECK-NEXT:    ret
282  %a = call { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y)
283  %b = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 0
284  %c = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 1
285  %d = select <vscale x 32 x i1> %c, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %b
286  ret <vscale x 32 x i16> %d
287}
288
289declare { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
290
291define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
292; CHECK-LABEL: umulo_nxv2i32:
293; CHECK:       // %bb.0:
294; CHECK-NEXT:    ptrue p0.d
295; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
296; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
297; CHECK-NEXT:    movprfx z2, z0
298; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z1.d
299; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
300; CHECK-NEXT:    lsr z1.d, z2.d, #32
301; CHECK-NEXT:    cmpne p1.d, p0/z, z0.d, #0
302; CHECK-NEXT:    cmpne p0.d, p0/z, z1.d, #0
303; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
304; CHECK-NEXT:    mov z2.d, p0/m, #0 // =0x0
305; CHECK-NEXT:    mov z0.d, z2.d
306; CHECK-NEXT:    ret
307  %a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
308  %b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0
309  %c = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 1
310  %d = select <vscale x 2 x i1> %c, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %b
311  ret <vscale x 2 x i32> %d
312}
313
314declare { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
315
316define <vscale x 4 x i32> @umulo_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
317; CHECK-LABEL: umulo_nxv4i32:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    ptrue p0.s
320; CHECK-NEXT:    movprfx z2, z0
321; CHECK-NEXT:    umulh z2.s, p0/m, z2.s, z1.s
322; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
323; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
324; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
325; CHECK-NEXT:    ret
326  %a = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
327  %b = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 0
328  %c = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 1
329  %d = select <vscale x 4 x i1> %c, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %b
330  ret <vscale x 4 x i32> %d
331}
332
333declare { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
334
335define <vscale x 8 x i32> @umulo_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
336; CHECK-LABEL: umulo_nxv8i32:
337; CHECK:       // %bb.0:
338; CHECK-NEXT:    ptrue p0.s
339; CHECK-NEXT:    movprfx z4, z1
340; CHECK-NEXT:    mul z4.s, p0/m, z4.s, z3.s
341; CHECK-NEXT:    umulh z1.s, p0/m, z1.s, z3.s
342; CHECK-NEXT:    movprfx z3, z0
343; CHECK-NEXT:    umulh z3.s, p0/m, z3.s, z2.s
344; CHECK-NEXT:    cmpne p1.s, p0/z, z1.s, #0
345; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z2.s
346; CHECK-NEXT:    cmpne p0.s, p0/z, z3.s, #0
347; CHECK-NEXT:    mov z4.s, p1/m, #0 // =0x0
348; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
349; CHECK-NEXT:    mov z1.d, z4.d
350; CHECK-NEXT:    ret
351  %a = call { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y)
352  %b = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 0
353  %c = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 1
354  %d = select <vscale x 8 x i1> %c, <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> %b
355  ret <vscale x 8 x i32> %d
356}
357
358declare { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
359
360define <vscale x 16 x i32> @umulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) {
361; CHECK-LABEL: umulo_nxv16i32:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    ptrue p0.s
364; CHECK-NEXT:    movprfx z24, z3
365; CHECK-NEXT:    mul z24.s, p0/m, z24.s, z7.s
366; CHECK-NEXT:    umulh z3.s, p0/m, z3.s, z7.s
367; CHECK-NEXT:    cmpne p1.s, p0/z, z3.s, #0
368; CHECK-NEXT:    movprfx z3, z2
369; CHECK-NEXT:    umulh z3.s, p0/m, z3.s, z6.s
370; CHECK-NEXT:    cmpne p2.s, p0/z, z3.s, #0
371; CHECK-NEXT:    movprfx z3, z1
372; CHECK-NEXT:    mul z3.s, p0/m, z3.s, z5.s
373; CHECK-NEXT:    umulh z1.s, p0/m, z1.s, z5.s
374; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z6.s
375; CHECK-NEXT:    cmpne p3.s, p0/z, z1.s, #0
376; CHECK-NEXT:    movprfx z1, z0
377; CHECK-NEXT:    umulh z1.s, p0/m, z1.s, z4.s
378; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z4.s
379; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
380; CHECK-NEXT:    mov z3.s, p3/m, #0 // =0x0
381; CHECK-NEXT:    mov z24.s, p1/m, #0 // =0x0
382; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
383; CHECK-NEXT:    mov z2.s, p2/m, #0 // =0x0
384; CHECK-NEXT:    mov z1.d, z3.d
385; CHECK-NEXT:    mov z3.d, z24.d
386; CHECK-NEXT:    ret
387  %a = call { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
388  %b = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 0
389  %c = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 1
390  %d = select <vscale x 16 x i1> %c, <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> %b
391  ret <vscale x 16 x i32> %d
392}
393
394declare { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
395
396define <vscale x 2 x i64> @umulo_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
397; CHECK-LABEL: umulo_nxv2i64:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    ptrue p0.d
400; CHECK-NEXT:    movprfx z2, z0
401; CHECK-NEXT:    umulh z2.d, p0/m, z2.d, z1.d
402; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
403; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
404; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
405; CHECK-NEXT:    ret
406  %a = call { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
407  %b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 0
408  %c = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 1
409  %d = select <vscale x 2 x i1> %c, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %b
410  ret <vscale x 2 x i64> %d
411}
412
413declare { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
414
415define <vscale x 4 x i64> @umulo_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) {
416; CHECK-LABEL: umulo_nxv4i64:
417; CHECK:       // %bb.0:
418; CHECK-NEXT:    ptrue p0.d
419; CHECK-NEXT:    movprfx z4, z1
420; CHECK-NEXT:    mul z4.d, p0/m, z4.d, z3.d
421; CHECK-NEXT:    umulh z1.d, p0/m, z1.d, z3.d
422; CHECK-NEXT:    movprfx z3, z0
423; CHECK-NEXT:    umulh z3.d, p0/m, z3.d, z2.d
424; CHECK-NEXT:    cmpne p1.d, p0/z, z1.d, #0
425; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z2.d
426; CHECK-NEXT:    cmpne p0.d, p0/z, z3.d, #0
427; CHECK-NEXT:    mov z4.d, p1/m, #0 // =0x0
428; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
429; CHECK-NEXT:    mov z1.d, z4.d
430; CHECK-NEXT:    ret
431  %a = call { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
432  %b = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 0
433  %c = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 1
434  %d = select <vscale x 4 x i1> %c, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> %b
435  ret <vscale x 4 x i64> %d
436}
437
438declare { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
439
440define <vscale x 8 x i64> @umulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
441; CHECK-LABEL: umulo_nxv8i64:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    ptrue p0.d
444; CHECK-NEXT:    movprfx z24, z3
445; CHECK-NEXT:    mul z24.d, p0/m, z24.d, z7.d
446; CHECK-NEXT:    umulh z3.d, p0/m, z3.d, z7.d
447; CHECK-NEXT:    cmpne p1.d, p0/z, z3.d, #0
448; CHECK-NEXT:    movprfx z3, z2
449; CHECK-NEXT:    umulh z3.d, p0/m, z3.d, z6.d
450; CHECK-NEXT:    cmpne p2.d, p0/z, z3.d, #0
451; CHECK-NEXT:    movprfx z3, z1
452; CHECK-NEXT:    mul z3.d, p0/m, z3.d, z5.d
453; CHECK-NEXT:    umulh z1.d, p0/m, z1.d, z5.d
454; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z6.d
455; CHECK-NEXT:    cmpne p3.d, p0/z, z1.d, #0
456; CHECK-NEXT:    movprfx z1, z0
457; CHECK-NEXT:    umulh z1.d, p0/m, z1.d, z4.d
458; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z4.d
459; CHECK-NEXT:    cmpne p0.d, p0/z, z1.d, #0
460; CHECK-NEXT:    mov z3.d, p3/m, #0 // =0x0
461; CHECK-NEXT:    mov z24.d, p1/m, #0 // =0x0
462; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
463; CHECK-NEXT:    mov z2.d, p2/m, #0 // =0x0
464; CHECK-NEXT:    mov z1.d, z3.d
465; CHECK-NEXT:    mov z3.d, z24.d
466; CHECK-NEXT:    ret
467  %a = call { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
468  %b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 0
469  %c = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 1
470  %d = select <vscale x 8 x i1> %c, <vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> %b
471  ret <vscale x 8 x i64> %d
472}
473