1; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE
2; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE
3; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
4; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE
5; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB
6; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2
7; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB
8; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE
9; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB
10; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB
11; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB
12; Check generated signed and unsigned multiply accumulate long.
13
14define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
15;CHECK-LABEL: MACLongTest1:
16;CHECK-V6-THUMB-NOT: umlal
17;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
18;CHECK-LE: mov r0, [[RDLO]]
19;CHECK-LE: mov r1, [[RDHI]]
20;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
21;CHECK-BE: mov r0, [[RDHI]]
22;CHECK-BE: mov r1, [[RDLO]]
23;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
24;CHECK-V6-THUMB2: mov r0, [[RDLO]]
25;CHECK-V6-THUMB2: mov r1, [[RDHI]]
26;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
27;CHECK-V7-THUMB: mov r0, [[RDLO]]
28;CHECK-V7-THUMB: mov r1, [[RDHI]]
29;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
30;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
31;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
32  %conv = zext i32 %a to i64
33  %conv1 = zext i32 %b to i64
34  %mul = mul i64 %conv1, %conv
35  %add = add i64 %mul, %c
36  ret i64 %add
37}
38
39define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
40;CHECK-LABEL: MACLongTest2:
41;CHECK-LE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
42;CHECK-LE: mov r0, [[RDLO]]
43;CHECK-LE: mov r1, [[RDHI]]
44;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
45;CHECK-BE: mov r0, [[RDHI]]
46;CHECK-BE: mov r1, [[RDLO]]
47;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
48;CHECK-V6-THUMB2: mov r0, [[RDLO]]
49;CHECK-V6-THUMB2: mov r1, [[RDHI]]
50;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
51;CHECK-V7-THUMB: mov r0, [[RDLO]]
52;CHECK-V7-THUMB: mov r1, [[RDHI]]
53;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
54;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
55;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
56  %conv = sext i32 %a to i64
57  %conv1 = sext i32 %b to i64
58  %mul = mul nsw i64 %conv1, %conv
59  %add = add nsw i64 %mul, %c
60  ret i64 %add
61}
62
63; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones.
64;    + Without @earlyclobber the v7 code is natural. With it, the first two
65;      registers must be distinct from the third.
66;    + Without "$Rd = $R", this can be satisfied without a mov before the umlal
67;      by trying to use 6 different registers in the MachineInstr. The natural
68;      evolution of this attempt currently leaves only two movs in the final
69;      function, both after the umlal. With it, *some* move has to happen
70;      before the umlal.
71define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
72;CHECK-LABEL: MACLongTest3:
73;CHECK-LE: mov [[RDHI:r[0-9]+]], #0
74;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
75;CHECK-LE: mov r0, [[RDLO]]
76;CHECK-LE: mov r1, [[RDHI]]
77;CHECK-BE: mov [[RDHI:r[0-9]+]], #0
78;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
79;CHECK-BE: mov r0, [[RDHI]]
80;CHECK-BE: mov r1, [[RDLO]]
81;CHECK-V6-THUMB2: umlal
82;CHECK-V7-THUMB: umlal
83;CHECK-V6-THUMB-NOT: umlal
84  %conv = zext i32 %b to i64
85  %conv1 = zext i32 %a to i64
86  %mul = mul i64 %conv, %conv1
87  %conv2 = zext i32 %c to i64
88  %add = add i64 %mul, %conv2
89  ret i64 %add
90}
91
92define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
93;CHECK-LABEL: MACLongTest4:
94;CHECK-V6-THUMB-NOT: smlal
95;CHECK-V6-THUMB2: smlal
96;CHECK-V7-THUMB: smlal
97;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
98;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0
99;CHECK-LE: mov r0, [[RDLO]]
100;CHECK-LE: mov r1, [[RDHI]]
101;CHECK-BE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
102;CHECK-BE: smlal [[RDLO]], [[RDHI]], r1, r0
103;CHECK-BE: mov r0, [[RDHI]]
104;CHECK-BE: mov r1, [[RDLO]]
105  %conv = sext i32 %b to i64
106  %conv1 = sext i32 %a to i64
107  %mul = mul nsw i64 %conv, %conv1
108  %conv2 = sext i32 %c to i64
109  %add = add nsw i64 %mul, %conv2
110  ret i64 %add
111}
112
113define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
114;CHECK-LABEL: MACLongTest6:
115;CHECK-V6-THUMB-NOT: smull
116;CHECK-V6-THUMB-NOT: smlal
117;CHECK: smull   r12, lr, r1, r0
118;CHECK: smlal   r12, lr, r3, r2
119;CHECK-V7: smull   [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
120;CHECK-V7: smlal   [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
121;CHECK-V7-THUMB: smull   [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
122;CHECK-V7-THUMB: smlal   [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
123;CHECK-V6-THUMB2: smull   [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
124;CHECK-V6-THUMB2: smlal   [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
125  %conv = sext i32 %a to i64
126  %conv1 = sext i32 %b to i64
127  %mul = mul nsw i64 %conv1, %conv
128  %conv2 = sext i32 %c to i64
129  %conv3 = sext i32 %d to i64
130  %mul4 = mul nsw i64 %conv3, %conv2
131  %add = add nsw i64 %mul4, %mul
132  ret i64 %add
133}
134
135define i64 @MACLongTest7(i64 %acc, i32 %lhs, i32 %rhs) {
136;CHECK-LABEL: MACLongTest7:
137;CHECK-NOT: smlal
138;CHECK-V6-THUMB2-NOT: smlal
139;CHECK-V7-THUMB-NOT: smlal
140;CHECK-V6-THUMB-NOT: smlal
141  %conv = sext i32 %lhs to i64
142  %conv1 = sext i32 %rhs to i64
143  %mul = mul nsw i64 %conv1, %conv
144  %shl = shl i64 %mul, 32
145  %shr = lshr i64 %mul, 32
146  %or = or i64 %shl, %shr
147  %add = add i64 %or, %acc
148  ret i64 %add
149}
150
151define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
152;CHECK-LABEL: MACLongTest8:
153;CHECK-NOT: smlal
154;CHECK-V6-THUMB2-NOT: smlal
155;CHECK-V7-THUMB-NOT: smlal
156;CHECK-V6-THUMB-NOT: smlal
157  %conv = zext i32 %lhs to i64
158  %conv1 = zext i32 %rhs to i64
159  %mul = mul nuw i64 %conv1, %conv
160  %and = and i64 %mul, 4294967295
161  %shl = shl i64 %mul, 32
162  %or = or i64 %and, %shl
163  %add = add i64 %or, %acc
164  ret i64 %add
165}
166
167define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
168;CHECK-LABEL: MACLongTest9:
169;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
170;CHECK-V7-LE: mov r0, [[RDLO]]
171;CHECK-V7-LE: mov r1, [[RDHI]]
172;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
173;CHECK-V7-BE: mov r0, [[RDHI]]
174;CHECK-V7-BE: mov r1, [[RDLO]]
175;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
176;CHECK-V6-THUMB2: mov r0, [[RDLO]]
177;CHECK-V6-THUMB2: mov r1, [[RDHI]]
178;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
179;CHECK-V7-THUMB: mov r0, [[RDLO]]
180;CHECK-V7-THUMB: mov r1, [[RDHI]]
181;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
182;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
183;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
184;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
185;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
186;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
187;CHECK-NOT:umaal
188;CHECK-V6-THUMB-NOT: umaal
189;CHECK-V6M-THUMB-NOT: umaal
190;CHECK-V7M-THUMB-NOT: umaal
191  %conv = zext i32 %lhs to i64
192  %conv1 = zext i32 %rhs to i64
193  %mul = mul nuw i64 %conv1, %conv
194  %conv2 = zext i32 %lo to i64
195  %add = add i64 %mul, %conv2
196  %conv3 = zext i32 %hi to i64
197  %add2 = add i64 %add, %conv3
198  ret i64 %add2
199}
200
201define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
202;CHECK-LABEL: MACLongTest10:
203;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
204;CHECK-V7-LE: mov r0, [[RDLO]]
205;CHECK-V7-LE: mov r1, [[RDHI]]
206;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
207;CHECK-V7-BE: mov r0, [[RDHI]]
208;CHECK-V7-BE: mov r1, [[RDLO]]
209;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
210;CHECK-V6-THUMB2: mov r0, [[RDLO]]
211;CHECK-V6-THUMB2: mov r1, [[RDHI]]
212;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
213;CHECK-V7-THUMB: mov r0, [[RDLO]]
214;CHECK-V7-THUMB: mov r1, [[RDHI]]
215;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
216;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
217;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
218;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
219;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
220;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
221;CHECK-NOT:umaal
222;CHECK-V6-THUMB-NOT:umaal
223;CHECK-V6M-THUMB-NOT: umaal
224;CHECK-V7M-THUMB-NOT: umaal
225  %conv = zext i32 %lhs to i64
226  %conv1 = zext i32 %rhs to i64
227  %mul = mul nuw i64 %conv1, %conv
228  %conv2 = zext i32 %lo to i64
229  %conv3 = zext i32 %hi to i64
230  %add = add i64 %conv2, %conv3
231  %add2 = add i64 %add, %mul
232  ret i64 %add2
233}
234