1; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE 2; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE 3; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE 4; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE 5; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB 6; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2 7; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB 8; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE 9; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB 10; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB 11; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB 12; Check generated signed and unsigned multiply accumulate long. 13 14define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { 15;CHECK-LABEL: MACLongTest1: 16;CHECK-V6-THUMB-NOT: umlal 17;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 18;CHECK-LE: mov r0, [[RDLO]] 19;CHECK-LE: mov r1, [[RDHI]] 20;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 21;CHECK-BE: mov r0, [[RDHI]] 22;CHECK-BE: mov r1, [[RDLO]] 23;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 24;CHECK-V6-THUMB2: mov r0, [[RDLO]] 25;CHECK-V6-THUMB2: mov r1, [[RDHI]] 26;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 27;CHECK-V7-THUMB: mov r0, [[RDLO]] 28;CHECK-V7-THUMB: mov r1, [[RDHI]] 29;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 30;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] 31;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] 32 %conv = zext i32 %a to i64 33 %conv1 = zext i32 %b to i64 34 %mul = mul i64 %conv1, %conv 35 %add = add i64 %mul, %c 36 ret i64 %add 37} 38 39define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) { 40;CHECK-LABEL: MACLongTest2: 41;CHECK-LE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 42;CHECK-LE: mov r0, [[RDLO]] 43;CHECK-LE: mov r1, [[RDHI]] 44;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 45;CHECK-BE: mov r0, [[RDHI]] 46;CHECK-BE: mov r1, [[RDLO]] 47;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 48;CHECK-V6-THUMB2: mov r0, [[RDLO]] 49;CHECK-V6-THUMB2: mov r1, [[RDHI]] 50;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 51;CHECK-V7-THUMB: mov r0, [[RDLO]] 52;CHECK-V7-THUMB: mov r1, [[RDHI]] 53;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 54;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] 55;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] 56 %conv = sext i32 %a to i64 57 %conv1 = sext i32 %b to i64 58 %mul = mul nsw i64 %conv1, %conv 59 %add = add nsw i64 %mul, %c 60 ret i64 %add 61} 62 63; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones. 64; + Without @earlyclobber the v7 code is natural. With it, the first two 65; registers must be distinct from the third. 66; + Without "$Rd = $R", this can be satisfied without a mov before the umlal 67; by trying to use 6 different registers in the MachineInstr. The natural 68; evolution of this attempt currently leaves only two movs in the final 69; function, both after the umlal. With it, *some* move has to happen 70; before the umlal. 71define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) { 72;CHECK-LABEL: MACLongTest3: 73;CHECK-LE: mov [[RDHI:r[0-9]+]], #0 74;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0 75;CHECK-LE: mov r0, [[RDLO]] 76;CHECK-LE: mov r1, [[RDHI]] 77;CHECK-BE: mov [[RDHI:r[0-9]+]], #0 78;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0 79;CHECK-BE: mov r0, [[RDHI]] 80;CHECK-BE: mov r1, [[RDLO]] 81;CHECK-V6-THUMB2: umlal 82;CHECK-V7-THUMB: umlal 83;CHECK-V6-THUMB-NOT: umlal 84 %conv = zext i32 %b to i64 85 %conv1 = zext i32 %a to i64 86 %mul = mul i64 %conv, %conv1 87 %conv2 = zext i32 %c to i64 88 %add = add i64 %mul, %conv2 89 ret i64 %add 90} 91 92define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) { 93;CHECK-LABEL: MACLongTest4: 94;CHECK-V6-THUMB-NOT: smlal 95;CHECK-V6-THUMB2: smlal 96;CHECK-V7-THUMB: smlal 97;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31 98;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0 99;CHECK-LE: mov r0, [[RDLO]] 100;CHECK-LE: mov r1, [[RDHI]] 101;CHECK-BE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31 102;CHECK-BE: smlal [[RDLO]], [[RDHI]], r1, r0 103;CHECK-BE: mov r0, [[RDHI]] 104;CHECK-BE: mov r1, [[RDLO]] 105 %conv = sext i32 %b to i64 106 %conv1 = sext i32 %a to i64 107 %mul = mul nsw i64 %conv, %conv1 108 %conv2 = sext i32 %c to i64 109 %add = add nsw i64 %mul, %conv2 110 ret i64 %add 111} 112 113define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) { 114;CHECK-LABEL: MACLongTest6: 115;CHECK-V6-THUMB-NOT: smull 116;CHECK-V6-THUMB-NOT: smlal 117;CHECK: smull r12, lr, r1, r0 118;CHECK: smlal r12, lr, r3, r2 119;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 120;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] 121;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 122;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] 123;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0 124;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]] 125 %conv = sext i32 %a to i64 126 %conv1 = sext i32 %b to i64 127 %mul = mul nsw i64 %conv1, %conv 128 %conv2 = sext i32 %c to i64 129 %conv3 = sext i32 %d to i64 130 %mul4 = mul nsw i64 %conv3, %conv2 131 %add = add nsw i64 %mul4, %mul 132 ret i64 %add 133} 134 135define i64 @MACLongTest7(i64 %acc, i32 %lhs, i32 %rhs) { 136;CHECK-LABEL: MACLongTest7: 137;CHECK-NOT: smlal 138;CHECK-V6-THUMB2-NOT: smlal 139;CHECK-V7-THUMB-NOT: smlal 140;CHECK-V6-THUMB-NOT: smlal 141 %conv = sext i32 %lhs to i64 142 %conv1 = sext i32 %rhs to i64 143 %mul = mul nsw i64 %conv1, %conv 144 %shl = shl i64 %mul, 32 145 %shr = lshr i64 %mul, 32 146 %or = or i64 %shl, %shr 147 %add = add i64 %or, %acc 148 ret i64 %add 149} 150 151define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) { 152;CHECK-LABEL: MACLongTest8: 153;CHECK-NOT: smlal 154;CHECK-V6-THUMB2-NOT: smlal 155;CHECK-V7-THUMB-NOT: smlal 156;CHECK-V6-THUMB-NOT: smlal 157 %conv = zext i32 %lhs to i64 158 %conv1 = zext i32 %rhs to i64 159 %mul = mul nuw i64 %conv1, %conv 160 %and = and i64 %mul, 4294967295 161 %shl = shl i64 %mul, 32 162 %or = or i64 %and, %shl 163 %add = add i64 %or, %acc 164 ret i64 %add 165} 166 167define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { 168;CHECK-LABEL: MACLongTest9: 169;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 170;CHECK-V7-LE: mov r0, [[RDLO]] 171;CHECK-V7-LE: mov r1, [[RDHI]] 172;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 173;CHECK-V7-BE: mov r0, [[RDHI]] 174;CHECK-V7-BE: mov r1, [[RDLO]] 175;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 176;CHECK-V6-THUMB2: mov r0, [[RDLO]] 177;CHECK-V6-THUMB2: mov r1, [[RDHI]] 178;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 179;CHECK-V7-THUMB: mov r0, [[RDLO]] 180;CHECK-V7-THUMB: mov r1, [[RDHI]] 181;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 182;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] 183;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] 184;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 185;CHECK-V7EM-THUMB: mov r0, [[RDLO]] 186;CHECK-V7EM-THUMB: mov r1, [[RDHI]] 187;CHECK-NOT:umaal 188;CHECK-V6-THUMB-NOT: umaal 189;CHECK-V6M-THUMB-NOT: umaal 190;CHECK-V7M-THUMB-NOT: umaal 191 %conv = zext i32 %lhs to i64 192 %conv1 = zext i32 %rhs to i64 193 %mul = mul nuw i64 %conv1, %conv 194 %conv2 = zext i32 %lo to i64 195 %add = add i64 %mul, %conv2 196 %conv3 = zext i32 %hi to i64 197 %add2 = add i64 %add, %conv3 198 ret i64 %add2 199} 200 201define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) { 202;CHECK-LABEL: MACLongTest10: 203;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 204;CHECK-V7-LE: mov r0, [[RDLO]] 205;CHECK-V7-LE: mov r1, [[RDHI]] 206;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 207;CHECK-V7-BE: mov r0, [[RDHI]] 208;CHECK-V7-BE: mov r1, [[RDLO]] 209;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 210;CHECK-V6-THUMB2: mov r0, [[RDLO]] 211;CHECK-V6-THUMB2: mov r1, [[RDHI]] 212;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 213;CHECK-V7-THUMB: mov r0, [[RDLO]] 214;CHECK-V7-THUMB: mov r1, [[RDHI]] 215;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 216;CHECK-V7-THUMB-BE: mov r0, [[RDHI]] 217;CHECK-V7-THUMB-BE: mov r1, [[RDLO]] 218;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]] 219;CHECK-V7EM-THUMB: mov r0, [[RDLO]] 220;CHECK-V7EM-THUMB: mov r1, [[RDHI]] 221;CHECK-NOT:umaal 222;CHECK-V6-THUMB-NOT:umaal 223;CHECK-V6M-THUMB-NOT: umaal 224;CHECK-V7M-THUMB-NOT: umaal 225 %conv = zext i32 %lhs to i64 226 %conv1 = zext i32 %rhs to i64 227 %mul = mul nuw i64 %conv1, %conv 228 %conv2 = zext i32 %lo to i64 229 %conv3 = zext i32 %hi to i64 230 %add = add i64 %conv2, %conv3 231 %add2 = add i64 %add, %mul 232 ret i64 %add2 233} 234