1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 3; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST 4; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 5; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64 6; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32 7 8define {i64, i1} @t1() nounwind { 9; CHECK-LABEL: t1: 10; CHECK: # %bb.0: 11; CHECK-NEXT: movl $72, %eax 12; CHECK-NEXT: xorl %edx, %edx 13; CHECK-NEXT: retq 14; 15; WIN32-LABEL: t1: 16; WIN32: # %bb.0: 17; WIN32-NEXT: movl $72, %eax 18; WIN32-NEXT: xorl %edx, %edx 19; WIN32-NEXT: xorl %ecx, %ecx 20; WIN32-NEXT: retl 21 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8) 22 ret {i64, i1} %1 23} 24 25define {i64, i1} @t2() nounwind { 26; CHECK-LABEL: t2: 27; CHECK: # %bb.0: 28; CHECK-NEXT: xorl %eax, %eax 29; CHECK-NEXT: xorl %edx, %edx 30; CHECK-NEXT: retq 31; 32; WIN32-LABEL: t2: 33; WIN32: # %bb.0: 34; WIN32-NEXT: xorl %eax, %eax 35; WIN32-NEXT: xorl %edx, %edx 36; WIN32-NEXT: xorl %ecx, %ecx 37; WIN32-NEXT: retl 38 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0) 39 ret {i64, i1} %1 40} 41 42define {i64, i1} @t3() nounwind { 43; CHECK-LABEL: t3: 44; CHECK: # %bb.0: 45; CHECK-NEXT: movq $-9, %rax 46; CHECK-NEXT: movb $1, %dl 47; CHECK-NEXT: retq 48; 49; WIN32-LABEL: t3: 50; WIN32: # %bb.0: 51; WIN32-NEXT: movl $-9, %eax 52; WIN32-NEXT: movl $-1, %edx 53; WIN32-NEXT: movb $1, %cl 54; WIN32-NEXT: retl 55 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1) 56 ret {i64, i1} %1 57} 58 59; SMULO 60define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) { 61; SDAG-LABEL: smuloi8: 62; SDAG: # %bb.0: 63; SDAG-NEXT: movl %edi, %eax 64; SDAG-NEXT: # kill: def $al killed $al killed $eax 65; SDAG-NEXT: imulb %sil 66; SDAG-NEXT: seto %cl 67; SDAG-NEXT: movb %al, (%rdx) 68; SDAG-NEXT: movl %ecx, %eax 69; SDAG-NEXT: retq 70; 71; FAST-LABEL: smuloi8: 72; FAST: # %bb.0: 73; FAST-NEXT: movl %edi, %eax 74; FAST-NEXT: # kill: def $al killed $al killed $eax 75; FAST-NEXT: imulb %sil 76; FAST-NEXT: seto %cl 77; FAST-NEXT: movb %al, (%rdx) 78; FAST-NEXT: andb $1, %cl 79; FAST-NEXT: movzbl %cl, %eax 80; FAST-NEXT: retq 81; 82; WIN64-LABEL: smuloi8: 83; WIN64: # %bb.0: 84; WIN64-NEXT: movl %ecx, %eax 85; WIN64-NEXT: imulb %dl 86; WIN64-NEXT: seto %cl 87; WIN64-NEXT: movb %al, (%r8) 88; WIN64-NEXT: movl %ecx, %eax 89; WIN64-NEXT: retq 90; 91; WIN32-LABEL: smuloi8: 92; WIN32: # %bb.0: 93; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 94; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 95; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 96; WIN32-NEXT: seto %cl 97; WIN32-NEXT: movb %al, (%edx) 98; WIN32-NEXT: movl %ecx, %eax 99; WIN32-NEXT: retl 100 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 101 %val = extractvalue {i8, i1} %t, 0 102 %obit = extractvalue {i8, i1} %t, 1 103 store i8 %val, ptr %res 104 ret i1 %obit 105} 106 107define zeroext i1 @smuloi16(i16 %v1, i16 %v2, ptr %res) { 108; SDAG-LABEL: smuloi16: 109; SDAG: # %bb.0: 110; SDAG-NEXT: imulw %si, %di 111; SDAG-NEXT: seto %al 112; SDAG-NEXT: movw %di, (%rdx) 113; SDAG-NEXT: retq 114; 115; FAST-LABEL: smuloi16: 116; FAST: # %bb.0: 117; FAST-NEXT: imulw %si, %di 118; FAST-NEXT: seto %al 119; FAST-NEXT: movw %di, (%rdx) 120; FAST-NEXT: andb $1, %al 121; FAST-NEXT: movzbl %al, %eax 122; FAST-NEXT: retq 123; 124; WIN64-LABEL: smuloi16: 125; WIN64: # %bb.0: 126; WIN64-NEXT: imulw %dx, %cx 127; WIN64-NEXT: seto %al 128; WIN64-NEXT: movw %cx, (%r8) 129; WIN64-NEXT: retq 130; 131; WIN32-LABEL: smuloi16: 132; WIN32: # %bb.0: 133; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 134; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 135; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 136; WIN32-NEXT: seto %al 137; WIN32-NEXT: movw %dx, (%ecx) 138; WIN32-NEXT: retl 139 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 140 %val = extractvalue {i16, i1} %t, 0 141 %obit = extractvalue {i16, i1} %t, 1 142 store i16 %val, ptr %res 143 ret i1 %obit 144} 145 146define zeroext i1 @smuloi32(i32 %v1, i32 %v2, ptr %res) { 147; SDAG-LABEL: smuloi32: 148; SDAG: # %bb.0: 149; SDAG-NEXT: imull %esi, %edi 150; SDAG-NEXT: seto %al 151; SDAG-NEXT: movl %edi, (%rdx) 152; SDAG-NEXT: retq 153; 154; FAST-LABEL: smuloi32: 155; FAST: # %bb.0: 156; FAST-NEXT: imull %esi, %edi 157; FAST-NEXT: seto %al 158; FAST-NEXT: movl %edi, (%rdx) 159; FAST-NEXT: andb $1, %al 160; FAST-NEXT: movzbl %al, %eax 161; FAST-NEXT: retq 162; 163; WIN64-LABEL: smuloi32: 164; WIN64: # %bb.0: 165; WIN64-NEXT: imull %edx, %ecx 166; WIN64-NEXT: seto %al 167; WIN64-NEXT: movl %ecx, (%r8) 168; WIN64-NEXT: retq 169; 170; WIN32-LABEL: smuloi32: 171; WIN32: # %bb.0: 172; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 173; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 174; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 175; WIN32-NEXT: seto %al 176; WIN32-NEXT: movl %edx, (%ecx) 177; WIN32-NEXT: retl 178 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 179 %val = extractvalue {i32, i1} %t, 0 180 %obit = extractvalue {i32, i1} %t, 1 181 store i32 %val, ptr %res 182 ret i1 %obit 183} 184 185define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { 186; SDAG-LABEL: smuloi64: 187; SDAG: # %bb.0: 188; SDAG-NEXT: imulq %rsi, %rdi 189; SDAG-NEXT: seto %al 190; SDAG-NEXT: movq %rdi, (%rdx) 191; SDAG-NEXT: retq 192; 193; FAST-LABEL: smuloi64: 194; FAST: # %bb.0: 195; FAST-NEXT: imulq %rsi, %rdi 196; FAST-NEXT: seto %al 197; FAST-NEXT: movq %rdi, (%rdx) 198; FAST-NEXT: andb $1, %al 199; FAST-NEXT: movzbl %al, %eax 200; FAST-NEXT: retq 201; 202; WIN64-LABEL: smuloi64: 203; WIN64: # %bb.0: 204; WIN64-NEXT: imulq %rdx, %rcx 205; WIN64-NEXT: seto %al 206; WIN64-NEXT: movq %rcx, (%r8) 207; WIN64-NEXT: retq 208; 209; WIN32-LABEL: smuloi64: 210; WIN32: # %bb.0: 211; WIN32-NEXT: pushl %ebp 212; WIN32-NEXT: pushl %ebx 213; WIN32-NEXT: pushl %edi 214; WIN32-NEXT: pushl %esi 215; WIN32-NEXT: subl $8, %esp 216; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 217; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 218; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 219; WIN32-NEXT: movl %ecx, %edi 220; WIN32-NEXT: sarl $31, %edi 221; WIN32-NEXT: movl %eax, %esi 222; WIN32-NEXT: imull %edi, %esi 223; WIN32-NEXT: mull %edi 224; WIN32-NEXT: movl %eax, %ebx 225; WIN32-NEXT: addl %esi, %edx 226; WIN32-NEXT: movl %ebp, %esi 227; WIN32-NEXT: imull %ebp, %edi 228; WIN32-NEXT: addl %edx, %edi 229; WIN32-NEXT: sarl $31, %esi 230; WIN32-NEXT: movl %esi, %ebp 231; WIN32-NEXT: imull %ecx, %ebp 232; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 233; WIN32-NEXT: movl %esi, %eax 234; WIN32-NEXT: mull %ecx 235; WIN32-NEXT: addl %ebp, %edx 236; WIN32-NEXT: imull %ecx, %esi 237; WIN32-NEXT: addl %edx, %esi 238; WIN32-NEXT: addl %ebx, %eax 239; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 240; WIN32-NEXT: adcl %edi, %esi 241; WIN32-NEXT: movl %ecx, %eax 242; WIN32-NEXT: movl %ecx, %edi 243; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 244; WIN32-NEXT: mull %ecx 245; WIN32-NEXT: movl %edx, %ebp 246; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 247; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 248; WIN32-NEXT: mull %ecx 249; WIN32-NEXT: movl %edx, %ebx 250; WIN32-NEXT: movl %eax, %ecx 251; WIN32-NEXT: addl %ebp, %ecx 252; WIN32-NEXT: adcl $0, %ebx 253; WIN32-NEXT: movl %edi, %eax 254; WIN32-NEXT: mull {{[0-9]+}}(%esp) 255; WIN32-NEXT: movl %edx, %edi 256; WIN32-NEXT: movl %eax, %ebp 257; WIN32-NEXT: addl %ecx, %ebp 258; WIN32-NEXT: adcl %ebx, %edi 259; WIN32-NEXT: setb %cl 260; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 261; WIN32-NEXT: mull {{[0-9]+}}(%esp) 262; WIN32-NEXT: addl %edi, %eax 263; WIN32-NEXT: movzbl %cl, %ecx 264; WIN32-NEXT: adcl %ecx, %edx 265; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload 266; WIN32-NEXT: adcl %esi, %edx 267; WIN32-NEXT: movl %ebp, %ecx 268; WIN32-NEXT: sarl $31, %ecx 269; WIN32-NEXT: xorl %ecx, %edx 270; WIN32-NEXT: xorl %eax, %ecx 271; WIN32-NEXT: orl %edx, %ecx 272; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 273; WIN32-NEXT: movl %ebp, 4(%eax) 274; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 275; WIN32-NEXT: movl %ecx, (%eax) 276; WIN32-NEXT: setne %al 277; WIN32-NEXT: addl $8, %esp 278; WIN32-NEXT: popl %esi 279; WIN32-NEXT: popl %edi 280; WIN32-NEXT: popl %ebx 281; WIN32-NEXT: popl %ebp 282; WIN32-NEXT: retl 283 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 284 %val = extractvalue {i64, i1} %t, 0 285 %obit = extractvalue {i64, i1} %t, 1 286 store i64 %val, ptr %res 287 ret i1 %obit 288} 289 290; UMULO 291define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) { 292; SDAG-LABEL: umuloi8: 293; SDAG: # %bb.0: 294; SDAG-NEXT: movl %edi, %eax 295; SDAG-NEXT: # kill: def $al killed $al killed $eax 296; SDAG-NEXT: mulb %sil 297; SDAG-NEXT: seto %cl 298; SDAG-NEXT: movb %al, (%rdx) 299; SDAG-NEXT: movl %ecx, %eax 300; SDAG-NEXT: retq 301; 302; FAST-LABEL: umuloi8: 303; FAST: # %bb.0: 304; FAST-NEXT: movl %edi, %eax 305; FAST-NEXT: # kill: def $al killed $al killed $eax 306; FAST-NEXT: mulb %sil 307; FAST-NEXT: seto %cl 308; FAST-NEXT: movb %al, (%rdx) 309; FAST-NEXT: andb $1, %cl 310; FAST-NEXT: movzbl %cl, %eax 311; FAST-NEXT: retq 312; 313; WIN64-LABEL: umuloi8: 314; WIN64: # %bb.0: 315; WIN64-NEXT: movl %ecx, %eax 316; WIN64-NEXT: mulb %dl 317; WIN64-NEXT: seto %cl 318; WIN64-NEXT: movb %al, (%r8) 319; WIN64-NEXT: movl %ecx, %eax 320; WIN64-NEXT: retq 321; 322; WIN32-LABEL: umuloi8: 323; WIN32: # %bb.0: 324; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 325; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 326; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 327; WIN32-NEXT: seto %cl 328; WIN32-NEXT: movb %al, (%edx) 329; WIN32-NEXT: movl %ecx, %eax 330; WIN32-NEXT: retl 331 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 332 %val = extractvalue {i8, i1} %t, 0 333 %obit = extractvalue {i8, i1} %t, 1 334 store i8 %val, ptr %res 335 ret i1 %obit 336} 337 338define zeroext i1 @umuloi16(i16 %v1, i16 %v2, ptr %res) { 339; SDAG-LABEL: umuloi16: 340; SDAG: # %bb.0: 341; SDAG-NEXT: movq %rdx, %rcx 342; SDAG-NEXT: movl %edi, %eax 343; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 344; SDAG-NEXT: mulw %si 345; SDAG-NEXT: seto %dl 346; SDAG-NEXT: movw %ax, (%rcx) 347; SDAG-NEXT: movl %edx, %eax 348; SDAG-NEXT: retq 349; 350; FAST-LABEL: umuloi16: 351; FAST: # %bb.0: 352; FAST-NEXT: movq %rdx, %rcx 353; FAST-NEXT: movl %edi, %eax 354; FAST-NEXT: # kill: def $ax killed $ax killed $eax 355; FAST-NEXT: mulw %si 356; FAST-NEXT: seto %dl 357; FAST-NEXT: movw %ax, (%rcx) 358; FAST-NEXT: andb $1, %dl 359; FAST-NEXT: movzbl %dl, %eax 360; FAST-NEXT: retq 361; 362; WIN64-LABEL: umuloi16: 363; WIN64: # %bb.0: 364; WIN64-NEXT: movl %ecx, %eax 365; WIN64-NEXT: mulw %dx 366; WIN64-NEXT: seto %cl 367; WIN64-NEXT: movw %ax, (%r8) 368; WIN64-NEXT: movl %ecx, %eax 369; WIN64-NEXT: retq 370; 371; WIN32-LABEL: umuloi16: 372; WIN32: # %bb.0: 373; WIN32-NEXT: pushl %esi 374; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 375; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 376; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 377; WIN32-NEXT: seto %cl 378; WIN32-NEXT: movw %ax, (%esi) 379; WIN32-NEXT: movl %ecx, %eax 380; WIN32-NEXT: popl %esi 381; WIN32-NEXT: retl 382 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 383 %val = extractvalue {i16, i1} %t, 0 384 %obit = extractvalue {i16, i1} %t, 1 385 store i16 %val, ptr %res 386 ret i1 %obit 387} 388 389define zeroext i1 @umuloi32(i32 %v1, i32 %v2, ptr %res) { 390; SDAG-LABEL: umuloi32: 391; SDAG: # %bb.0: 392; SDAG-NEXT: movq %rdx, %rcx 393; SDAG-NEXT: movl %edi, %eax 394; SDAG-NEXT: mull %esi 395; SDAG-NEXT: seto %dl 396; SDAG-NEXT: movl %eax, (%rcx) 397; SDAG-NEXT: movl %edx, %eax 398; SDAG-NEXT: retq 399; 400; FAST-LABEL: umuloi32: 401; FAST: # %bb.0: 402; FAST-NEXT: movq %rdx, %rcx 403; FAST-NEXT: movl %edi, %eax 404; FAST-NEXT: mull %esi 405; FAST-NEXT: seto %dl 406; FAST-NEXT: movl %eax, (%rcx) 407; FAST-NEXT: andb $1, %dl 408; FAST-NEXT: movzbl %dl, %eax 409; FAST-NEXT: retq 410; 411; WIN64-LABEL: umuloi32: 412; WIN64: # %bb.0: 413; WIN64-NEXT: movl %ecx, %eax 414; WIN64-NEXT: mull %edx 415; WIN64-NEXT: seto %cl 416; WIN64-NEXT: movl %eax, (%r8) 417; WIN64-NEXT: movl %ecx, %eax 418; WIN64-NEXT: retq 419; 420; WIN32-LABEL: umuloi32: 421; WIN32: # %bb.0: 422; WIN32-NEXT: pushl %esi 423; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 424; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 425; WIN32-NEXT: mull {{[0-9]+}}(%esp) 426; WIN32-NEXT: seto %cl 427; WIN32-NEXT: movl %eax, (%esi) 428; WIN32-NEXT: movl %ecx, %eax 429; WIN32-NEXT: popl %esi 430; WIN32-NEXT: retl 431 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 432 %val = extractvalue {i32, i1} %t, 0 433 %obit = extractvalue {i32, i1} %t, 1 434 store i32 %val, ptr %res 435 ret i1 %obit 436} 437 438define zeroext i1 @umuloi64(i64 %v1, i64 %v2, ptr %res) { 439; SDAG-LABEL: umuloi64: 440; SDAG: # %bb.0: 441; SDAG-NEXT: movq %rdx, %rcx 442; SDAG-NEXT: movq %rdi, %rax 443; SDAG-NEXT: mulq %rsi 444; SDAG-NEXT: seto %dl 445; SDAG-NEXT: movq %rax, (%rcx) 446; SDAG-NEXT: movl %edx, %eax 447; SDAG-NEXT: retq 448; 449; FAST-LABEL: umuloi64: 450; FAST: # %bb.0: 451; FAST-NEXT: movq %rdx, %rcx 452; FAST-NEXT: movq %rdi, %rax 453; FAST-NEXT: mulq %rsi 454; FAST-NEXT: seto %dl 455; FAST-NEXT: movq %rax, (%rcx) 456; FAST-NEXT: andb $1, %dl 457; FAST-NEXT: movzbl %dl, %eax 458; FAST-NEXT: retq 459; 460; WIN64-LABEL: umuloi64: 461; WIN64: # %bb.0: 462; WIN64-NEXT: movq %rcx, %rax 463; WIN64-NEXT: mulq %rdx 464; WIN64-NEXT: seto %cl 465; WIN64-NEXT: movq %rax, (%r8) 466; WIN64-NEXT: movl %ecx, %eax 467; WIN64-NEXT: retq 468; 469; WIN32-LABEL: umuloi64: 470; WIN32: # %bb.0: 471; WIN32-NEXT: pushl %ebp 472; WIN32-NEXT: pushl %ebx 473; WIN32-NEXT: pushl %edi 474; WIN32-NEXT: pushl %esi 475; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 476; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 477; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 478; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 479; WIN32-NEXT: testl %esi, %esi 480; WIN32-NEXT: setne %dl 481; WIN32-NEXT: testl %eax, %eax 482; WIN32-NEXT: setne %bl 483; WIN32-NEXT: andb %dl, %bl 484; WIN32-NEXT: mull %ebp 485; WIN32-NEXT: movl %eax, %edi 486; WIN32-NEXT: seto %bh 487; WIN32-NEXT: movl %esi, %eax 488; WIN32-NEXT: mull %ecx 489; WIN32-NEXT: movl %ecx, %edx 490; WIN32-NEXT: seto %ch 491; WIN32-NEXT: orb %bh, %ch 492; WIN32-NEXT: leal (%edi,%eax), %esi 493; WIN32-NEXT: movl %edx, %eax 494; WIN32-NEXT: mull %ebp 495; WIN32-NEXT: addl %esi, %edx 496; WIN32-NEXT: setb %cl 497; WIN32-NEXT: orb %ch, %cl 498; WIN32-NEXT: orb %bl, %cl 499; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 500; WIN32-NEXT: movl %eax, (%esi) 501; WIN32-NEXT: movl %edx, 4(%esi) 502; WIN32-NEXT: movl %ecx, %eax 503; WIN32-NEXT: popl %esi 504; WIN32-NEXT: popl %edi 505; WIN32-NEXT: popl %ebx 506; WIN32-NEXT: popl %ebp 507; WIN32-NEXT: retl 508 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 509 %val = extractvalue {i64, i1} %t, 0 510 %obit = extractvalue {i64, i1} %t, 1 511 store i64 %val, ptr %res 512 ret i1 %obit 513} 514 515; 516; Check the use of the overflow bit in combination with a select instruction. 517; 518define i32 @smuloselecti32(i32 %v1, i32 %v2) { 519; LINUX-LABEL: smuloselecti32: 520; LINUX: # %bb.0: 521; LINUX-NEXT: movl %esi, %eax 522; LINUX-NEXT: movl %edi, %ecx 523; LINUX-NEXT: imull %esi, %ecx 524; LINUX-NEXT: cmovol %edi, %eax 525; LINUX-NEXT: retq 526; 527; WIN64-LABEL: smuloselecti32: 528; WIN64: # %bb.0: 529; WIN64-NEXT: movl %edx, %eax 530; WIN64-NEXT: movl %ecx, %edx 531; WIN64-NEXT: imull %eax, %edx 532; WIN64-NEXT: cmovol %ecx, %eax 533; WIN64-NEXT: retq 534; 535; WIN32-LABEL: smuloselecti32: 536; WIN32: # %bb.0: 537; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 538; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 539; WIN32-NEXT: movl %eax, %edx 540; WIN32-NEXT: imull %ecx, %edx 541; WIN32-NEXT: jo LBB11_2 542; WIN32-NEXT: # %bb.1: 543; WIN32-NEXT: movl %ecx, %eax 544; WIN32-NEXT: LBB11_2: 545; WIN32-NEXT: retl 546 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 547 %obit = extractvalue {i32, i1} %t, 1 548 %ret = select i1 %obit, i32 %v1, i32 %v2 549 ret i32 %ret 550} 551 552define i64 @smuloselecti64(i64 %v1, i64 %v2) { 553; LINUX-LABEL: smuloselecti64: 554; LINUX: # %bb.0: 555; LINUX-NEXT: movq %rsi, %rax 556; LINUX-NEXT: movq %rdi, %rcx 557; LINUX-NEXT: imulq %rsi, %rcx 558; LINUX-NEXT: cmovoq %rdi, %rax 559; LINUX-NEXT: retq 560; 561; WIN64-LABEL: smuloselecti64: 562; WIN64: # %bb.0: 563; WIN64-NEXT: movq %rdx, %rax 564; WIN64-NEXT: movq %rcx, %rdx 565; WIN64-NEXT: imulq %rax, %rdx 566; WIN64-NEXT: cmovoq %rcx, %rax 567; WIN64-NEXT: retq 568; 569; WIN32-LABEL: smuloselecti64: 570; WIN32: # %bb.0: 571; WIN32-NEXT: pushl %ebp 572; WIN32-NEXT: pushl %ebx 573; WIN32-NEXT: pushl %edi 574; WIN32-NEXT: pushl %esi 575; WIN32-NEXT: pushl %eax 576; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 577; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 578; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 579; WIN32-NEXT: movl %eax, %ecx 580; WIN32-NEXT: movl %eax, %esi 581; WIN32-NEXT: sarl $31, %ecx 582; WIN32-NEXT: movl %ebp, %edi 583; WIN32-NEXT: imull %ecx, %edi 584; WIN32-NEXT: movl %ebp, %eax 585; WIN32-NEXT: mull %ecx 586; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 587; WIN32-NEXT: addl %edi, %edx 588; WIN32-NEXT: imull %ebx, %ecx 589; WIN32-NEXT: addl %edx, %ecx 590; WIN32-NEXT: sarl $31, %ebx 591; WIN32-NEXT: movl %ebx, %edi 592; WIN32-NEXT: imull %esi, %edi 593; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 594; WIN32-NEXT: movl %ebx, %eax 595; WIN32-NEXT: mull %esi 596; WIN32-NEXT: addl %edi, %edx 597; WIN32-NEXT: movl %esi, %edi 598; WIN32-NEXT: imull %esi, %ebx 599; WIN32-NEXT: addl %edx, %ebx 600; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload 601; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 602; WIN32-NEXT: adcl %ecx, %ebx 603; WIN32-NEXT: movl %edi, %eax 604; WIN32-NEXT: mull %ebp 605; WIN32-NEXT: movl %edx, %esi 606; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 607; WIN32-NEXT: mull %ebp 608; WIN32-NEXT: movl %edx, %ecx 609; WIN32-NEXT: movl %eax, %ebp 610; WIN32-NEXT: addl %esi, %ebp 611; WIN32-NEXT: adcl $0, %ecx 612; WIN32-NEXT: movl %edi, %eax 613; WIN32-NEXT: mull {{[0-9]+}}(%esp) 614; WIN32-NEXT: movl %edx, %edi 615; WIN32-NEXT: movl %eax, %esi 616; WIN32-NEXT: addl %ebp, %esi 617; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 618; WIN32-NEXT: adcl %ecx, %edi 619; WIN32-NEXT: setb %cl 620; WIN32-NEXT: movl %ebp, %eax 621; WIN32-NEXT: mull {{[0-9]+}}(%esp) 622; WIN32-NEXT: addl %edi, %eax 623; WIN32-NEXT: movzbl %cl, %ecx 624; WIN32-NEXT: adcl %ecx, %edx 625; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload 626; WIN32-NEXT: adcl %ebx, %edx 627; WIN32-NEXT: sarl $31, %esi 628; WIN32-NEXT: xorl %esi, %edx 629; WIN32-NEXT: xorl %eax, %esi 630; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 631; WIN32-NEXT: orl %edx, %esi 632; WIN32-NEXT: jne LBB12_2 633; WIN32-NEXT: # %bb.1: 634; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 635; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 636; WIN32-NEXT: LBB12_2: 637; WIN32-NEXT: movl %ebp, %edx 638; WIN32-NEXT: addl $4, %esp 639; WIN32-NEXT: popl %esi 640; WIN32-NEXT: popl %edi 641; WIN32-NEXT: popl %ebx 642; WIN32-NEXT: popl %ebp 643; WIN32-NEXT: retl 644 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 645 %obit = extractvalue {i64, i1} %t, 1 646 %ret = select i1 %obit, i64 %v1, i64 %v2 647 ret i64 %ret 648} 649 650define i32 @umuloselecti32(i32 %v1, i32 %v2) { 651; LINUX-LABEL: umuloselecti32: 652; LINUX: # %bb.0: 653; LINUX-NEXT: movl %edi, %eax 654; LINUX-NEXT: mull %esi 655; LINUX-NEXT: cmovol %edi, %esi 656; LINUX-NEXT: movl %esi, %eax 657; LINUX-NEXT: retq 658; 659; WIN64-LABEL: umuloselecti32: 660; WIN64: # %bb.0: 661; WIN64-NEXT: movl %edx, %r8d 662; WIN64-NEXT: movl %ecx, %eax 663; WIN64-NEXT: mull %edx 664; WIN64-NEXT: cmovol %ecx, %r8d 665; WIN64-NEXT: movl %r8d, %eax 666; WIN64-NEXT: retq 667; 668; WIN32-LABEL: umuloselecti32: 669; WIN32: # %bb.0: 670; WIN32-NEXT: pushl %esi 671; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 672; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 673; WIN32-NEXT: movl %ecx, %eax 674; WIN32-NEXT: mull %esi 675; WIN32-NEXT: jo LBB13_2 676; WIN32-NEXT: # %bb.1: 677; WIN32-NEXT: movl %esi, %ecx 678; WIN32-NEXT: LBB13_2: 679; WIN32-NEXT: movl %ecx, %eax 680; WIN32-NEXT: popl %esi 681; WIN32-NEXT: retl 682 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 683 %obit = extractvalue {i32, i1} %t, 1 684 %ret = select i1 %obit, i32 %v1, i32 %v2 685 ret i32 %ret 686} 687 688define i64 @umuloselecti64(i64 %v1, i64 %v2) { 689; LINUX-LABEL: umuloselecti64: 690; LINUX: # %bb.0: 691; LINUX-NEXT: movq %rdi, %rax 692; LINUX-NEXT: mulq %rsi 693; LINUX-NEXT: cmovoq %rdi, %rsi 694; LINUX-NEXT: movq %rsi, %rax 695; LINUX-NEXT: retq 696; 697; WIN64-LABEL: umuloselecti64: 698; WIN64: # %bb.0: 699; WIN64-NEXT: movq %rdx, %r8 700; WIN64-NEXT: movq %rcx, %rax 701; WIN64-NEXT: mulq %rdx 702; WIN64-NEXT: cmovoq %rcx, %r8 703; WIN64-NEXT: movq %r8, %rax 704; WIN64-NEXT: retq 705; 706; WIN32-LABEL: umuloselecti64: 707; WIN32: # %bb.0: 708; WIN32-NEXT: pushl %ebp 709; WIN32-NEXT: pushl %ebx 710; WIN32-NEXT: pushl %edi 711; WIN32-NEXT: pushl %esi 712; WIN32-NEXT: pushl %eax 713; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 714; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 715; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 716; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 717; WIN32-NEXT: testl %ebp, %ebp 718; WIN32-NEXT: setne %al 719; WIN32-NEXT: testl %esi, %esi 720; WIN32-NEXT: setne %bl 721; WIN32-NEXT: andb %al, %bl 722; WIN32-NEXT: movl %esi, %eax 723; WIN32-NEXT: mull %edi 724; WIN32-NEXT: movl %edi, %edx 725; WIN32-NEXT: movl %eax, %edi 726; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 727; WIN32-NEXT: movl %ebp, %eax 728; WIN32-NEXT: movl %edx, %ebp 729; WIN32-NEXT: mull %ecx 730; WIN32-NEXT: seto %bh 731; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 732; WIN32-NEXT: addl %eax, %edi 733; WIN32-NEXT: movl %ecx, %eax 734; WIN32-NEXT: mull %ebp 735; WIN32-NEXT: addl %edi, %edx 736; WIN32-NEXT: setb %al 737; WIN32-NEXT: orb %bh, %al 738; WIN32-NEXT: orb %bl, %al 739; WIN32-NEXT: testb %al, %al 740; WIN32-NEXT: jne LBB14_2 741; WIN32-NEXT: # %bb.1: 742; WIN32-NEXT: movl %ebp, %ecx 743; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 744; WIN32-NEXT: LBB14_2: 745; WIN32-NEXT: movl %ecx, %eax 746; WIN32-NEXT: movl %esi, %edx 747; WIN32-NEXT: addl $4, %esp 748; WIN32-NEXT: popl %esi 749; WIN32-NEXT: popl %edi 750; WIN32-NEXT: popl %ebx 751; WIN32-NEXT: popl %ebp 752; WIN32-NEXT: retl 753 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 754 %obit = extractvalue {i64, i1} %t, 1 755 %ret = select i1 %obit, i64 %v1, i64 %v2 756 ret i64 %ret 757} 758 759; 760; Check the use of the overflow bit in combination with a branch instruction. 761; 762define zeroext i1 @smulobri8(i8 %v1, i8 %v2) { 763; SDAG-LABEL: smulobri8: 764; SDAG: # %bb.0: 765; SDAG-NEXT: movl %edi, %eax 766; SDAG-NEXT: # kill: def $al killed $al killed $eax 767; SDAG-NEXT: imulb %sil 768; SDAG-NEXT: jo .LBB15_1 769; SDAG-NEXT: # %bb.2: # %continue 770; SDAG-NEXT: movb $1, %al 771; SDAG-NEXT: retq 772; SDAG-NEXT: .LBB15_1: # %overflow 773; SDAG-NEXT: xorl %eax, %eax 774; SDAG-NEXT: retq 775; 776; FAST-LABEL: smulobri8: 777; FAST: # %bb.0: 778; FAST-NEXT: movl %edi, %eax 779; FAST-NEXT: # kill: def $al killed $al killed $eax 780; FAST-NEXT: imulb %sil 781; FAST-NEXT: seto %al 782; FAST-NEXT: testb $1, %al 783; FAST-NEXT: jne .LBB15_1 784; FAST-NEXT: # %bb.2: # %continue 785; FAST-NEXT: movb $1, %al 786; FAST-NEXT: andb $1, %al 787; FAST-NEXT: movzbl %al, %eax 788; FAST-NEXT: retq 789; FAST-NEXT: .LBB15_1: # %overflow 790; FAST-NEXT: xorl %eax, %eax 791; FAST-NEXT: andb $1, %al 792; FAST-NEXT: movzbl %al, %eax 793; FAST-NEXT: retq 794; 795; WIN64-LABEL: smulobri8: 796; WIN64: # %bb.0: 797; WIN64-NEXT: movl %ecx, %eax 798; WIN64-NEXT: imulb %dl 799; WIN64-NEXT: jo .LBB15_1 800; WIN64-NEXT: # %bb.2: # %continue 801; WIN64-NEXT: movb $1, %al 802; WIN64-NEXT: retq 803; WIN64-NEXT: .LBB15_1: # %overflow 804; WIN64-NEXT: xorl %eax, %eax 805; WIN64-NEXT: retq 806; 807; WIN32-LABEL: smulobri8: 808; WIN32: # %bb.0: 809; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 810; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 811; WIN32-NEXT: jo LBB15_1 812; WIN32-NEXT: # %bb.2: # %continue 813; WIN32-NEXT: movb $1, %al 814; WIN32-NEXT: retl 815; WIN32-NEXT: LBB15_1: # %overflow 816; WIN32-NEXT: xorl %eax, %eax 817; WIN32-NEXT: retl 818 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 819 %val = extractvalue {i8, i1} %t, 0 820 %obit = extractvalue {i8, i1} %t, 1 821 br i1 %obit, label %overflow, label %continue, !prof !0 822 823overflow: 824 ret i1 false 825 826continue: 827 ret i1 true 828} 829 830define zeroext i1 @smulobri16(i16 %v1, i16 %v2) { 831; SDAG-LABEL: smulobri16: 832; SDAG: # %bb.0: 833; SDAG-NEXT: imulw %si, %di 834; SDAG-NEXT: jo .LBB16_1 835; SDAG-NEXT: # %bb.2: # %continue 836; SDAG-NEXT: movb $1, %al 837; SDAG-NEXT: retq 838; SDAG-NEXT: .LBB16_1: # %overflow 839; SDAG-NEXT: xorl %eax, %eax 840; SDAG-NEXT: retq 841; 842; FAST-LABEL: smulobri16: 843; FAST: # %bb.0: 844; FAST-NEXT: imulw %si, %di 845; FAST-NEXT: seto %al 846; FAST-NEXT: testb $1, %al 847; FAST-NEXT: jne .LBB16_1 848; FAST-NEXT: # %bb.2: # %continue 849; FAST-NEXT: movb $1, %al 850; FAST-NEXT: andb $1, %al 851; FAST-NEXT: movzbl %al, %eax 852; FAST-NEXT: retq 853; FAST-NEXT: .LBB16_1: # %overflow 854; FAST-NEXT: xorl %eax, %eax 855; FAST-NEXT: andb $1, %al 856; FAST-NEXT: movzbl %al, %eax 857; FAST-NEXT: retq 858; 859; WIN64-LABEL: smulobri16: 860; WIN64: # %bb.0: 861; WIN64-NEXT: imulw %dx, %cx 862; WIN64-NEXT: jo .LBB16_1 863; WIN64-NEXT: # %bb.2: # %continue 864; WIN64-NEXT: movb $1, %al 865; WIN64-NEXT: retq 866; WIN64-NEXT: .LBB16_1: # %overflow 867; WIN64-NEXT: xorl %eax, %eax 868; WIN64-NEXT: retq 869; 870; WIN32-LABEL: smulobri16: 871; WIN32: # %bb.0: 872; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 873; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %ax 874; WIN32-NEXT: jo LBB16_1 875; WIN32-NEXT: # %bb.2: # %continue 876; WIN32-NEXT: movb $1, %al 877; WIN32-NEXT: retl 878; WIN32-NEXT: LBB16_1: # %overflow 879; WIN32-NEXT: xorl %eax, %eax 880; WIN32-NEXT: retl 881 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 882 %val = extractvalue {i16, i1} %t, 0 883 %obit = extractvalue {i16, i1} %t, 1 884 br i1 %obit, label %overflow, label %continue, !prof !0 885 886overflow: 887 ret i1 false 888 889continue: 890 ret i1 true 891} 892 893define zeroext i1 @smulobri32(i32 %v1, i32 %v2) { 894; SDAG-LABEL: smulobri32: 895; SDAG: # %bb.0: 896; SDAG-NEXT: imull %esi, %edi 897; SDAG-NEXT: jo .LBB17_1 898; SDAG-NEXT: # %bb.2: # %continue 899; SDAG-NEXT: movb $1, %al 900; SDAG-NEXT: retq 901; SDAG-NEXT: .LBB17_1: # %overflow 902; SDAG-NEXT: xorl %eax, %eax 903; SDAG-NEXT: retq 904; 905; FAST-LABEL: smulobri32: 906; FAST: # %bb.0: 907; FAST-NEXT: imull %esi, %edi 908; FAST-NEXT: jo .LBB17_1 909; FAST-NEXT: # %bb.2: # %continue 910; FAST-NEXT: movb $1, %al 911; FAST-NEXT: andb $1, %al 912; FAST-NEXT: movzbl %al, %eax 913; FAST-NEXT: retq 914; FAST-NEXT: .LBB17_1: # %overflow 915; FAST-NEXT: xorl %eax, %eax 916; FAST-NEXT: andb $1, %al 917; FAST-NEXT: movzbl %al, %eax 918; FAST-NEXT: retq 919; 920; WIN64-LABEL: smulobri32: 921; WIN64: # %bb.0: 922; WIN64-NEXT: imull %edx, %ecx 923; WIN64-NEXT: jo .LBB17_1 924; WIN64-NEXT: # %bb.2: # %continue 925; WIN64-NEXT: movb $1, %al 926; WIN64-NEXT: retq 927; WIN64-NEXT: .LBB17_1: # %overflow 928; WIN64-NEXT: xorl %eax, %eax 929; WIN64-NEXT: retq 930; 931; WIN32-LABEL: smulobri32: 932; WIN32: # %bb.0: 933; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 934; WIN32-NEXT: imull {{[0-9]+}}(%esp), %eax 935; WIN32-NEXT: jo LBB17_1 936; WIN32-NEXT: # %bb.2: # %continue 937; WIN32-NEXT: movb $1, %al 938; WIN32-NEXT: retl 939; WIN32-NEXT: LBB17_1: # %overflow 940; WIN32-NEXT: xorl %eax, %eax 941; WIN32-NEXT: retl 942 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 943 %val = extractvalue {i32, i1} %t, 0 944 %obit = extractvalue {i32, i1} %t, 1 945 br i1 %obit, label %overflow, label %continue, !prof !0 946 947overflow: 948 ret i1 false 949 950continue: 951 ret i1 true 952} 953 954define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { 955; SDAG-LABEL: smulobri64: 956; SDAG: # %bb.0: 957; SDAG-NEXT: imulq %rsi, %rdi 958; SDAG-NEXT: jo .LBB18_1 959; SDAG-NEXT: # %bb.2: # %continue 960; SDAG-NEXT: movb $1, %al 961; SDAG-NEXT: retq 962; SDAG-NEXT: .LBB18_1: # %overflow 963; SDAG-NEXT: xorl %eax, %eax 964; SDAG-NEXT: retq 965; 966; FAST-LABEL: smulobri64: 967; FAST: # %bb.0: 968; FAST-NEXT: imulq %rsi, %rdi 969; FAST-NEXT: jo .LBB18_1 970; FAST-NEXT: # %bb.2: # %continue 971; FAST-NEXT: movb $1, %al 972; FAST-NEXT: andb $1, %al 973; FAST-NEXT: movzbl %al, %eax 974; FAST-NEXT: retq 975; FAST-NEXT: .LBB18_1: # %overflow 976; FAST-NEXT: xorl %eax, %eax 977; FAST-NEXT: andb $1, %al 978; FAST-NEXT: movzbl %al, %eax 979; FAST-NEXT: retq 980; 981; WIN64-LABEL: smulobri64: 982; WIN64: # %bb.0: 983; WIN64-NEXT: imulq %rdx, %rcx 984; WIN64-NEXT: jo .LBB18_1 985; WIN64-NEXT: # %bb.2: # %continue 986; WIN64-NEXT: movb $1, %al 987; WIN64-NEXT: retq 988; WIN64-NEXT: .LBB18_1: # %overflow 989; WIN64-NEXT: xorl %eax, %eax 990; WIN64-NEXT: retq 991; 992; WIN32-LABEL: smulobri64: 993; WIN32: # %bb.0: 994; WIN32-NEXT: pushl %ebp 995; WIN32-NEXT: pushl %ebx 996; WIN32-NEXT: pushl %edi 997; WIN32-NEXT: pushl %esi 998; WIN32-NEXT: pushl %eax 999; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1000; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 1001; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1002; WIN32-NEXT: movl %ecx, %edi 1003; WIN32-NEXT: sarl $31, %edi 1004; WIN32-NEXT: movl %eax, %esi 1005; WIN32-NEXT: imull %edi, %esi 1006; WIN32-NEXT: mull %edi 1007; WIN32-NEXT: movl %eax, %ebp 1008; WIN32-NEXT: addl %esi, %edx 1009; WIN32-NEXT: movl %ebx, %esi 1010; WIN32-NEXT: imull %ebx, %edi 1011; WIN32-NEXT: addl %edx, %edi 1012; WIN32-NEXT: sarl $31, %esi 1013; WIN32-NEXT: movl %esi, %ebx 1014; WIN32-NEXT: imull %ecx, %ebx 1015; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1016; WIN32-NEXT: movl %esi, %eax 1017; WIN32-NEXT: mull %ecx 1018; WIN32-NEXT: addl %ebx, %edx 1019; WIN32-NEXT: imull %ecx, %esi 1020; WIN32-NEXT: addl %edx, %esi 1021; WIN32-NEXT: addl %ebp, %eax 1022; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 1023; WIN32-NEXT: adcl %edi, %esi 1024; WIN32-NEXT: movl %ecx, %eax 1025; WIN32-NEXT: movl %ecx, %edi 1026; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1027; WIN32-NEXT: mull %ecx 1028; WIN32-NEXT: movl %edx, %ebx 1029; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1030; WIN32-NEXT: mull %ecx 1031; WIN32-NEXT: movl %edx, %ebp 1032; WIN32-NEXT: movl %eax, %ecx 1033; WIN32-NEXT: addl %ebx, %ecx 1034; WIN32-NEXT: adcl $0, %ebp 1035; WIN32-NEXT: movl %edi, %eax 1036; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1037; WIN32-NEXT: movl %edx, %edi 1038; WIN32-NEXT: movl %eax, %ebx 1039; WIN32-NEXT: addl %ecx, %ebx 1040; WIN32-NEXT: adcl %ebp, %edi 1041; WIN32-NEXT: setb %cl 1042; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1043; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1044; WIN32-NEXT: addl %edi, %eax 1045; WIN32-NEXT: movzbl %cl, %ecx 1046; WIN32-NEXT: adcl %ecx, %edx 1047; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload 1048; WIN32-NEXT: adcl %esi, %edx 1049; WIN32-NEXT: sarl $31, %ebx 1050; WIN32-NEXT: xorl %ebx, %edx 1051; WIN32-NEXT: xorl %eax, %ebx 1052; WIN32-NEXT: orl %edx, %ebx 1053; WIN32-NEXT: jne LBB18_1 1054; WIN32-NEXT: # %bb.3: # %continue 1055; WIN32-NEXT: movb $1, %al 1056; WIN32-NEXT: LBB18_2: # %overflow 1057; WIN32-NEXT: addl $4, %esp 1058; WIN32-NEXT: popl %esi 1059; WIN32-NEXT: popl %edi 1060; WIN32-NEXT: popl %ebx 1061; WIN32-NEXT: popl %ebp 1062; WIN32-NEXT: retl 1063; WIN32-NEXT: LBB18_1: # %overflow 1064; WIN32-NEXT: xorl %eax, %eax 1065; WIN32-NEXT: jmp LBB18_2 1066 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1067 %val = extractvalue {i64, i1} %t, 0 1068 %obit = extractvalue {i64, i1} %t, 1 1069 br i1 %obit, label %overflow, label %continue, !prof !0 1070 1071overflow: 1072 ret i1 false 1073 1074continue: 1075 ret i1 true 1076} 1077 1078define zeroext i1 @umulobri8(i8 %v1, i8 %v2) { 1079; SDAG-LABEL: umulobri8: 1080; SDAG: # %bb.0: 1081; SDAG-NEXT: movl %edi, %eax 1082; SDAG-NEXT: # kill: def $al killed $al killed $eax 1083; SDAG-NEXT: mulb %sil 1084; SDAG-NEXT: jo .LBB19_1 1085; SDAG-NEXT: # %bb.2: # %continue 1086; SDAG-NEXT: movb $1, %al 1087; SDAG-NEXT: retq 1088; SDAG-NEXT: .LBB19_1: # %overflow 1089; SDAG-NEXT: xorl %eax, %eax 1090; SDAG-NEXT: retq 1091; 1092; FAST-LABEL: umulobri8: 1093; FAST: # %bb.0: 1094; FAST-NEXT: movl %edi, %eax 1095; FAST-NEXT: # kill: def $al killed $al killed $eax 1096; FAST-NEXT: mulb %sil 1097; FAST-NEXT: seto %al 1098; FAST-NEXT: testb $1, %al 1099; FAST-NEXT: jne .LBB19_1 1100; FAST-NEXT: # %bb.2: # %continue 1101; FAST-NEXT: movb $1, %al 1102; FAST-NEXT: andb $1, %al 1103; FAST-NEXT: movzbl %al, %eax 1104; FAST-NEXT: retq 1105; FAST-NEXT: .LBB19_1: # %overflow 1106; FAST-NEXT: xorl %eax, %eax 1107; FAST-NEXT: andb $1, %al 1108; FAST-NEXT: movzbl %al, %eax 1109; FAST-NEXT: retq 1110; 1111; WIN64-LABEL: umulobri8: 1112; WIN64: # %bb.0: 1113; WIN64-NEXT: movl %ecx, %eax 1114; WIN64-NEXT: mulb %dl 1115; WIN64-NEXT: jo .LBB19_1 1116; WIN64-NEXT: # %bb.2: # %continue 1117; WIN64-NEXT: movb $1, %al 1118; WIN64-NEXT: retq 1119; WIN64-NEXT: .LBB19_1: # %overflow 1120; WIN64-NEXT: xorl %eax, %eax 1121; WIN64-NEXT: retq 1122; 1123; WIN32-LABEL: umulobri8: 1124; WIN32: # %bb.0: 1125; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1126; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 1127; WIN32-NEXT: jo LBB19_1 1128; WIN32-NEXT: # %bb.2: # %continue 1129; WIN32-NEXT: movb $1, %al 1130; WIN32-NEXT: retl 1131; WIN32-NEXT: LBB19_1: # %overflow 1132; WIN32-NEXT: xorl %eax, %eax 1133; WIN32-NEXT: retl 1134 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1135 %val = extractvalue {i8, i1} %t, 0 1136 %obit = extractvalue {i8, i1} %t, 1 1137 br i1 %obit, label %overflow, label %continue, !prof !0 1138 1139overflow: 1140 ret i1 false 1141 1142continue: 1143 ret i1 true 1144} 1145 1146define zeroext i1 @umulobri16(i16 %v1, i16 %v2) { 1147; SDAG-LABEL: umulobri16: 1148; SDAG: # %bb.0: 1149; SDAG-NEXT: movl %edi, %eax 1150; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1151; SDAG-NEXT: mulw %si 1152; SDAG-NEXT: jo .LBB20_1 1153; SDAG-NEXT: # %bb.2: # %continue 1154; SDAG-NEXT: movb $1, %al 1155; SDAG-NEXT: retq 1156; SDAG-NEXT: .LBB20_1: # %overflow 1157; SDAG-NEXT: xorl %eax, %eax 1158; SDAG-NEXT: retq 1159; 1160; FAST-LABEL: umulobri16: 1161; FAST: # %bb.0: 1162; FAST-NEXT: movl %edi, %eax 1163; FAST-NEXT: # kill: def $ax killed $ax killed $eax 1164; FAST-NEXT: mulw %si 1165; FAST-NEXT: seto %al 1166; FAST-NEXT: testb $1, %al 1167; FAST-NEXT: jne .LBB20_1 1168; FAST-NEXT: # %bb.2: # %continue 1169; FAST-NEXT: movb $1, %al 1170; FAST-NEXT: andb $1, %al 1171; FAST-NEXT: movzbl %al, %eax 1172; FAST-NEXT: retq 1173; FAST-NEXT: .LBB20_1: # %overflow 1174; FAST-NEXT: xorl %eax, %eax 1175; FAST-NEXT: andb $1, %al 1176; FAST-NEXT: movzbl %al, %eax 1177; FAST-NEXT: retq 1178; 1179; WIN64-LABEL: umulobri16: 1180; WIN64: # %bb.0: 1181; WIN64-NEXT: movl %ecx, %eax 1182; WIN64-NEXT: mulw %dx 1183; WIN64-NEXT: jo .LBB20_1 1184; WIN64-NEXT: # %bb.2: # %continue 1185; WIN64-NEXT: movb $1, %al 1186; WIN64-NEXT: retq 1187; WIN64-NEXT: .LBB20_1: # %overflow 1188; WIN64-NEXT: xorl %eax, %eax 1189; WIN64-NEXT: retq 1190; 1191; WIN32-LABEL: umulobri16: 1192; WIN32: # %bb.0: 1193; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1194; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 1195; WIN32-NEXT: jo LBB20_1 1196; WIN32-NEXT: # %bb.2: # %continue 1197; WIN32-NEXT: movb $1, %al 1198; WIN32-NEXT: retl 1199; WIN32-NEXT: LBB20_1: # %overflow 1200; WIN32-NEXT: xorl %eax, %eax 1201; WIN32-NEXT: retl 1202 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1203 %val = extractvalue {i16, i1} %t, 0 1204 %obit = extractvalue {i16, i1} %t, 1 1205 br i1 %obit, label %overflow, label %continue, !prof !0 1206 1207overflow: 1208 ret i1 false 1209 1210continue: 1211 ret i1 true 1212} 1213 1214define zeroext i1 @umulobri32(i32 %v1, i32 %v2) { 1215; SDAG-LABEL: umulobri32: 1216; SDAG: # %bb.0: 1217; SDAG-NEXT: movl %edi, %eax 1218; SDAG-NEXT: mull %esi 1219; SDAG-NEXT: jo .LBB21_1 1220; SDAG-NEXT: # %bb.2: # %continue 1221; SDAG-NEXT: movb $1, %al 1222; SDAG-NEXT: retq 1223; SDAG-NEXT: .LBB21_1: # %overflow 1224; SDAG-NEXT: xorl %eax, %eax 1225; SDAG-NEXT: retq 1226; 1227; FAST-LABEL: umulobri32: 1228; FAST: # %bb.0: 1229; FAST-NEXT: movl %edi, %eax 1230; FAST-NEXT: mull %esi 1231; FAST-NEXT: jo .LBB21_1 1232; FAST-NEXT: # %bb.2: # %continue 1233; FAST-NEXT: movb $1, %al 1234; FAST-NEXT: andb $1, %al 1235; FAST-NEXT: movzbl %al, %eax 1236; FAST-NEXT: retq 1237; FAST-NEXT: .LBB21_1: # %overflow 1238; FAST-NEXT: xorl %eax, %eax 1239; FAST-NEXT: andb $1, %al 1240; FAST-NEXT: movzbl %al, %eax 1241; FAST-NEXT: retq 1242; 1243; WIN64-LABEL: umulobri32: 1244; WIN64: # %bb.0: 1245; WIN64-NEXT: movl %ecx, %eax 1246; WIN64-NEXT: mull %edx 1247; WIN64-NEXT: jo .LBB21_1 1248; WIN64-NEXT: # %bb.2: # %continue 1249; WIN64-NEXT: movb $1, %al 1250; WIN64-NEXT: retq 1251; WIN64-NEXT: .LBB21_1: # %overflow 1252; WIN64-NEXT: xorl %eax, %eax 1253; WIN64-NEXT: retq 1254; 1255; WIN32-LABEL: umulobri32: 1256; WIN32: # %bb.0: 1257; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1258; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1259; WIN32-NEXT: jo LBB21_1 1260; WIN32-NEXT: # %bb.2: # %continue 1261; WIN32-NEXT: movb $1, %al 1262; WIN32-NEXT: retl 1263; WIN32-NEXT: LBB21_1: # %overflow 1264; WIN32-NEXT: xorl %eax, %eax 1265; WIN32-NEXT: retl 1266 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 1267 %val = extractvalue {i32, i1} %t, 0 1268 %obit = extractvalue {i32, i1} %t, 1 1269 br i1 %obit, label %overflow, label %continue, !prof !0 1270 1271overflow: 1272 ret i1 false 1273 1274continue: 1275 ret i1 true 1276} 1277 1278define zeroext i1 @umulobri64(i64 %v1, i64 %v2) { 1279; SDAG-LABEL: umulobri64: 1280; SDAG: # %bb.0: 1281; SDAG-NEXT: movq %rdi, %rax 1282; SDAG-NEXT: mulq %rsi 1283; SDAG-NEXT: jo .LBB22_1 1284; SDAG-NEXT: # %bb.2: # %continue 1285; SDAG-NEXT: movb $1, %al 1286; SDAG-NEXT: retq 1287; SDAG-NEXT: .LBB22_1: # %overflow 1288; SDAG-NEXT: xorl %eax, %eax 1289; SDAG-NEXT: retq 1290; 1291; FAST-LABEL: umulobri64: 1292; FAST: # %bb.0: 1293; FAST-NEXT: movq %rdi, %rax 1294; FAST-NEXT: mulq %rsi 1295; FAST-NEXT: jo .LBB22_1 1296; FAST-NEXT: # %bb.2: # %continue 1297; FAST-NEXT: movb $1, %al 1298; FAST-NEXT: andb $1, %al 1299; FAST-NEXT: movzbl %al, %eax 1300; FAST-NEXT: retq 1301; FAST-NEXT: .LBB22_1: # %overflow 1302; FAST-NEXT: xorl %eax, %eax 1303; FAST-NEXT: andb $1, %al 1304; FAST-NEXT: movzbl %al, %eax 1305; FAST-NEXT: retq 1306; 1307; WIN64-LABEL: umulobri64: 1308; WIN64: # %bb.0: 1309; WIN64-NEXT: movq %rcx, %rax 1310; WIN64-NEXT: mulq %rdx 1311; WIN64-NEXT: jo .LBB22_1 1312; WIN64-NEXT: # %bb.2: # %continue 1313; WIN64-NEXT: movb $1, %al 1314; WIN64-NEXT: retq 1315; WIN64-NEXT: .LBB22_1: # %overflow 1316; WIN64-NEXT: xorl %eax, %eax 1317; WIN64-NEXT: retq 1318; 1319; WIN32-LABEL: umulobri64: 1320; WIN32: # %bb.0: 1321; WIN32-NEXT: pushl %ebp 1322; WIN32-NEXT: pushl %ebx 1323; WIN32-NEXT: pushl %edi 1324; WIN32-NEXT: pushl %esi 1325; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1326; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1327; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 1328; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1329; WIN32-NEXT: testl %esi, %esi 1330; WIN32-NEXT: setne %dl 1331; WIN32-NEXT: testl %eax, %eax 1332; WIN32-NEXT: setne %bl 1333; WIN32-NEXT: andb %dl, %bl 1334; WIN32-NEXT: mull %ebp 1335; WIN32-NEXT: movl %eax, %edi 1336; WIN32-NEXT: seto %bh 1337; WIN32-NEXT: movl %esi, %eax 1338; WIN32-NEXT: mull %ecx 1339; WIN32-NEXT: movl %ecx, %edx 1340; WIN32-NEXT: seto %cl 1341; WIN32-NEXT: orb %bh, %cl 1342; WIN32-NEXT: leal (%edi,%eax), %esi 1343; WIN32-NEXT: movl %edx, %eax 1344; WIN32-NEXT: mull %ebp 1345; WIN32-NEXT: addl %esi, %edx 1346; WIN32-NEXT: setb %al 1347; WIN32-NEXT: orb %cl, %al 1348; WIN32-NEXT: orb %bl, %al 1349; WIN32-NEXT: subb $1, %al 1350; WIN32-NEXT: je LBB22_1 1351; WIN32-NEXT: # %bb.3: # %continue 1352; WIN32-NEXT: movb $1, %al 1353; WIN32-NEXT: LBB22_2: # %overflow 1354; WIN32-NEXT: popl %esi 1355; WIN32-NEXT: popl %edi 1356; WIN32-NEXT: popl %ebx 1357; WIN32-NEXT: popl %ebp 1358; WIN32-NEXT: retl 1359; WIN32-NEXT: LBB22_1: # %overflow 1360; WIN32-NEXT: xorl %eax, %eax 1361; WIN32-NEXT: jmp LBB22_2 1362 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 1363 %val = extractvalue {i64, i1} %t, 0 1364 %obit = extractvalue {i64, i1} %t, 1 1365 br i1 %obit, label %overflow, label %continue, !prof !0 1366 1367overflow: 1368 ret i1 false 1369 1370continue: 1371 ret i1 true 1372} 1373 1374define i1 @bug27873(i64 %c1, i1 %c2) { 1375; LINUX-LABEL: bug27873: 1376; LINUX: # %bb.0: 1377; LINUX-NEXT: movq %rdi, %rax 1378; LINUX-NEXT: movl $160, %ecx 1379; LINUX-NEXT: mulq %rcx 1380; LINUX-NEXT: seto %al 1381; LINUX-NEXT: orb %sil, %al 1382; LINUX-NEXT: retq 1383; 1384; WIN64-LABEL: bug27873: 1385; WIN64: # %bb.0: 1386; WIN64-NEXT: movl %edx, %r8d 1387; WIN64-NEXT: movq %rcx, %rax 1388; WIN64-NEXT: movl $160, %ecx 1389; WIN64-NEXT: mulq %rcx 1390; WIN64-NEXT: seto %al 1391; WIN64-NEXT: orb %r8b, %al 1392; WIN64-NEXT: retq 1393; 1394; WIN32-LABEL: bug27873: 1395; WIN32: # %bb.0: 1396; WIN32-NEXT: pushl %ebx 1397; WIN32-NEXT: movl $160, %eax 1398; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1399; WIN32-NEXT: movl %eax, %ecx 1400; WIN32-NEXT: seto %bl 1401; WIN32-NEXT: movl $160, %eax 1402; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1403; WIN32-NEXT: addl %ecx, %edx 1404; WIN32-NEXT: setb %al 1405; WIN32-NEXT: orb %bl, %al 1406; WIN32-NEXT: orb {{[0-9]+}}(%esp), %al 1407; WIN32-NEXT: popl %ebx 1408; WIN32-NEXT: retl 1409 %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160) 1410 %mul.overflow = extractvalue { i64, i1 } %mul, 1 1411 %x1 = or i1 %c2, %mul.overflow 1412 ret i1 %x1 1413} 1414 1415define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { 1416; SDAG-LABEL: smuloi8_load: 1417; SDAG: # %bb.0: 1418; SDAG-NEXT: movl %esi, %eax 1419; SDAG-NEXT: # kill: def $al killed $al killed $eax 1420; SDAG-NEXT: imulb (%rdi) 1421; SDAG-NEXT: seto %cl 1422; SDAG-NEXT: movb %al, (%rdx) 1423; SDAG-NEXT: movl %ecx, %eax 1424; SDAG-NEXT: retq 1425; 1426; FAST-LABEL: smuloi8_load: 1427; FAST: # %bb.0: 1428; FAST-NEXT: movzbl (%rdi), %eax 1429; FAST-NEXT: imulb %sil 1430; FAST-NEXT: seto %cl 1431; FAST-NEXT: movb %al, (%rdx) 1432; FAST-NEXT: andb $1, %cl 1433; FAST-NEXT: movzbl %cl, %eax 1434; FAST-NEXT: retq 1435; 1436; WIN64-LABEL: smuloi8_load: 1437; WIN64: # %bb.0: 1438; WIN64-NEXT: movl %edx, %eax 1439; WIN64-NEXT: imulb (%rcx) 1440; WIN64-NEXT: seto %cl 1441; WIN64-NEXT: movb %al, (%r8) 1442; WIN64-NEXT: movl %ecx, %eax 1443; WIN64-NEXT: retq 1444; 1445; WIN32-LABEL: smuloi8_load: 1446; WIN32: # %bb.0: 1447; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1448; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1449; WIN32-NEXT: movzbl (%eax), %eax 1450; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 1451; WIN32-NEXT: seto %cl 1452; WIN32-NEXT: movb %al, (%edx) 1453; WIN32-NEXT: movl %ecx, %eax 1454; WIN32-NEXT: retl 1455 %v1 = load i8, ptr %ptr1 1456 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1457 %val = extractvalue {i8, i1} %t, 0 1458 %obit = extractvalue {i8, i1} %t, 1 1459 store i8 %val, ptr %res 1460 ret i1 %obit 1461} 1462 1463define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { 1464; SDAG-LABEL: smuloi8_load2: 1465; SDAG: # %bb.0: 1466; SDAG-NEXT: movl %edi, %eax 1467; SDAG-NEXT: # kill: def $al killed $al killed $eax 1468; SDAG-NEXT: imulb (%rsi) 1469; SDAG-NEXT: seto %cl 1470; SDAG-NEXT: movb %al, (%rdx) 1471; SDAG-NEXT: movl %ecx, %eax 1472; SDAG-NEXT: retq 1473; 1474; FAST-LABEL: smuloi8_load2: 1475; FAST: # %bb.0: 1476; FAST-NEXT: movl %edi, %eax 1477; FAST-NEXT: # kill: def $al killed $al killed $eax 1478; FAST-NEXT: imulb (%rsi) 1479; FAST-NEXT: seto %cl 1480; FAST-NEXT: movb %al, (%rdx) 1481; FAST-NEXT: andb $1, %cl 1482; FAST-NEXT: movzbl %cl, %eax 1483; FAST-NEXT: retq 1484; 1485; WIN64-LABEL: smuloi8_load2: 1486; WIN64: # %bb.0: 1487; WIN64-NEXT: movl %ecx, %eax 1488; WIN64-NEXT: imulb (%rdx) 1489; WIN64-NEXT: seto %cl 1490; WIN64-NEXT: movb %al, (%r8) 1491; WIN64-NEXT: movl %ecx, %eax 1492; WIN64-NEXT: retq 1493; 1494; WIN32-LABEL: smuloi8_load2: 1495; WIN32: # %bb.0: 1496; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1497; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1498; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1499; WIN32-NEXT: imulb (%ecx) 1500; WIN32-NEXT: seto %cl 1501; WIN32-NEXT: movb %al, (%edx) 1502; WIN32-NEXT: movl %ecx, %eax 1503; WIN32-NEXT: retl 1504 %v2 = load i8, ptr %ptr2 1505 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1506 %val = extractvalue {i8, i1} %t, 0 1507 %obit = extractvalue {i8, i1} %t, 1 1508 store i8 %val, ptr %res 1509 ret i1 %obit 1510} 1511 1512define zeroext i1 @smuloi16_load(ptr %ptr1, i16 %v2, ptr %res) { 1513; SDAG-LABEL: smuloi16_load: 1514; SDAG: # %bb.0: 1515; SDAG-NEXT: imulw (%rdi), %si 1516; SDAG-NEXT: seto %al 1517; SDAG-NEXT: movw %si, (%rdx) 1518; SDAG-NEXT: retq 1519; 1520; FAST-LABEL: smuloi16_load: 1521; FAST: # %bb.0: 1522; FAST-NEXT: imulw (%rdi), %si 1523; FAST-NEXT: seto %al 1524; FAST-NEXT: movw %si, (%rdx) 1525; FAST-NEXT: andb $1, %al 1526; FAST-NEXT: movzbl %al, %eax 1527; FAST-NEXT: retq 1528; 1529; WIN64-LABEL: smuloi16_load: 1530; WIN64: # %bb.0: 1531; WIN64-NEXT: imulw (%rcx), %dx 1532; WIN64-NEXT: seto %al 1533; WIN64-NEXT: movw %dx, (%r8) 1534; WIN64-NEXT: retq 1535; 1536; WIN32-LABEL: smuloi16_load: 1537; WIN32: # %bb.0: 1538; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1539; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1540; WIN32-NEXT: movzwl (%eax), %edx 1541; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 1542; WIN32-NEXT: seto %al 1543; WIN32-NEXT: movw %dx, (%ecx) 1544; WIN32-NEXT: retl 1545 %v1 = load i16, ptr %ptr1 1546 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1547 %val = extractvalue {i16, i1} %t, 0 1548 %obit = extractvalue {i16, i1} %t, 1 1549 store i16 %val, ptr %res 1550 ret i1 %obit 1551} 1552 1553define zeroext i1 @smuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) { 1554; SDAG-LABEL: smuloi16_load2: 1555; SDAG: # %bb.0: 1556; SDAG-NEXT: imulw (%rsi), %di 1557; SDAG-NEXT: seto %al 1558; SDAG-NEXT: movw %di, (%rdx) 1559; SDAG-NEXT: retq 1560; 1561; FAST-LABEL: smuloi16_load2: 1562; FAST: # %bb.0: 1563; FAST-NEXT: imulw (%rsi), %di 1564; FAST-NEXT: seto %al 1565; FAST-NEXT: movw %di, (%rdx) 1566; FAST-NEXT: andb $1, %al 1567; FAST-NEXT: movzbl %al, %eax 1568; FAST-NEXT: retq 1569; 1570; WIN64-LABEL: smuloi16_load2: 1571; WIN64: # %bb.0: 1572; WIN64-NEXT: imulw (%rdx), %cx 1573; WIN64-NEXT: seto %al 1574; WIN64-NEXT: movw %cx, (%r8) 1575; WIN64-NEXT: retq 1576; 1577; WIN32-LABEL: smuloi16_load2: 1578; WIN32: # %bb.0: 1579; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1580; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1581; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 1582; WIN32-NEXT: imulw (%eax), %dx 1583; WIN32-NEXT: seto %al 1584; WIN32-NEXT: movw %dx, (%ecx) 1585; WIN32-NEXT: retl 1586 %v2 = load i16, ptr %ptr2 1587 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1588 %val = extractvalue {i16, i1} %t, 0 1589 %obit = extractvalue {i16, i1} %t, 1 1590 store i16 %val, ptr %res 1591 ret i1 %obit 1592} 1593 1594define zeroext i1 @smuloi32_load(ptr %ptr1, i32 %v2, ptr %res) { 1595; SDAG-LABEL: smuloi32_load: 1596; SDAG: # %bb.0: 1597; SDAG-NEXT: imull (%rdi), %esi 1598; SDAG-NEXT: seto %al 1599; SDAG-NEXT: movl %esi, (%rdx) 1600; SDAG-NEXT: retq 1601; 1602; FAST-LABEL: smuloi32_load: 1603; FAST: # %bb.0: 1604; FAST-NEXT: imull (%rdi), %esi 1605; FAST-NEXT: seto %al 1606; FAST-NEXT: movl %esi, (%rdx) 1607; FAST-NEXT: andb $1, %al 1608; FAST-NEXT: movzbl %al, %eax 1609; FAST-NEXT: retq 1610; 1611; WIN64-LABEL: smuloi32_load: 1612; WIN64: # %bb.0: 1613; WIN64-NEXT: imull (%rcx), %edx 1614; WIN64-NEXT: seto %al 1615; WIN64-NEXT: movl %edx, (%r8) 1616; WIN64-NEXT: retq 1617; 1618; WIN32-LABEL: smuloi32_load: 1619; WIN32: # %bb.0: 1620; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1621; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1622; WIN32-NEXT: movl (%eax), %edx 1623; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 1624; WIN32-NEXT: seto %al 1625; WIN32-NEXT: movl %edx, (%ecx) 1626; WIN32-NEXT: retl 1627 %v1 = load i32, ptr %ptr1 1628 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1629 %val = extractvalue {i32, i1} %t, 0 1630 %obit = extractvalue {i32, i1} %t, 1 1631 store i32 %val, ptr %res 1632 ret i1 %obit 1633} 1634 1635define zeroext i1 @smuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) { 1636; SDAG-LABEL: smuloi32_load2: 1637; SDAG: # %bb.0: 1638; SDAG-NEXT: imull (%rsi), %edi 1639; SDAG-NEXT: seto %al 1640; SDAG-NEXT: movl %edi, (%rdx) 1641; SDAG-NEXT: retq 1642; 1643; FAST-LABEL: smuloi32_load2: 1644; FAST: # %bb.0: 1645; FAST-NEXT: imull (%rsi), %edi 1646; FAST-NEXT: seto %al 1647; FAST-NEXT: movl %edi, (%rdx) 1648; FAST-NEXT: andb $1, %al 1649; FAST-NEXT: movzbl %al, %eax 1650; FAST-NEXT: retq 1651; 1652; WIN64-LABEL: smuloi32_load2: 1653; WIN64: # %bb.0: 1654; WIN64-NEXT: imull (%rdx), %ecx 1655; WIN64-NEXT: seto %al 1656; WIN64-NEXT: movl %ecx, (%r8) 1657; WIN64-NEXT: retq 1658; 1659; WIN32-LABEL: smuloi32_load2: 1660; WIN32: # %bb.0: 1661; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1662; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1663; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1664; WIN32-NEXT: imull (%eax), %edx 1665; WIN32-NEXT: seto %al 1666; WIN32-NEXT: movl %edx, (%ecx) 1667; WIN32-NEXT: retl 1668 %v2 = load i32, ptr %ptr2 1669 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1670 %val = extractvalue {i32, i1} %t, 0 1671 %obit = extractvalue {i32, i1} %t, 1 1672 store i32 %val, ptr %res 1673 ret i1 %obit 1674} 1675 1676define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { 1677; SDAG-LABEL: smuloi64_load: 1678; SDAG: # %bb.0: 1679; SDAG-NEXT: imulq (%rdi), %rsi 1680; SDAG-NEXT: seto %al 1681; SDAG-NEXT: movq %rsi, (%rdx) 1682; SDAG-NEXT: retq 1683; 1684; FAST-LABEL: smuloi64_load: 1685; FAST: # %bb.0: 1686; FAST-NEXT: imulq (%rdi), %rsi 1687; FAST-NEXT: seto %al 1688; FAST-NEXT: movq %rsi, (%rdx) 1689; FAST-NEXT: andb $1, %al 1690; FAST-NEXT: movzbl %al, %eax 1691; FAST-NEXT: retq 1692; 1693; WIN64-LABEL: smuloi64_load: 1694; WIN64: # %bb.0: 1695; WIN64-NEXT: imulq (%rcx), %rdx 1696; WIN64-NEXT: seto %al 1697; WIN64-NEXT: movq %rdx, (%r8) 1698; WIN64-NEXT: retq 1699; 1700; WIN32-LABEL: smuloi64_load: 1701; WIN32: # %bb.0: 1702; WIN32-NEXT: pushl %ebp 1703; WIN32-NEXT: pushl %ebx 1704; WIN32-NEXT: pushl %edi 1705; WIN32-NEXT: pushl %esi 1706; WIN32-NEXT: subl $16, %esp 1707; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 1708; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1709; WIN32-NEXT: movl (%eax), %esi 1710; WIN32-NEXT: movl 4(%eax), %ebp 1711; WIN32-NEXT: sarl $31, %ebx 1712; WIN32-NEXT: movl %ebx, %ecx 1713; WIN32-NEXT: imull %ebp, %ecx 1714; WIN32-NEXT: movl %ebx, %eax 1715; WIN32-NEXT: mull %esi 1716; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1717; WIN32-NEXT: addl %ecx, %edx 1718; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1719; WIN32-NEXT: imull %esi, %ebx 1720; WIN32-NEXT: addl %edx, %ebx 1721; WIN32-NEXT: movl %ebp, %ecx 1722; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1723; WIN32-NEXT: sarl $31, %ecx 1724; WIN32-NEXT: movl %eax, %edi 1725; WIN32-NEXT: imull %ecx, %edi 1726; WIN32-NEXT: mull %ecx 1727; WIN32-NEXT: addl %edi, %edx 1728; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx 1729; WIN32-NEXT: addl %edx, %ecx 1730; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 1731; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1732; WIN32-NEXT: adcl %ebx, %ecx 1733; WIN32-NEXT: movl %esi, %eax 1734; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 1735; WIN32-NEXT: mull %edi 1736; WIN32-NEXT: movl %edx, %ebx 1737; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1738; WIN32-NEXT: movl %ebp, %eax 1739; WIN32-NEXT: mull %edi 1740; WIN32-NEXT: movl %edx, %ebp 1741; WIN32-NEXT: movl %eax, %edi 1742; WIN32-NEXT: addl %ebx, %edi 1743; WIN32-NEXT: adcl $0, %ebp 1744; WIN32-NEXT: movl %esi, %eax 1745; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1746; WIN32-NEXT: movl %edx, %ebx 1747; WIN32-NEXT: movl %eax, %esi 1748; WIN32-NEXT: addl %edi, %esi 1749; WIN32-NEXT: adcl %ebp, %ebx 1750; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 1751; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1752; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1753; WIN32-NEXT: addl %ebx, %eax 1754; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload 1755; WIN32-NEXT: adcl %edi, %edx 1756; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 1757; WIN32-NEXT: adcl %ecx, %edx 1758; WIN32-NEXT: movl %esi, %ecx 1759; WIN32-NEXT: sarl $31, %ecx 1760; WIN32-NEXT: xorl %ecx, %edx 1761; WIN32-NEXT: xorl %eax, %ecx 1762; WIN32-NEXT: orl %edx, %ecx 1763; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1764; WIN32-NEXT: movl %esi, 4(%eax) 1765; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1766; WIN32-NEXT: movl %ecx, (%eax) 1767; WIN32-NEXT: setne %al 1768; WIN32-NEXT: addl $16, %esp 1769; WIN32-NEXT: popl %esi 1770; WIN32-NEXT: popl %edi 1771; WIN32-NEXT: popl %ebx 1772; WIN32-NEXT: popl %ebp 1773; WIN32-NEXT: retl 1774 %v1 = load i64, ptr %ptr1 1775 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1776 %val = extractvalue {i64, i1} %t, 0 1777 %obit = extractvalue {i64, i1} %t, 1 1778 store i64 %val, ptr %res 1779 ret i1 %obit 1780} 1781 1782define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { 1783; SDAG-LABEL: smuloi64_load2: 1784; SDAG: # %bb.0: 1785; SDAG-NEXT: imulq (%rsi), %rdi 1786; SDAG-NEXT: seto %al 1787; SDAG-NEXT: movq %rdi, (%rdx) 1788; SDAG-NEXT: retq 1789; 1790; FAST-LABEL: smuloi64_load2: 1791; FAST: # %bb.0: 1792; FAST-NEXT: imulq (%rsi), %rdi 1793; FAST-NEXT: seto %al 1794; FAST-NEXT: movq %rdi, (%rdx) 1795; FAST-NEXT: andb $1, %al 1796; FAST-NEXT: movzbl %al, %eax 1797; FAST-NEXT: retq 1798; 1799; WIN64-LABEL: smuloi64_load2: 1800; WIN64: # %bb.0: 1801; WIN64-NEXT: imulq (%rdx), %rcx 1802; WIN64-NEXT: seto %al 1803; WIN64-NEXT: movq %rcx, (%r8) 1804; WIN64-NEXT: retq 1805; 1806; WIN32-LABEL: smuloi64_load2: 1807; WIN32: # %bb.0: 1808; WIN32-NEXT: pushl %ebp 1809; WIN32-NEXT: pushl %ebx 1810; WIN32-NEXT: pushl %edi 1811; WIN32-NEXT: pushl %esi 1812; WIN32-NEXT: subl $12, %esp 1813; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1814; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1815; WIN32-NEXT: movl (%eax), %ebp 1816; WIN32-NEXT: movl 4(%eax), %ebx 1817; WIN32-NEXT: movl %ecx, %edi 1818; WIN32-NEXT: sarl $31, %edi 1819; WIN32-NEXT: movl %ebp, %esi 1820; WIN32-NEXT: imull %edi, %esi 1821; WIN32-NEXT: movl %ebp, %eax 1822; WIN32-NEXT: mull %edi 1823; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 1824; WIN32-NEXT: addl %esi, %edx 1825; WIN32-NEXT: movl %ebx, %esi 1826; WIN32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1827; WIN32-NEXT: imull %ebx, %edi 1828; WIN32-NEXT: addl %edx, %edi 1829; WIN32-NEXT: sarl $31, %esi 1830; WIN32-NEXT: movl %esi, %ebx 1831; WIN32-NEXT: imull %ecx, %ebx 1832; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1833; WIN32-NEXT: movl %esi, %eax 1834; WIN32-NEXT: mull %ecx 1835; WIN32-NEXT: addl %ebx, %edx 1836; WIN32-NEXT: imull %ecx, %esi 1837; WIN32-NEXT: addl %edx, %esi 1838; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload 1839; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1840; WIN32-NEXT: adcl %edi, %esi 1841; WIN32-NEXT: movl %ecx, %eax 1842; WIN32-NEXT: mull %ebp 1843; WIN32-NEXT: movl %edx, %edi 1844; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 1845; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1846; WIN32-NEXT: mull %ebp 1847; WIN32-NEXT: movl %edx, %ebx 1848; WIN32-NEXT: movl %eax, %ecx 1849; WIN32-NEXT: addl %edi, %ecx 1850; WIN32-NEXT: adcl $0, %ebx 1851; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1852; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload 1853; WIN32-NEXT: movl %edx, %edi 1854; WIN32-NEXT: movl %eax, %ebp 1855; WIN32-NEXT: addl %ecx, %ebp 1856; WIN32-NEXT: adcl %ebx, %edi 1857; WIN32-NEXT: setb %cl 1858; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1859; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload 1860; WIN32-NEXT: addl %edi, %eax 1861; WIN32-NEXT: movzbl %cl, %ecx 1862; WIN32-NEXT: adcl %ecx, %edx 1863; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 1864; WIN32-NEXT: adcl %esi, %edx 1865; WIN32-NEXT: movl %ebp, %ecx 1866; WIN32-NEXT: sarl $31, %ecx 1867; WIN32-NEXT: xorl %ecx, %edx 1868; WIN32-NEXT: xorl %eax, %ecx 1869; WIN32-NEXT: orl %edx, %ecx 1870; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1871; WIN32-NEXT: movl %ebp, 4(%eax) 1872; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload 1873; WIN32-NEXT: movl %ecx, (%eax) 1874; WIN32-NEXT: setne %al 1875; WIN32-NEXT: addl $12, %esp 1876; WIN32-NEXT: popl %esi 1877; WIN32-NEXT: popl %edi 1878; WIN32-NEXT: popl %ebx 1879; WIN32-NEXT: popl %ebp 1880; WIN32-NEXT: retl 1881 %v2 = load i64, ptr %ptr2 1882 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1883 %val = extractvalue {i64, i1} %t, 0 1884 %obit = extractvalue {i64, i1} %t, 1 1885 store i64 %val, ptr %res 1886 ret i1 %obit 1887} 1888 1889define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { 1890; SDAG-LABEL: umuloi8_load: 1891; SDAG: # %bb.0: 1892; SDAG-NEXT: movl %esi, %eax 1893; SDAG-NEXT: # kill: def $al killed $al killed $eax 1894; SDAG-NEXT: mulb (%rdi) 1895; SDAG-NEXT: seto %cl 1896; SDAG-NEXT: movb %al, (%rdx) 1897; SDAG-NEXT: movl %ecx, %eax 1898; SDAG-NEXT: retq 1899; 1900; FAST-LABEL: umuloi8_load: 1901; FAST: # %bb.0: 1902; FAST-NEXT: movzbl (%rdi), %eax 1903; FAST-NEXT: mulb %sil 1904; FAST-NEXT: seto %cl 1905; FAST-NEXT: movb %al, (%rdx) 1906; FAST-NEXT: andb $1, %cl 1907; FAST-NEXT: movzbl %cl, %eax 1908; FAST-NEXT: retq 1909; 1910; WIN64-LABEL: umuloi8_load: 1911; WIN64: # %bb.0: 1912; WIN64-NEXT: movl %edx, %eax 1913; WIN64-NEXT: mulb (%rcx) 1914; WIN64-NEXT: seto %cl 1915; WIN64-NEXT: movb %al, (%r8) 1916; WIN64-NEXT: movl %ecx, %eax 1917; WIN64-NEXT: retq 1918; 1919; WIN32-LABEL: umuloi8_load: 1920; WIN32: # %bb.0: 1921; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1922; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1923; WIN32-NEXT: movzbl (%eax), %eax 1924; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 1925; WIN32-NEXT: seto %cl 1926; WIN32-NEXT: movb %al, (%edx) 1927; WIN32-NEXT: movl %ecx, %eax 1928; WIN32-NEXT: retl 1929 %v1 = load i8, ptr %ptr1 1930 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1931 %val = extractvalue {i8, i1} %t, 0 1932 %obit = extractvalue {i8, i1} %t, 1 1933 store i8 %val, ptr %res 1934 ret i1 %obit 1935} 1936 1937define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { 1938; SDAG-LABEL: umuloi8_load2: 1939; SDAG: # %bb.0: 1940; SDAG-NEXT: movl %edi, %eax 1941; SDAG-NEXT: # kill: def $al killed $al killed $eax 1942; SDAG-NEXT: mulb (%rsi) 1943; SDAG-NEXT: seto %cl 1944; SDAG-NEXT: movb %al, (%rdx) 1945; SDAG-NEXT: movl %ecx, %eax 1946; SDAG-NEXT: retq 1947; 1948; FAST-LABEL: umuloi8_load2: 1949; FAST: # %bb.0: 1950; FAST-NEXT: movl %edi, %eax 1951; FAST-NEXT: # kill: def $al killed $al killed $eax 1952; FAST-NEXT: mulb (%rsi) 1953; FAST-NEXT: seto %cl 1954; FAST-NEXT: movb %al, (%rdx) 1955; FAST-NEXT: andb $1, %cl 1956; FAST-NEXT: movzbl %cl, %eax 1957; FAST-NEXT: retq 1958; 1959; WIN64-LABEL: umuloi8_load2: 1960; WIN64: # %bb.0: 1961; WIN64-NEXT: movl %ecx, %eax 1962; WIN64-NEXT: mulb (%rdx) 1963; WIN64-NEXT: seto %cl 1964; WIN64-NEXT: movb %al, (%r8) 1965; WIN64-NEXT: movl %ecx, %eax 1966; WIN64-NEXT: retq 1967; 1968; WIN32-LABEL: umuloi8_load2: 1969; WIN32: # %bb.0: 1970; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1971; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1972; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1973; WIN32-NEXT: mulb (%ecx) 1974; WIN32-NEXT: seto %cl 1975; WIN32-NEXT: movb %al, (%edx) 1976; WIN32-NEXT: movl %ecx, %eax 1977; WIN32-NEXT: retl 1978 %v2 = load i8, ptr %ptr2 1979 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1980 %val = extractvalue {i8, i1} %t, 0 1981 %obit = extractvalue {i8, i1} %t, 1 1982 store i8 %val, ptr %res 1983 ret i1 %obit 1984} 1985 1986define zeroext i1 @umuloi16_load(ptr %ptr1, i16 %v2, ptr %res) { 1987; SDAG-LABEL: umuloi16_load: 1988; SDAG: # %bb.0: 1989; SDAG-NEXT: movq %rdx, %rcx 1990; SDAG-NEXT: movl %esi, %eax 1991; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1992; SDAG-NEXT: mulw (%rdi) 1993; SDAG-NEXT: seto %dl 1994; SDAG-NEXT: movw %ax, (%rcx) 1995; SDAG-NEXT: movl %edx, %eax 1996; SDAG-NEXT: retq 1997; 1998; FAST-LABEL: umuloi16_load: 1999; FAST: # %bb.0: 2000; FAST-NEXT: movq %rdx, %rcx 2001; FAST-NEXT: movzwl (%rdi), %eax 2002; FAST-NEXT: mulw %si 2003; FAST-NEXT: seto %dl 2004; FAST-NEXT: movw %ax, (%rcx) 2005; FAST-NEXT: andb $1, %dl 2006; FAST-NEXT: movzbl %dl, %eax 2007; FAST-NEXT: retq 2008; 2009; WIN64-LABEL: umuloi16_load: 2010; WIN64: # %bb.0: 2011; WIN64-NEXT: movl %edx, %eax 2012; WIN64-NEXT: mulw (%rcx) 2013; WIN64-NEXT: seto %cl 2014; WIN64-NEXT: movw %ax, (%r8) 2015; WIN64-NEXT: movl %ecx, %eax 2016; WIN64-NEXT: retq 2017; 2018; WIN32-LABEL: umuloi16_load: 2019; WIN32: # %bb.0: 2020; WIN32-NEXT: pushl %esi 2021; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2022; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2023; WIN32-NEXT: movzwl (%eax), %eax 2024; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 2025; WIN32-NEXT: seto %cl 2026; WIN32-NEXT: movw %ax, (%esi) 2027; WIN32-NEXT: movl %ecx, %eax 2028; WIN32-NEXT: popl %esi 2029; WIN32-NEXT: retl 2030 %v1 = load i16, ptr %ptr1 2031 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 2032 %val = extractvalue {i16, i1} %t, 0 2033 %obit = extractvalue {i16, i1} %t, 1 2034 store i16 %val, ptr %res 2035 ret i1 %obit 2036} 2037 2038define zeroext i1 @umuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) { 2039; SDAG-LABEL: umuloi16_load2: 2040; SDAG: # %bb.0: 2041; SDAG-NEXT: movq %rdx, %rcx 2042; SDAG-NEXT: movl %edi, %eax 2043; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 2044; SDAG-NEXT: mulw (%rsi) 2045; SDAG-NEXT: seto %dl 2046; SDAG-NEXT: movw %ax, (%rcx) 2047; SDAG-NEXT: movl %edx, %eax 2048; SDAG-NEXT: retq 2049; 2050; FAST-LABEL: umuloi16_load2: 2051; FAST: # %bb.0: 2052; FAST-NEXT: movq %rdx, %rcx 2053; FAST-NEXT: movl %edi, %eax 2054; FAST-NEXT: # kill: def $ax killed $ax killed $eax 2055; FAST-NEXT: mulw (%rsi) 2056; FAST-NEXT: seto %dl 2057; FAST-NEXT: movw %ax, (%rcx) 2058; FAST-NEXT: andb $1, %dl 2059; FAST-NEXT: movzbl %dl, %eax 2060; FAST-NEXT: retq 2061; 2062; WIN64-LABEL: umuloi16_load2: 2063; WIN64: # %bb.0: 2064; WIN64-NEXT: movl %ecx, %eax 2065; WIN64-NEXT: mulw (%rdx) 2066; WIN64-NEXT: seto %cl 2067; WIN64-NEXT: movw %ax, (%r8) 2068; WIN64-NEXT: movl %ecx, %eax 2069; WIN64-NEXT: retq 2070; 2071; WIN32-LABEL: umuloi16_load2: 2072; WIN32: # %bb.0: 2073; WIN32-NEXT: pushl %esi 2074; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2075; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 2076; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2077; WIN32-NEXT: mulw (%ecx) 2078; WIN32-NEXT: seto %cl 2079; WIN32-NEXT: movw %ax, (%esi) 2080; WIN32-NEXT: movl %ecx, %eax 2081; WIN32-NEXT: popl %esi 2082; WIN32-NEXT: retl 2083 %v2 = load i16, ptr %ptr2 2084 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 2085 %val = extractvalue {i16, i1} %t, 0 2086 %obit = extractvalue {i16, i1} %t, 1 2087 store i16 %val, ptr %res 2088 ret i1 %obit 2089} 2090 2091define zeroext i1 @umuloi32_load(ptr %ptr1, i32 %v2, ptr %res) { 2092; SDAG-LABEL: umuloi32_load: 2093; SDAG: # %bb.0: 2094; SDAG-NEXT: movq %rdx, %rcx 2095; SDAG-NEXT: movl %esi, %eax 2096; SDAG-NEXT: mull (%rdi) 2097; SDAG-NEXT: seto %dl 2098; SDAG-NEXT: movl %eax, (%rcx) 2099; SDAG-NEXT: movl %edx, %eax 2100; SDAG-NEXT: retq 2101; 2102; FAST-LABEL: umuloi32_load: 2103; FAST: # %bb.0: 2104; FAST-NEXT: movq %rdx, %rcx 2105; FAST-NEXT: movl (%rdi), %eax 2106; FAST-NEXT: mull %esi 2107; FAST-NEXT: seto %dl 2108; FAST-NEXT: movl %eax, (%rcx) 2109; FAST-NEXT: andb $1, %dl 2110; FAST-NEXT: movzbl %dl, %eax 2111; FAST-NEXT: retq 2112; 2113; WIN64-LABEL: umuloi32_load: 2114; WIN64: # %bb.0: 2115; WIN64-NEXT: movl %edx, %eax 2116; WIN64-NEXT: mull (%rcx) 2117; WIN64-NEXT: seto %cl 2118; WIN64-NEXT: movl %eax, (%r8) 2119; WIN64-NEXT: movl %ecx, %eax 2120; WIN64-NEXT: retq 2121; 2122; WIN32-LABEL: umuloi32_load: 2123; WIN32: # %bb.0: 2124; WIN32-NEXT: pushl %esi 2125; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2126; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2127; WIN32-NEXT: movl (%eax), %eax 2128; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2129; WIN32-NEXT: seto %cl 2130; WIN32-NEXT: movl %eax, (%esi) 2131; WIN32-NEXT: movl %ecx, %eax 2132; WIN32-NEXT: popl %esi 2133; WIN32-NEXT: retl 2134 %v1 = load i32, ptr %ptr1 2135 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 2136 %val = extractvalue {i32, i1} %t, 0 2137 %obit = extractvalue {i32, i1} %t, 1 2138 store i32 %val, ptr %res 2139 ret i1 %obit 2140} 2141 2142define zeroext i1 @umuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) { 2143; SDAG-LABEL: umuloi32_load2: 2144; SDAG: # %bb.0: 2145; SDAG-NEXT: movq %rdx, %rcx 2146; SDAG-NEXT: movl %edi, %eax 2147; SDAG-NEXT: mull (%rsi) 2148; SDAG-NEXT: seto %dl 2149; SDAG-NEXT: movl %eax, (%rcx) 2150; SDAG-NEXT: movl %edx, %eax 2151; SDAG-NEXT: retq 2152; 2153; FAST-LABEL: umuloi32_load2: 2154; FAST: # %bb.0: 2155; FAST-NEXT: movq %rdx, %rcx 2156; FAST-NEXT: movl %edi, %eax 2157; FAST-NEXT: mull (%rsi) 2158; FAST-NEXT: seto %dl 2159; FAST-NEXT: movl %eax, (%rcx) 2160; FAST-NEXT: andb $1, %dl 2161; FAST-NEXT: movzbl %dl, %eax 2162; FAST-NEXT: retq 2163; 2164; WIN64-LABEL: umuloi32_load2: 2165; WIN64: # %bb.0: 2166; WIN64-NEXT: movl %ecx, %eax 2167; WIN64-NEXT: mull (%rdx) 2168; WIN64-NEXT: seto %cl 2169; WIN64-NEXT: movl %eax, (%r8) 2170; WIN64-NEXT: movl %ecx, %eax 2171; WIN64-NEXT: retq 2172; 2173; WIN32-LABEL: umuloi32_load2: 2174; WIN32: # %bb.0: 2175; WIN32-NEXT: pushl %esi 2176; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2177; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2178; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2179; WIN32-NEXT: mull (%ecx) 2180; WIN32-NEXT: seto %cl 2181; WIN32-NEXT: movl %eax, (%esi) 2182; WIN32-NEXT: movl %ecx, %eax 2183; WIN32-NEXT: popl %esi 2184; WIN32-NEXT: retl 2185 %v2 = load i32, ptr %ptr2 2186 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 2187 %val = extractvalue {i32, i1} %t, 0 2188 %obit = extractvalue {i32, i1} %t, 1 2189 store i32 %val, ptr %res 2190 ret i1 %obit 2191} 2192 2193define zeroext i1 @umuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { 2194; SDAG-LABEL: umuloi64_load: 2195; SDAG: # %bb.0: 2196; SDAG-NEXT: movq %rdx, %rcx 2197; SDAG-NEXT: movq %rsi, %rax 2198; SDAG-NEXT: mulq (%rdi) 2199; SDAG-NEXT: seto %dl 2200; SDAG-NEXT: movq %rax, (%rcx) 2201; SDAG-NEXT: movl %edx, %eax 2202; SDAG-NEXT: retq 2203; 2204; FAST-LABEL: umuloi64_load: 2205; FAST: # %bb.0: 2206; FAST-NEXT: movq %rdx, %rcx 2207; FAST-NEXT: movq (%rdi), %rax 2208; FAST-NEXT: mulq %rsi 2209; FAST-NEXT: seto %dl 2210; FAST-NEXT: movq %rax, (%rcx) 2211; FAST-NEXT: andb $1, %dl 2212; FAST-NEXT: movzbl %dl, %eax 2213; FAST-NEXT: retq 2214; 2215; WIN64-LABEL: umuloi64_load: 2216; WIN64: # %bb.0: 2217; WIN64-NEXT: movq %rdx, %rax 2218; WIN64-NEXT: mulq (%rcx) 2219; WIN64-NEXT: seto %cl 2220; WIN64-NEXT: movq %rax, (%r8) 2221; WIN64-NEXT: movl %ecx, %eax 2222; WIN64-NEXT: retq 2223; 2224; WIN32-LABEL: umuloi64_load: 2225; WIN32: # %bb.0: 2226; WIN32-NEXT: pushl %ebp 2227; WIN32-NEXT: pushl %ebx 2228; WIN32-NEXT: pushl %edi 2229; WIN32-NEXT: pushl %esi 2230; WIN32-NEXT: pushl %eax 2231; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 2232; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2233; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2234; WIN32-NEXT: movl (%eax), %ecx 2235; WIN32-NEXT: movl 4(%eax), %eax 2236; WIN32-NEXT: testl %esi, %esi 2237; WIN32-NEXT: setne %dl 2238; WIN32-NEXT: testl %eax, %eax 2239; WIN32-NEXT: setne %bl 2240; WIN32-NEXT: andb %dl, %bl 2241; WIN32-NEXT: mull %ebp 2242; WIN32-NEXT: movl %eax, %edi 2243; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 2244; WIN32-NEXT: movl %esi, %eax 2245; WIN32-NEXT: mull %ecx 2246; WIN32-NEXT: seto %bh 2247; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 2248; WIN32-NEXT: leal (%edi,%eax), %esi 2249; WIN32-NEXT: movl %ecx, %eax 2250; WIN32-NEXT: mull %ebp 2251; WIN32-NEXT: addl %esi, %edx 2252; WIN32-NEXT: setb %cl 2253; WIN32-NEXT: orb %bh, %cl 2254; WIN32-NEXT: orb %bl, %cl 2255; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2256; WIN32-NEXT: movl %eax, (%esi) 2257; WIN32-NEXT: movl %edx, 4(%esi) 2258; WIN32-NEXT: movl %ecx, %eax 2259; WIN32-NEXT: addl $4, %esp 2260; WIN32-NEXT: popl %esi 2261; WIN32-NEXT: popl %edi 2262; WIN32-NEXT: popl %ebx 2263; WIN32-NEXT: popl %ebp 2264; WIN32-NEXT: retl 2265 %v1 = load i64, ptr %ptr1 2266 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2267 %val = extractvalue {i64, i1} %t, 0 2268 %obit = extractvalue {i64, i1} %t, 1 2269 store i64 %val, ptr %res 2270 ret i1 %obit 2271} 2272 2273define zeroext i1 @umuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { 2274; SDAG-LABEL: umuloi64_load2: 2275; SDAG: # %bb.0: 2276; SDAG-NEXT: movq %rdx, %rcx 2277; SDAG-NEXT: movq %rdi, %rax 2278; SDAG-NEXT: mulq (%rsi) 2279; SDAG-NEXT: seto %dl 2280; SDAG-NEXT: movq %rax, (%rcx) 2281; SDAG-NEXT: movl %edx, %eax 2282; SDAG-NEXT: retq 2283; 2284; FAST-LABEL: umuloi64_load2: 2285; FAST: # %bb.0: 2286; FAST-NEXT: movq %rdx, %rcx 2287; FAST-NEXT: movq %rdi, %rax 2288; FAST-NEXT: mulq (%rsi) 2289; FAST-NEXT: seto %dl 2290; FAST-NEXT: movq %rax, (%rcx) 2291; FAST-NEXT: andb $1, %dl 2292; FAST-NEXT: movzbl %dl, %eax 2293; FAST-NEXT: retq 2294; 2295; WIN64-LABEL: umuloi64_load2: 2296; WIN64: # %bb.0: 2297; WIN64-NEXT: movq %rcx, %rax 2298; WIN64-NEXT: mulq (%rdx) 2299; WIN64-NEXT: seto %cl 2300; WIN64-NEXT: movq %rax, (%r8) 2301; WIN64-NEXT: movl %ecx, %eax 2302; WIN64-NEXT: retq 2303; 2304; WIN32-LABEL: umuloi64_load2: 2305; WIN32: # %bb.0: 2306; WIN32-NEXT: pushl %ebp 2307; WIN32-NEXT: pushl %ebx 2308; WIN32-NEXT: pushl %edi 2309; WIN32-NEXT: pushl %esi 2310; WIN32-NEXT: pushl %eax 2311; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 2312; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2313; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 2314; WIN32-NEXT: movl (%edx), %ecx 2315; WIN32-NEXT: movl 4(%edx), %esi 2316; WIN32-NEXT: testl %eax, %eax 2317; WIN32-NEXT: setne %dl 2318; WIN32-NEXT: testl %esi, %esi 2319; WIN32-NEXT: setne %bl 2320; WIN32-NEXT: andb %dl, %bl 2321; WIN32-NEXT: mull %ecx 2322; WIN32-NEXT: movl %eax, %edi 2323; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 2324; WIN32-NEXT: movl %esi, %eax 2325; WIN32-NEXT: mull %ebp 2326; WIN32-NEXT: seto %bh 2327; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 2328; WIN32-NEXT: leal (%edi,%eax), %esi 2329; WIN32-NEXT: movl %ebp, %eax 2330; WIN32-NEXT: mull %ecx 2331; WIN32-NEXT: addl %esi, %edx 2332; WIN32-NEXT: setb %cl 2333; WIN32-NEXT: orb %bh, %cl 2334; WIN32-NEXT: orb %bl, %cl 2335; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2336; WIN32-NEXT: movl %eax, (%esi) 2337; WIN32-NEXT: movl %edx, 4(%esi) 2338; WIN32-NEXT: movl %ecx, %eax 2339; WIN32-NEXT: addl $4, %esp 2340; WIN32-NEXT: popl %esi 2341; WIN32-NEXT: popl %edi 2342; WIN32-NEXT: popl %ebx 2343; WIN32-NEXT: popl %ebp 2344; WIN32-NEXT: retl 2345 %v2 = load i64, ptr %ptr2 2346 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2347 %val = extractvalue {i64, i1} %t, 0 2348 %obit = extractvalue {i64, i1} %t, 1 2349 store i64 %val, ptr %res 2350 ret i1 %obit 2351} 2352 2353declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone 2354declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone 2355declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone 2356declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone 2357declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone 2358declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone 2359declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone 2360declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone 2361 2362!0 = !{!"branch_weights", i32 0, i32 2147483647} 2363