1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5define double @f1(double %a) { 6; X86-LABEL: f1: 7; X86: # %bb.0: 8; X86-NEXT: pushl %ebp 9; X86-NEXT: .cfi_def_cfa_offset 8 10; X86-NEXT: .cfi_offset %ebp, -8 11; X86-NEXT: movl %esp, %ebp 12; X86-NEXT: .cfi_def_cfa_register %ebp 13; X86-NEXT: andl $-8, %esp 14; X86-NEXT: subl $8, %esp 15; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 16; X86-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 17; X86-NEXT: movsd %xmm0, (%esp) 18; X86-NEXT: fldl (%esp) 19; X86-NEXT: movl %ebp, %esp 20; X86-NEXT: popl %ebp 21; X86-NEXT: .cfi_def_cfa %esp, 4 22; X86-NEXT: retl 23; 24; X64-LABEL: f1: 25; X64: # %bb.0: 26; X64-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 27; X64-NEXT: retq 28 %1 = fadd fast double %a, %a 29 %2 = fadd fast double %a, %a 30 %3 = fadd fast double %1, %2 31 ret double %3 32} 33 34define double @f2(double %a) { 35; X86-LABEL: f2: 36; X86: # %bb.0: 37; X86-NEXT: pushl %ebp 38; X86-NEXT: .cfi_def_cfa_offset 8 39; X86-NEXT: .cfi_offset %ebp, -8 40; X86-NEXT: movl %esp, %ebp 41; X86-NEXT: .cfi_def_cfa_register %ebp 42; X86-NEXT: andl $-8, %esp 43; X86-NEXT: subl $8, %esp 44; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 45; X86-NEXT: addsd %xmm0, %xmm0 46; X86-NEXT: movapd %xmm0, %xmm1 47; X86-NEXT: #ARITH_FENCE 48; X86-NEXT: addsd %xmm0, %xmm1 49; X86-NEXT: movsd %xmm1, (%esp) 50; X86-NEXT: fldl (%esp) 51; X86-NEXT: movl %ebp, %esp 52; X86-NEXT: popl %ebp 53; X86-NEXT: .cfi_def_cfa %esp, 4 54; X86-NEXT: retl 55; 56; X64-LABEL: f2: 57; X64: # %bb.0: 58; X64-NEXT: addsd %xmm0, %xmm0 59; X64-NEXT: movapd %xmm0, %xmm1 60; X64-NEXT: #ARITH_FENCE 61; X64-NEXT: addsd %xmm1, %xmm0 62; X64-NEXT: retq 63 %1 = fadd fast double %a, %a 64 %t = call double @llvm.arithmetic.fence.f64(double %1) 65 %2 = fadd fast double %a, %a 66 %3 = fadd fast double %t, %2 67 ret double %3 68} 69 70define <2 x float> @f3(<2 x float> %a) { 71; X86-LABEL: f3: 72; X86: # %bb.0: 73; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 74; X86-NEXT: retl 75; 76; X64-LABEL: f3: 77; X64: # %bb.0: 78; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 79; X64-NEXT: retq 80 %1 = fadd fast <2 x float> %a, %a 81 %2 = fadd fast <2 x float> %a, %a 82 %3 = fadd fast <2 x float> %1, %2 83 ret <2 x float> %3 84} 85 86define <2 x float> @f4(<2 x float> %a) { 87; X86-LABEL: f4: 88; X86: # %bb.0: 89; X86-NEXT: addps %xmm0, %xmm0 90; X86-NEXT: movaps %xmm0, %xmm1 91; X86-NEXT: #ARITH_FENCE 92; X86-NEXT: addps %xmm1, %xmm0 93; X86-NEXT: retl 94; 95; X64-LABEL: f4: 96; X64: # %bb.0: 97; X64-NEXT: addps %xmm0, %xmm0 98; X64-NEXT: movaps %xmm0, %xmm1 99; X64-NEXT: #ARITH_FENCE 100; X64-NEXT: addps %xmm1, %xmm0 101; X64-NEXT: retq 102 %1 = fadd fast <2 x float> %a, %a 103 %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1) 104 %2 = fadd fast <2 x float> %a, %a 105 %3 = fadd fast <2 x float> %t, %2 106 ret <2 x float> %3 107} 108 109define <8 x float> @f5(<8 x float> %a) { 110; X86-LABEL: f5: 111; X86: # %bb.0: 112; X86-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0] 113; X86-NEXT: mulps %xmm2, %xmm0 114; X86-NEXT: mulps %xmm2, %xmm1 115; X86-NEXT: retl 116; 117; X64-LABEL: f5: 118; X64: # %bb.0: 119; X64-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0] 120; X64-NEXT: mulps %xmm2, %xmm0 121; X64-NEXT: mulps %xmm2, %xmm1 122; X64-NEXT: retq 123 %1 = fadd fast <8 x float> %a, %a 124 %2 = fadd fast <8 x float> %a, %a 125 %3 = fadd fast <8 x float> %1, %2 126 ret <8 x float> %3 127} 128 129define <8 x float> @f6(<8 x float> %a) { 130; X86-LABEL: f6: 131; X86: # %bb.0: 132; X86-NEXT: addps %xmm0, %xmm0 133; X86-NEXT: addps %xmm1, %xmm1 134; X86-NEXT: movaps %xmm1, %xmm2 135; X86-NEXT: #ARITH_FENCE 136; X86-NEXT: movaps %xmm0, %xmm3 137; X86-NEXT: #ARITH_FENCE 138; X86-NEXT: addps %xmm3, %xmm0 139; X86-NEXT: addps %xmm2, %xmm1 140; X86-NEXT: retl 141; 142; X64-LABEL: f6: 143; X64: # %bb.0: 144; X64-NEXT: addps %xmm0, %xmm0 145; X64-NEXT: addps %xmm1, %xmm1 146; X64-NEXT: movaps %xmm1, %xmm2 147; X64-NEXT: #ARITH_FENCE 148; X64-NEXT: movaps %xmm0, %xmm3 149; X64-NEXT: #ARITH_FENCE 150; X64-NEXT: addps %xmm3, %xmm0 151; X64-NEXT: addps %xmm2, %xmm1 152; X64-NEXT: retq 153 %1 = fadd fast <8 x float> %a, %a 154 %t = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> %1) 155 %2 = fadd fast <8 x float> %a, %a 156 %3 = fadd fast <8 x float> %t, %2 157 ret <8 x float> %3 158} 159 160declare float @llvm.arithmetic.fence.f32(float) 161declare double @llvm.arithmetic.fence.f64(double) 162declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>) 163declare <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float>) 164