1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 3 4define float @dotf(<4 x float> %x, <4 x float> %y) { 5; CHECK-LABEL: @dotf( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]] 8; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 9; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 10; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]] 11; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 12; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 13; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 14; CHECK-NEXT: ret float [[TMP2]] 15; 16entry: 17 %vecext = extractelement <4 x float> %x, i32 0 18 %vecext1 = extractelement <4 x float> %y, i32 0 19 %mul = fmul fast float %vecext, %vecext1 20 %vecext.1 = extractelement <4 x float> %x, i32 1 21 %vecext1.1 = extractelement <4 x float> %y, i32 1 22 %mul.1 = fmul fast float %vecext.1, %vecext1.1 23 %add.1 = fadd fast float %mul.1, %mul 24 %vecext.2 = extractelement <4 x float> %x, i32 2 25 %vecext1.2 = extractelement <4 x float> %y, i32 2 26 %mul.2 = fmul fast float %vecext.2, %vecext1.2 27 %add.2 = fadd fast float %mul.2, %add.1 28 %vecext.3 = extractelement <4 x float> %x, i32 3 29 %vecext1.3 = extractelement <4 x float> %y, i32 3 30 %mul.3 = fmul fast float %vecext.3, %vecext1.3 31 %add.3 = fadd fast float %mul.3, %add.2 32 ret float %add.3 33} 34 35define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double>* byval nocapture readonly align 32) { 36; CHECK-LABEL: @dotd( 37; CHECK-NEXT: entry: 38; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32 39; CHECK-NEXT: [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32 40; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]] 41; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 42; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 43; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]] 44; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 45; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]] 46; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0 47; CHECK-NEXT: ret double [[TMP4]] 48; 49entry: 50 %x = load <4 x double>, <4 x double>* %0, align 32 51 %y = load <4 x double>, <4 x double>* %1, align 32 52 %vecext = extractelement <4 x double> %x, i32 0 53 %vecext1 = extractelement <4 x double> %y, i32 0 54 %mul = fmul fast double %vecext, %vecext1 55 %vecext.1 = extractelement <4 x double> %x, i32 1 56 %vecext1.1 = extractelement <4 x double> %y, i32 1 57 %mul.1 = fmul fast double %vecext.1, %vecext1.1 58 %add.1 = fadd fast double %mul.1, %mul 59 %vecext.2 = extractelement <4 x double> %x, i32 2 60 %vecext1.2 = extractelement <4 x double> %y, i32 2 61 %mul.2 = fmul fast double %vecext.2, %vecext1.2 62 %add.2 = fadd fast double %mul.2, %add.1 63 %vecext.3 = extractelement <4 x double> %x, i32 3 64 %vecext1.3 = extractelement <4 x double> %y, i32 3 65 %mul.3 = fmul fast double %vecext.3, %vecext1.3 66 %add.3 = fadd fast double %mul.3, %add.2 67 ret double %add.3 68} 69 70define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) { 71; CHECK-LABEL: @dotfq( 72; CHECK-NEXT: entry: 73; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16 74; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16 75; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] 76; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 77; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 78; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] 79; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 80; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 81; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 82; CHECK-NEXT: ret float [[TMP4]] 83; 84entry: 85 %0 = load <4 x float>, <4 x float>* %x, align 16 86 %1 = load <4 x float>, <4 x float>* %y, align 16 87 %vecext = extractelement <4 x float> %0, i32 0 88 %vecext1 = extractelement <4 x float> %1, i32 0 89 %mul = fmul fast float %vecext1, %vecext 90 %vecext.1 = extractelement <4 x float> %0, i32 1 91 %vecext1.1 = extractelement <4 x float> %1, i32 1 92 %mul.1 = fmul fast float %vecext1.1, %vecext.1 93 %add.1 = fadd fast float %mul.1, %mul 94 %vecext.2 = extractelement <4 x float> %0, i32 2 95 %vecext1.2 = extractelement <4 x float> %1, i32 2 96 %mul.2 = fmul fast float %vecext1.2, %vecext.2 97 %add.2 = fadd fast float %mul.2, %add.1 98 %vecext.3 = extractelement <4 x float> %0, i32 3 99 %vecext1.3 = extractelement <4 x float> %1, i32 3 100 %mul.3 = fmul fast float %vecext1.3, %vecext.3 101 %add.3 = fadd fast float %mul.3, %add.2 102 ret float %add.3 103} 104 105define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) { 106; CHECK-LABEL: @dotdq( 107; CHECK-NEXT: entry: 108; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32 109; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32 110; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]] 111; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 112; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 113; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]] 114; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 115; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]] 116; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0 117; CHECK-NEXT: ret double [[TMP4]] 118; 119entry: 120 %0 = load <4 x double>, <4 x double>* %x, align 32 121 %1 = load <4 x double>, <4 x double>* %y, align 32 122 %vecext = extractelement <4 x double> %0, i32 0 123 %vecext1 = extractelement <4 x double> %1, i32 0 124 %mul = fmul fast double %vecext1, %vecext 125 %vecext.1 = extractelement <4 x double> %0, i32 1 126 %vecext1.1 = extractelement <4 x double> %1, i32 1 127 %mul.1 = fmul fast double %vecext1.1, %vecext.1 128 %add.1 = fadd fast double %mul.1, %mul 129 %vecext.2 = extractelement <4 x double> %0, i32 2 130 %vecext1.2 = extractelement <4 x double> %1, i32 2 131 %mul.2 = fmul fast double %vecext1.2, %vecext.2 132 %add.2 = fadd fast double %mul.2, %add.1 133 %vecext.3 = extractelement <4 x double> %0, i32 3 134 %vecext1.3 = extractelement <4 x double> %1, i32 3 135 %mul.3 = fmul fast double %vecext1.3, %vecext.3 136 %add.3 = fadd fast double %mul.3, %add.2 137 ret double %add.3 138} 139