1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.8.0" 6 7declare double @sin(double) nounwind willreturn 8declare double @cos(double) nounwind willreturn 9declare double @pow(double, double) nounwind willreturn 10declare double @exp2(double) nounwind willreturn 11declare double @sqrt(double) nounwind willreturn 12declare i64 @round(i64) nounwind willreturn 13 14 15define void @sin_libm(double* %a, double* %b) { 16; CHECK-LABEL: @sin_libm( 17; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* 18; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 19; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]]) 20; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* 21; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 22; CHECK-NEXT: ret void 23; 24 %a0 = load double, double* %a, align 8 25 %idx1 = getelementptr inbounds double, double* %a, i64 1 26 %a1 = load double, double* %idx1, align 8 27 %sin1 = tail call double @sin(double %a0) nounwind readnone 28 %sin2 = tail call double @sin(double %a1) nounwind readnone 29 store double %sin1, double* %b, align 8 30 %idx2 = getelementptr inbounds double, double* %b, i64 1 31 store double %sin2, double* %idx2, align 8 32 ret void 33} 34 35define void @cos_libm(double* %a, double* %b) { 36; CHECK-LABEL: @cos_libm( 37; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* 38; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 39; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]]) 40; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* 41; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 42; CHECK-NEXT: ret void 43; 44 %a0 = load double, double* %a, align 8 45 %idx1 = getelementptr inbounds double, double* %a, i64 1 46 %a1 = load double, double* %idx1, align 8 47 %cos1 = tail call double @cos(double %a0) nounwind readnone 48 %cos2 = tail call double @cos(double %a1) nounwind readnone 49 store double %cos1, double* %b, align 8 50 %idx2 = getelementptr inbounds double, double* %b, i64 1 51 store double %cos2, double* %idx2, align 8 52 ret void 53} 54 55define void @pow_libm(double* %a, double* %b) { 56; CHECK-LABEL: @pow_libm( 57; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* 58; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 59; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]]) 60; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* 61; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 62; CHECK-NEXT: ret void 63; 64 %a0 = load double, double* %a, align 8 65 %idx1 = getelementptr inbounds double, double* %a, i64 1 66 %a1 = load double, double* %idx1, align 8 67 %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone 68 %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone 69 store double %pow1, double* %b, align 8 70 %idx2 = getelementptr inbounds double, double* %b, i64 1 71 store double %pow2, double* %idx2, align 8 72 ret void 73} 74 75define void @exp_libm(double* %a, double* %b) { 76; CHECK-LABEL: @exp_libm( 77; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* 78; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 79; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]]) 80; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* 81; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 82; CHECK-NEXT: ret void 83; 84 %a0 = load double, double* %a, align 8 85 %idx1 = getelementptr inbounds double, double* %a, i64 1 86 %a1 = load double, double* %idx1, align 8 87 %exp1 = tail call double @exp2(double %a0) nounwind readnone 88 %exp2 = tail call double @exp2(double %a1) nounwind readnone 89 store double %exp1, double* %b, align 8 90 %idx2 = getelementptr inbounds double, double* %b, i64 1 91 store double %exp2, double* %idx2, align 8 92 ret void 93} 94 95; No fast-math-flags are required to convert sqrt library calls to an intrinsic. 96; We just need to know that errno is not set (readnone). 97 98define void @sqrt_libm_no_errno(double* %a, double* %b) { 99; CHECK-LABEL: @sqrt_libm_no_errno( 100; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* 101; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 102; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]]) 103; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* 104; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 105; CHECK-NEXT: ret void 106; 107 %a0 = load double, double* %a, align 8 108 %idx1 = getelementptr inbounds double, double* %a, i64 1 109 %a1 = load double, double* %idx1, align 8 110 %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone 111 %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone 112 store double %sqrt1, double* %b, align 8 113 %idx2 = getelementptr inbounds double, double* %b, i64 1 114 store double %sqrt2, double* %idx2, align 8 115 ret void 116} 117 118; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize. 119; The nnan on the call does not matter because there's no guarantee in the C standard that a negative 120; input would result in a nan output ("On a domain error, the function returns an 121; implementation-defined value.") 122 123define void @sqrt_libm_errno(double* %a, double* %b) { 124; CHECK-LABEL: @sqrt_libm_errno( 125; CHECK-NEXT: [[A0:%.*]] = load double, double* [[A:%.*]], align 8 126; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 127; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDX1]], align 8 128; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]] 129; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]] 130; CHECK-NEXT: store double [[SQRT1]], double* [[B:%.*]], align 8 131; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 132; CHECK-NEXT: store double [[SQRT2]], double* [[IDX2]], align 8 133; CHECK-NEXT: ret void 134; 135 %a0 = load double, double* %a, align 8 136 %idx1 = getelementptr inbounds double, double* %a, i64 1 137 %a1 = load double, double* %idx1, align 8 138 %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind 139 %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind 140 store double %sqrt1, double* %b, align 8 141 %idx2 = getelementptr inbounds double, double* %b, i64 1 142 store double %sqrt2, double* %idx2, align 8 143 ret void 144} 145 146; Negative test case 147define void @round_custom(i64* %a, i64* %b) { 148; CHECK-LABEL: @round_custom( 149; CHECK-NEXT: [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8 150; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1 151; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[IDX1]], align 8 152; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]] 153; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]] 154; CHECK-NEXT: store i64 [[ROUND1]], i64* [[B:%.*]], align 8 155; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1 156; CHECK-NEXT: store i64 [[ROUND2]], i64* [[IDX2]], align 8 157; CHECK-NEXT: ret void 158; 159 %a0 = load i64, i64* %a, align 8 160 %idx1 = getelementptr inbounds i64, i64* %a, i64 1 161 %a1 = load i64, i64* %idx1, align 8 162 %round1 = tail call i64 @round(i64 %a0) nounwind readnone 163 %round2 = tail call i64 @round(i64 %a1) nounwind readnone 164 store i64 %round1, i64* %b, align 8 165 %idx2 = getelementptr inbounds i64, i64* %b, i64 1 166 store i64 %round2, i64* %idx2, align 8 167 ret void 168} 169 170 171 172 173