1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -inject-tli-mappings -slp-vectorizer -vector-library=Accelerate -S %s | FileCheck %s 3; RUN: opt -inject-tli-mappings -slp-vectorizer -S %s | FileCheck --check-prefix NOACCELERATE %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6target triple = "arm64-apple-ios14.0.0" 7 8declare float @llvm.sin.f32(float) 9 10; Accelerate provides sin() for <4 x float> 11define <4 x float> @int_sin_4x(<4 x float>* %a) { 12; CHECK-LABEL: @int_sin_4x( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 15; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]]) 16; CHECK-NEXT: ret <4 x float> [[TMP1]] 17; 18; NOACCELERATE-LABEL: @int_sin_4x( 19; NOACCELERATE-NEXT: entry: 20; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 21; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 22; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 23; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 24; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 25; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 26; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 27; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 28; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 29; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 30; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 31; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) 32; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 33; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 34; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 35; 36entry: 37 %0 = load <4 x float>, <4 x float>* %a, align 16 38 %vecext = extractelement <4 x float> %0, i32 0 39 %1 = tail call fast float @llvm.sin.f32(float %vecext) 40 %vecins = insertelement <4 x float> undef, float %1, i32 0 41 %vecext.1 = extractelement <4 x float> %0, i32 1 42 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 43 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 44 %vecext.2 = extractelement <4 x float> %0, i32 2 45 %3 = tail call fast float @llvm.sin.f32(float %vecext.2) 46 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 47 %vecext.3 = extractelement <4 x float> %0, i32 3 48 %4 = tail call fast float @llvm.sin.f32(float %vecext.3) 49 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 50 ret <4 x float> %vecins.3 51} 52 53declare float @ceilf(float) readonly nounwind willreturn 54 55define <4 x float> @ceil_4x(<4 x float>* %a) { 56; CHECK-LABEL: @ceil_4x( 57; CHECK-NEXT: entry: 58; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 59; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) 60; CHECK-NEXT: ret <4 x float> [[TMP1]] 61; 62; NOACCELERATE-LABEL: @ceil_4x( 63; NOACCELERATE-NEXT: entry: 64; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 65; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) 66; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 67; 68entry: 69 %0 = load <4 x float>, <4 x float>* %a, align 16 70 %vecext = extractelement <4 x float> %0, i32 0 71 %1 = tail call fast float @ceilf(float %vecext) 72 %vecins = insertelement <4 x float> undef, float %1, i32 0 73 %vecext.1 = extractelement <4 x float> %0, i32 1 74 %2 = tail call fast float @ceilf(float %vecext.1) 75 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 76 %vecext.2 = extractelement <4 x float> %0, i32 2 77 %3 = tail call fast float @ceilf(float %vecext.2) 78 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 79 %vecext.3 = extractelement <4 x float> %0, i32 3 80 %4 = tail call fast float @ceilf(float %vecext.3) 81 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 82 ret <4 x float> %vecins.3 83} 84 85declare float @fabsf(float) readonly nounwind willreturn 86 87define <4 x float> @fabs_4x(<4 x float>* %a) { 88; CHECK-LABEL: @fabs_4x( 89; CHECK-NEXT: entry: 90; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 91; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 92; CHECK-NEXT: ret <4 x float> [[TMP1]] 93; 94; NOACCELERATE-LABEL: @fabs_4x( 95; NOACCELERATE-NEXT: entry: 96; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 97; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 98; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 99; 100entry: 101 %0 = load <4 x float>, <4 x float>* %a, align 16 102 %vecext = extractelement <4 x float> %0, i32 0 103 %1 = tail call fast float @fabsf(float %vecext) 104 %vecins = insertelement <4 x float> undef, float %1, i32 0 105 %vecext.1 = extractelement <4 x float> %0, i32 1 106 %2 = tail call fast float @fabsf(float %vecext.1) 107 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 108 %vecext.2 = extractelement <4 x float> %0, i32 2 109 %3 = tail call fast float @fabsf(float %vecext.2) 110 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 111 %vecext.3 = extractelement <4 x float> %0, i32 3 112 %4 = tail call fast float @fabsf(float %vecext.3) 113 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 114 ret <4 x float> %vecins.3 115} 116declare float @llvm.fabs.f32(float) 117define <4 x float> @int_fabs_4x(<4 x float>* %a) { 118; CHECK-LABEL: @int_fabs_4x( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 121; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 122; CHECK-NEXT: ret <4 x float> [[TMP1]] 123; 124; NOACCELERATE-LABEL: @int_fabs_4x( 125; NOACCELERATE-NEXT: entry: 126; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 127; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) 128; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 129; 130entry: 131 %0 = load <4 x float>, <4 x float>* %a, align 16 132 %vecext = extractelement <4 x float> %0, i32 0 133 %1 = tail call fast float @llvm.fabs.f32(float %vecext) 134 %vecins = insertelement <4 x float> undef, float %1, i32 0 135 %vecext.1 = extractelement <4 x float> %0, i32 1 136 %2 = tail call fast float @llvm.fabs.f32(float %vecext.1) 137 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 138 %vecext.2 = extractelement <4 x float> %0, i32 2 139 %3 = tail call fast float @llvm.fabs.f32(float %vecext.2) 140 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 141 %vecext.3 = extractelement <4 x float> %0, i32 3 142 %4 = tail call fast float @llvm.fabs.f32(float %vecext.3) 143 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 144 ret <4 x float> %vecins.3 145} 146declare float @floorf(float) readonly nounwind willreturn 147define <4 x float> @floor_4x(<4 x float>* %a) { 148; CHECK-LABEL: @floor_4x( 149; CHECK-NEXT: entry: 150; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 151; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) 152; CHECK-NEXT: ret <4 x float> [[TMP1]] 153; 154; NOACCELERATE-LABEL: @floor_4x( 155; NOACCELERATE-NEXT: entry: 156; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 157; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) 158; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 159; 160entry: 161 %0 = load <4 x float>, <4 x float>* %a, align 16 162 %vecext = extractelement <4 x float> %0, i32 0 163 %1 = tail call fast float @floorf(float %vecext) 164 %vecins = insertelement <4 x float> undef, float %1, i32 0 165 %vecext.1 = extractelement <4 x float> %0, i32 1 166 %2 = tail call fast float @floorf(float %vecext.1) 167 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 168 %vecext.2 = extractelement <4 x float> %0, i32 2 169 %3 = tail call fast float @floorf(float %vecext.2) 170 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 171 %vecext.3 = extractelement <4 x float> %0, i32 3 172 %4 = tail call fast float @floorf(float %vecext.3) 173 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 174 ret <4 x float> %vecins.3 175} 176declare float @sqrtf(float) readonly nounwind willreturn 177define <4 x float> @sqrt_4x(<4 x float>* %a) { 178; CHECK-LABEL: @sqrt_4x( 179; CHECK-NEXT: entry: 180; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 181; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 182; CHECK-NEXT: ret <4 x float> [[TMP1]] 183; 184; NOACCELERATE-LABEL: @sqrt_4x( 185; NOACCELERATE-NEXT: entry: 186; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 187; NOACCELERATE-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) 188; NOACCELERATE-NEXT: ret <4 x float> [[TMP1]] 189; 190entry: 191 %0 = load <4 x float>, <4 x float>* %a, align 16 192 %vecext = extractelement <4 x float> %0, i32 0 193 %1 = tail call fast float @sqrtf(float %vecext) 194 %vecins = insertelement <4 x float> undef, float %1, i32 0 195 %vecext.1 = extractelement <4 x float> %0, i32 1 196 %2 = tail call fast float @sqrtf(float %vecext.1) 197 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 198 %vecext.2 = extractelement <4 x float> %0, i32 2 199 %3 = tail call fast float @sqrtf(float %vecext.2) 200 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 201 %vecext.3 = extractelement <4 x float> %0, i32 3 202 %4 = tail call fast float @sqrtf(float %vecext.3) 203 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 204 ret <4 x float> %vecins.3 205} 206declare float @expf(float) readonly nounwind willreturn 207define <4 x float> @exp_4x(<4 x float>* %a) { 208; CHECK-LABEL: @exp_4x( 209; CHECK-NEXT: entry: 210; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 211; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]]) 212; CHECK-NEXT: ret <4 x float> [[TMP1]] 213; 214; NOACCELERATE-LABEL: @exp_4x( 215; NOACCELERATE-NEXT: entry: 216; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 217; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 218; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]]) 219; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 220; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 221; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) 222; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 223; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 224; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 225; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 226; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 227; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]]) 228; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 229; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 230; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 231; 232entry: 233 %0 = load <4 x float>, <4 x float>* %a, align 16 234 %vecext = extractelement <4 x float> %0, i32 0 235 %1 = tail call fast float @expf(float %vecext) 236 %vecins = insertelement <4 x float> undef, float %1, i32 0 237 %vecext.1 = extractelement <4 x float> %0, i32 1 238 %2 = tail call fast float @expf(float %vecext.1) 239 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 240 %vecext.2 = extractelement <4 x float> %0, i32 2 241 %3 = tail call fast float @expf(float %vecext.2) 242 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 243 %vecext.3 = extractelement <4 x float> %0, i32 3 244 %4 = tail call fast float @expf(float %vecext.3) 245 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 246 ret <4 x float> %vecins.3 247} 248declare float @expm1f(float) readonly nounwind willreturn 249define <4 x float> @expm1_4x(<4 x float>* %a) { 250; CHECK-LABEL: @expm1_4x( 251; CHECK-NEXT: entry: 252; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 253; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]]) 254; CHECK-NEXT: ret <4 x float> [[TMP1]] 255; 256; NOACCELERATE-LABEL: @expm1_4x( 257; NOACCELERATE-NEXT: entry: 258; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 259; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 260; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]]) 261; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 262; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 263; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]]) 264; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 265; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 266; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]]) 267; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 268; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 269; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]]) 270; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 271; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 272; 273entry: 274 %0 = load <4 x float>, <4 x float>* %a, align 16 275 %vecext = extractelement <4 x float> %0, i32 0 276 %1 = tail call fast float @expm1f(float %vecext) 277 %vecins = insertelement <4 x float> undef, float %1, i32 0 278 %vecext.1 = extractelement <4 x float> %0, i32 1 279 %2 = tail call fast float @expm1f(float %vecext.1) 280 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 281 %vecext.2 = extractelement <4 x float> %0, i32 2 282 %3 = tail call fast float @expm1f(float %vecext.2) 283 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 284 %vecext.3 = extractelement <4 x float> %0, i32 3 285 %4 = tail call fast float @expm1f(float %vecext.3) 286 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 287 ret <4 x float> %vecins.3 288} 289declare float @logf(float) readonly nounwind willreturn 290define <4 x float> @log_4x(<4 x float>* %a) { 291; CHECK-LABEL: @log_4x( 292; CHECK-NEXT: entry: 293; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 294; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]]) 295; CHECK-NEXT: ret <4 x float> [[TMP1]] 296; 297; NOACCELERATE-LABEL: @log_4x( 298; NOACCELERATE-NEXT: entry: 299; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 300; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 301; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]]) 302; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 303; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 304; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) 305; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 306; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 307; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 308; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 309; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 310; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]]) 311; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 312; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 313; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 314; 315entry: 316 %0 = load <4 x float>, <4 x float>* %a, align 16 317 %vecext = extractelement <4 x float> %0, i32 0 318 %1 = tail call fast float @logf(float %vecext) 319 %vecins = insertelement <4 x float> undef, float %1, i32 0 320 %vecext.1 = extractelement <4 x float> %0, i32 1 321 %2 = tail call fast float @logf(float %vecext.1) 322 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 323 %vecext.2 = extractelement <4 x float> %0, i32 2 324 %3 = tail call fast float @logf(float %vecext.2) 325 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 326 %vecext.3 = extractelement <4 x float> %0, i32 3 327 %4 = tail call fast float @logf(float %vecext.3) 328 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 329 ret <4 x float> %vecins.3 330} 331declare float @log1pf(float) readonly nounwind willreturn 332define <4 x float> @log1p_4x(<4 x float>* %a) { 333; CHECK-LABEL: @log1p_4x( 334; CHECK-NEXT: entry: 335; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 336; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]]) 337; CHECK-NEXT: ret <4 x float> [[TMP1]] 338; 339; NOACCELERATE-LABEL: @log1p_4x( 340; NOACCELERATE-NEXT: entry: 341; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 342; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 343; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]]) 344; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 345; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 346; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]]) 347; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 348; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 349; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]]) 350; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 351; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 352; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]]) 353; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 354; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 355; 356entry: 357 %0 = load <4 x float>, <4 x float>* %a, align 16 358 %vecext = extractelement <4 x float> %0, i32 0 359 %1 = tail call fast float @log1pf(float %vecext) 360 %vecins = insertelement <4 x float> undef, float %1, i32 0 361 %vecext.1 = extractelement <4 x float> %0, i32 1 362 %2 = tail call fast float @log1pf(float %vecext.1) 363 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 364 %vecext.2 = extractelement <4 x float> %0, i32 2 365 %3 = tail call fast float @log1pf(float %vecext.2) 366 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 367 %vecext.3 = extractelement <4 x float> %0, i32 3 368 %4 = tail call fast float @log1pf(float %vecext.3) 369 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 370 ret <4 x float> %vecins.3 371} 372declare float @log10pf(float) readonly nounwind willreturn 373define <4 x float> @log10p_4x(<4 x float>* %a) { 374; CHECK-LABEL: @log10p_4x( 375; CHECK-NEXT: entry: 376; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 377; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 378; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]]) 379; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 380; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 381; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]]) 382; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 383; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 384; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]]) 385; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 386; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 387; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]]) 388; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 389; CHECK-NEXT: ret <4 x float> [[VECINS_3]] 390; 391; NOACCELERATE-LABEL: @log10p_4x( 392; NOACCELERATE-NEXT: entry: 393; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 394; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 395; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]]) 396; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 397; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 398; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]]) 399; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 400; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 401; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]]) 402; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 403; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 404; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]]) 405; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 406; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 407; 408entry: 409 %0 = load <4 x float>, <4 x float>* %a, align 16 410 %vecext = extractelement <4 x float> %0, i32 0 411 %1 = tail call fast float @log10pf(float %vecext) 412 %vecins = insertelement <4 x float> undef, float %1, i32 0 413 %vecext.1 = extractelement <4 x float> %0, i32 1 414 %2 = tail call fast float @log10pf(float %vecext.1) 415 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 416 %vecext.2 = extractelement <4 x float> %0, i32 2 417 %3 = tail call fast float @log10pf(float %vecext.2) 418 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 419 %vecext.3 = extractelement <4 x float> %0, i32 3 420 %4 = tail call fast float @log10pf(float %vecext.3) 421 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 422 ret <4 x float> %vecins.3 423} 424declare float @logbf(float) readonly nounwind willreturn 425define <4 x float> @logb_4x(<4 x float>* %a) { 426; CHECK-LABEL: @logb_4x( 427; CHECK-NEXT: entry: 428; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 429; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]]) 430; CHECK-NEXT: ret <4 x float> [[TMP1]] 431; 432; NOACCELERATE-LABEL: @logb_4x( 433; NOACCELERATE-NEXT: entry: 434; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 435; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 436; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]]) 437; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 438; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 439; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]]) 440; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 441; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 442; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]]) 443; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 444; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 445; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]]) 446; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 447; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 448; 449entry: 450 %0 = load <4 x float>, <4 x float>* %a, align 16 451 %vecext = extractelement <4 x float> %0, i32 0 452 %1 = tail call fast float @logbf(float %vecext) 453 %vecins = insertelement <4 x float> undef, float %1, i32 0 454 %vecext.1 = extractelement <4 x float> %0, i32 1 455 %2 = tail call fast float @logbf(float %vecext.1) 456 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 457 %vecext.2 = extractelement <4 x float> %0, i32 2 458 %3 = tail call fast float @logbf(float %vecext.2) 459 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 460 %vecext.3 = extractelement <4 x float> %0, i32 3 461 %4 = tail call fast float @logbf(float %vecext.3) 462 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 463 ret <4 x float> %vecins.3 464} 465declare float @sinf(float) readonly nounwind willreturn 466define <4 x float> @sin_4x(<4 x float>* %a) { 467; CHECK-LABEL: @sin_4x( 468; CHECK-NEXT: entry: 469; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 470; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]]) 471; CHECK-NEXT: ret <4 x float> [[TMP1]] 472; 473; NOACCELERATE-LABEL: @sin_4x( 474; NOACCELERATE-NEXT: entry: 475; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 476; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 477; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]]) 478; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 479; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 480; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) 481; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 482; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 483; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 484; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 485; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 486; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) 487; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 488; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 489; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 490; 491entry: 492 %0 = load <4 x float>, <4 x float>* %a, align 16 493 %vecext = extractelement <4 x float> %0, i32 0 494 %1 = tail call fast float @sinf(float %vecext) 495 %vecins = insertelement <4 x float> undef, float %1, i32 0 496 %vecext.1 = extractelement <4 x float> %0, i32 1 497 %2 = tail call fast float @sinf(float %vecext.1) 498 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 499 %vecext.2 = extractelement <4 x float> %0, i32 2 500 %3 = tail call fast float @sinf(float %vecext.2) 501 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 502 %vecext.3 = extractelement <4 x float> %0, i32 3 503 %4 = tail call fast float @sinf(float %vecext.3) 504 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 505 ret <4 x float> %vecins.3 506} 507declare float @cosf(float) readonly nounwind willreturn 508define <4 x float> @cos_4x(<4 x float>* %a) { 509; CHECK-LABEL: @cos_4x( 510; CHECK-NEXT: entry: 511; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 512; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]]) 513; CHECK-NEXT: ret <4 x float> [[TMP1]] 514; 515; NOACCELERATE-LABEL: @cos_4x( 516; NOACCELERATE-NEXT: entry: 517; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 518; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 519; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) 520; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 521; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 522; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) 523; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 524; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 525; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 526; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 527; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 528; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) 529; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 530; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 531; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 532; 533entry: 534 %0 = load <4 x float>, <4 x float>* %a, align 16 535 %vecext = extractelement <4 x float> %0, i32 0 536 %1 = tail call fast float @cosf(float %vecext) 537 %vecins = insertelement <4 x float> undef, float %1, i32 0 538 %vecext.1 = extractelement <4 x float> %0, i32 1 539 %2 = tail call fast float @cosf(float %vecext.1) 540 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 541 %vecext.2 = extractelement <4 x float> %0, i32 2 542 %3 = tail call fast float @cosf(float %vecext.2) 543 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 544 %vecext.3 = extractelement <4 x float> %0, i32 3 545 %4 = tail call fast float @cosf(float %vecext.3) 546 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 547 ret <4 x float> %vecins.3 548} 549declare float @tanf(float) readonly nounwind willreturn 550define <4 x float> @tan_4x(<4 x float>* %a) { 551; CHECK-LABEL: @tan_4x( 552; CHECK-NEXT: entry: 553; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 554; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]]) 555; CHECK-NEXT: ret <4 x float> [[TMP1]] 556; 557; NOACCELERATE-LABEL: @tan_4x( 558; NOACCELERATE-NEXT: entry: 559; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 560; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 561; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) 562; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 563; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 564; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) 565; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 566; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 567; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) 568; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 569; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 570; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) 571; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 572; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 573; 574entry: 575 %0 = load <4 x float>, <4 x float>* %a, align 16 576 %vecext = extractelement <4 x float> %0, i32 0 577 %1 = tail call fast float @tanf(float %vecext) 578 %vecins = insertelement <4 x float> undef, float %1, i32 0 579 %vecext.1 = extractelement <4 x float> %0, i32 1 580 %2 = tail call fast float @tanf(float %vecext.1) 581 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 582 %vecext.2 = extractelement <4 x float> %0, i32 2 583 %3 = tail call fast float @tanf(float %vecext.2) 584 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 585 %vecext.3 = extractelement <4 x float> %0, i32 3 586 %4 = tail call fast float @tanf(float %vecext.3) 587 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 588 ret <4 x float> %vecins.3 589} 590declare float @asinf(float) readonly nounwind willreturn 591define <4 x float> @asin_4x(<4 x float>* %a) { 592; CHECK-LABEL: @asin_4x( 593; CHECK-NEXT: entry: 594; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 595; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]]) 596; CHECK-NEXT: ret <4 x float> [[TMP1]] 597; 598; NOACCELERATE-LABEL: @asin_4x( 599; NOACCELERATE-NEXT: entry: 600; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 601; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 602; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) 603; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 604; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 605; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 606; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 607; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 608; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 609; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 610; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 611; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 612; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 613; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 614; 615entry: 616 %0 = load <4 x float>, <4 x float>* %a, align 16 617 %vecext = extractelement <4 x float> %0, i32 0 618 %1 = tail call fast float @asinf(float %vecext) 619 %vecins = insertelement <4 x float> undef, float %1, i32 0 620 %vecext.1 = extractelement <4 x float> %0, i32 1 621 %2 = tail call fast float @asinf(float %vecext.1) 622 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 623 %vecext.2 = extractelement <4 x float> %0, i32 2 624 %3 = tail call fast float @asinf(float %vecext.2) 625 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 626 %vecext.3 = extractelement <4 x float> %0, i32 3 627 %4 = tail call fast float @asinf(float %vecext.3) 628 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 629 ret <4 x float> %vecins.3 630} 631declare float @acosf(float) readonly nounwind willreturn 632define <4 x float> @acos_4x(<4 x float>* %a) { 633; CHECK-LABEL: @acos_4x( 634; CHECK-NEXT: entry: 635; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 636; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]]) 637; CHECK-NEXT: ret <4 x float> [[TMP1]] 638; 639; NOACCELERATE-LABEL: @acos_4x( 640; NOACCELERATE-NEXT: entry: 641; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 642; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 643; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) 644; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 645; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 646; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 647; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 648; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 649; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 650; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 651; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 652; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 653; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 654; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 655; 656entry: 657 %0 = load <4 x float>, <4 x float>* %a, align 16 658 %vecext = extractelement <4 x float> %0, i32 0 659 %1 = tail call fast float @acosf(float %vecext) 660 %vecins = insertelement <4 x float> undef, float %1, i32 0 661 %vecext.1 = extractelement <4 x float> %0, i32 1 662 %2 = tail call fast float @acosf(float %vecext.1) 663 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 664 %vecext.2 = extractelement <4 x float> %0, i32 2 665 %3 = tail call fast float @acosf(float %vecext.2) 666 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 667 %vecext.3 = extractelement <4 x float> %0, i32 3 668 %4 = tail call fast float @acosf(float %vecext.3) 669 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 670 ret <4 x float> %vecins.3 671} 672declare float @atanf(float) readonly nounwind willreturn 673define <4 x float> @atan_4x(<4 x float>* %a) { 674; CHECK-LABEL: @atan_4x( 675; CHECK-NEXT: entry: 676; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 677; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]]) 678; CHECK-NEXT: ret <4 x float> [[TMP1]] 679; 680; NOACCELERATE-LABEL: @atan_4x( 681; NOACCELERATE-NEXT: entry: 682; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 683; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 684; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) 685; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 686; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 687; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 688; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 689; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 690; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 691; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 692; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 693; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 694; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 695; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 696; 697entry: 698 %0 = load <4 x float>, <4 x float>* %a, align 16 699 %vecext = extractelement <4 x float> %0, i32 0 700 %1 = tail call fast float @atanf(float %vecext) 701 %vecins = insertelement <4 x float> undef, float %1, i32 0 702 %vecext.1 = extractelement <4 x float> %0, i32 1 703 %2 = tail call fast float @atanf(float %vecext.1) 704 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 705 %vecext.2 = extractelement <4 x float> %0, i32 2 706 %3 = tail call fast float @atanf(float %vecext.2) 707 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 708 %vecext.3 = extractelement <4 x float> %0, i32 3 709 %4 = tail call fast float @atanf(float %vecext.3) 710 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 711 ret <4 x float> %vecins.3 712} 713declare float @sinhf(float) readonly nounwind willreturn 714define <4 x float> @sinh_4x(<4 x float>* %a) { 715; CHECK-LABEL: @sinh_4x( 716; CHECK-NEXT: entry: 717; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 718; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]]) 719; CHECK-NEXT: ret <4 x float> [[TMP1]] 720; 721; NOACCELERATE-LABEL: @sinh_4x( 722; NOACCELERATE-NEXT: entry: 723; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 724; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 725; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) 726; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 727; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 728; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 729; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 730; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 731; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 732; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 733; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 734; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 735; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 736; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 737; 738entry: 739 %0 = load <4 x float>, <4 x float>* %a, align 16 740 %vecext = extractelement <4 x float> %0, i32 0 741 %1 = tail call fast float @sinhf(float %vecext) 742 %vecins = insertelement <4 x float> undef, float %1, i32 0 743 %vecext.1 = extractelement <4 x float> %0, i32 1 744 %2 = tail call fast float @sinhf(float %vecext.1) 745 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 746 %vecext.2 = extractelement <4 x float> %0, i32 2 747 %3 = tail call fast float @sinhf(float %vecext.2) 748 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 749 %vecext.3 = extractelement <4 x float> %0, i32 3 750 %4 = tail call fast float @sinhf(float %vecext.3) 751 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 752 ret <4 x float> %vecins.3 753} 754declare float @coshf(float) readonly nounwind willreturn 755define <4 x float> @cosh_4x(<4 x float>* %a) { 756; CHECK-LABEL: @cosh_4x( 757; CHECK-NEXT: entry: 758; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 759; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]]) 760; CHECK-NEXT: ret <4 x float> [[TMP1]] 761; 762; NOACCELERATE-LABEL: @cosh_4x( 763; NOACCELERATE-NEXT: entry: 764; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 765; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 766; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) 767; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 768; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 769; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 770; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 771; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 772; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 773; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 774; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 775; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 776; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 777; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 778; 779entry: 780 %0 = load <4 x float>, <4 x float>* %a, align 16 781 %vecext = extractelement <4 x float> %0, i32 0 782 %1 = tail call fast float @coshf(float %vecext) 783 %vecins = insertelement <4 x float> undef, float %1, i32 0 784 %vecext.1 = extractelement <4 x float> %0, i32 1 785 %2 = tail call fast float @coshf(float %vecext.1) 786 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 787 %vecext.2 = extractelement <4 x float> %0, i32 2 788 %3 = tail call fast float @coshf(float %vecext.2) 789 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 790 %vecext.3 = extractelement <4 x float> %0, i32 3 791 %4 = tail call fast float @coshf(float %vecext.3) 792 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 793 ret <4 x float> %vecins.3 794} 795declare float @tanhf(float) readonly nounwind willreturn 796define <4 x float> @tanh_4x(<4 x float>* %a) { 797; CHECK-LABEL: @tanh_4x( 798; CHECK-NEXT: entry: 799; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 800; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]]) 801; CHECK-NEXT: ret <4 x float> [[TMP1]] 802; 803; NOACCELERATE-LABEL: @tanh_4x( 804; NOACCELERATE-NEXT: entry: 805; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 806; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 807; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) 808; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 809; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 810; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 811; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 812; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 813; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 814; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 815; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 816; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 817; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 818; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 819; 820entry: 821 %0 = load <4 x float>, <4 x float>* %a, align 16 822 %vecext = extractelement <4 x float> %0, i32 0 823 %1 = tail call fast float @tanhf(float %vecext) 824 %vecins = insertelement <4 x float> undef, float %1, i32 0 825 %vecext.1 = extractelement <4 x float> %0, i32 1 826 %2 = tail call fast float @tanhf(float %vecext.1) 827 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 828 %vecext.2 = extractelement <4 x float> %0, i32 2 829 %3 = tail call fast float @tanhf(float %vecext.2) 830 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 831 %vecext.3 = extractelement <4 x float> %0, i32 3 832 %4 = tail call fast float @tanhf(float %vecext.3) 833 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 834 ret <4 x float> %vecins.3 835} 836declare float @asinhf(float) readonly nounwind willreturn 837define <4 x float> @asinh_4x(<4 x float>* %a) { 838; CHECK-LABEL: @asinh_4x( 839; CHECK-NEXT: entry: 840; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 841; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]]) 842; CHECK-NEXT: ret <4 x float> [[TMP1]] 843; 844; NOACCELERATE-LABEL: @asinh_4x( 845; NOACCELERATE-NEXT: entry: 846; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 847; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 848; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) 849; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 850; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 851; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) 852; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 853; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 854; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) 855; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 856; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 857; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) 858; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 859; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 860; 861entry: 862 %0 = load <4 x float>, <4 x float>* %a, align 16 863 %vecext = extractelement <4 x float> %0, i32 0 864 %1 = tail call fast float @asinhf(float %vecext) 865 %vecins = insertelement <4 x float> undef, float %1, i32 0 866 %vecext.1 = extractelement <4 x float> %0, i32 1 867 %2 = tail call fast float @asinhf(float %vecext.1) 868 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 869 %vecext.2 = extractelement <4 x float> %0, i32 2 870 %3 = tail call fast float @asinhf(float %vecext.2) 871 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 872 %vecext.3 = extractelement <4 x float> %0, i32 3 873 %4 = tail call fast float @asinhf(float %vecext.3) 874 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 875 ret <4 x float> %vecins.3 876} 877declare float @acoshf(float) readonly nounwind willreturn 878define <4 x float> @acosh_4x(<4 x float>* %a) { 879; CHECK-LABEL: @acosh_4x( 880; CHECK-NEXT: entry: 881; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 882; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]]) 883; CHECK-NEXT: ret <4 x float> [[TMP1]] 884; 885; NOACCELERATE-LABEL: @acosh_4x( 886; NOACCELERATE-NEXT: entry: 887; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 888; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 889; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) 890; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 891; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 892; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) 893; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 894; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 895; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) 896; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 897; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 898; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) 899; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 900; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 901; 902entry: 903 %0 = load <4 x float>, <4 x float>* %a, align 16 904 %vecext = extractelement <4 x float> %0, i32 0 905 %1 = tail call fast float @acoshf(float %vecext) 906 %vecins = insertelement <4 x float> undef, float %1, i32 0 907 %vecext.1 = extractelement <4 x float> %0, i32 1 908 %2 = tail call fast float @acoshf(float %vecext.1) 909 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 910 %vecext.2 = extractelement <4 x float> %0, i32 2 911 %3 = tail call fast float @acoshf(float %vecext.2) 912 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 913 %vecext.3 = extractelement <4 x float> %0, i32 3 914 %4 = tail call fast float @acoshf(float %vecext.3) 915 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 916 ret <4 x float> %vecins.3 917} 918declare float @atanhf(float) readonly nounwind willreturn 919define <4 x float> @atanh_4x(<4 x float>* %a) { 920; CHECK-LABEL: @atanh_4x( 921; CHECK-NEXT: entry: 922; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 923; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]]) 924; CHECK-NEXT: ret <4 x float> [[TMP1]] 925; 926; NOACCELERATE-LABEL: @atanh_4x( 927; NOACCELERATE-NEXT: entry: 928; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 929; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 930; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) 931; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 932; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 933; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) 934; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 935; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 936; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) 937; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 938; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 939; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) 940; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 941; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] 942; 943entry: 944 %0 = load <4 x float>, <4 x float>* %a, align 16 945 %vecext = extractelement <4 x float> %0, i32 0 946 %1 = tail call fast float @atanhf(float %vecext) 947 %vecins = insertelement <4 x float> undef, float %1, i32 0 948 %vecext.1 = extractelement <4 x float> %0, i32 1 949 %2 = tail call fast float @atanhf(float %vecext.1) 950 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 951 %vecext.2 = extractelement <4 x float> %0, i32 2 952 %3 = tail call fast float @atanhf(float %vecext.2) 953 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 954 %vecext.3 = extractelement <4 x float> %0, i32 3 955 %4 = tail call fast float @atanhf(float %vecext.3) 956 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 957 ret <4 x float> %vecins.3 958} 959 960; Accelerate *does not* provide sin() for <2 x float>. 961define <2 x float> @sin_2x(<2 x float>* %a) { 962; CHECK-LABEL: @sin_2x( 963; CHECK-NEXT: entry: 964; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16 965; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 966; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]] 967; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 968; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 969; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #[[ATTR2]] 970; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 971; CHECK-NEXT: ret <2 x float> [[VECINS_1]] 972; 973; NOACCELERATE-LABEL: @sin_2x( 974; NOACCELERATE-NEXT: entry: 975; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16 976; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 977; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) 978; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 979; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 980; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) 981; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 982; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]] 983; 984entry: 985 %0 = load <2 x float>, <2 x float>* %a, align 16 986 %vecext = extractelement <2 x float> %0, i32 0 987 %1 = tail call fast float @llvm.sin.f32(float %vecext) 988 %vecins = insertelement <2 x float> undef, float %1, i32 0 989 %vecext.1 = extractelement <2 x float> %0, i32 1 990 %2 = tail call fast float @llvm.sin.f32(float %vecext.1) 991 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1 992 ret <2 x float> %vecins.1 993} 994 995 996declare float @llvm.cos.f32(float) 997 998; Accelerate provides cos() for <4 x float> 999define <4 x float> @int_cos_4x(<4 x float>* %a) { 1000; CHECK-LABEL: @int_cos_4x( 1001; CHECK-NEXT: entry: 1002; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 1003; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]]) 1004; CHECK-NEXT: ret <4 x float> [[TMP1]] 1005; 1006; NOACCELERATE-LABEL: @int_cos_4x( 1007; NOACCELERATE-NEXT: entry: 1008; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16 1009; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 1010; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 1011; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 1012; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 1013; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 1014; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 1015; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 1016; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 1017; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 1018; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 1019; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) 1020; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1021; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1022; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] 1023; 1024entry: 1025 %0 = load <4 x float>, <4 x float>* %a, align 16 1026 %vecext = extractelement <4 x float> %0, i32 0 1027 %1 = tail call fast float @llvm.cos.f32(float %vecext) 1028 %vecins = insertelement <4 x float> undef, float %1, i32 0 1029 %vecext.1 = extractelement <4 x float> %0, i32 1 1030 %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 1031 %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 1032 %vecext.2 = extractelement <4 x float> %0, i32 2 1033 %3 = tail call fast float @llvm.cos.f32(float %vecext.2) 1034 %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 1035 %vecext.3 = extractelement <4 x float> %0, i32 3 1036 %4 = tail call fast float @llvm.cos.f32(float %vecext.3) 1037 %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 1038 ret <4 x float> %vecins.3 1039} 1040 1041; Accelerate *does not* provide cos() for <2 x float>. 1042define <2 x float> @cos_2x(<2 x float>* %a) { 1043; CHECK-LABEL: @cos_2x( 1044; CHECK-NEXT: entry: 1045; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16 1046; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1047; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]] 1048; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1049; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1050; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #[[ATTR3]] 1051; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1052; CHECK-NEXT: ret <2 x float> [[VECINS_1]] 1053; 1054; NOACCELERATE-LABEL: @cos_2x( 1055; NOACCELERATE-NEXT: entry: 1056; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16 1057; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 1058; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) 1059; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 1060; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 1061; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) 1062; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1 1063; NOACCELERATE-NEXT: ret <2 x float> [[VECINS_1]] 1064; 1065entry: 1066 %0 = load <2 x float>, <2 x float>* %a, align 16 1067 %vecext = extractelement <2 x float> %0, i32 0 1068 %1 = tail call fast float @llvm.cos.f32(float %vecext) 1069 %vecins = insertelement <2 x float> undef, float %1, i32 0 1070 %vecext.1 = extractelement <2 x float> %0, i32 1 1071 %2 = tail call fast float @llvm.cos.f32(float %vecext.1) 1072 %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1 1073 ret <2 x float> %vecins.1 1074} 1075