//===-- Single-precision sin function -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "src/math/sinf.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" #include #if defined(LIBC_TARGET_HAS_FMA) #include "range_reduction_fma.h" // using namespace __llvm_libc::fma; using __llvm_libc::fma::FAST_PASS_BOUND; using __llvm_libc::fma::large_range_reduction; using __llvm_libc::fma::LargeExcepts; using __llvm_libc::fma::N_EXCEPT_LARGE; using __llvm_libc::fma::N_EXCEPT_SMALL; using __llvm_libc::fma::small_range_reduction; using __llvm_libc::fma::SmallExcepts; #else #include "range_reduction.h" // using namespace __llvm_libc::generic; using __llvm_libc::generic::FAST_PASS_BOUND; using __llvm_libc::generic::large_range_reduction; using __llvm_libc::generic::LargeExcepts; using __llvm_libc::generic::N_EXCEPT_LARGE; using __llvm_libc::generic::N_EXCEPT_SMALL; using __llvm_libc::generic::small_range_reduction; using __llvm_libc::generic::SmallExcepts; #endif namespace __llvm_libc { LLVM_LIBC_FUNCTION(float, sinf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); uint32_t x_u = xbits.uintval(); uint32_t x_abs = x_u & 0x7fff'ffffU; double xd, y; // Range reduction: // For |x| > pi/16, we perform range reduction as follows: // Find k and y such that: // x = (k + y) * pi // k is an integer // |y| < 0.5 // For small range (|x| < 2^50 when FMA instructions are available, 2^26 // otherwise), this is done by performing: // k = round(x * 1/pi) // y = x * 1/pi - k // For large range, we will omit all the higher parts of 1/pi such that the // least significant bits of their full products with x are larger than 1, // since sin(x + i * 2pi) = sin(x). // // When FMA instructions are not available, we store the digits of 1/pi in // chunks of 28-bit precision. This will make sure that the products: // x * ONE_OVER_PI_28[i] are all exact. // When FMA instructions are available, we simply store the digits of 1/pi in // chunks of doubles (53-bit of precision). // So when multiplying by the largest values of single precision, the // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the // worst-case analysis of range reduction, |y| >= 2^-38, so this should give // us more than 40 bits of accuracy. For the worst-case estimation of range // reduction, see for instances: // Elementary Functions by J-M. Muller, Chapter 11, // Handbook of Floating-Point Arithmetic by J-M. Muller et. al., // Chapter 10.2. // // Once k and y are computed, we then deduce the answer by the sine of sum // formula: // sin(x) = sin((k + y)*pi) // = sin(y*pi) * cos(k*pi) + cos(y*pi) * sin(k*pi) // = (-1)^(k & 1) * sin(y*pi) // ~ (-1)^(k & 1) * y * P(y^2) // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya // with: > Q = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], // [|D...|], [0, 0.5]); // |x| <= pi/16 if (x_abs <= 0x3e49'0fdbU) { xd = static_cast(x); // |x| < 0x1.d12ed2p-12f if (x_abs < 0x39e8'9769U) { if (unlikely(x_abs == 0U)) { // For signed zeros. return x; } // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x // is: // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|) // = x^2 / 6 // < 2^-25 // < epsilon(1)/2. // So the correctly rounded values of sin(x) are: // = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, // or (rounding mode = FE_UPWARD and x is // negative), // = x otherwise. // To simplify the rounding decision and make it more efficient, we use // fma(x, -2^-25, x) instead. // An exhaustive test shows that this formula work correctly for all // rounding modes up to |x| < 0x1.c555dep-11f. // Note: to use the formula x - 2^-25*x to decide the correct rounding, we // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when // |x| < 2^-125. For targets without FMA instructions, we simply use // double for intermediate results as it is more efficient than using an // emulated version of FMA. #if defined(LIBC_TARGET_HAS_FMA) return fputil::multiply_add(x, -0x1.0p-25f, x); #else return static_cast(fputil::multiply_add(xd, -0x1.0p-25, xd)); #endif // LIBC_TARGET_HAS_FMA } // |x| < pi/16. double xsq = xd * xd; // Degree-9 polynomial approximation: // sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9 // = x (1 + a_3 x^2 + ... + a_9 x^8) // = x * P(x^2) // generated by Sollya with the following commands: // > display = hexadecimal; // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]); double result = fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7, -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19); return xd * result; } bool x_sign = xbits.get_sign(); int64_t k; xd = static_cast(x); if (x_abs < FAST_PASS_BOUND) { using ExceptChecker = typename fputil::ExceptionChecker; { float result; if (ExceptChecker::check_odd_func(SmallExcepts, x_abs, x_sign, result)) { return result; } } k = small_range_reduction(xd, y); } else { // x is inf or nan. if (unlikely(x_abs >= 0x7f80'0000U)) { if (x_abs == 0x7f80'0000U) { errno = EDOM; fputil::set_except(FE_INVALID); } return x + FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); } using ExceptChecker = typename fputil::ExceptionChecker; { float result; if (ExceptChecker::check_odd_func(LargeExcepts, x_abs, x_sign, result)) return result; } k = large_range_reduction(xd, xbits.get_exponent(), y); } // After range reduction, k = round(x / pi) and y = (x/pi) - k. // So k is an integer and -0.5 <= y <= 0.5. // Then sin(x) = sin(y*pi + k*pi) // = (-1)^(k & 1) * sin(y*pi) // ~ (-1)^(k & 1) * y * P(y^2) // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya // with: > P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], // [|D...|], [0, 0.5]); constexpr double SIGN[2] = {1.0, -1.0}; double ysq = y * y; double result = y * fputil::polyeval(ysq, 0x1.921fb54442d17p1, -0x1.4abbce625bd4bp2, 0x1.466bc67750a3fp1, -0x1.32d2cce1612b5p-1, 0x1.507832417bce6p-4, -0x1.e3062119b6071p-8, 0x1.e89c7aa14122dp-12, -0x1.625b1709dece6p-16); return SIGN[k & 1] * result; // } } } // namespace __llvm_libc