19e7688c7STue Ly //===-- Single-precision log1p(x) function --------------------------------===//
29e7688c7STue Ly //
39e7688c7STue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49e7688c7STue Ly // See https://llvm.org/LICENSE.txt for license information.
59e7688c7STue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69e7688c7STue Ly //
79e7688c7STue Ly //===----------------------------------------------------------------------===//
89e7688c7STue Ly
99e7688c7STue Ly #include "src/math/log1pf.h"
109e7688c7STue Ly #include "common_constants.h" // Lookup table for (1/f) and log(f)
119e7688c7STue Ly #include "src/__support/FPUtil/BasicOperations.h"
1276ec69a9STue Ly #include "src/__support/FPUtil/FEnvImpl.h"
139e7688c7STue Ly #include "src/__support/FPUtil/FMA.h"
149e7688c7STue Ly #include "src/__support/FPUtil/FPBits.h"
159e7688c7STue Ly #include "src/__support/FPUtil/PolyEval.h"
169e7688c7STue Ly #include "src/__support/common.h"
179e7688c7STue Ly
189e7688c7STue Ly // This is an algorithm for log10(x) in single precision which is
199e7688c7STue Ly // correctly rounded for all rounding modes.
209e7688c7STue Ly // - An exhaustive test show that when x >= 2^45, log1pf(x) == logf(x)
219e7688c7STue Ly // for all rounding modes.
229e7688c7STue Ly // - When 2^(-8) <= |x| < 2^45, the sum (double(x) + 1.0) is exact,
239e7688c7STue Ly // so we can adapt the correctly rounded algorithm of logf to compute
249e7688c7STue Ly // log(double(x) + 1.0) correctly. For more information about the logf
259e7688c7STue Ly // algorithm, see `libc/src/math/generic/logf.cpp`.
269e7688c7STue Ly // - When |x| < 2^(-8), we use a degree-6 polynomial in double precision
279e7688c7STue Ly // generated with Sollya using the following command:
289e7688c7STue Ly // fpminimax(log(1 + x)/x, 5, [|D...|], [-2^-8; 2^-8]);
299e7688c7STue Ly
309e7688c7STue Ly namespace __llvm_libc {
319e7688c7STue Ly
329e7688c7STue Ly namespace internal {
339e7688c7STue Ly
349e7688c7STue Ly // We don't need to treat denormal
log(double x)35*614567a7STue Ly static inline float log(double x) {
369e7688c7STue Ly constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
379e7688c7STue Ly
389e7688c7STue Ly using FPBits = typename fputil::FPBits<double>;
399e7688c7STue Ly FPBits xbits(x);
409e7688c7STue Ly
419e7688c7STue Ly if (xbits.is_zero()) {
429e7688c7STue Ly return static_cast<float>(fputil::FPBits<float>::neg_inf());
439e7688c7STue Ly }
449e7688c7STue Ly
459e7688c7STue Ly if (xbits.uintval() > FPBits::MAX_NORMAL) {
469e7688c7STue Ly if (xbits.get_sign() && !xbits.is_nan()) {
47e5e93f60STue Ly return fputil::FPBits<float>::build_nan(
48e5e93f60STue Ly 1 << (fputil::MantissaWidth<float>::VALUE - 1));
499e7688c7STue Ly }
509e7688c7STue Ly return static_cast<float>(x);
519e7688c7STue Ly }
529e7688c7STue Ly
539e7688c7STue Ly double m = static_cast<double>(xbits.get_exponent());
549e7688c7STue Ly
559e7688c7STue Ly // Set bits to 1.m
569e7688c7STue Ly xbits.set_unbiased_exponent(0x3FF);
579e7688c7STue Ly // Get the 8 highest bits, use 7 bits (excluding the implicit hidden bit) for
589e7688c7STue Ly // lookup tables.
599e7688c7STue Ly int f_index =
609e7688c7STue Ly xbits.get_mantissa() >> 45; // fputil::MantissaWidth<double>::VALUE - 7
619e7688c7STue Ly
627e7ecef9SGuillaume Chatelet FPBits f = xbits;
639e7688c7STue Ly // Clear the lowest 45 bits.
649e7688c7STue Ly f.bits &= ~0x0000'1FFF'FFFF'FFFFULL;
659e7688c7STue Ly
669e7688c7STue Ly double d = static_cast<double>(xbits) - static_cast<double>(f);
679e7688c7STue Ly d *= ONE_OVER_F[f_index];
689e7688c7STue Ly
69c5f8a0a1STue Ly double extra_factor = fputil::multiply_add(m, LOG_2, LOG_F[f_index]);
709e7688c7STue Ly
719e7688c7STue Ly double r = fputil::polyeval(d, extra_factor, 0x1.fffffffffffacp-1,
729e7688c7STue Ly -0x1.fffffffef9cb2p-2, 0x1.5555513bc679ap-2,
739e7688c7STue Ly -0x1.fff4805ea441p-3, 0x1.930180dbde91ap-3);
749e7688c7STue Ly
759e7688c7STue Ly return static_cast<float>(r);
769e7688c7STue Ly }
779e7688c7STue Ly
789e7688c7STue Ly } // namespace internal
799e7688c7STue Ly
809e7688c7STue Ly LLVM_LIBC_FUNCTION(float, log1pf, (float x)) {
819e7688c7STue Ly using FPBits = typename fputil::FPBits<float>;
829e7688c7STue Ly FPBits xbits(x);
839e7688c7STue Ly double xd = static_cast<double>(x);
849e7688c7STue Ly
859e7688c7STue Ly if (xbits.get_exponent() >= -8) {
869e7688c7STue Ly // Hard-to-round cases.
879e7688c7STue Ly switch (xbits.uintval()) {
889e7688c7STue Ly case 0x3b9315c8U: // x = 0x1.262b9p-8f
899e7688c7STue Ly if (fputil::get_round() != FE_UPWARD)
909e7688c7STue Ly return 0x1.25830cp-8f;
919e7688c7STue Ly break;
929e7688c7STue Ly case 0x3c6eb7afU: // x = 0x1.dd6f5ep-7f
939e7688c7STue Ly if (fputil::get_round() == FE_UPWARD)
949e7688c7STue Ly return 0x1.d9fd86p-7f;
959e7688c7STue Ly return 0x1.d9fd84p-7f;
969e7688c7STue Ly case 0x41078febU: // x = 0x1.0f1fd6p+3f
979e7688c7STue Ly if (fputil::get_round() != FE_UPWARD)
989e7688c7STue Ly return 0x1.1fcbcep+1f;
999e7688c7STue Ly break;
1009e7688c7STue Ly case 0x5cd69e88U: // x = 0x1.ad3d1p+58f
1019e7688c7STue Ly if (fputil::get_round() != FE_UPWARD)
1029e7688c7STue Ly return 0x1.45c146p+5f;
1039e7688c7STue Ly break;
1049e7688c7STue Ly case 0x65d890d3U: // x = 0x1.b121a6p+76f
1059e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1069e7688c7STue Ly return 0x1.a9a3f2p+5f;
1079e7688c7STue Ly break;
1089e7688c7STue Ly case 0x6f31a8ecU: // x = 0x1.6351d8p+95f
1099e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1109e7688c7STue Ly return 0x1.08b512p+6f;
1119e7688c7STue Ly break;
1129e7688c7STue Ly case 0x7a17f30aU: // x = 0x1.2fe614p+117f
1139e7688c7STue Ly if (fputil::get_round() != FE_UPWARD)
1149e7688c7STue Ly return 0x1.451436p+6f;
1159e7688c7STue Ly break;
1169e7688c7STue Ly case 0xbc4d092cU: // x = -0x1.9a1258p-7f
1179e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1189e7688c7STue Ly return -0x1.9ca8bep-7f;
1199e7688c7STue Ly break;
1209e7688c7STue Ly case 0xbc657728U: // x = -0x1.caee5p-7f
1219e7688c7STue Ly if (fputil::get_round() != FE_DOWNWARD)
1229e7688c7STue Ly return -0x1.ce2cccp-7f;
1239e7688c7STue Ly break;
1249e7688c7STue Ly case 0xbd1d20afU: // x = -0x1.3a415ep-5f
1259e7688c7STue Ly int round_mode = fputil::get_round();
1269e7688c7STue Ly if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
1279e7688c7STue Ly return -0x1.40711p-5f;
1289e7688c7STue Ly return -0x1.407112p-5f;
1299e7688c7STue Ly }
1309e7688c7STue Ly
1319e7688c7STue Ly return internal::log(xd + 1.0);
1329e7688c7STue Ly }
1339e7688c7STue Ly
1349e7688c7STue Ly // Hard-to round cases.
1359e7688c7STue Ly switch (xbits.uintval()) {
1369e7688c7STue Ly case 0x35400003U: // x = 0x1.800006p-21f
1379e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1389e7688c7STue Ly return 0x1.7ffffep-21f;
1399e7688c7STue Ly break;
1409e7688c7STue Ly case 0x3710001bU: // x = 0x1.200036p-17f
1419e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1429e7688c7STue Ly return 0x1.1fffe6p-17f;
1439e7688c7STue Ly break;
1449e7688c7STue Ly case 0xb53ffffdU: // x = -0x1.7ffffap-21f
1459e7688c7STue Ly if (fputil::get_round() != FE_DOWNWARD)
1469e7688c7STue Ly return -0x1.800002p-21f;
1479e7688c7STue Ly break;
1489e7688c7STue Ly case 0xb70fffe5U: // x = -0x1.1fffcap-17f
1499e7688c7STue Ly if (fputil::get_round() != FE_DOWNWARD)
1509e7688c7STue Ly return -0x1.20001ap-17f;
1519e7688c7STue Ly break;
1529e7688c7STue Ly case 0xbb0ec8c4U: // x = -0x1.1d9188p-9f
1539e7688c7STue Ly if (fputil::get_round() == FE_TONEAREST)
1549e7688c7STue Ly return -0x1.1de14ap-9f;
1559e7688c7STue Ly break;
1569e7688c7STue Ly }
1579e7688c7STue Ly
1589e7688c7STue Ly double r;
1599e7688c7STue Ly // Polymial generated with Sollya:
1609e7688c7STue Ly // > fpminimax(log(1 + x)/x, 5, [|D...|], [-2^-8; 2^-8]);
1619e7688c7STue Ly r = fputil::polyeval(xd, -0x1p-1, 0x1.5555555515551p-2, -0x1.ffffffff82bdap-3,
1629e7688c7STue Ly 0x1.999b33348d3aep-3, -0x1.5556cae3adcc3p-3);
163c5f8a0a1STue Ly return static_cast<float>(fputil::multiply_add(r, xd * xd, xd));
1649e7688c7STue Ly }
1659e7688c7STue Ly
1669e7688c7STue Ly } // namespace __llvm_libc
167