164af346bSTue Ly //===-- Single-precision e^x - 1 function ---------------------------------===//
24e5f8b4dSTue Ly //
34e5f8b4dSTue Ly // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44e5f8b4dSTue Ly // See https://llvm.org/LICENSE.txt for license information.
54e5f8b4dSTue Ly // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64e5f8b4dSTue Ly //
74e5f8b4dSTue Ly //===----------------------------------------------------------------------===//
84e5f8b4dSTue Ly 
94e5f8b4dSTue Ly #include "src/math/expm1f.h"
1064af346bSTue Ly #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
11c120edc7SMichael Jones #include "src/__support/FPUtil/BasicOperations.h"
1264af346bSTue Ly #include "src/__support/FPUtil/FEnvImpl.h"
1364af346bSTue Ly #include "src/__support/FPUtil/FMA.h"
1464af346bSTue Ly #include "src/__support/FPUtil/FPBits.h"
15c120edc7SMichael Jones #include "src/__support/FPUtil/PolyEval.h"
16*628fbbefSTue Ly #include "src/__support/FPUtil/multiply_add.h"
17*628fbbefSTue Ly #include "src/__support/FPUtil/nearest_integer.h"
184e5f8b4dSTue Ly #include "src/__support/common.h"
1964af346bSTue Ly 
2064af346bSTue Ly #include <errno.h>
214e5f8b4dSTue Ly 
224e5f8b4dSTue Ly namespace __llvm_libc {
234e5f8b4dSTue Ly 
244e5f8b4dSTue Ly LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
2564af346bSTue Ly   using FPBits = typename fputil::FPBits<float>;
2664af346bSTue Ly   FPBits xbits(x);
274e5f8b4dSTue Ly 
28a5466f04STue Ly   uint32_t x_u = xbits.uintval();
29a5466f04STue Ly   uint32_t x_abs = x_u & 0x7fff'ffffU;
30a5466f04STue Ly 
31a5466f04STue Ly   // Exceptional value
32a5466f04STue Ly   if (unlikely(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f
33a5466f04STue Ly     int round_mode = fputil::get_round();
34a5466f04STue Ly     if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD)
35a5466f04STue Ly       return 0x1.8dbe64p-3f;
36a5466f04STue Ly     return 0x1.8dbe62p-3f;
37a5466f04STue Ly   }
38a5466f04STue Ly 
39484319f4STue Ly #if !defined(LIBC_TARGET_HAS_FMA)
40484319f4STue Ly   if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
41484319f4STue Ly     int round_mode = fputil::get_round();
42484319f4STue Ly     if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
43484319f4STue Ly       return -0x1.71c884p-4f;
44484319f4STue Ly     return -0x1.71c882p-4f;
45484319f4STue Ly   }
46484319f4STue Ly #endif // LIBC_TARGET_HAS_FMA
47484319f4STue Ly 
48a5466f04STue Ly   // When |x| > 25*log(2), or nan
49a5466f04STue Ly   if (unlikely(x_abs >= 0x418a'a123U)) {
50a5466f04STue Ly     // x < log(2^-25)
51a5466f04STue Ly     if (xbits.get_sign()) {
5264af346bSTue Ly       // exp(-Inf) = 0
5364af346bSTue Ly       if (xbits.is_inf())
5464af346bSTue Ly         return -1.0f;
5564af346bSTue Ly       // exp(nan) = nan
5664af346bSTue Ly       if (xbits.is_nan())
5764af346bSTue Ly         return x;
5864af346bSTue Ly       int round_mode = fputil::get_round();
5964af346bSTue Ly       if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
6064af346bSTue Ly         return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
6164af346bSTue Ly       return -1.0f;
62a5466f04STue Ly     } else {
6364af346bSTue Ly       // x >= 89 or nan
64a5466f04STue Ly       if (xbits.uintval() >= 0x42b2'0000) {
6564af346bSTue Ly         if (xbits.uintval() < 0x7f80'0000U) {
6664af346bSTue Ly           int rounding = fputil::get_round();
6764af346bSTue Ly           if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
6864af346bSTue Ly             return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
6964af346bSTue Ly 
7064af346bSTue Ly           errno = ERANGE;
714e5f8b4dSTue Ly         }
7264af346bSTue Ly         return x + static_cast<float>(FPBits::inf());
734e5f8b4dSTue Ly       }
74a5466f04STue Ly     }
75a5466f04STue Ly   }
7664af346bSTue Ly 
7764af346bSTue Ly   // |x| < 2^-4
78a5466f04STue Ly   if (x_abs < 0x3d80'0000U) {
7964af346bSTue Ly     // |x| < 2^-25
80a5466f04STue Ly     if (x_abs < 0x3300'0000U) {
8164af346bSTue Ly       // x = -0.0f
8264af346bSTue Ly       if (unlikely(xbits.uintval() == 0x8000'0000U))
8364af346bSTue Ly         return x;
84a5466f04STue Ly         // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
85a5466f04STue Ly         // is:
86a5466f04STue Ly         //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
87a5466f04STue Ly         //                               = |x|
88a5466f04STue Ly         //                               < 2^-25
89a5466f04STue Ly         //                               < epsilon(1)/2.
9064af346bSTue Ly         // So the correctly rounded values of expm1(x) are:
9164af346bSTue Ly         //   = x + eps(x) if rounding mode = FE_UPWARD,
92484319f4STue Ly         //                   or (rounding mode = FE_TOWARDZERO and x is
93484319f4STue Ly         //                   negative),
9464af346bSTue Ly         //   = x otherwise.
9564af346bSTue Ly         // To simplify the rounding decision and make it more efficient, we use
9664af346bSTue Ly         //   fma(x, x, x) ~ x + x^2 instead.
97484319f4STue Ly         // Note: to use the formula x + x^2 to decide the correct rounding, we
98484319f4STue Ly         // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
99484319f4STue Ly         // 2^-76. For targets without FMA instructions, we simply use double for
100484319f4STue Ly         // intermediate results as it is more efficient than using an emulated
101484319f4STue Ly         // version of FMA.
102484319f4STue Ly #if defined(LIBC_TARGET_HAS_FMA)
103484319f4STue Ly       return fputil::fma(x, x, x);
104484319f4STue Ly #else
105484319f4STue Ly       double xd = x;
106484319f4STue Ly       return static_cast<float>(fputil::multiply_add(xd, xd, xd));
107484319f4STue Ly #endif // LIBC_TARGET_HAS_FMA
10864af346bSTue Ly     }
109a5466f04STue Ly 
11064af346bSTue Ly     // 2^-25 <= |x| < 2^-4
11164af346bSTue Ly     double xd = static_cast<double>(x);
11264af346bSTue Ly     double xsq = xd * xd;
11364af346bSTue Ly     // Degree-8 minimax polynomial generated by Sollya with:
11464af346bSTue Ly     // > display = hexadecimal;
115a5466f04STue Ly     // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
11664af346bSTue Ly     double r =
117a5466f04STue Ly         fputil::polyeval(xd, 0x1p-1, 0x1.55555555557ddp-3, 0x1.55555555552fap-5,
118a5466f04STue Ly                          0x1.111110fcd58b7p-7, 0x1.6c16c1717660bp-10,
119a5466f04STue Ly                          0x1.a0241f0006d62p-13, 0x1.a01e3f8d3c06p-16);
120c5f8a0a1STue Ly     return static_cast<float>(fputil::multiply_add(r, xsq, xd));
12164af346bSTue Ly   }
12264af346bSTue Ly 
123a5466f04STue Ly   // For -18 < x < 89, to compute expm1(x), we perform the following range
12464af346bSTue Ly   // reduction: find hi, mid, lo such that:
12564af346bSTue Ly   //   x = hi + mid + lo, in which
12664af346bSTue Ly   //     hi is an integer,
12764af346bSTue Ly   //     mid * 2^7 is an integer
12864af346bSTue Ly   //     -2^(-8) <= lo < 2^-8.
12964af346bSTue Ly   // In particular,
13064af346bSTue Ly   //   hi + mid = round(x * 2^7) * 2^(-7).
13164af346bSTue Ly   // Then,
132a5466f04STue Ly   //   expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1.
13364af346bSTue Ly   // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
134a5466f04STue Ly   // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
13564af346bSTue Ly   // generated by Sollya.
13664af346bSTue Ly 
13764af346bSTue Ly   // x_hi = hi + mid.
138*628fbbefSTue Ly   float kf = fputil::nearest_integer(x * 0x1.0p7f);
139*628fbbefSTue Ly   int x_hi = static_cast<int>(kf);
14064af346bSTue Ly   // Subtract (hi + mid) from x to get lo.
141*628fbbefSTue Ly   double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
14264af346bSTue Ly   x_hi += 104 << 7;
14364af346bSTue Ly   // hi = x_hi >> 7
14464af346bSTue Ly   double exp_hi = EXP_M1[x_hi >> 7];
14564af346bSTue Ly   // lo = x_hi & 0x0000'007fU;
14664af346bSTue Ly   double exp_mid = EXP_M2[x_hi & 0x7f];
14764af346bSTue Ly   double exp_hi_mid = exp_hi * exp_mid;
148a5466f04STue Ly   // Degree-4 minimax polynomial generated by Sollya with the following
14964af346bSTue Ly   // commands:
15064af346bSTue Ly   //   > display = hexadecimal;
151a5466f04STue Ly   //   > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]);
15264af346bSTue Ly   //   > Q;
153a5466f04STue Ly   double exp_lo =
154a5466f04STue Ly       fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
155a5466f04STue Ly                        0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
156c5f8a0a1STue Ly   return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
1574e5f8b4dSTue Ly }
1584e5f8b4dSTue Ly 
1594e5f8b4dSTue Ly } // namespace __llvm_libc
160