1 //===-- Single-precision e^x function -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/math/expf.h"
10 #include "src/__support/FPUtil/BasicOperations.h"
11 #include "src/__support/FPUtil/FEnvImpl.h"
12 #include "src/__support/FPUtil/FMA.h"
13 #include "src/__support/FPUtil/FPBits.h"
14 #include "src/__support/FPUtil/PolyEval.h"
15 #include "src/__support/common.h"
16 
17 #include <errno.h>
18 
19 namespace __llvm_libc {
20 
21 // Lookup table for exp(m) with m = -104, ..., 89.
22 //   -104 = floor(log(single precision's min denormal))
23 //     89 = ceil(log(single precision's max normal))
24 // Table is generated with Sollya as follow:
25 // > display = hexadecimal;
26 // > for i from -104 to 89 do { D(exp(i)); };
27 static constexpr double EXP_M1[195] = {
28     0x1.f1e6b68529e33p-151, 0x1.525be4e4e601dp-149, 0x1.cbe0a45f75eb1p-148,
29     0x1.3884e838aea68p-146, 0x1.a8c1f14e2af5dp-145, 0x1.20a717e64a9bdp-143,
30     0x1.8851d84118908p-142, 0x1.0a9bdfb02d240p-140, 0x1.6a5bea046b42ep-139,
31     0x1.ec7f3b269efa8p-138, 0x1.4eafb87eab0f2p-136, 0x1.c6e2d05bbc000p-135,
32     0x1.35208867c2683p-133, 0x1.a425b317eeacdp-132, 0x1.1d8508fa8246ap-130,
33     0x1.840fbc08fdc8ap-129, 0x1.07b7112bc1ffep-127, 0x1.666d0dad2961dp-126,
34     0x1.e726c3f64d0fep-125, 0x1.4b0dc07cabf98p-123, 0x1.c1f2daf3b6a46p-122,
35     0x1.31c5957a47de2p-120, 0x1.9f96445648b9fp-119, 0x1.1a6baeadb4fd1p-117,
36     0x1.7fd974d372e45p-116, 0x1.04da4d1452919p-114, 0x1.62891f06b3450p-113,
37     0x1.e1dd273aa8a4ap-112, 0x1.4775e0840bfddp-110, 0x1.bd109d9d94bdap-109,
38     0x1.2e73f53fba844p-107, 0x1.9b138170d6bfep-106, 0x1.175af0cf60ec5p-104,
39     0x1.7baee1bffa80bp-103, 0x1.02057d1245cebp-101, 0x1.5eafffb34ba31p-100,
40     0x1.dca23bae16424p-99,  0x1.43e7fc88b8056p-97,  0x1.b83bf23a9a9ebp-96,
41     0x1.2b2b8dd05b318p-94,  0x1.969d47321e4ccp-93,  0x1.1452b7723aed2p-91,
42     0x1.778fe2497184cp-90,  0x1.fe7116182e9ccp-89,  0x1.5ae191a99585ap-87,
43     0x1.d775d87da854dp-86,  0x1.4063f8cc8bb98p-84,  0x1.b374b315f87c1p-83,
44     0x1.27ec458c65e3cp-81,  0x1.923372c67a074p-80,  0x1.1152eaeb73c08p-78,
45     0x1.737c5645114b5p-77,  0x1.f8e6c24b5592ep-76,  0x1.571db733a9d61p-74,
46     0x1.d257d547e083fp-73,  0x1.3ce9b9de78f85p-71,  0x1.aebabae3a41b5p-70,
47     0x1.24b6031b49bdap-68,  0x1.8dd5e1bb09d7ep-67,  0x1.0e5b73d1ff53dp-65,
48     0x1.6f741de1748ecp-64,  0x1.f36bd37f42f3ep-63,  0x1.536452ee2f75cp-61,
49     0x1.cd480a1b74820p-60,  0x1.39792499b1a24p-58,  0x1.aa0de4bf35b38p-57,
50     0x1.2188ad6ae3303p-55,  0x1.898471fca6055p-54,  0x1.0b6c3afdde064p-52,
51     0x1.6b7719a59f0e0p-51,  0x1.ee001eed62aa0p-50,  0x1.4fb547c775da8p-48,
52     0x1.c8464f7616468p-47,  0x1.36121e24d3bbap-45,  0x1.a56e0c2ac7f75p-44,
53     0x1.1e642baeb84a0p-42,  0x1.853f01d6d53bap-41,  0x1.0885298767e9ap-39,
54     0x1.67852a7007e42p-38,  0x1.e8a37a45fc32ep-37,  0x1.4c1078fe9228ap-35,
55     0x1.c3527e433fab1p-34,  0x1.32b48bf117da2p-32,  0x1.a0db0d0ddb3ecp-31,
56     0x1.1b48655f37267p-29,  0x1.81056ff2c5772p-28,  0x1.05a628c699fa1p-26,
57     0x1.639e3175a689dp-25,  0x1.e355bbaee85cbp-24,  0x1.4875ca227ec38p-22,
58     0x1.be6c6fdb01612p-21,  0x1.2f6053b981d98p-19,  0x1.9c54c3b43bc8bp-18,
59     0x1.18354238f6764p-16,  0x1.7cd79b5647c9bp-15,  0x1.02cf22526545ap-13,
60     0x1.5fc21041027adp-12,  0x1.de16b9c24a98fp-11,  0x1.44e51f113d4d6p-9,
61     0x1.b993fe00d5376p-8,   0x1.2c155b8213cf4p-6,   0x1.97db0ccceb0afp-5,
62     0x1.152aaa3bf81ccp-3,   0x1.78b56362cef38p-2,   0x1.0000000000000p+0,
63     0x1.5bf0a8b145769p+1,   0x1.d8e64b8d4ddaep+2,   0x1.415e5bf6fb106p+4,
64     0x1.b4c902e273a58p+5,   0x1.28d389970338fp+7,   0x1.936dc5690c08fp+8,
65     0x1.122885aaeddaap+10,  0x1.749ea7d470c6ep+11,  0x1.fa7157c470f82p+12,
66     0x1.5829dcf950560p+14,  0x1.d3c4488ee4f7fp+15,  0x1.3de1654d37c9ap+17,
67     0x1.b00b5916ac955p+18,  0x1.259ac48bf05d7p+20,  0x1.8f0ccafad2a87p+21,
68     0x1.0f2ebd0a80020p+23,  0x1.709348c0ea4f9p+24,  0x1.f4f22091940bdp+25,
69     0x1.546d8f9ed26e1p+27,  0x1.ceb088b68e804p+28,  0x1.3a6e1fd9eecfdp+30,
70     0x1.ab5adb9c43600p+31,  0x1.226af33b1fdc1p+33,  0x1.8ab7fb5475fb7p+34,
71     0x1.0c3d3920962c9p+36,  0x1.6c932696a6b5dp+37,  0x1.ef822f7f6731dp+38,
72     0x1.50bba3796379ap+40,  0x1.c9aae4631c056p+41,  0x1.370470aec28edp+43,
73     0x1.a6b765d8cdf6dp+44,  0x1.1f43fcc4b662cp+46,  0x1.866f34a725782p+47,
74     0x1.0953e2f3a1ef7p+49,  0x1.689e221bc8d5bp+50,  0x1.ea215a1d20d76p+51,
75     0x1.4d13fbb1a001ap+53,  0x1.c4b334617cc67p+54,  0x1.33a43d282a519p+56,
76     0x1.a220d397972ebp+57,  0x1.1c25c88df6862p+59,  0x1.8232558201159p+60,
77     0x1.0672a3c9eb871p+62,  0x1.64b41c6d37832p+63,  0x1.e4cf766fe49bep+64,
78     0x1.49767bc0483e3p+66,  0x1.bfc951eb8bb76p+67,  0x1.304d6aeca254bp+69,
79     0x1.9d97010884251p+70,  0x1.19103e4080b45p+72,  0x1.7e013cd114461p+73,
80     0x1.03996528e074cp+75,  0x1.60d4f6fdac731p+76,  0x1.df8c5af17ba3bp+77,
81     0x1.45e3076d61699p+79,  0x1.baed16a6e0da7p+80,  0x1.2cffdfebde1a1p+82,
82     0x1.9919cabefcb69p+83,  0x1.160345c9953e3p+85,  0x1.79dbc9dc53c66p+86,
83     0x1.00c810d464097p+88,  0x1.5d009394c5c27p+89,  0x1.da57de8f107a8p+90,
84     0x1.425982cf597cdp+92,  0x1.b61e5ca3a5e31p+93,  0x1.29bb825dfcf87p+95,
85     0x1.94a90db0d6fe2p+96,  0x1.12fec759586fdp+98,  0x1.75c1dc469e3afp+99,
86     0x1.fbfd219c43b04p+100, 0x1.5936d44e1a146p+102, 0x1.d531d8a7ee79cp+103,
87     0x1.3ed9d24a2d51bp+105, 0x1.b15cfe5b6e17bp+106, 0x1.268038c2c0e00p+108,
88     0x1.9044a73545d48p+109, 0x1.1002ab6218b38p+111, 0x1.71b3540cbf921p+112,
89     0x1.f6799ea9c414ap+113, 0x1.55779b984f3ebp+115, 0x1.d01a210c44aa4p+116,
90     0x1.3b63da8e91210p+118, 0x1.aca8d6b0116b8p+119, 0x1.234de9e0c74e9p+121,
91     0x1.8bec7503ca477p+122, 0x1.0d0eda9796b90p+124, 0x1.6db0118477245p+125,
92     0x1.f1056dc7bf22dp+126, 0x1.51c2cc3433801p+128, 0x1.cb108ffbec164p+129,
93 };
94 
95 // Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127.
96 // Table is generated with Sollya as follow:
97 // > display = hexadecimal;
98 // > for i from 0 to 127 do { D(exp(i / 128)); };
99 static constexpr double EXP_M2[128] = {
100     0x1.0000000000000p0, 0x1.0202015600446p0, 0x1.04080ab55de39p0,
101     0x1.06122436410ddp0, 0x1.08205601127edp0, 0x1.0a32a84e9c1f6p0,
102     0x1.0c49236829e8cp0, 0x1.0e63cfa7ab09dp0, 0x1.1082b577d34edp0,
103     0x1.12a5dd543ccc5p0, 0x1.14cd4fc989cd6p0, 0x1.16f9157587069p0,
104     0x1.192937074e0cdp0, 0x1.1b5dbd3f68122p0, 0x1.1d96b0eff0e79p0,
105     0x1.1fd41afcba45ep0, 0x1.2216045b6f5cdp0, 0x1.245c7613b8a9bp0,
106     0x1.26a7793f60164p0, 0x1.28f7170a755fdp0, 0x1.2b4b58b372c79p0,
107     0x1.2da4478b620c7p0, 0x1.3001ecf601af7p0, 0x1.32645269ea829p0,
108     0x1.34cb8170b5835p0, 0x1.373783a722012p0, 0x1.39a862bd3c106p0,
109     0x1.3c1e2876834aap0, 0x1.3e98deaa11dccp0, 0x1.41188f42c3e32p0,
110     0x1.439d443f5f159p0, 0x1.462707b2bac21p0, 0x1.48b5e3c3e8186p0,
111     0x1.4b49e2ae5ac67p0, 0x1.4de30ec211e60p0, 0x1.50817263c13cdp0,
112     0x1.5325180cfacf7p0, 0x1.55ce0a4c58c7cp0, 0x1.587c53c5a7af0p0,
113     0x1.5b2fff3210fd9p0, 0x1.5de9176045ff5p0, 0x1.60a7a734ab0e8p0,
114     0x1.636bb9a983258p0, 0x1.663559cf1bc7cp0, 0x1.690492cbf9433p0,
115     0x1.6bd96fdd034a2p0, 0x1.6eb3fc55b1e76p0, 0x1.719443a03acb9p0,
116     0x1.747a513dbef6ap0, 0x1.776630c678bc1p0, 0x1.7a57ede9ea23ep0,
117     0x1.7d4f946f0ba8dp0, 0x1.804d30347b546p0, 0x1.8350cd30ac390p0,
118     0x1.865a7772164c5p0, 0x1.896a3b1f66a0ep0, 0x1.8c802477b0010p0,
119     0x1.8f9c3fd29beafp0, 0x1.92be99a09bf00p0, 0x1.95e73e6b1b75ep0,
120     0x1.99163ad4b1dccp0, 0x1.9c4b9b995509bp0, 0x1.9f876d8e8c566p0,
121     0x1.a2c9bda3a3e78p0, 0x1.a61298e1e069cp0, 0x1.a9620c6cb3374p0,
122     0x1.acb82581eee54p0, 0x1.b014f179fc3b8p0, 0x1.b3787dc80f95fp0,
123     0x1.b6e2d7fa5eb18p0, 0x1.ba540dba56e56p0, 0x1.bdcc2cccd3c85p0,
124     0x1.c14b431256446p0, 0x1.c4d15e873c193p0, 0x1.c85e8d43f7cd0p0,
125     0x1.cbf2dd7d490f2p0, 0x1.cf8e5d84758a9p0, 0x1.d3311bc7822b4p0,
126     0x1.d6db26d16cd67p0, 0x1.da8c8d4a66969p0, 0x1.de455df80e3c0p0,
127     0x1.e205a7bdab73ep0, 0x1.e5cd799c6a54ep0, 0x1.e99ce2b397649p0,
128     0x1.ed73f240dc142p0, 0x1.f152b7a07bb76p0, 0x1.f539424d90f5ep0,
129     0x1.f927a1e24bb76p0, 0x1.fd1de6182f8c9p0, 0x1.008e0f64294abp1,
130     0x1.02912df5ce72ap1, 0x1.049856cd84339p1, 0x1.06a39207f0a09p1,
131     0x1.08b2e7d2035cfp1, 0x1.0ac6606916501p1, 0x1.0cde041b0e9aep1,
132     0x1.0ef9db467dcf8p1, 0x1.1119ee5ac36b6p1, 0x1.133e45d82e952p1,
133     0x1.1566ea50201d7p1, 0x1.1793e4652cc50p1, 0x1.19c53ccb3fc6bp1,
134     0x1.1bfafc47bda73p1, 0x1.1e352bb1a74adp1, 0x1.2073d3f1bd518p1,
135     0x1.22b6fe02a3b9cp1, 0x1.24feb2f105cb8p1, 0x1.274afbdbba4a6p1,
136     0x1.299be1f3e7f1cp1, 0x1.2bf16e7d2a38cp1, 0x1.2e4baacdb6614p1,
137     0x1.30aaa04e80d05p1, 0x1.330e587b62b28p1, 0x1.3576dce33feadp1,
138     0x1.37e437282d4eep1, 0x1.3a5670ff972edp1, 0x1.3ccd9432682b4p1,
139     0x1.3f49aa9d30590p1, 0x1.41cabe304cb34p1, 0x1.4450d8f00edd4p1,
140     0x1.46dc04f4e5338p1, 0x1.496c4c6b832dap1, 0x1.4c01b9950a111p1,
141     0x1.4e9c56c731f5dp1, 0x1.513c2e6c731d7p1, 0x1.53e14b042f9cap1,
142     0x1.568bb722dd593p1, 0x1.593b7d72305bbp1,
143 };
144 
145 INLINE_FMA
146 LLVM_LIBC_FUNCTION(float, expf, (float x)) {
147   using FPBits = typename fputil::FPBits<float>;
148   FPBits xbits(x);
149 
150   // When x < log(2^-150) or nan
151   if (unlikely(xbits.uintval() >= 0xc2cf'f1b5U)) {
152     // exp(-Inf) = 0
153     if (xbits.is_inf())
154       return 0.0f;
155     // exp(nan) = nan
156     if (xbits.is_nan())
157       return x;
158     if (fputil::get_round() == FE_UPWARD)
159       return static_cast<float>(FPBits(FPBits::MIN_SUBNORMAL));
160     if (x != 0.0f)
161       errno = ERANGE;
162     return 0.0f;
163   }
164   // x >= 89 or nan
165   if (unlikely(!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000))) {
166     if (xbits.uintval() < 0x7f80'0000U) {
167       int rounding = fputil::get_round();
168       if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
169         return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
170 
171       errno = ERANGE;
172     }
173     return x + static_cast<float>(FPBits::inf());
174   }
175   // |x| < 2^-25
176   if (unlikely(xbits.get_unbiased_exponent() <= 101)) {
177     return 1.0f + x;
178   }
179 
180   // For -104 < x < 89, to compute exp(x), we perform the following range
181   // reduction: find hi, mid, lo such that:
182   //   x = hi + mid + lo, in which
183   //     hi is an integer,
184   //     mid * 2^7 is an integer
185   //     -2^(-8) <= lo < 2^-8.
186   // In particular,
187   //   hi + mid = round(x * 2^7) * 2^(-7).
188   // Then,
189   //   exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo).
190   // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
191   // respectively.  exp(lo) is computed using a degree-7 minimax polynomial
192   // generated by Sollya.
193 
194   // x_hi = hi + mid.
195   int x_hi = static_cast<int>(x * 0x1.0p7f);
196   // Subtract (hi + mid) from x to get lo.
197   x -= static_cast<float>(x_hi) * 0x1.0p-7f;
198   double xd = static_cast<double>(x);
199   // Make sure that -2^(-8) <= lo < 2^-8.
200   if (x >= 0x1.0p-8f) {
201     ++x_hi;
202     xd -= 0x1.0p-7;
203   }
204   if (x < -0x1.0p-8f) {
205     --x_hi;
206     xd += 0x1.0p-7;
207   }
208   x_hi += 104 << 7;
209   // hi = x_hi >> 7
210   double exp_hi = EXP_M1[x_hi >> 7];
211   // lo = x_hi & 0x0000'007fU;
212   double exp_mid = EXP_M2[x_hi & 0x7f];
213   // Degree-7 minimax polynomial generated by Sollya with the following
214   // commands:
215   //   > display = hexadecimal;
216   //   > Q = fpminimax(expm1(x)/x, 6, [|D...|], [-2^-8, 2^-8]);
217   //   > Q;
218   double exp_lo = fputil::polyeval(
219       xd, 0x1p0, 0x1p0, 0x1p-1, 0x1.5555555555555p-3, 0x1.55555555553ap-5,
220       0x1.1111111204dfcp-7, 0x1.6c16cb2da593ap-10, 0x1.9ff1648996d2ep-13);
221   return static_cast<float>(exp_hi * exp_mid * exp_lo);
222 }
223 
224 } // namespace __llvm_libc
225