1bbb75554SSiva Chandra //===-- Implementation of hypotf function ---------------------------------===//
2bbb75554SSiva Chandra //
3bbb75554SSiva Chandra // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bbb75554SSiva Chandra // See https://llvm.org/LICENSE.txt for license information.
5bbb75554SSiva Chandra // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bbb75554SSiva Chandra //
7bbb75554SSiva Chandra //===----------------------------------------------------------------------===//
8bbb75554SSiva Chandra #include "src/math/hypotf.h"
9*f1ec99f9STue Ly #include "src/__support/FPUtil/BasicOperations.h"
10*f1ec99f9STue Ly #include "src/__support/FPUtil/FPBits.h"
11*f1ec99f9STue Ly #include "src/__support/FPUtil/sqrt.h"
12bbb75554SSiva Chandra #include "src/__support/common.h"
13bbb75554SSiva Chandra 
14bbb75554SSiva Chandra namespace __llvm_libc {
15bbb75554SSiva Chandra 
16bbb75554SSiva Chandra LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
17*f1ec99f9STue Ly   using DoubleBits = fputil::FPBits<double>;
18*f1ec99f9STue Ly   using FPBits = fputil::FPBits<float>;
19*f1ec99f9STue Ly 
20*f1ec99f9STue Ly   FPBits x_bits(x), y_bits(y);
21*f1ec99f9STue Ly 
22*f1ec99f9STue Ly   uint16_t x_exp = x_bits.get_unbiased_exponent();
23*f1ec99f9STue Ly   uint16_t y_exp = y_bits.get_unbiased_exponent();
24*f1ec99f9STue Ly   uint16_t exp_diff = (x_exp > y_exp) ? (x_exp - y_exp) : (y_exp - x_exp);
25*f1ec99f9STue Ly 
26*f1ec99f9STue Ly   if (exp_diff >= fputil::MantissaWidth<float>::VALUE + 2) {
27*f1ec99f9STue Ly     return fputil::abs(x) + fputil::abs(y);
28*f1ec99f9STue Ly   }
29*f1ec99f9STue Ly 
30*f1ec99f9STue Ly   double xd = static_cast<double>(x);
31*f1ec99f9STue Ly   double yd = static_cast<double>(y);
32*f1ec99f9STue Ly 
33*f1ec99f9STue Ly   // These squares are exact.
34*f1ec99f9STue Ly   double x_sq = xd * xd;
35*f1ec99f9STue Ly   double y_sq = yd * yd;
36*f1ec99f9STue Ly 
37*f1ec99f9STue Ly   // Compute the sum of squares.
38*f1ec99f9STue Ly   double sum_sq = x_sq + y_sq;
39*f1ec99f9STue Ly 
40*f1ec99f9STue Ly   // Compute the rounding error with Fast2Sum algorithm:
41*f1ec99f9STue Ly   // x_sq + y_sq = sum_sq - err
42*f1ec99f9STue Ly   double err = (x_sq >= y_sq) ? (sum_sq - x_sq) - y_sq : (sum_sq - y_sq) - x_sq;
43*f1ec99f9STue Ly 
44*f1ec99f9STue Ly   // Take sqrt in double precision.
45*f1ec99f9STue Ly   DoubleBits result(fputil::sqrt(sum_sq));
46*f1ec99f9STue Ly 
47*f1ec99f9STue Ly   if (!DoubleBits(sum_sq).is_inf_or_nan()) {
48*f1ec99f9STue Ly     // Correct rounding.
49*f1ec99f9STue Ly     double r_sq = static_cast<double>(result) * static_cast<double>(result);
50*f1ec99f9STue Ly     double diff = sum_sq - r_sq;
51*f1ec99f9STue Ly     constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL;
52*f1ec99f9STue Ly     uint64_t lrs = result.uintval() & mask;
53*f1ec99f9STue Ly 
54*f1ec99f9STue Ly     if (lrs == 0x0000'0000'1000'0000ULL && err < diff) {
55*f1ec99f9STue Ly       result.bits |= 1ULL;
56*f1ec99f9STue Ly     } else if (lrs == 0x0000'0000'3000'0000ULL && err > diff) {
57*f1ec99f9STue Ly       result.bits -= 1ULL;
58*f1ec99f9STue Ly     }
59*f1ec99f9STue Ly   } else {
60*f1ec99f9STue Ly     FPBits bits_x(x), bits_y(y);
61*f1ec99f9STue Ly     if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) {
62*f1ec99f9STue Ly       if (bits_x.is_inf() || bits_y.is_inf())
63*f1ec99f9STue Ly         return static_cast<float>(FPBits::inf());
64*f1ec99f9STue Ly       if (bits_x.is_nan())
65*f1ec99f9STue Ly         return x;
66*f1ec99f9STue Ly       return y;
67*f1ec99f9STue Ly     }
68*f1ec99f9STue Ly   }
69*f1ec99f9STue Ly 
70*f1ec99f9STue Ly   return static_cast<float>(static_cast<double>(result));
71bbb75554SSiva Chandra }
72bbb75554SSiva Chandra 
73bbb75554SSiva Chandra } // namespace __llvm_libc
74