1 //===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FPBits.h"
10 #include "NormalFloat.h"
11 #include "src/__support/CPP/UInt.h"
12 
13 #include <stdint.h>
14 
15 namespace __llvm_libc {
16 namespace fputil {
17 
18 // Store and manipulate positive double precision numbers at |Precision| bits.
19 template <size_t Precision> class XFloat {
20   static constexpr uint64_t OneMask = (uint64_t(1) << 63);
21   UInt<Precision> man;
22   static constexpr uint64_t WordCount = Precision / 64;
23   int exp;
24 
bit_width(uint64_t x)25   size_t bit_width(uint64_t x) {
26     if (x == 0)
27       return 0;
28     size_t shift = 0;
29     while ((OneMask & x) == 0) {
30       ++shift;
31       x <<= 1;
32     }
33     return 64 - shift;
34   }
35 
36 public:
XFloat()37   XFloat() : exp(0) {
38     for (int i = 0; i < WordCount; ++i)
39       man[i] = 0;
40   }
41 
XFloat(const XFloat & other)42   XFloat(const XFloat &other) : exp(other.exp) {
43     for (int i = 0; i < WordCount; ++i)
44       man[i] = other.man[i];
45   }
46 
XFloat(double x)47   explicit XFloat(double x) {
48     auto xn = NormalFloat<double>(x);
49     exp = xn.exponent;
50     man[WordCount - 1] = xn.mantissa << 11;
51     for (int i = 0; i < WordCount - 1; ++i)
52       man[i] = 0;
53   }
54 
XFloat(int e,const UInt<Precision> & bits)55   XFloat(int e, const UInt<Precision> &bits) : exp(e) {
56     for (size_t i = 0; i < WordCount; ++i)
57       man[i] = bits[i];
58   }
59 
60   // Multiply this number with x and store the result in this number.
mul(double x)61   void mul(double x) {
62     auto xn = NormalFloat<double>(x);
63     exp += xn.exponent;
64     uint64_t carry = man.mul(xn.mantissa << 11);
65     size_t carry_width = bit_width(carry);
66     carry_width = (carry_width == 64 ? 64 : 63);
67     man.shift_right(carry_width);
68     man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
69     exp += carry_width == 64 ? 1 : 0;
70     normalize();
71   }
72 
drop_int()73   void drop_int() {
74     if (exp < 0)
75       return;
76     if (exp > int(Precision - 1)) {
77       for (size_t i = 0; i < WordCount; ++i)
78         man[i] = 0;
79       return;
80     }
81 
82     man.shift_left(exp + 1);
83     man.shift_right(exp + 1);
84 
85     normalize();
86   }
87 
mul(const XFloat<Precision> & other)88   double mul(const XFloat<Precision> &other) {
89     constexpr size_t row_words = 2 * WordCount + 1;
90     constexpr size_t row_precision = row_words * 64;
91     constexpr size_t result_bits = 2 * Precision;
92     UInt<row_precision> rows[WordCount];
93 
94     for (size_t r = 0; r < WordCount; ++r) {
95       for (size_t i = 0; i < row_words; ++i) {
96         if (i < WordCount)
97           rows[r][i] = man[i];
98         else
99           rows[r][i] = 0;
100       }
101       rows[r].mul(other.man[r]);
102       rows[r].shift_left(r * 64);
103     }
104 
105     for (size_t r = 1; r < WordCount; ++r) {
106       rows[0].add(rows[r]);
107     }
108     int result_exp = exp + other.exp;
109     uint64_t carry = rows[0][row_words - 1];
110     if (carry) {
111       size_t carry_width = bit_width(carry);
112       rows[0].shift_right(carry_width);
113       rows[0][row_words - 2] =
114           rows[0][row_words - 2] + (carry << (64 - carry_width));
115       result_exp += carry_width;
116     }
117 
118     if (rows[0][row_words - 2] & OneMask) {
119       ++result_exp;
120     } else {
121       rows[0].shift_left(1);
122     }
123 
124     UInt<result_bits> result_man;
125     for (size_t i = 0; i < result_bits / 64; ++i)
126       result_man[i] = rows[0][i];
127     XFloat<result_bits> result(result_exp, result_man);
128     result.normalize();
129     return double(result);
130   }
131 
132   explicit operator double() {
133     normalize();
134 
135     constexpr uint64_t one = uint64_t(1) << 10;
136     constexpr uint64_t excess_mask = (one << 1) - 1;
137     uint64_t excess = man[WordCount - 1] & excess_mask;
138     uint64_t new_man = man[WordCount - 1] >> 11;
139     if (excess > one) {
140       // We have to round up.
141       ++new_man;
142     } else if (excess == one) {
143       bool greater_than_one = false;
144       for (size_t i = 0; i < WordCount - 1; ++i) {
145         greater_than_one = (man[i] != 0);
146         if (greater_than_one)
147           break;
148       }
149       if (greater_than_one || (new_man & 1) != 0) {
150         ++new_man;
151       }
152     }
153 
154     if (new_man == (uint64_t(1) << 53))
155       ++exp;
156 
157     // We use NormalFloat as it can produce subnormal numbers or underflow to 0
158     // if necessary.
159     NormalFloat<double> d(exp, new_man, 0);
160     return double(d);
161   }
162 
163   // Normalizes this number.
normalize()164   void normalize() {
165     uint64_t man_bits = 0;
166     for (size_t i = 0; i < WordCount; ++i)
167       man_bits |= man[i];
168 
169     if (man_bits == 0)
170       return;
171 
172     while ((man[WordCount - 1] & OneMask) == 0) {
173       man.shift_left(1);
174       --exp;
175     }
176   }
177 };
178 
179 } // namespace fputil
180 } // namespace __llvm_libc
181