1 //===-- Common header for fmod implementations ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H 10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H 11 12 #include "src/__support/CPP/Limits.h" 13 #include "src/__support/CPP/TypeTraits.h" 14 #include "src/__support/FPUtil/FEnvImpl.h" 15 #include "src/__support/FPUtil/FPBits.h" 16 #include "src/__support/FPUtil/builtin_wrappers.h" 17 #include "src/__support/common.h" 18 #include "src/math/generic/math_utils.h" 19 20 namespace __llvm_libc { 21 namespace fputil { 22 namespace generic { 23 24 // Objective: 25 // The algorithm uses integer arithmetic (max uint64_t) for general case. 26 // Some common cases, like abs(x) < abs(y) or abs(x) < 1000 * abs(y) are 27 // treated specially to increase performance. The part of checking special 28 // cases, numbers NaN, INF etc. treated separately. 29 // 30 // Objective: 31 // 1) FMod definition (https://cplusplus.com/reference/cmath/fmod/): 32 // fmod = numer - tquot * denom, where tquot is the truncated 33 // (i.e., rounded towards zero) result of: numer/denom. 34 // 2) FMod with negative x and/or y can be trivially converted to fmod for 35 // positive x and y. Therefore the algorithm below works only with 36 // positive numbers. 37 // 3) All positive floating point numbers can be represented as m * 2^e, 38 // where "m" is positive integer and "e" is signed. 39 // 4) FMod function can be calculated in integer numbers (x > y): 40 // fmod = m_x * 2^e_x - tquot * m_y * 2^e_y 41 // = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y). 42 // All variables in parentheses are unsigned integers. 43 // 44 // Mathematical background: 45 // Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x 46 // and m_y*2^e_y. This is an ambiguous number representation. For example: 47 // m * 2^e = (2 * m) * 2^(e-1) 48 // The algorithm uses the facts that 49 // r = a % b = (a % (N * b)) % b, 50 // (a * c) % (b * c) = (a % b) * c 51 // where N is positive integer number. a, b and c - positive. Let's adopt 52 // the formula for representation above. 53 // a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k 54 // r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y) 55 // = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y) 56 // r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k) 57 // = (2^p * (m_x % m_y) * 2^(e_y + k - p)) 58 // m_r = 2^p * (m_x % m_y), e_r = m_y + k - p 59 // 60 // Algorithm description: 61 // First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y 62 // are integers (m_x amd m_y positive). 63 // Then the naive implementation of the fmod function with a simple 64 // for/while loop: 65 // while (e_x > e_y) { 66 // m_x *= 2; --e_x; // m_x * 2^e_x == 2 * m_x * 2^(e_x - 1) 67 // m_x %= m_y; 68 // } 69 // On the other hand, the algorithm exploits the fact that m_x, m_y are the 70 // mantissas of floating point numbers, which use less bits than the storage 71 // integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of 72 // the iteration, we can left shift m_x as many bits as the storage integer 73 // type can hold, the exponent reduction per step will be at least 32 - 24 = 8 74 // for floats and 64 - 53 = 11 for doubles (double example below): 75 // while (e_x > e_y) { 76 // m_x <<= 11; e_x -= 11; // m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11) 77 // m_x %= m_y; 78 // } 79 // Some extra improvements are done: 80 // 1) Shift m_y maximum to the right, which can significantly improve 81 // performance for small integer numbers (y = 3 for example). 82 // The m_x shift in the loop can be 62 instead of 11 for double. 83 // 2) For some architectures with very slow division, it can be better to 84 // calculate inverse value ones, and after do multiplication in the loop. 85 // 3) "likely" special cases are treated specially to improve performance. 86 // 87 // Simple example: 88 // The examples below use byte for simplicity. 89 // 1) Shift hy maximum to right without losing bits and increase iy value 90 // m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22. 91 // 2) m_x = m_x % m_y. 92 // 3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x 93 // is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000, 94 // e_x = 100-4 = 96. 95 // 4) Repeat (2) until e_x == e_y. 96 // 97 // Complexity analysis (double): 98 // Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop count: 99 // (m_x - m_y) / (64 - "length of m_y"). 100 // max("length of m_y") = 53, 101 // max(e_x - e_y) = 2048 102 // Maximum operation is 186. For rare "unrealistic" cases. 103 // 104 // Special cases (double): 105 // Supposing that case where |y| > 1e-292 and |x/y|<2000 is very common 106 // special processing is implemented. No m_y alignment, no loop: 107 // result = (m_x * 2^(e_x - e_y)) % m_y. 108 // When x and y are both subnormal (rare case but...) the 109 // result = m_x % m_y. 110 // Simplified conversion back to double. 111 112 // Exceptional cases handler according to cppreference.com 113 // https://en.cppreference.com/w/cpp/numeric/math/fmod 114 // and POSIX standard described in Linux man 115 // https://man7.org/linux/man-pages/man3/fmod.3p.html 116 // C standard for the function is not full, so not by default (although it can 117 // be implemented in another handler. 118 // Signaling NaN converted to quiet NaN with FE_INVALID exception. 119 // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm 120 template <typename T> struct FModExceptionalInputHandler { 121 122 static_assert(cpp::IsFloatingPointType<T>::Value, 123 "FModCStandardWrapper instantiated with invalid type."); 124 PreCheckFModExceptionalInputHandler125 static bool PreCheck(T x, T y, T &out) { 126 using FPB = fputil::FPBits<T>; 127 const T quiet_NaN = FPB::build_nan(FPB::FloatProp::QUIET_NAN_MASK); 128 FPB sx(x), sy(y); 129 if (likely(!sy.is_zero() && !sy.is_inf_or_nan() && !sx.is_inf_or_nan())) { 130 return false; 131 } 132 133 if (sx.is_nan() || sy.is_nan()) { 134 if ((sx.is_nan() && !sx.is_quiet_nan()) || 135 (sy.is_nan() && !sy.is_quiet_nan())) 136 fputil::set_except(FE_INVALID); 137 out = quiet_NaN; 138 return true; 139 } 140 141 if (sx.is_inf() || sy.is_zero()) { 142 fputil::set_except(FE_INVALID); 143 out = with_errno(quiet_NaN, EDOM); 144 return true; 145 } 146 147 if (sy.is_inf()) { 148 out = x; 149 return true; 150 } 151 152 // case where x == 0 153 out = x; 154 return true; 155 } 156 }; 157 158 template <typename T> struct FModFastMathWrapper { 159 160 static_assert(cpp::IsFloatingPointType<T>::Value, 161 "FModFastMathWrapper instantiated with invalid type."); 162 PreCheckFModFastMathWrapper163 static bool PreCheck(T, T, T &) { return false; } 164 }; 165 166 template <typename T> class FModDivisionSimpleHelper { 167 private: 168 using intU_t = typename FPBits<T>::UIntType; 169 170 public: execute(int exp_diff,int sides_zeroes_count,intU_t m_x,intU_t m_y)171 inline constexpr static intU_t execute(int exp_diff, int sides_zeroes_count, 172 intU_t m_x, intU_t m_y) { 173 while (exp_diff > sides_zeroes_count) { 174 exp_diff -= sides_zeroes_count; 175 m_x <<= sides_zeroes_count; 176 m_x %= m_y; 177 } 178 m_x <<= exp_diff; 179 m_x %= m_y; 180 return m_x; 181 } 182 }; 183 184 template <typename T> class FModDivisionInvMultHelper { 185 private: 186 using FPB = FPBits<T>; 187 using intU_t = typename FPB::UIntType; 188 189 public: execute(int exp_diff,int sides_zeroes_count,intU_t m_x,intU_t m_y)190 inline constexpr static intU_t execute(int exp_diff, int sides_zeroes_count, 191 intU_t m_x, intU_t m_y) { 192 if (exp_diff > sides_zeroes_count) { 193 intU_t inv_hy = (cpp::NumericLimits<intU_t>::max() / m_y); 194 while (exp_diff > sides_zeroes_count) { 195 exp_diff -= sides_zeroes_count; 196 intU_t hd = 197 (m_x * inv_hy) >> (FPB::FloatProp::BIT_WIDTH - sides_zeroes_count); 198 m_x <<= sides_zeroes_count; 199 m_x -= hd * m_y; 200 while (unlikely(m_x > m_y)) 201 m_x -= m_y; 202 } 203 intU_t hd = (m_x * inv_hy) >> (FPB::FloatProp::BIT_WIDTH - exp_diff); 204 m_x <<= exp_diff; 205 m_x -= hd * m_y; 206 while (unlikely(m_x > m_y)) 207 m_x -= m_y; 208 } else { 209 m_x <<= exp_diff; 210 m_x %= m_y; 211 } 212 return m_x; 213 } 214 }; 215 216 template <typename T, class Wrapper = FModExceptionalInputHandler<T>, 217 class DivisionHelper = FModDivisionSimpleHelper<T>> 218 class FMod { 219 static_assert(cpp::IsFloatingPointType<T>::Value, 220 "FMod instantiated with invalid type."); 221 222 private: 223 using FPB = FPBits<T>; 224 using intU_t = typename FPB::UIntType; 225 eval_internal(FPB sx,FPB sy)226 inline static constexpr FPB eval_internal(FPB sx, FPB sy) { 227 228 if (likely(sx.uintval() <= sy.uintval())) { 229 if (sx.uintval() < sy.uintval()) 230 return sx; // |x|<|y| return x 231 return FPB::zero(); // |x|=|y| return 0.0 232 } 233 234 int e_x = sx.get_unbiased_exponent(); 235 int e_y = sy.get_unbiased_exponent(); 236 237 // Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH 238 if (likely(e_y > int(FPB::FloatProp::MANTISSA_WIDTH) && 239 e_x - e_y <= int(FPB::FloatProp::EXPONENT_WIDTH))) { 240 intU_t m_x = sx.get_explicit_mantissa(); 241 intU_t m_y = sy.get_explicit_mantissa(); 242 intU_t d = (e_x == e_y) ? (m_x - m_y) : (m_x << (e_x - e_y)) % m_y; 243 if (d == 0) 244 return FPB::zero(); 245 // iy - 1 because of "zero power" for number with power 1 246 return FPB::make_value(d, e_y - 1); 247 } 248 /* Both subnormal special case. */ 249 if (unlikely(e_x == 0 && e_y == 0)) { 250 FPB d; 251 d.set_mantissa(sx.uintval() % sy.uintval()); 252 return d; 253 } 254 255 // Note that hx is not subnormal by conditions above. 256 intU_t m_x = sx.get_explicit_mantissa(); 257 e_x--; 258 259 intU_t m_y = sy.get_explicit_mantissa(); 260 int lead_zeros_m_y = FPB::FloatProp::EXPONENT_WIDTH; 261 if (likely(e_y > 0)) { 262 e_y--; 263 } else { 264 m_y = sy.get_mantissa(); 265 lead_zeros_m_y = unsafe_clz(m_y); 266 } 267 268 // Assume hy != 0 269 int tail_zeros_m_y = unsafe_ctz(m_y); 270 int sides_zeroes_count = lead_zeros_m_y + tail_zeros_m_y; 271 // n > 0 by conditions above 272 int exp_diff = e_x - e_y; 273 { 274 // Shift hy right until the end or n = 0 275 int right_shift = exp_diff < tail_zeros_m_y ? exp_diff : tail_zeros_m_y; 276 m_y >>= right_shift; 277 exp_diff -= right_shift; 278 e_y += right_shift; 279 } 280 281 { 282 // Shift hx left until the end or n = 0 283 int left_shift = exp_diff < int(FPB::FloatProp::EXPONENT_WIDTH) 284 ? exp_diff 285 : FPB::FloatProp::EXPONENT_WIDTH; 286 m_x <<= left_shift; 287 exp_diff -= left_shift; 288 } 289 290 m_x %= m_y; 291 if (unlikely(m_x == 0)) 292 return FPB::zero(); 293 294 if (exp_diff == 0) 295 return FPB::make_value(m_x, e_y); 296 297 /* hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0 */ 298 m_x = DivisionHelper::execute(exp_diff, sides_zeroes_count, m_x, m_y); 299 return FPB::make_value(m_x, e_y); 300 } 301 302 public: eval(T x,T y)303 static inline T eval(T x, T y) { 304 if (T out; Wrapper::PreCheck(x, y, out)) 305 return out; 306 FPB sx(x), sy(y); 307 bool sign = sx.get_sign(); 308 sx.set_sign(false); 309 sy.set_sign(false); 310 FPB result = eval_internal(sx, sy); 311 result.set_sign(sign); 312 return result.get_val(); 313 } 314 }; 315 316 } // namespace generic 317 } // namespace fputil 318 } // namespace __llvm_libc 319 320 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H 321