1 //===-- String to float conversion utils ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 10 #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 11 12 #include "src/__support/CPP/Limits.h" 13 #include "src/__support/CPP/UInt128.h" 14 #include "src/__support/FPUtil/FPBits.h" 15 #include "src/__support/FPUtil/builtin_wrappers.h" 16 #include "src/__support/ctype_utils.h" 17 #include "src/__support/detailed_powers_of_ten.h" 18 #include "src/__support/high_precision_decimal.h" 19 #include "src/__support/str_to_integer.h" 20 #include <errno.h> 21 22 namespace __llvm_libc { 23 namespace internal { 24 25 template <class T> uint32_t inline leading_zeroes(T inputNumber) { 26 constexpr uint32_t BITS_IN_T = sizeof(T) * 8; 27 if (inputNumber == 0) { 28 return BITS_IN_T; 29 } 30 uint32_t cur_guess = BITS_IN_T / 2; 31 uint32_t range_size = BITS_IN_T / 2; 32 // while either shifting by curGuess does not get rid of all of the bits or 33 // shifting by one less also gets rid of all of the bits then we have not 34 // found the first bit. 35 while (((inputNumber >> cur_guess) > 0) || 36 ((inputNumber >> (cur_guess - 1)) == 0)) { 37 // Binary search for the first set bit 38 range_size /= 2; 39 if (range_size == 0) { 40 break; 41 } 42 if ((inputNumber >> cur_guess) > 0) { 43 cur_guess += range_size; 44 } else { 45 cur_guess -= range_size; 46 } 47 } 48 if (inputNumber >> cur_guess > 0) { 49 cur_guess++; 50 } 51 return BITS_IN_T - cur_guess; 52 } 53 54 template <> uint32_t inline leading_zeroes<uint32_t>(uint32_t inputNumber) { 55 return fputil::safe_clz(inputNumber); 56 } 57 58 template <> uint32_t inline leading_zeroes<uint64_t>(uint64_t inputNumber) { 59 return fputil::safe_clz(inputNumber); 60 } 61 62 static inline uint64_t low64(const UInt128 &num) { 63 return static_cast<uint64_t>(num & 0xffffffffffffffff); 64 } 65 66 static inline uint64_t high64(const UInt128 &num) { 67 return static_cast<uint64_t>(num >> 64); 68 } 69 70 template <class T> inline void set_implicit_bit(fputil::FPBits<T> &result) { 71 return; 72 } 73 74 #if defined(SPECIAL_X86_LONG_DOUBLE) 75 template <> 76 inline void set_implicit_bit<long double>(fputil::FPBits<long double> &result) { 77 result.set_implicit_bit(result.get_unbiased_exponent() != 0); 78 } 79 #endif 80 81 // This Eisel-Lemire implementation is based on the algorithm described in the 82 // paper Number Parsing at a Gigabyte per Second, Software: Practice and 83 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the 84 // description by Nigel Tao 85 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang 86 // implementation, also by Nigel Tao 87 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) 88 // for some optimizations as well as handling 32 bit floats. 89 template <class T> 90 static inline bool 91 eisel_lemire(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 92 typename fputil::FPBits<T>::UIntType *outputMantissa, 93 uint32_t *outputExp2) { 94 95 using BitsType = typename fputil::FPBits<T>::UIntType; 96 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 97 98 if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a 99 // double, so we skip straight to the fallback. 100 return false; 101 } 102 103 // Exp10 Range 104 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 105 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 106 return false; 107 } 108 109 // Normalization 110 uint32_t clz = leading_zeroes<BitsType>(mantissa); 111 mantissa <<= clz; 112 113 uint32_t exp2 = static_cast<uint32_t>(exp10_to_exp2(exp10)) + BITS_IN_MANTISSA + 114 fputil::FloatProperties<T>::EXPONENT_BIAS - clz; 115 116 // Multiplication 117 const uint64_t *power_of_ten = 118 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 119 120 UInt128 first_approx = 121 static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[1]); 122 123 // Wider Approximation 124 UInt128 final_approx; 125 // The halfway constant is used to check if the bits that will be shifted away 126 // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final 127 // mantissa size) - 3 (we shift away the last two bits separately for 128 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, 129 // it's 6 bits for floats in this case. 130 const uint64_t halfway_constant = 131 (uint64_t(1) << (BITS_IN_MANTISSA - 132 fputil::FloatProperties<T>::MANTISSA_WIDTH - 3)) - 133 1; 134 if ((high64(first_approx) & halfway_constant) == halfway_constant && 135 low64(first_approx) + mantissa < mantissa) { 136 UInt128 low_bits = 137 static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[0]); 138 UInt128 second_approx = 139 first_approx + static_cast<UInt128>(high64(low_bits)); 140 141 if ((high64(second_approx) & halfway_constant) == halfway_constant && 142 low64(second_approx) + 1 == 0 && 143 low64(low_bits) + mantissa < mantissa) { 144 return false; 145 } 146 final_approx = second_approx; 147 } else { 148 final_approx = first_approx; 149 } 150 151 // Shifting to 54 bits for doubles and 25 bits for floats 152 BitsType msb = static_cast<BitsType>(high64(final_approx) >> (BITS_IN_MANTISSA - 1)); 153 BitsType final_mantissa = static_cast<BitsType>(high64(final_approx) >> 154 (msb + BITS_IN_MANTISSA - 155 (fputil::FloatProperties<T>::MANTISSA_WIDTH + 3))); 156 exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb 157 158 // Half-way ambiguity 159 if (low64(final_approx) == 0 && 160 (high64(final_approx) & halfway_constant) == 0 && 161 (final_mantissa & 3) == 1) { 162 return false; 163 } 164 165 // From 54 to 53 bits for doubles and 25 to 24 bits for floats 166 final_mantissa += final_mantissa & 1; 167 final_mantissa >>= 1; 168 if ((final_mantissa >> (fputil::FloatProperties<T>::MANTISSA_WIDTH + 1)) > 169 0) { 170 final_mantissa >>= 1; 171 ++exp2; 172 } 173 174 // The if block is equivalent to (but has fewer branches than): 175 // if exp2 <= 0 || exp2 >= 0x7FF { etc } 176 if (exp2 - 1 >= (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 2) { 177 return false; 178 } 179 180 *outputMantissa = final_mantissa; 181 *outputExp2 = exp2; 182 return true; 183 } 184 185 #if !defined(LONG_DOUBLE_IS_DOUBLE) 186 template <> 187 inline bool eisel_lemire<long double>( 188 typename fputil::FPBits<long double>::UIntType mantissa, int32_t exp10, 189 typename fputil::FPBits<long double>::UIntType *outputMantissa, 190 uint32_t *outputExp2) { 191 using BitsType = typename fputil::FPBits<long double>::UIntType; 192 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 193 194 // Exp10 Range 195 // This doesn't reach very far into the range for long doubles, since it's 196 // sized for doubles and their 11 exponent bits, and not for long doubles and 197 // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long 198 // double). This is a known tradeoff, and was made because a proper long 199 // double table would be approximately 16 times larger. This would have 200 // significant memory and storage costs all the time to speed up a relatively 201 // uncommon path. In addition the exp10_to_exp2 function only approximates 202 // multiplying by log(10)/log(2), and that approximation may not be accurate 203 // out to the full long double range. 204 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 205 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 206 return false; 207 } 208 209 // Normalization 210 uint32_t clz = leading_zeroes<BitsType>(mantissa); 211 mantissa <<= clz; 212 213 uint32_t exp2 = static_cast<uint32_t>(exp10_to_exp2(exp10)) + BITS_IN_MANTISSA + 214 fputil::FloatProperties<long double>::EXPONENT_BIAS - clz; 215 216 // Multiplication 217 const uint64_t *power_of_ten = 218 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 219 220 // Since the input mantissa is more than 64 bits, we have to multiply with the 221 // full 128 bits of the power of ten to get an approximation with the same 222 // number of significant bits. This means that we only get the one 223 // approximation, and that approximation is 256 bits long. 224 UInt128 approx_upper = static_cast<UInt128>(high64(mantissa)) * 225 static_cast<UInt128>(power_of_ten[1]); 226 227 UInt128 approx_middle = static_cast<UInt128>(high64(mantissa)) * 228 static_cast<UInt128>(power_of_ten[0]) + 229 static_cast<UInt128>(low64(mantissa)) * 230 static_cast<UInt128>(power_of_ten[1]); 231 232 UInt128 approx_lower = static_cast<UInt128>(low64(mantissa)) * 233 static_cast<UInt128>(power_of_ten[0]); 234 235 UInt128 final_approx_lower = 236 approx_lower + (static_cast<UInt128>(low64(approx_middle)) << 64); 237 UInt128 final_approx_upper = approx_upper + high64(approx_middle) + 238 (final_approx_lower < approx_lower ? 1 : 0); 239 240 // The halfway constant is used to check if the bits that will be shifted away 241 // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 242 // (final mantissa size) - 3 (we shift away the last two bits separately for 243 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, 244 // it's 12 bits for 128 bit floats in this case. 245 constexpr UInt128 HALFWAY_CONSTANT = 246 (UInt128(1) << (BITS_IN_MANTISSA - 247 fputil::FloatProperties<long double>::MANTISSA_WIDTH - 248 3)) - 249 1; 250 251 if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && 252 final_approx_lower + mantissa < mantissa) { 253 return false; 254 } 255 256 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats 257 BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); 258 BitsType final_mantissa = 259 final_approx_upper >> 260 (msb + BITS_IN_MANTISSA - 261 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 3)); 262 exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb 263 264 // Half-way ambiguity 265 if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && 266 (final_mantissa & 3) == 1) { 267 return false; 268 } 269 270 // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit 271 // floats 272 final_mantissa += final_mantissa & 1; 273 final_mantissa >>= 1; 274 if ((final_mantissa >> 275 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 1)) > 0) { 276 final_mantissa >>= 1; 277 ++exp2; 278 } 279 280 // The if block is equivalent to (but has fewer branches than): 281 // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } 282 if (exp2 - 1 >= 283 (1 << fputil::FloatProperties<long double>::EXPONENT_WIDTH) - 2) { 284 return false; 285 } 286 287 *outputMantissa = final_mantissa; 288 *outputExp2 = exp2; 289 return true; 290 } 291 #endif 292 293 // The nth item in POWERS_OF_TWO represents the greatest power of two less than 294 // 10^n. This tells us how much we can safely shift without overshooting. 295 constexpr uint8_t POWERS_OF_TWO[19] = { 296 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 297 }; 298 constexpr int32_t NUM_POWERS_OF_TWO = 299 sizeof(POWERS_OF_TWO) / sizeof(POWERS_OF_TWO[0]); 300 301 // Takes a mantissa and base 10 exponent and converts it into its closest 302 // floating point type T equivalent. This is the fallback algorithm used when 303 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based 304 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this 305 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html 306 template <class T> 307 static inline void 308 simple_decimal_conversion(const char *__restrict numStart, 309 typename fputil::FPBits<T>::UIntType *outputMantissa, 310 uint32_t *outputExp2) { 311 312 int32_t exp2 = 0; 313 HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart); 314 315 if (hpd.get_num_digits() == 0) { 316 *outputMantissa = 0; 317 *outputExp2 = 0; 318 return; 319 } 320 321 // If the exponent is too large and can't be represented in this size of 322 // float, return inf. 323 if (hpd.get_decimal_point() > 0 && 324 exp10_to_exp2(hpd.get_decimal_point() - 1) > 325 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS)) { 326 *outputMantissa = 0; 327 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 328 errno = ERANGE; 329 return; 330 } 331 // If the exponent is too small even for a subnormal, return 0. 332 if (hpd.get_decimal_point() < 0 && 333 exp10_to_exp2(-hpd.get_decimal_point()) > 334 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 335 fputil::FloatProperties<T>::MANTISSA_WIDTH)) { 336 *outputMantissa = 0; 337 *outputExp2 = 0; 338 errno = ERANGE; 339 return; 340 } 341 342 // Right shift until the number is smaller than 1. 343 while (hpd.get_decimal_point() > 0) { 344 int32_t shift_amount = 0; 345 if (hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 346 shift_amount = 60; 347 } else { 348 shift_amount = POWERS_OF_TWO[hpd.get_decimal_point()]; 349 } 350 exp2 += shift_amount; 351 hpd.shift(-shift_amount); 352 } 353 354 // Left shift until the number is between 1/2 and 1 355 while (hpd.get_decimal_point() < 0 || 356 (hpd.get_decimal_point() == 0 && hpd.get_digits()[0] < 5)) { 357 int32_t shift_amount = 0; 358 359 if (-hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 360 shift_amount = 60; 361 } else if (hpd.get_decimal_point() != 0) { 362 shift_amount = POWERS_OF_TWO[-hpd.get_decimal_point()]; 363 } else { // This handles the case of the number being between .1 and .5 364 shift_amount = 1; 365 } 366 exp2 -= shift_amount; 367 hpd.shift(shift_amount); 368 } 369 370 // Left shift once so that the number is between 1 and 2 371 --exp2; 372 hpd.shift(1); 373 374 // Get the biased exponent 375 exp2 += fputil::FloatProperties<T>::EXPONENT_BIAS; 376 377 // Handle the exponent being too large (and return inf). 378 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 379 *outputMantissa = 0; 380 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 381 errno = ERANGE; 382 return; 383 } 384 385 // Shift left to fill the mantissa 386 hpd.shift(fputil::FloatProperties<T>::MANTISSA_WIDTH); 387 typename fputil::FPBits<T>::UIntType final_mantissa = 388 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 389 390 // Handle subnormals 391 if (exp2 <= 0) { 392 // Shift right until there is a valid exponent 393 while (exp2 < 0) { 394 hpd.shift(-1); 395 ++exp2; 396 } 397 // Shift right one more time to compensate for the left shift to get it 398 // between 1 and 2. 399 hpd.shift(-1); 400 final_mantissa = 401 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 402 403 // Check if by shifting right we've caused this to round to a normal number. 404 if ((final_mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH) != 0) { 405 ++exp2; 406 } 407 } 408 409 // Check if rounding added a bit, and shift down if that's the case. 410 if (final_mantissa == typename fputil::FPBits<T>::UIntType(2) 411 << fputil::FloatProperties<T>::MANTISSA_WIDTH) { 412 final_mantissa >>= 1; 413 ++exp2; 414 415 // Check if this rounding causes exp2 to go out of range and make the result 416 // INF. If this is the case, then finalMantissa and exp2 are already the 417 // correct values for an INF result. 418 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 419 errno = ERANGE; // NOLINT 420 } 421 } 422 423 if (exp2 == 0) { 424 errno = ERANGE; 425 } 426 427 *outputMantissa = final_mantissa; 428 *outputExp2 = exp2; 429 } 430 431 // This class is used for templating the constants for Clinger's Fast Path, 432 // described as a method of approximation in 433 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990 434 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557. 435 // As well as the additions by Gay that extend the useful range by the number of 436 // exact digits stored by the float type, described in 437 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions; 438 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10. 439 template <class T> class ClingerConsts; 440 441 template <> class ClingerConsts<float> { 442 public: 443 static constexpr float POWERS_OF_TEN_ARRAY[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 444 1e6, 1e7, 1e8, 1e9, 1e10}; 445 static constexpr int32_t EXACT_POWERS_OF_TEN = 10; 446 static constexpr int32_t DIGITS_IN_MANTISSA = 7; 447 static constexpr float MAX_EXACT_INT = 16777215.0; 448 }; 449 450 template <> class ClingerConsts<double> { 451 public: 452 static constexpr double POWERS_OF_TEN_ARRAY[] = { 453 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 454 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 455 static constexpr int32_t EXACT_POWERS_OF_TEN = 22; 456 static constexpr int32_t DIGITS_IN_MANTISSA = 15; 457 static constexpr double MAX_EXACT_INT = 9007199254740991.0; 458 }; 459 460 #if defined(LONG_DOUBLE_IS_DOUBLE) 461 template <> class ClingerConsts<long double> { 462 public: 463 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 464 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 465 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 466 static constexpr int32_t EXACT_POWERS_OF_TEN = 467 ClingerConsts<double>::EXACT_POWERS_OF_TEN; 468 static constexpr int32_t DIGITS_IN_MANTISSA = 469 ClingerConsts<double>::DIGITS_IN_MANTISSA; 470 static constexpr long double MAX_EXACT_INT = 471 ClingerConsts<double>::MAX_EXACT_INT; 472 }; 473 #elif defined(SPECIAL_X86_LONG_DOUBLE) 474 template <> class ClingerConsts<long double> { 475 public: 476 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 477 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 478 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 479 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L}; 480 static constexpr int32_t EXACT_POWERS_OF_TEN = 27; 481 static constexpr int32_t DIGITS_IN_MANTISSA = 21; 482 static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L; 483 }; 484 #else 485 template <> class ClingerConsts<long double> { 486 public: 487 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 488 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 489 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 490 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 491 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 492 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L}; 493 static constexpr int32_t EXACT_POWERS_OF_TEN = 48; 494 static constexpr int32_t DIGITS_IN_MANTISSA = 33; 495 static constexpr long double MAX_EXACT_INT = 496 10384593717069655257060992658440191.0L; 497 }; 498 #endif 499 500 // Take an exact mantissa and exponent and attempt to convert it using only 501 // exact floating point arithmetic. This only handles numbers with low 502 // exponents, but handles them quickly. This is an implementation of Clinger's 503 // Fast Path, as described above. 504 template <class T> 505 static inline bool 506 clinger_fast_path(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 507 typename fputil::FPBits<T>::UIntType *outputMantissa, 508 uint32_t *outputExp2) { 509 if (mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH > 0) { 510 return false; 511 } 512 513 fputil::FPBits<T> result; 514 T float_mantissa = static_cast<T>(mantissa); 515 516 if (exp10 == 0) { 517 result = fputil::FPBits<T>(float_mantissa); 518 } 519 if (exp10 > 0) { 520 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN + 521 ClingerConsts<T>::DIGITS_IN_MANTISSA) { 522 return false; 523 } 524 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 525 float_mantissa = float_mantissa * 526 ClingerConsts<T>::POWERS_OF_TEN_ARRAY 527 [exp10 - ClingerConsts<T>::EXACT_POWERS_OF_TEN]; 528 exp10 = ClingerConsts<T>::EXACT_POWERS_OF_TEN; 529 } 530 if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) { 531 return false; 532 } 533 result = fputil::FPBits<T>(float_mantissa * 534 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]); 535 } else if (exp10 < 0) { 536 if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 537 return false; 538 } 539 result = fputil::FPBits<T>(float_mantissa / 540 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]); 541 } 542 *outputMantissa = result.get_mantissa(); 543 *outputExp2 = result.get_unbiased_exponent(); 544 return true; 545 } 546 547 // Takes a mantissa and base 10 exponent and converts it into its closest 548 // floating point type T equivalient. First we try the Eisel-Lemire algorithm, 549 // then if that fails then we fall back to a more accurate algorithm for 550 // accuracy. The resulting mantissa and exponent are placed in outputMantissa 551 // and outputExp2. 552 template <class T> 553 static inline void 554 decimal_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, 555 int32_t exp10, const char *__restrict numStart, 556 bool truncated, 557 typename fputil::FPBits<T>::UIntType *outputMantissa, 558 uint32_t *outputExp2) { 559 // If the exponent is too large and can't be represented in this size of 560 // float, return inf. These bounds are very loose, but are mostly serving as a 561 // first pass. Some close numbers getting through is okay. 562 if (exp10 > 563 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS) / 3) { 564 *outputMantissa = 0; 565 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 566 errno = ERANGE; 567 return; 568 } 569 // If the exponent is too small even for a subnormal, return 0. 570 if (exp10 < 0 && 571 -static_cast<int64_t>(exp10) > 572 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 573 fputil::FloatProperties<T>::MANTISSA_WIDTH) / 574 2) { 575 *outputMantissa = 0; 576 *outputExp2 = 0; 577 errno = ERANGE; 578 return; 579 } 580 581 if (!truncated) { 582 if (clinger_fast_path<T>(mantissa, exp10, outputMantissa, outputExp2)) { 583 return; 584 } 585 } 586 587 // Try Eisel-Lemire 588 if (eisel_lemire<T>(mantissa, exp10, outputMantissa, outputExp2)) { 589 if (!truncated) { 590 return; 591 } 592 // If the mantissa is truncated, then the result may be off by the LSB, so 593 // check if rounding the mantissa up changes the result. If not, then it's 594 // safe, else use the fallback. 595 typename fputil::FPBits<T>::UIntType first_mantissa = *outputMantissa; 596 uint32_t first_exp2 = *outputExp2; 597 if (eisel_lemire<T>(mantissa + 1, exp10, outputMantissa, outputExp2)) { 598 if (*outputMantissa == first_mantissa && *outputExp2 == first_exp2) { 599 return; 600 } 601 } 602 } 603 604 simple_decimal_conversion<T>(numStart, outputMantissa, outputExp2); 605 606 return; 607 } 608 609 // Takes a mantissa and base 2 exponent and converts it into its closest 610 // floating point type T equivalient. Since the exponent is already in the right 611 // form, this is mostly just shifting and rounding. This is used for hexadecimal 612 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. 613 template <class T> 614 static inline void 615 binary_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2, 616 bool truncated, 617 typename fputil::FPBits<T>::UIntType *outputMantissa, 618 uint32_t *outputExp2) { 619 using BitsType = typename fputil::FPBits<T>::UIntType; 620 621 // This is the number of leading zeroes a properly normalized float of type T 622 // should have. 623 constexpr int32_t NUMBITS = sizeof(BitsType) * 8; 624 constexpr int32_t INF_EXP = 625 (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 626 627 // Normalization step 1: Bring the leading bit to the highest bit of BitsType. 628 uint32_t amount_to_shift_left = leading_zeroes<BitsType>(mantissa); 629 mantissa <<= amount_to_shift_left; 630 631 // Keep exp2 representing the exponent of the lowest bit of BitsType. 632 exp2 -= amount_to_shift_left; 633 634 // biasedExponent represents the biased exponent of the most significant bit. 635 int32_t biased_exponent = 636 exp2 + NUMBITS + fputil::FPBits<T>::EXPONENT_BIAS - 1; 637 638 // Handle numbers that're too large and get squashed to inf 639 if (biased_exponent >= INF_EXP) { 640 // This indicates an overflow, so we make the result INF and set errno. 641 *outputExp2 = (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 642 *outputMantissa = 0; 643 errno = ERANGE; 644 return; 645 } 646 647 uint32_t amount_to_shift_right = 648 NUMBITS - fputil::FloatProperties<T>::MANTISSA_WIDTH - 1; 649 650 // Handle subnormals. 651 if (biased_exponent <= 0) { 652 amount_to_shift_right += 1 - biased_exponent; 653 biased_exponent = 0; 654 655 if (amount_to_shift_right > NUMBITS) { 656 // Return 0 if the exponent is too small. 657 *outputMantissa = 0; 658 *outputExp2 = 0; 659 errno = ERANGE; 660 return; 661 } 662 } 663 664 BitsType round_bit_mask = BitsType(1) << (amount_to_shift_right - 1); 665 BitsType sticky_mask = round_bit_mask - 1; 666 bool round_bit = mantissa & round_bit_mask; 667 bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated; 668 669 if (amount_to_shift_right < NUMBITS) { 670 // Shift the mantissa and clear the implicit bit. 671 mantissa >>= amount_to_shift_right; 672 mantissa &= fputil::FloatProperties<T>::MANTISSA_MASK; 673 } else { 674 mantissa = 0; 675 } 676 bool least_significant_bit = mantissa & BitsType(1); 677 // Perform rounding-to-nearest, tie-to-even. 678 if (round_bit && (least_significant_bit || sticky_bit)) { 679 ++mantissa; 680 } 681 682 if (mantissa > fputil::FloatProperties<T>::MANTISSA_MASK) { 683 // Rounding causes the exponent to increase. 684 ++biased_exponent; 685 686 if (biased_exponent == INF_EXP) { 687 errno = ERANGE; 688 } 689 } 690 691 if (biased_exponent == 0) { 692 errno = ERANGE; 693 } 694 695 *outputMantissa = mantissa & fputil::FloatProperties<T>::MANTISSA_MASK; 696 *outputExp2 = biased_exponent; 697 } 698 699 // checks if the next 4 characters of the string pointer are the start of a 700 // hexadecimal floating point number. Does not advance the string pointer. 701 static inline bool is_float_hex_start(const char *__restrict src, 702 const char decimalPoint) { 703 if (!(*src == '0' && (*(src + 1) | 32) == 'x')) { 704 return false; 705 } 706 if (*(src + 2) == decimalPoint) { 707 return isalnum(*(src + 3)) && b36_char_to_int(*(src + 3)) < 16; 708 } else { 709 return isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; 710 } 711 } 712 713 // Takes the start of a string representing a decimal float, as well as the 714 // local decimalPoint. It returns if it suceeded in parsing any digits, and if 715 // the return value is true then the outputs are pointer to the end of the 716 // number, and the mantissa and exponent for the closest float T representation. 717 // If the return value is false, then it is assumed that there is no number 718 // here. 719 template <class T> 720 static inline bool 721 decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, 722 char **__restrict strEnd, 723 typename fputil::FPBits<T>::UIntType *outputMantissa, 724 uint32_t *outputExponent) { 725 using BitsType = typename fputil::FPBits<T>::UIntType; 726 constexpr uint32_t BASE = 10; 727 constexpr char EXPONENT_MARKER = 'e'; 728 729 const char *__restrict num_start = src; 730 bool truncated = false; 731 bool seen_digit = false; 732 bool after_decimal = false; 733 BitsType mantissa = 0; 734 int32_t exponent = 0; 735 736 // The goal for the first step of parsing is to convert the number in src to 737 // the format mantissa * (base ^ exponent) 738 739 // The loop fills the mantissa with as many digits as it can hold 740 const BitsType bitstype_max_div_by_base = 741 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 742 while (true) { 743 if (isdigit(*src)) { 744 uint32_t digit = *src - '0'; 745 seen_digit = true; 746 747 if (mantissa < bitstype_max_div_by_base) { 748 mantissa = (mantissa * BASE) + digit; 749 if (after_decimal) { 750 --exponent; 751 } 752 } else { 753 if (digit > 0) 754 truncated = true; 755 if (!after_decimal) 756 ++exponent; 757 } 758 759 ++src; 760 continue; 761 } 762 if (*src == DECIMAL_POINT) { 763 if (after_decimal) { 764 break; // this means that *src points to a second decimal point, ending 765 // the number. 766 } 767 after_decimal = true; 768 ++src; 769 continue; 770 } 771 // The character is neither a digit nor a decimal point. 772 break; 773 } 774 775 if (!seen_digit) 776 return false; 777 778 if ((*src | 32) == EXPONENT_MARKER) { 779 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 780 ++src; 781 char *temp_str_end; 782 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 783 if (add_to_exponent > 100000) 784 add_to_exponent = 100000; 785 else if (add_to_exponent < -100000) 786 add_to_exponent = -100000; 787 788 src = temp_str_end; 789 exponent += add_to_exponent; 790 } 791 } 792 793 *strEnd = const_cast<char *>(src); 794 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 795 *outputMantissa = 0; 796 *outputExponent = 0; 797 } else { 798 decimal_exp_to_float<T>(mantissa, exponent, num_start, truncated, 799 outputMantissa, outputExponent); 800 } 801 return true; 802 } 803 804 // Takes the start of a string representing a hexadecimal float, as well as the 805 // local decimal point. It returns if it suceeded in parsing any digits, and if 806 // the return value is true then the outputs are pointer to the end of the 807 // number, and the mantissa and exponent for the closest float T representation. 808 // If the return value is false, then it is assumed that there is no number 809 // here. 810 template <class T> 811 static inline bool hexadecimal_string_to_float( 812 const char *__restrict src, const char DECIMAL_POINT, 813 char **__restrict strEnd, 814 typename fputil::FPBits<T>::UIntType *outputMantissa, 815 uint32_t *outputExponent) { 816 using BitsType = typename fputil::FPBits<T>::UIntType; 817 constexpr uint32_t BASE = 16; 818 constexpr char EXPONENT_MARKER = 'p'; 819 820 bool truncated = false; 821 bool seen_digit = false; 822 bool after_decimal = false; 823 BitsType mantissa = 0; 824 int32_t exponent = 0; 825 826 // The goal for the first step of parsing is to convert the number in src to 827 // the format mantissa * (base ^ exponent) 828 829 // The loop fills the mantissa with as many digits as it can hold 830 const BitsType bitstype_max_div_by_base = 831 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 832 while (true) { 833 if (isalnum(*src)) { 834 uint32_t digit = b36_char_to_int(*src); 835 if (digit < BASE) 836 seen_digit = true; 837 else 838 break; 839 840 if (mantissa < bitstype_max_div_by_base) { 841 mantissa = (mantissa * BASE) + digit; 842 if (after_decimal) 843 --exponent; 844 } else { 845 if (digit > 0) 846 truncated = true; 847 if (!after_decimal) 848 ++exponent; 849 } 850 ++src; 851 continue; 852 } 853 if (*src == DECIMAL_POINT) { 854 if (after_decimal) { 855 break; // this means that *src points to a second decimal point, ending 856 // the number. 857 } 858 after_decimal = true; 859 ++src; 860 continue; 861 } 862 // The character is neither a hexadecimal digit nor a decimal point. 863 break; 864 } 865 866 if (!seen_digit) 867 return false; 868 869 // Convert the exponent from having a base of 16 to having a base of 2. 870 exponent *= 4; 871 872 if ((*src | 32) == EXPONENT_MARKER) { 873 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 874 ++src; 875 char *temp_str_end; 876 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 877 if (add_to_exponent > 100000) 878 add_to_exponent = 100000; 879 else if (add_to_exponent < -100000) 880 add_to_exponent = -100000; 881 src = temp_str_end; 882 exponent += add_to_exponent; 883 } 884 } 885 *strEnd = const_cast<char *>(src); 886 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 887 *outputMantissa = 0; 888 *outputExponent = 0; 889 } else { 890 binary_exp_to_float<T>(mantissa, exponent, truncated, outputMantissa, 891 outputExponent); 892 } 893 return true; 894 } 895 896 // Takes a pointer to a string and a pointer to a string pointer. This function 897 // is used as the backend for all of the string to float functions. 898 template <class T> 899 static inline T strtofloatingpoint(const char *__restrict src, 900 char **__restrict strEnd) { 901 using BitsType = typename fputil::FPBits<T>::UIntType; 902 fputil::FPBits<T> result = fputil::FPBits<T>(); 903 const char *original_src = src; 904 bool seen_digit = false; 905 src = first_non_whitespace(src); 906 907 if (*src == '+' || *src == '-') { 908 if (*src == '-') { 909 result.set_sign(true); 910 } 911 ++src; 912 } 913 914 static constexpr char DECIMAL_POINT = '.'; 915 static const char *inf_string = "infinity"; 916 static const char *nan_string = "nan"; 917 918 // bool truncated = false; 919 920 if (isdigit(*src) || *src == DECIMAL_POINT) { // regular number 921 int base = 10; 922 if (is_float_hex_start(src, DECIMAL_POINT)) { 923 base = 16; 924 src += 2; 925 seen_digit = true; 926 } 927 char *new_str_end = nullptr; 928 929 BitsType output_mantissa = 0; 930 uint32_t output_exponent = 0; 931 if (base == 16) { 932 seen_digit = hexadecimal_string_to_float<T>( 933 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 934 } else { // base is 10 935 seen_digit = decimal_string_to_float<T>( 936 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 937 } 938 939 if (seen_digit) { 940 src += new_str_end - src; 941 result.set_mantissa(output_mantissa); 942 result.set_unbiased_exponent(output_exponent); 943 } 944 } else if ((*src | 32) == 'n') { // NaN 945 if ((src[1] | 32) == nan_string[1] && (src[2] | 32) == nan_string[2]) { 946 seen_digit = true; 947 src += 3; 948 BitsType nan_mantissa = 0; 949 // this handles the case of `NaN(n-character-sequence)`, where the 950 // n-character-sequence is made of 0 or more letters and numbers in any 951 // order. 952 if (*src == '(') { 953 const char *left_paren = src; 954 ++src; 955 while (isalnum(*src)) 956 ++src; 957 if (*src == ')') { 958 ++src; 959 char *temp_src = 0; 960 if (isdigit(*(left_paren + 1))) { 961 // This is to prevent errors when BitsType is larger than 64 bits, 962 // since strtointeger only supports up to 64 bits. This is actually 963 // more than is required by the specification, which says for the 964 // input type "NAN(n-char-sequence)" that "the meaning of 965 // the n-char sequence is implementation-defined." 966 nan_mantissa = static_cast<BitsType>( 967 strtointeger<uint64_t>(left_paren + 1, &temp_src, 0)); 968 if (*temp_src != ')') 969 nan_mantissa = 0; 970 } 971 } else 972 src = left_paren; 973 } 974 nan_mantissa |= fputil::FloatProperties<T>::QUIET_NAN_MASK; 975 if (result.get_sign()) { 976 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 977 result.set_sign(true); 978 } else { 979 result.set_sign(false); 980 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 981 } 982 } 983 } else if ((*src | 32) == 'i') { // INF 984 if ((src[1] | 32) == inf_string[1] && (src[2] | 32) == inf_string[2]) { 985 seen_digit = true; 986 if (result.get_sign()) 987 result = result.neg_inf(); 988 else 989 result = result.inf(); 990 if ((src[3] | 32) == inf_string[3] && (src[4] | 32) == inf_string[4] && 991 (src[5] | 32) == inf_string[5] && (src[6] | 32) == inf_string[6] && 992 (src[7] | 32) == inf_string[7]) { 993 // if the string is "INFINITY" then strEnd needs to be set to src + 8. 994 src += 8; 995 } else { 996 src += 3; 997 } 998 } 999 } 1000 if (!seen_digit) { // If there is nothing to actually parse, then return 0. 1001 if (strEnd != nullptr) 1002 *strEnd = const_cast<char *>(original_src); 1003 return T(0); 1004 } 1005 1006 if (strEnd != nullptr) 1007 *strEnd = const_cast<char *>(src); 1008 1009 // This function only does something if T is long double and the platform uses 1010 // special 80 bit long doubles. Otherwise it should be inlined out. 1011 set_implicit_bit<T>(result); 1012 1013 return T(result); 1014 } 1015 1016 } // namespace internal 1017 } // namespace __llvm_libc 1018 1019 #endif // LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 1020