1 //===-- String to float conversion utils ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 10 #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 11 12 #include "src/__support/CPP/Limits.h" 13 #include "src/__support/FPUtil/FPBits.h" 14 #include "src/__support/FPUtil/builtin_wrappers.h" 15 #include "src/__support/ctype_utils.h" 16 #include "src/__support/detailed_powers_of_ten.h" 17 #include "src/__support/high_precision_decimal.h" 18 #include "src/__support/str_to_integer.h" 19 #include <errno.h> 20 21 namespace __llvm_libc { 22 namespace internal { 23 24 template <class T> uint32_t inline leading_zeroes(T inputNumber) { 25 constexpr uint32_t BITS_IN_T = sizeof(T) * 8; 26 if (inputNumber == 0) { 27 return BITS_IN_T; 28 } 29 uint32_t cur_guess = BITS_IN_T / 2; 30 uint32_t range_size = BITS_IN_T / 2; 31 // while either shifting by curGuess does not get rid of all of the bits or 32 // shifting by one less also gets rid of all of the bits then we have not 33 // found the first bit. 34 while (((inputNumber >> cur_guess) > 0) || 35 ((inputNumber >> (cur_guess - 1)) == 0)) { 36 // Binary search for the first set bit 37 range_size /= 2; 38 if (range_size == 0) { 39 break; 40 } 41 if ((inputNumber >> cur_guess) > 0) { 42 cur_guess += range_size; 43 } else { 44 cur_guess -= range_size; 45 } 46 } 47 if (inputNumber >> cur_guess > 0) { 48 cur_guess++; 49 } 50 return BITS_IN_T - cur_guess; 51 } 52 53 template <> uint32_t inline leading_zeroes<uint32_t>(uint32_t inputNumber) { 54 return inputNumber == 0 ? 32 : fputil::clz(inputNumber); 55 } 56 57 template <> uint32_t inline leading_zeroes<uint64_t>(uint64_t inputNumber) { 58 return inputNumber == 0 ? 64 : fputil::clz(inputNumber); 59 } 60 61 static inline uint64_t low64(__uint128_t num) { 62 return static_cast<uint64_t>(num & 0xffffffffffffffff); 63 } 64 65 static inline uint64_t high64(__uint128_t num) { 66 return static_cast<uint64_t>(num >> 64); 67 } 68 69 template <class T> inline void set_implicit_bit(fputil::FPBits<T> &result) { 70 return; 71 } 72 73 #if defined(SPECIAL_X86_LONG_DOUBLE) 74 template <> 75 inline void set_implicit_bit<long double>(fputil::FPBits<long double> &result) { 76 result.set_implicit_bit(result.get_unbiased_exponent() != 0); 77 } 78 #endif 79 80 // This Eisel-Lemire implementation is based on the algorithm described in the 81 // paper Number Parsing at a Gigabyte per Second, Software: Practice and 82 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the 83 // description by Nigel Tao 84 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang 85 // implementation, also by Nigel Tao 86 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) 87 // for some optimizations as well as handling 32 bit floats. 88 template <class T> 89 static inline bool 90 eisel_lemire(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 91 typename fputil::FPBits<T>::UIntType *outputMantissa, 92 uint32_t *outputExp2) { 93 94 using BitsType = typename fputil::FPBits<T>::UIntType; 95 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 96 97 if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a 98 // double, so we skip straight to the fallback. 99 return false; 100 } 101 102 // Exp10 Range 103 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 104 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 105 return false; 106 } 107 108 // Normalization 109 uint32_t clz = leading_zeroes<BitsType>(mantissa); 110 mantissa <<= clz; 111 112 uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + 113 fputil::FloatProperties<T>::EXPONENT_BIAS - clz; 114 115 // Multiplication 116 const uint64_t *power_of_ten = 117 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 118 119 __uint128_t first_approx = static_cast<__uint128_t>(mantissa) * 120 static_cast<__uint128_t>(power_of_ten[1]); 121 122 // Wider Approximation 123 __uint128_t final_approx; 124 // The halfway constant is used to check if the bits that will be shifted away 125 // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final 126 // mantissa size) - 3 (we shift away the last two bits separately for 127 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, 128 // it's 6 bits for floats in this case. 129 const uint64_t halfway_constant = 130 (uint64_t(1) << (BITS_IN_MANTISSA - 131 fputil::FloatProperties<T>::MANTISSA_WIDTH - 3)) - 132 1; 133 if ((high64(first_approx) & halfway_constant) == halfway_constant && 134 low64(first_approx) + mantissa < mantissa) { 135 __uint128_t low_bits = static_cast<__uint128_t>(mantissa) * 136 static_cast<__uint128_t>(power_of_ten[0]); 137 __uint128_t second_approx = 138 first_approx + static_cast<__uint128_t>(high64(low_bits)); 139 140 if ((high64(second_approx) & halfway_constant) == halfway_constant && 141 low64(second_approx) + 1 == 0 && 142 low64(low_bits) + mantissa < mantissa) { 143 return false; 144 } 145 final_approx = second_approx; 146 } else { 147 final_approx = first_approx; 148 } 149 150 // Shifting to 54 bits for doubles and 25 bits for floats 151 BitsType msb = high64(final_approx) >> (BITS_IN_MANTISSA - 1); 152 BitsType final_mantissa = high64(final_approx) >> 153 (msb + BITS_IN_MANTISSA - 154 (fputil::FloatProperties<T>::MANTISSA_WIDTH + 3)); 155 exp2 -= 1 ^ msb; // same as !msb 156 157 // Half-way ambiguity 158 if (low64(final_approx) == 0 && 159 (high64(final_approx) & halfway_constant) == 0 && 160 (final_mantissa & 3) == 1) { 161 return false; 162 } 163 164 // From 54 to 53 bits for doubles and 25 to 24 bits for floats 165 final_mantissa += final_mantissa & 1; 166 final_mantissa >>= 1; 167 if ((final_mantissa >> (fputil::FloatProperties<T>::MANTISSA_WIDTH + 1)) > 168 0) { 169 final_mantissa >>= 1; 170 ++exp2; 171 } 172 173 // The if block is equivalent to (but has fewer branches than): 174 // if exp2 <= 0 || exp2 >= 0x7FF { etc } 175 if (exp2 - 1 >= (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 2) { 176 return false; 177 } 178 179 *outputMantissa = final_mantissa; 180 *outputExp2 = exp2; 181 return true; 182 } 183 184 #if !defined(LONG_DOUBLE_IS_DOUBLE) 185 template <> 186 inline bool eisel_lemire<long double>( 187 typename fputil::FPBits<long double>::UIntType mantissa, int32_t exp10, 188 typename fputil::FPBits<long double>::UIntType *outputMantissa, 189 uint32_t *outputExp2) { 190 using BitsType = typename fputil::FPBits<long double>::UIntType; 191 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 192 193 // Exp10 Range 194 // This doesn't reach very far into the range for long doubles, since it's 195 // sized for doubles and their 11 exponent bits, and not for long doubles and 196 // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long 197 // double). This is a known tradeoff, and was made because a proper long 198 // double table would be approximately 16 times larger. This would have 199 // significant memory and storage costs all the time to speed up a relatively 200 // uncommon path. In addition the exp10_to_exp2 function only approximates 201 // multiplying by log(10)/log(2), and that approximation may not be accurate 202 // out to the full long double range. 203 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 204 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 205 return false; 206 } 207 208 // Normalization 209 uint32_t clz = leading_zeroes<BitsType>(mantissa); 210 mantissa <<= clz; 211 212 uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + 213 fputil::FloatProperties<long double>::EXPONENT_BIAS - clz; 214 215 // Multiplication 216 const uint64_t *power_of_ten = 217 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 218 219 // Since the input mantissa is more than 64 bits, we have to multiply with the 220 // full 128 bits of the power of ten to get an approximation with the same 221 // number of significant bits. This means that we only get the one 222 // approximation, and that approximation is 256 bits long. 223 __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) * 224 static_cast<__uint128_t>(power_of_ten[1]); 225 226 __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) * 227 static_cast<__uint128_t>(power_of_ten[0]) + 228 static_cast<__uint128_t>(low64(mantissa)) * 229 static_cast<__uint128_t>(power_of_ten[1]); 230 231 __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) * 232 static_cast<__uint128_t>(power_of_ten[0]); 233 234 __uint128_t final_approx_lower = 235 approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64); 236 __uint128_t final_approx_upper = approx_upper + high64(approx_middle) + 237 (final_approx_lower < approx_lower ? 1 : 0); 238 239 // The halfway constant is used to check if the bits that will be shifted away 240 // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 241 // (final mantissa size) - 3 (we shift away the last two bits separately for 242 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, 243 // it's 12 bits for 128 bit floats in this case. 244 constexpr __uint128_t HALFWAY_CONSTANT = 245 (__uint128_t(1) << (BITS_IN_MANTISSA - 246 fputil::FloatProperties<long double>::MANTISSA_WIDTH - 247 3)) - 248 1; 249 250 if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && 251 final_approx_lower + mantissa < mantissa) { 252 return false; 253 } 254 255 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats 256 BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); 257 BitsType final_mantissa = 258 final_approx_upper >> 259 (msb + BITS_IN_MANTISSA - 260 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 3)); 261 exp2 -= 1 ^ msb; // same as !msb 262 263 // Half-way ambiguity 264 if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && 265 (final_mantissa & 3) == 1) { 266 return false; 267 } 268 269 // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit 270 // floats 271 final_mantissa += final_mantissa & 1; 272 final_mantissa >>= 1; 273 if ((final_mantissa >> 274 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 1)) > 0) { 275 final_mantissa >>= 1; 276 ++exp2; 277 } 278 279 // The if block is equivalent to (but has fewer branches than): 280 // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } 281 if (exp2 - 1 >= 282 (1 << fputil::FloatProperties<long double>::EXPONENT_WIDTH) - 2) { 283 return false; 284 } 285 286 *outputMantissa = final_mantissa; 287 *outputExp2 = exp2; 288 return true; 289 } 290 #endif 291 292 // The nth item in POWERS_OF_TWO represents the greatest power of two less than 293 // 10^n. This tells us how much we can safely shift without overshooting. 294 constexpr uint8_t POWERS_OF_TWO[19] = { 295 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 296 }; 297 constexpr int32_t NUM_POWERS_OF_TWO = 298 sizeof(POWERS_OF_TWO) / sizeof(POWERS_OF_TWO[0]); 299 300 // Takes a mantissa and base 10 exponent and converts it into its closest 301 // floating point type T equivalent. This is the fallback algorithm used when 302 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based 303 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this 304 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html 305 template <class T> 306 static inline void 307 simple_decimal_conversion(const char *__restrict numStart, 308 typename fputil::FPBits<T>::UIntType *outputMantissa, 309 uint32_t *outputExp2) { 310 311 int32_t exp2 = 0; 312 HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart); 313 314 if (hpd.get_num_digits() == 0) { 315 *outputMantissa = 0; 316 *outputExp2 = 0; 317 return; 318 } 319 320 // If the exponent is too large and can't be represented in this size of 321 // float, return inf. 322 if (hpd.get_decimal_point() > 0 && 323 exp10_to_exp2(hpd.get_decimal_point() - 1) > 324 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS)) { 325 *outputMantissa = 0; 326 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 327 errno = ERANGE; 328 return; 329 } 330 // If the exponent is too small even for a subnormal, return 0. 331 if (hpd.get_decimal_point() < 0 && 332 exp10_to_exp2(-hpd.get_decimal_point()) > 333 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 334 fputil::FloatProperties<T>::MANTISSA_WIDTH)) { 335 *outputMantissa = 0; 336 *outputExp2 = 0; 337 errno = ERANGE; 338 return; 339 } 340 341 // Right shift until the number is smaller than 1. 342 while (hpd.get_decimal_point() > 0) { 343 int32_t shift_amount = 0; 344 if (hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 345 shift_amount = 60; 346 } else { 347 shift_amount = POWERS_OF_TWO[hpd.get_decimal_point()]; 348 } 349 exp2 += shift_amount; 350 hpd.shift(-shift_amount); 351 } 352 353 // Left shift until the number is between 1/2 and 1 354 while (hpd.get_decimal_point() < 0 || 355 (hpd.get_decimal_point() == 0 && hpd.get_digits()[0] < 5)) { 356 int32_t shift_amount = 0; 357 358 if (-hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 359 shift_amount = 60; 360 } else if (hpd.get_decimal_point() != 0) { 361 shift_amount = POWERS_OF_TWO[-hpd.get_decimal_point()]; 362 } else { // This handles the case of the number being between .1 and .5 363 shift_amount = 1; 364 } 365 exp2 -= shift_amount; 366 hpd.shift(shift_amount); 367 } 368 369 // Left shift once so that the number is between 1 and 2 370 --exp2; 371 hpd.shift(1); 372 373 // Get the biased exponent 374 exp2 += fputil::FloatProperties<T>::EXPONENT_BIAS; 375 376 // Handle the exponent being too large (and return inf). 377 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 378 *outputMantissa = 0; 379 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 380 errno = ERANGE; 381 return; 382 } 383 384 // Shift left to fill the mantissa 385 hpd.shift(fputil::FloatProperties<T>::MANTISSA_WIDTH); 386 typename fputil::FPBits<T>::UIntType final_mantissa = 387 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 388 389 // Handle subnormals 390 if (exp2 <= 0) { 391 // Shift right until there is a valid exponent 392 while (exp2 < 0) { 393 hpd.shift(-1); 394 ++exp2; 395 } 396 // Shift right one more time to compensate for the left shift to get it 397 // between 1 and 2. 398 hpd.shift(-1); 399 final_mantissa = 400 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 401 402 // Check if by shifting right we've caused this to round to a normal number. 403 if ((final_mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH) != 0) { 404 ++exp2; 405 } 406 } 407 408 // Check if rounding added a bit, and shift down if that's the case. 409 if (final_mantissa == typename fputil::FPBits<T>::UIntType(2) 410 << fputil::FloatProperties<T>::MANTISSA_WIDTH) { 411 final_mantissa >>= 1; 412 ++exp2; 413 414 // Check if this rounding causes exp2 to go out of range and make the result 415 // INF. If this is the case, then finalMantissa and exp2 are already the 416 // correct values for an INF result. 417 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 418 errno = ERANGE; // NOLINT 419 } 420 } 421 422 if (exp2 == 0) { 423 errno = ERANGE; 424 } 425 426 *outputMantissa = final_mantissa; 427 *outputExp2 = exp2; 428 } 429 430 // This class is used for templating the constants for Clinger's Fast Path, 431 // described as a method of approximation in 432 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990 433 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557. 434 // As well as the additions by Gay that extend the useful range by the number of 435 // exact digits stored by the float type, described in 436 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions; 437 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10. 438 template <class T> class ClingerConsts; 439 440 template <> class ClingerConsts<float> { 441 public: 442 static constexpr float POWERS_OF_TEN_ARRAY[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 443 1e6, 1e7, 1e8, 1e9, 1e10}; 444 static constexpr int32_t EXACT_POWERS_OF_TEN = 10; 445 static constexpr int32_t DIGITS_IN_MANTISSA = 7; 446 static constexpr float MAX_EXACT_INT = 16777215.0; 447 }; 448 449 template <> class ClingerConsts<double> { 450 public: 451 static constexpr double POWERS_OF_TEN_ARRAY[] = { 452 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 453 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 454 static constexpr int32_t EXACT_POWERS_OF_TEN = 22; 455 static constexpr int32_t DIGITS_IN_MANTISSA = 15; 456 static constexpr double MAX_EXACT_INT = 9007199254740991.0; 457 }; 458 459 #if defined(LONG_DOUBLE_IS_DOUBLE) 460 template <> class ClingerConsts<long double> { 461 public: 462 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 463 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 464 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 465 static constexpr int32_t EXACT_POWERS_OF_TEN = 466 ClingerConsts<double>::EXACT_POWERS_OF_TEN; 467 static constexpr int32_t DIGITS_IN_MANTISSA = 468 ClingerConsts<double>::DIGITS_IN_MANTISSA; 469 static constexpr long double MAX_EXACT_INT = 470 ClingerConsts<double>::MAX_EXACT_INT; 471 }; 472 #elif defined(SPECIAL_X86_LONG_DOUBLE) 473 template <> class ClingerConsts<long double> { 474 public: 475 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 476 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 477 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 478 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L}; 479 static constexpr int32_t EXACT_POWERS_OF_TEN = 27; 480 static constexpr int32_t DIGITS_IN_MANTISSA = 21; 481 static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L; 482 }; 483 #else 484 template <> class ClingerConsts<long double> { 485 public: 486 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 487 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 488 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 489 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 490 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 491 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L}; 492 static constexpr int32_t EXACT_POWERS_OF_TEN = 48; 493 static constexpr int32_t DIGITS_IN_MANTISSA = 33; 494 static constexpr long double MAX_EXACT_INT = 495 10384593717069655257060992658440191.0L; 496 }; 497 #endif 498 499 // Take an exact mantissa and exponent and attempt to convert it using only 500 // exact floating point arithmetic. This only handles numbers with low 501 // exponents, but handles them quickly. This is an implementation of Clinger's 502 // Fast Path, as described above. 503 template <class T> 504 static inline bool 505 clinger_fast_path(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 506 typename fputil::FPBits<T>::UIntType *outputMantissa, 507 uint32_t *outputExp2) { 508 if (mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH > 0) { 509 return false; 510 } 511 512 fputil::FPBits<T> result; 513 T float_mantissa = static_cast<T>(mantissa); 514 515 if (exp10 == 0) { 516 result = fputil::FPBits<T>(float_mantissa); 517 } 518 if (exp10 > 0) { 519 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN + 520 ClingerConsts<T>::DIGITS_IN_MANTISSA) { 521 return false; 522 } 523 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 524 float_mantissa = float_mantissa * 525 ClingerConsts<T>::POWERS_OF_TEN_ARRAY 526 [exp10 - ClingerConsts<T>::EXACT_POWERS_OF_TEN]; 527 exp10 = ClingerConsts<T>::EXACT_POWERS_OF_TEN; 528 } 529 if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) { 530 return false; 531 } 532 result = fputil::FPBits<T>(float_mantissa * 533 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]); 534 } else if (exp10 < 0) { 535 if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 536 return false; 537 } 538 result = fputil::FPBits<T>(float_mantissa / 539 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]); 540 } 541 *outputMantissa = result.get_mantissa(); 542 *outputExp2 = result.get_unbiased_exponent(); 543 return true; 544 } 545 546 // Takes a mantissa and base 10 exponent and converts it into its closest 547 // floating point type T equivalient. First we try the Eisel-Lemire algorithm, 548 // then if that fails then we fall back to a more accurate algorithm for 549 // accuracy. The resulting mantissa and exponent are placed in outputMantissa 550 // and outputExp2. 551 template <class T> 552 static inline void 553 decimal_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, 554 int32_t exp10, const char *__restrict numStart, 555 bool truncated, 556 typename fputil::FPBits<T>::UIntType *outputMantissa, 557 uint32_t *outputExp2) { 558 // If the exponent is too large and can't be represented in this size of 559 // float, return inf. These bounds are very loose, but are mostly serving as a 560 // first pass. Some close numbers getting through is okay. 561 if (exp10 > 562 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS) / 3) { 563 *outputMantissa = 0; 564 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 565 errno = ERANGE; 566 return; 567 } 568 // If the exponent is too small even for a subnormal, return 0. 569 if (exp10 < 0 && 570 -static_cast<int64_t>(exp10) > 571 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 572 fputil::FloatProperties<T>::MANTISSA_WIDTH) / 573 2) { 574 *outputMantissa = 0; 575 *outputExp2 = 0; 576 errno = ERANGE; 577 return; 578 } 579 580 if (!truncated) { 581 if (clinger_fast_path<T>(mantissa, exp10, outputMantissa, outputExp2)) { 582 return; 583 } 584 } 585 586 // Try Eisel-Lemire 587 if (eisel_lemire<T>(mantissa, exp10, outputMantissa, outputExp2)) { 588 if (!truncated) { 589 return; 590 } 591 // If the mantissa is truncated, then the result may be off by the LSB, so 592 // check if rounding the mantissa up changes the result. If not, then it's 593 // safe, else use the fallback. 594 typename fputil::FPBits<T>::UIntType first_mantissa = *outputMantissa; 595 uint32_t first_exp2 = *outputExp2; 596 if (eisel_lemire<T>(mantissa + 1, exp10, outputMantissa, outputExp2)) { 597 if (*outputMantissa == first_mantissa && *outputExp2 == first_exp2) { 598 return; 599 } 600 } 601 } 602 603 simple_decimal_conversion<T>(numStart, outputMantissa, outputExp2); 604 605 return; 606 } 607 608 // Takes a mantissa and base 2 exponent and converts it into its closest 609 // floating point type T equivalient. Since the exponent is already in the right 610 // form, this is mostly just shifting and rounding. This is used for hexadecimal 611 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. 612 template <class T> 613 static inline void 614 binary_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2, 615 bool truncated, 616 typename fputil::FPBits<T>::UIntType *outputMantissa, 617 uint32_t *outputExp2) { 618 using BitsType = typename fputil::FPBits<T>::UIntType; 619 620 // This is the number of leading zeroes a properly normalized float of type T 621 // should have. 622 constexpr int32_t NUMBITS = sizeof(BitsType) * 8; 623 constexpr int32_t INF_EXP = 624 (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 625 626 // Normalization step 1: Bring the leading bit to the highest bit of BitsType. 627 uint32_t amount_to_shift_left = leading_zeroes<BitsType>(mantissa); 628 mantissa <<= amount_to_shift_left; 629 630 // Keep exp2 representing the exponent of the lowest bit of BitsType. 631 exp2 -= amount_to_shift_left; 632 633 // biasedExponent represents the biased exponent of the most significant bit. 634 int32_t biased_exponent = 635 exp2 + NUMBITS + fputil::FPBits<T>::EXPONENT_BIAS - 1; 636 637 // Handle numbers that're too large and get squashed to inf 638 if (biased_exponent >= INF_EXP) { 639 // This indicates an overflow, so we make the result INF and set errno. 640 *outputExp2 = (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 641 *outputMantissa = 0; 642 errno = ERANGE; 643 return; 644 } 645 646 uint32_t amount_to_shift_right = 647 NUMBITS - fputil::FloatProperties<T>::MANTISSA_WIDTH - 1; 648 649 // Handle subnormals. 650 if (biased_exponent <= 0) { 651 amount_to_shift_right += 1 - biased_exponent; 652 biased_exponent = 0; 653 654 if (amount_to_shift_right > NUMBITS) { 655 // Return 0 if the exponent is too small. 656 *outputMantissa = 0; 657 *outputExp2 = 0; 658 errno = ERANGE; 659 return; 660 } 661 } 662 663 BitsType round_bit_mask = BitsType(1) << (amount_to_shift_right - 1); 664 BitsType sticky_mask = round_bit_mask - 1; 665 bool round_bit = mantissa & round_bit_mask; 666 bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated; 667 668 if (amount_to_shift_right < NUMBITS) { 669 // Shift the mantissa and clear the implicit bit. 670 mantissa >>= amount_to_shift_right; 671 mantissa &= fputil::FloatProperties<T>::MANTISSA_MASK; 672 } else { 673 mantissa = 0; 674 } 675 bool least_significant_bit = mantissa & BitsType(1); 676 // Perform rounding-to-nearest, tie-to-even. 677 if (round_bit && (least_significant_bit || sticky_bit)) { 678 ++mantissa; 679 } 680 681 if (mantissa > fputil::FloatProperties<T>::MANTISSA_MASK) { 682 // Rounding causes the exponent to increase. 683 ++biased_exponent; 684 685 if (biased_exponent == INF_EXP) { 686 errno = ERANGE; 687 } 688 } 689 690 if (biased_exponent == 0) { 691 errno = ERANGE; 692 } 693 694 *outputMantissa = mantissa & fputil::FloatProperties<T>::MANTISSA_MASK; 695 *outputExp2 = biased_exponent; 696 } 697 698 // checks if the next 4 characters of the string pointer are the start of a 699 // hexadecimal floating point number. Does not advance the string pointer. 700 static inline bool is_float_hex_start(const char *__restrict src, 701 const char decimalPoint) { 702 if (!(*src == '0' && (*(src + 1) | 32) == 'x')) { 703 return false; 704 } 705 if (*(src + 2) == decimalPoint) { 706 return isalnum(*(src + 3)) && b36_char_to_int(*(src + 3)) < 16; 707 } else { 708 return isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; 709 } 710 } 711 712 // Takes the start of a string representing a decimal float, as well as the 713 // local decimalPoint. It returns if it suceeded in parsing any digits, and if 714 // the return value is true then the outputs are pointer to the end of the 715 // number, and the mantissa and exponent for the closest float T representation. 716 // If the return value is false, then it is assumed that there is no number 717 // here. 718 template <class T> 719 static inline bool 720 decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, 721 char **__restrict strEnd, 722 typename fputil::FPBits<T>::UIntType *outputMantissa, 723 uint32_t *outputExponent) { 724 using BitsType = typename fputil::FPBits<T>::UIntType; 725 constexpr uint32_t BASE = 10; 726 constexpr char EXPONENT_MARKER = 'e'; 727 728 const char *__restrict num_start = src; 729 bool truncated = false; 730 bool seen_digit = false; 731 bool after_decimal = false; 732 BitsType mantissa = 0; 733 int32_t exponent = 0; 734 735 // The goal for the first step of parsing is to convert the number in src to 736 // the format mantissa * (base ^ exponent) 737 738 // The loop fills the mantissa with as many digits as it can hold 739 const BitsType bitstype_max_div_by_base = 740 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 741 while (true) { 742 if (isdigit(*src)) { 743 uint32_t digit = *src - '0'; 744 seen_digit = true; 745 746 if (mantissa < bitstype_max_div_by_base) { 747 mantissa = (mantissa * BASE) + digit; 748 if (after_decimal) { 749 --exponent; 750 } 751 } else { 752 if (digit > 0) 753 truncated = true; 754 if (!after_decimal) 755 ++exponent; 756 } 757 758 ++src; 759 continue; 760 } 761 if (*src == DECIMAL_POINT) { 762 if (after_decimal) { 763 break; // this means that *src points to a second decimal point, ending 764 // the number. 765 } 766 after_decimal = true; 767 ++src; 768 continue; 769 } 770 // The character is neither a digit nor a decimal point. 771 break; 772 } 773 774 if (!seen_digit) 775 return false; 776 777 if ((*src | 32) == EXPONENT_MARKER) { 778 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 779 ++src; 780 char *temp_str_end; 781 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 782 if (add_to_exponent > 100000) 783 add_to_exponent = 100000; 784 else if (add_to_exponent < -100000) 785 add_to_exponent = -100000; 786 787 src = temp_str_end; 788 exponent += add_to_exponent; 789 } 790 } 791 792 *strEnd = const_cast<char *>(src); 793 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 794 *outputMantissa = 0; 795 *outputExponent = 0; 796 } else { 797 decimal_exp_to_float<T>(mantissa, exponent, num_start, truncated, 798 outputMantissa, outputExponent); 799 } 800 return true; 801 } 802 803 // Takes the start of a string representing a hexadecimal float, as well as the 804 // local decimal point. It returns if it suceeded in parsing any digits, and if 805 // the return value is true then the outputs are pointer to the end of the 806 // number, and the mantissa and exponent for the closest float T representation. 807 // If the return value is false, then it is assumed that there is no number 808 // here. 809 template <class T> 810 static inline bool hexadecimal_string_to_float( 811 const char *__restrict src, const char DECIMAL_POINT, 812 char **__restrict strEnd, 813 typename fputil::FPBits<T>::UIntType *outputMantissa, 814 uint32_t *outputExponent) { 815 using BitsType = typename fputil::FPBits<T>::UIntType; 816 constexpr uint32_t BASE = 16; 817 constexpr char EXPONENT_MARKER = 'p'; 818 819 bool truncated = false; 820 bool seen_digit = false; 821 bool after_decimal = false; 822 BitsType mantissa = 0; 823 int32_t exponent = 0; 824 825 // The goal for the first step of parsing is to convert the number in src to 826 // the format mantissa * (base ^ exponent) 827 828 // The loop fills the mantissa with as many digits as it can hold 829 const BitsType bitstype_max_div_by_base = 830 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 831 while (true) { 832 if (isalnum(*src)) { 833 uint32_t digit = b36_char_to_int(*src); 834 if (digit < BASE) 835 seen_digit = true; 836 else 837 break; 838 839 if (mantissa < bitstype_max_div_by_base) { 840 mantissa = (mantissa * BASE) + digit; 841 if (after_decimal) 842 --exponent; 843 } else { 844 if (digit > 0) 845 truncated = true; 846 if (!after_decimal) 847 ++exponent; 848 } 849 ++src; 850 continue; 851 } 852 if (*src == DECIMAL_POINT) { 853 if (after_decimal) { 854 break; // this means that *src points to a second decimal point, ending 855 // the number. 856 } 857 after_decimal = true; 858 ++src; 859 continue; 860 } 861 // The character is neither a hexadecimal digit nor a decimal point. 862 break; 863 } 864 865 if (!seen_digit) 866 return false; 867 868 // Convert the exponent from having a base of 16 to having a base of 2. 869 exponent *= 4; 870 871 if ((*src | 32) == EXPONENT_MARKER) { 872 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 873 ++src; 874 char *temp_str_end; 875 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 876 if (add_to_exponent > 100000) 877 add_to_exponent = 100000; 878 else if (add_to_exponent < -100000) 879 add_to_exponent = -100000; 880 src = temp_str_end; 881 exponent += add_to_exponent; 882 } 883 } 884 *strEnd = const_cast<char *>(src); 885 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 886 *outputMantissa = 0; 887 *outputExponent = 0; 888 } else { 889 binary_exp_to_float<T>(mantissa, exponent, truncated, outputMantissa, 890 outputExponent); 891 } 892 return true; 893 } 894 895 // Takes a pointer to a string and a pointer to a string pointer. This function 896 // is used as the backend for all of the string to float functions. 897 template <class T> 898 static inline T strtofloatingpoint(const char *__restrict src, 899 char **__restrict strEnd) { 900 using BitsType = typename fputil::FPBits<T>::UIntType; 901 fputil::FPBits<T> result = fputil::FPBits<T>(); 902 const char *original_src = src; 903 bool seen_digit = false; 904 src = first_non_whitespace(src); 905 906 if (*src == '+' || *src == '-') { 907 if (*src == '-') { 908 result.set_sign(true); 909 } 910 ++src; 911 } 912 913 static constexpr char DECIMAL_POINT = '.'; 914 static const char *inf_string = "infinity"; 915 static const char *nan_string = "nan"; 916 917 // bool truncated = false; 918 919 if (isdigit(*src) || *src == DECIMAL_POINT) { // regular number 920 int base = 10; 921 if (is_float_hex_start(src, DECIMAL_POINT)) { 922 base = 16; 923 src += 2; 924 seen_digit = true; 925 } 926 char *new_str_end = nullptr; 927 928 BitsType output_mantissa = 0; 929 uint32_t output_exponent = 0; 930 if (base == 16) { 931 seen_digit = hexadecimal_string_to_float<T>( 932 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 933 } else { // base is 10 934 seen_digit = decimal_string_to_float<T>( 935 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 936 } 937 938 if (seen_digit) { 939 src += new_str_end - src; 940 result.set_mantissa(output_mantissa); 941 result.set_unbiased_exponent(output_exponent); 942 } 943 } else if ((*src | 32) == 'n') { // NaN 944 if ((src[1] | 32) == nan_string[1] && (src[2] | 32) == nan_string[2]) { 945 seen_digit = true; 946 src += 3; 947 BitsType nan_mantissa = 0; 948 // this handles the case of `NaN(n-character-sequence)`, where the 949 // n-character-sequence is made of 0 or more letters and numbers in any 950 // order. 951 if (*src == '(') { 952 const char *left_paren = src; 953 ++src; 954 while (isalnum(*src)) 955 ++src; 956 if (*src == ')') { 957 ++src; 958 char *temp_src = 0; 959 if (isdigit(*(left_paren + 1))) { 960 // This is to prevent errors when BitsType is larger than 64 bits, 961 // since strtointeger only supports up to 64 bits. This is actually 962 // more than is required by the specification, which says for the 963 // input type "NAN(n-char-sequence)" that "the meaning of 964 // the n-char sequence is implementation-defined." 965 nan_mantissa = static_cast<BitsType>( 966 strtointeger<uint64_t>(left_paren + 1, &temp_src, 0)); 967 if (*temp_src != ')') 968 nan_mantissa = 0; 969 } 970 } else 971 src = left_paren; 972 } 973 nan_mantissa |= fputil::FloatProperties<T>::QUIET_NAN_MASK; 974 if (result.get_sign()) { 975 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 976 result.set_sign(true); 977 } else { 978 result.set_sign(false); 979 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 980 } 981 } 982 } else if ((*src | 32) == 'i') { // INF 983 if ((src[1] | 32) == inf_string[1] && (src[2] | 32) == inf_string[2]) { 984 seen_digit = true; 985 if (result.get_sign()) 986 result = result.neg_inf(); 987 else 988 result = result.inf(); 989 if ((src[3] | 32) == inf_string[3] && (src[4] | 32) == inf_string[4] && 990 (src[5] | 32) == inf_string[5] && (src[6] | 32) == inf_string[6] && 991 (src[7] | 32) == inf_string[7]) { 992 // if the string is "INFINITY" then strEnd needs to be set to src + 8. 993 src += 8; 994 } else { 995 src += 3; 996 } 997 } 998 } 999 if (!seen_digit) { // If there is nothing to actually parse, then return 0. 1000 if (strEnd != nullptr) 1001 *strEnd = const_cast<char *>(original_src); 1002 return T(0); 1003 } 1004 1005 if (strEnd != nullptr) 1006 *strEnd = const_cast<char *>(src); 1007 1008 // This function only does something if T is long double and the platform uses 1009 // special 80 bit long doubles. Otherwise it should be inlined out. 1010 set_implicit_bit<T>(result); 1011 1012 return T(result); 1013 } 1014 1015 } // namespace internal 1016 } // namespace __llvm_libc 1017 1018 #endif // LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 1019