1 //===-- String to float conversion utils ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 10 #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 11 12 #include "src/__support/CPP/Limits.h" 13 #include "src/__support/FPUtil/FPBits.h" 14 #include "src/__support/ctype_utils.h" 15 #include "src/__support/detailed_powers_of_ten.h" 16 #include "src/__support/high_precision_decimal.h" 17 #include "src/__support/str_to_integer.h" 18 #include <errno.h> 19 20 namespace __llvm_libc { 21 namespace internal { 22 23 template <class T> uint32_t inline leading_zeroes(T inputNumber) { 24 constexpr uint32_t BITS_IN_T = sizeof(T) * 8; 25 if (inputNumber == 0) { 26 return BITS_IN_T; 27 } 28 uint32_t cur_guess = BITS_IN_T / 2; 29 uint32_t range_size = BITS_IN_T / 2; 30 // while either shifting by curGuess does not get rid of all of the bits or 31 // shifting by one less also gets rid of all of the bits then we have not 32 // found the first bit. 33 while (((inputNumber >> cur_guess) > 0) || 34 ((inputNumber >> (cur_guess - 1)) == 0)) { 35 // Binary search for the first set bit 36 range_size /= 2; 37 if (range_size == 0) { 38 break; 39 } 40 if ((inputNumber >> cur_guess) > 0) { 41 cur_guess += range_size; 42 } else { 43 cur_guess -= range_size; 44 } 45 } 46 if (inputNumber >> cur_guess > 0) { 47 cur_guess++; 48 } 49 return BITS_IN_T - cur_guess; 50 } 51 52 template <> uint32_t inline leading_zeroes<uint32_t>(uint32_t inputNumber) { 53 return inputNumber == 0 ? 32 : __builtin_clz(inputNumber); 54 } 55 56 template <> uint32_t inline leading_zeroes<uint64_t>(uint64_t inputNumber) { 57 return inputNumber == 0 ? 64 : __builtin_clzll(inputNumber); 58 } 59 60 static inline uint64_t low64(__uint128_t num) { 61 return static_cast<uint64_t>(num & 0xffffffffffffffff); 62 } 63 64 static inline uint64_t high64(__uint128_t num) { 65 return static_cast<uint64_t>(num >> 64); 66 } 67 68 template <class T> inline void set_implicit_bit(fputil::FPBits<T> &result) { 69 return; 70 } 71 72 #if defined(SPECIAL_X86_LONG_DOUBLE) 73 template <> 74 inline void set_implicit_bit<long double>(fputil::FPBits<long double> &result) { 75 result.set_implicit_bit(result.get_unbiased_exponent() != 0); 76 } 77 #endif 78 79 // This Eisel-Lemire implementation is based on the algorithm described in the 80 // paper Number Parsing at a Gigabyte per Second, Software: Practice and 81 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the 82 // description by Nigel Tao 83 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang 84 // implementation, also by Nigel Tao 85 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) 86 // for some optimizations as well as handling 32 bit floats. 87 template <class T> 88 static inline bool 89 eisel_lemire(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 90 typename fputil::FPBits<T>::UIntType *outputMantissa, 91 uint32_t *outputExp2) { 92 93 using BitsType = typename fputil::FPBits<T>::UIntType; 94 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 95 96 if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a 97 // double, so we skip straight to the fallback. 98 return false; 99 } 100 101 // Exp10 Range 102 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 103 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 104 return false; 105 } 106 107 // Normalization 108 uint32_t clz = leading_zeroes<BitsType>(mantissa); 109 mantissa <<= clz; 110 111 uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + 112 fputil::FloatProperties<T>::EXPONENT_BIAS - clz; 113 114 // Multiplication 115 const uint64_t *power_of_ten = 116 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 117 118 __uint128_t first_approx = static_cast<__uint128_t>(mantissa) * 119 static_cast<__uint128_t>(power_of_ten[1]); 120 121 // Wider Approximation 122 __uint128_t final_approx; 123 // The halfway constant is used to check if the bits that will be shifted away 124 // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final 125 // mantissa size) - 3 (we shift away the last two bits separately for 126 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, 127 // it's 6 bits for floats in this case. 128 const uint64_t halfway_constant = 129 (uint64_t(1) << (BITS_IN_MANTISSA - 130 fputil::FloatProperties<T>::MANTISSA_WIDTH - 3)) - 131 1; 132 if ((high64(first_approx) & halfway_constant) == halfway_constant && 133 low64(first_approx) + mantissa < mantissa) { 134 __uint128_t low_bits = static_cast<__uint128_t>(mantissa) * 135 static_cast<__uint128_t>(power_of_ten[0]); 136 __uint128_t second_approx = 137 first_approx + static_cast<__uint128_t>(high64(low_bits)); 138 139 if ((high64(second_approx) & halfway_constant) == halfway_constant && 140 low64(second_approx) + 1 == 0 && 141 low64(low_bits) + mantissa < mantissa) { 142 return false; 143 } 144 final_approx = second_approx; 145 } else { 146 final_approx = first_approx; 147 } 148 149 // Shifting to 54 bits for doubles and 25 bits for floats 150 BitsType msb = high64(final_approx) >> (BITS_IN_MANTISSA - 1); 151 BitsType final_mantissa = high64(final_approx) >> 152 (msb + BITS_IN_MANTISSA - 153 (fputil::FloatProperties<T>::MANTISSA_WIDTH + 3)); 154 exp2 -= 1 ^ msb; // same as !msb 155 156 // Half-way ambiguity 157 if (low64(final_approx) == 0 && 158 (high64(final_approx) & halfway_constant) == 0 && 159 (final_mantissa & 3) == 1) { 160 return false; 161 } 162 163 // From 54 to 53 bits for doubles and 25 to 24 bits for floats 164 final_mantissa += final_mantissa & 1; 165 final_mantissa >>= 1; 166 if ((final_mantissa >> (fputil::FloatProperties<T>::MANTISSA_WIDTH + 1)) > 167 0) { 168 final_mantissa >>= 1; 169 ++exp2; 170 } 171 172 // The if block is equivalent to (but has fewer branches than): 173 // if exp2 <= 0 || exp2 >= 0x7FF { etc } 174 if (exp2 - 1 >= (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 2) { 175 return false; 176 } 177 178 *outputMantissa = final_mantissa; 179 *outputExp2 = exp2; 180 return true; 181 } 182 183 #if !defined(LONG_DOUBLE_IS_DOUBLE) 184 template <> 185 inline bool eisel_lemire<long double>( 186 typename fputil::FPBits<long double>::UIntType mantissa, int32_t exp10, 187 typename fputil::FPBits<long double>::UIntType *outputMantissa, 188 uint32_t *outputExp2) { 189 using BitsType = typename fputil::FPBits<long double>::UIntType; 190 constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; 191 192 // Exp10 Range 193 // This doesn't reach very far into the range for long doubles, since it's 194 // sized for doubles and their 11 exponent bits, and not for long doubles and 195 // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long 196 // double). This is a known tradeoff, and was made because a proper long 197 // double table would be approximately 16 times larger. This would have 198 // significant memory and storage costs all the time to speed up a relatively 199 // uncommon path. In addition the exp10_to_exp2 function only approximates 200 // multiplying by log(10)/log(2), and that approximation may not be accurate 201 // out to the full long double range. 202 if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 203 exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 204 return false; 205 } 206 207 // Normalization 208 uint32_t clz = leading_zeroes<BitsType>(mantissa); 209 mantissa <<= clz; 210 211 uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + 212 fputil::FloatProperties<long double>::EXPONENT_BIAS - clz; 213 214 // Multiplication 215 const uint64_t *power_of_ten = 216 DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 217 218 // Since the input mantissa is more than 64 bits, we have to multiply with the 219 // full 128 bits of the power of ten to get an approximation with the same 220 // number of significant bits. This means that we only get the one 221 // approximation, and that approximation is 256 bits long. 222 __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) * 223 static_cast<__uint128_t>(power_of_ten[1]); 224 225 __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) * 226 static_cast<__uint128_t>(power_of_ten[0]) + 227 static_cast<__uint128_t>(low64(mantissa)) * 228 static_cast<__uint128_t>(power_of_ten[1]); 229 230 __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) * 231 static_cast<__uint128_t>(power_of_ten[0]); 232 233 __uint128_t final_approx_lower = 234 approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64); 235 __uint128_t final_approx_upper = approx_upper + high64(approx_middle) + 236 (final_approx_lower < approx_lower ? 1 : 0); 237 238 // The halfway constant is used to check if the bits that will be shifted away 239 // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 240 // (final mantissa size) - 3 (we shift away the last two bits separately for 241 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, 242 // it's 12 bits for 128 bit floats in this case. 243 constexpr __uint128_t HALFWAY_CONSTANT = 244 (__uint128_t(1) << (BITS_IN_MANTISSA - 245 fputil::FloatProperties<long double>::MANTISSA_WIDTH - 246 3)) - 247 1; 248 249 if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && 250 final_approx_lower + mantissa < mantissa) { 251 return false; 252 } 253 254 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats 255 BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); 256 BitsType final_mantissa = 257 final_approx_upper >> 258 (msb + BITS_IN_MANTISSA - 259 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 3)); 260 exp2 -= 1 ^ msb; // same as !msb 261 262 // Half-way ambiguity 263 if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && 264 (final_mantissa & 3) == 1) { 265 return false; 266 } 267 268 // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit 269 // floats 270 final_mantissa += final_mantissa & 1; 271 final_mantissa >>= 1; 272 if ((final_mantissa >> 273 (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 1)) > 0) { 274 final_mantissa >>= 1; 275 ++exp2; 276 } 277 278 // The if block is equivalent to (but has fewer branches than): 279 // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } 280 if (exp2 - 1 >= 281 (1 << fputil::FloatProperties<long double>::EXPONENT_WIDTH) - 2) { 282 return false; 283 } 284 285 *outputMantissa = final_mantissa; 286 *outputExp2 = exp2; 287 return true; 288 } 289 #endif 290 291 // The nth item in POWERS_OF_TWO represents the greatest power of two less than 292 // 10^n. This tells us how much we can safely shift without overshooting. 293 constexpr uint8_t POWERS_OF_TWO[19] = { 294 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 295 }; 296 constexpr int32_t NUM_POWERS_OF_TWO = 297 sizeof(POWERS_OF_TWO) / sizeof(POWERS_OF_TWO[0]); 298 299 // Takes a mantissa and base 10 exponent and converts it into its closest 300 // floating point type T equivalent. This is the fallback algorithm used when 301 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based 302 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this 303 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html 304 template <class T> 305 static inline void 306 simple_decimal_conversion(const char *__restrict numStart, 307 typename fputil::FPBits<T>::UIntType *outputMantissa, 308 uint32_t *outputExp2) { 309 310 int32_t exp2 = 0; 311 HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart); 312 313 if (hpd.get_num_digits() == 0) { 314 *outputMantissa = 0; 315 *outputExp2 = 0; 316 return; 317 } 318 319 // If the exponent is too large and can't be represented in this size of 320 // float, return inf. 321 if (hpd.get_decimal_point() > 0 && 322 exp10_to_exp2(hpd.get_decimal_point() - 1) > 323 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS)) { 324 *outputMantissa = 0; 325 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 326 errno = ERANGE; 327 return; 328 } 329 // If the exponent is too small even for a subnormal, return 0. 330 if (hpd.get_decimal_point() < 0 && 331 exp10_to_exp2(-hpd.get_decimal_point()) > 332 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 333 fputil::FloatProperties<T>::MANTISSA_WIDTH)) { 334 *outputMantissa = 0; 335 *outputExp2 = 0; 336 errno = ERANGE; 337 return; 338 } 339 340 // Right shift until the number is smaller than 1. 341 while (hpd.get_decimal_point() > 0) { 342 int32_t shift_amount = 0; 343 if (hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 344 shift_amount = 60; 345 } else { 346 shift_amount = POWERS_OF_TWO[hpd.get_decimal_point()]; 347 } 348 exp2 += shift_amount; 349 hpd.shift(-shift_amount); 350 } 351 352 // Left shift until the number is between 1/2 and 1 353 while (hpd.get_decimal_point() < 0 || 354 (hpd.get_decimal_point() == 0 && hpd.get_digits()[0] < 5)) { 355 int32_t shift_amount = 0; 356 357 if (-hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 358 shift_amount = 60; 359 } else if (hpd.get_decimal_point() != 0) { 360 shift_amount = POWERS_OF_TWO[-hpd.get_decimal_point()]; 361 } else { // This handles the case of the number being between .1 and .5 362 shift_amount = 1; 363 } 364 exp2 -= shift_amount; 365 hpd.shift(shift_amount); 366 } 367 368 // Left shift once so that the number is between 1 and 2 369 --exp2; 370 hpd.shift(1); 371 372 // Get the biased exponent 373 exp2 += fputil::FloatProperties<T>::EXPONENT_BIAS; 374 375 // Handle the exponent being too large (and return inf). 376 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 377 *outputMantissa = 0; 378 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 379 errno = ERANGE; 380 return; 381 } 382 383 // Shift left to fill the mantissa 384 hpd.shift(fputil::FloatProperties<T>::MANTISSA_WIDTH); 385 typename fputil::FPBits<T>::UIntType final_mantissa = 386 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 387 388 // Handle subnormals 389 if (exp2 <= 0) { 390 // Shift right until there is a valid exponent 391 while (exp2 < 0) { 392 hpd.shift(-1); 393 ++exp2; 394 } 395 // Shift right one more time to compensate for the left shift to get it 396 // between 1 and 2. 397 hpd.shift(-1); 398 final_mantissa = 399 hpd.round_to_integer_type<typename fputil::FPBits<T>::UIntType>(); 400 401 // Check if by shifting right we've caused this to round to a normal number. 402 if ((final_mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH) != 0) { 403 ++exp2; 404 } 405 } 406 407 // Check if rounding added a bit, and shift down if that's the case. 408 if (final_mantissa == typename fputil::FPBits<T>::UIntType(2) 409 << fputil::FloatProperties<T>::MANTISSA_WIDTH) { 410 final_mantissa >>= 1; 411 ++exp2; 412 413 // Check if this rounding causes exp2 to go out of range and make the result 414 // INF. If this is the case, then finalMantissa and exp2 are already the 415 // correct values for an INF result. 416 if (exp2 >= fputil::FPBits<T>::MAX_EXPONENT) { 417 errno = ERANGE; // NOLINT 418 } 419 } 420 421 if (exp2 == 0) { 422 errno = ERANGE; 423 } 424 425 *outputMantissa = final_mantissa; 426 *outputExp2 = exp2; 427 } 428 429 // This class is used for templating the constants for Clinger's Fast Path, 430 // described as a method of approximation in 431 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990 432 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557. 433 // As well as the additions by Gay that extend the useful range by the number of 434 // exact digits stored by the float type, described in 435 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions; 436 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10. 437 template <class T> class ClingerConsts; 438 439 template <> class ClingerConsts<float> { 440 public: 441 static constexpr float POWERS_OF_TEN_ARRAY[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 442 1e6, 1e7, 1e8, 1e9, 1e10}; 443 static constexpr int32_t EXACT_POWERS_OF_TEN = 10; 444 static constexpr int32_t DIGITS_IN_MANTISSA = 7; 445 static constexpr float MAX_EXACT_INT = 16777215.0; 446 }; 447 448 template <> class ClingerConsts<double> { 449 public: 450 static constexpr double POWERS_OF_TEN_ARRAY[] = { 451 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 452 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 453 static constexpr int32_t EXACT_POWERS_OF_TEN = 22; 454 static constexpr int32_t DIGITS_IN_MANTISSA = 15; 455 static constexpr double MAX_EXACT_INT = 9007199254740991.0; 456 }; 457 458 #if defined(LONG_DOUBLE_IS_DOUBLE) 459 template <> class ClingerConsts<long double> { 460 public: 461 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 462 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 463 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 464 static constexpr int32_t EXACT_POWERS_OF_TEN = 465 ClingerConsts<double>::EXACT_POWERS_OF_TEN; 466 static constexpr int32_t DIGITS_IN_MANTISSA = 467 ClingerConsts<double>::DIGITS_IN_MANTISSA; 468 static constexpr long double MAX_EXACT_INT = 469 ClingerConsts<double>::MAX_EXACT_INT; 470 }; 471 #elif defined(SPECIAL_X86_LONG_DOUBLE) 472 template <> class ClingerConsts<long double> { 473 public: 474 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 475 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 476 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 477 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L}; 478 static constexpr int32_t EXACT_POWERS_OF_TEN = 27; 479 static constexpr int32_t DIGITS_IN_MANTISSA = 21; 480 static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L; 481 }; 482 #else 483 template <> class ClingerConsts<long double> { 484 public: 485 static constexpr long double POWERS_OF_TEN_ARRAY[] = { 486 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 487 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 488 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 489 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 490 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L}; 491 static constexpr int32_t EXACT_POWERS_OF_TEN = 48; 492 static constexpr int32_t DIGITS_IN_MANTISSA = 33; 493 static constexpr long double MAX_EXACT_INT = 494 10384593717069655257060992658440191.0L; 495 }; 496 #endif 497 498 // Take an exact mantissa and exponent and attempt to convert it using only 499 // exact floating point arithmetic. This only handles numbers with low 500 // exponents, but handles them quickly. This is an implementation of Clinger's 501 // Fast Path, as described above. 502 template <class T> 503 static inline bool 504 clinger_fast_path(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10, 505 typename fputil::FPBits<T>::UIntType *outputMantissa, 506 uint32_t *outputExp2) { 507 if (mantissa >> fputil::FloatProperties<T>::MANTISSA_WIDTH > 0) { 508 return false; 509 } 510 511 fputil::FPBits<T> result; 512 T float_mantissa = static_cast<T>(mantissa); 513 514 if (exp10 == 0) { 515 result = fputil::FPBits<T>(float_mantissa); 516 } 517 if (exp10 > 0) { 518 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN + 519 ClingerConsts<T>::DIGITS_IN_MANTISSA) { 520 return false; 521 } 522 if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 523 float_mantissa = float_mantissa * 524 ClingerConsts<T>::POWERS_OF_TEN_ARRAY 525 [exp10 - ClingerConsts<T>::EXACT_POWERS_OF_TEN]; 526 exp10 = ClingerConsts<T>::EXACT_POWERS_OF_TEN; 527 } 528 if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) { 529 return false; 530 } 531 result = fputil::FPBits<T>(float_mantissa * 532 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]); 533 } else if (exp10 < 0) { 534 if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 535 return false; 536 } 537 result = fputil::FPBits<T>(float_mantissa / 538 ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]); 539 } 540 *outputMantissa = result.get_mantissa(); 541 *outputExp2 = result.get_unbiased_exponent(); 542 return true; 543 } 544 545 // Takes a mantissa and base 10 exponent and converts it into its closest 546 // floating point type T equivalient. First we try the Eisel-Lemire algorithm, 547 // then if that fails then we fall back to a more accurate algorithm for 548 // accuracy. The resulting mantissa and exponent are placed in outputMantissa 549 // and outputExp2. 550 template <class T> 551 static inline void 552 decimal_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, 553 int32_t exp10, const char *__restrict numStart, 554 bool truncated, 555 typename fputil::FPBits<T>::UIntType *outputMantissa, 556 uint32_t *outputExp2) { 557 // If the exponent is too large and can't be represented in this size of 558 // float, return inf. These bounds are very loose, but are mostly serving as a 559 // first pass. Some close numbers getting through is okay. 560 if (exp10 > 561 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS) / 3) { 562 *outputMantissa = 0; 563 *outputExp2 = fputil::FPBits<T>::MAX_EXPONENT; 564 errno = ERANGE; 565 return; 566 } 567 // If the exponent is too small even for a subnormal, return 0. 568 if (exp10 < 0 && 569 -static_cast<int64_t>(exp10) > 570 static_cast<int64_t>(fputil::FloatProperties<T>::EXPONENT_BIAS + 571 fputil::FloatProperties<T>::MANTISSA_WIDTH) / 572 2) { 573 *outputMantissa = 0; 574 *outputExp2 = 0; 575 errno = ERANGE; 576 return; 577 } 578 579 if (!truncated) { 580 if (clinger_fast_path<T>(mantissa, exp10, outputMantissa, outputExp2)) { 581 return; 582 } 583 } 584 585 // Try Eisel-Lemire 586 if (eisel_lemire<T>(mantissa, exp10, outputMantissa, outputExp2)) { 587 if (!truncated) { 588 return; 589 } 590 // If the mantissa is truncated, then the result may be off by the LSB, so 591 // check if rounding the mantissa up changes the result. If not, then it's 592 // safe, else use the fallback. 593 typename fputil::FPBits<T>::UIntType first_mantissa = *outputMantissa; 594 uint32_t first_exp2 = *outputExp2; 595 if (eisel_lemire<T>(mantissa + 1, exp10, outputMantissa, outputExp2)) { 596 if (*outputMantissa == first_mantissa && *outputExp2 == first_exp2) { 597 return; 598 } 599 } 600 } 601 602 simple_decimal_conversion<T>(numStart, outputMantissa, outputExp2); 603 604 return; 605 } 606 607 // Takes a mantissa and base 2 exponent and converts it into its closest 608 // floating point type T equivalient. Since the exponent is already in the right 609 // form, this is mostly just shifting and rounding. This is used for hexadecimal 610 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. 611 template <class T> 612 static inline void 613 binary_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2, 614 bool truncated, 615 typename fputil::FPBits<T>::UIntType *outputMantissa, 616 uint32_t *outputExp2) { 617 using BitsType = typename fputil::FPBits<T>::UIntType; 618 619 // This is the number of leading zeroes a properly normalized float of type T 620 // should have. 621 constexpr int32_t NUMBITS = sizeof(BitsType) * 8; 622 constexpr int32_t INF_EXP = 623 (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 624 625 // Normalization step 1: Bring the leading bit to the highest bit of BitsType. 626 uint32_t amount_to_shift_left = leading_zeroes<BitsType>(mantissa); 627 mantissa <<= amount_to_shift_left; 628 629 // Keep exp2 representing the exponent of the lowest bit of BitsType. 630 exp2 -= amount_to_shift_left; 631 632 // biasedExponent represents the biased exponent of the most significant bit. 633 int32_t biased_exponent = 634 exp2 + NUMBITS + fputil::FPBits<T>::EXPONENT_BIAS - 1; 635 636 // Handle numbers that're too large and get squashed to inf 637 if (biased_exponent >= INF_EXP) { 638 // This indicates an overflow, so we make the result INF and set errno. 639 *outputExp2 = (1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1; 640 *outputMantissa = 0; 641 errno = ERANGE; 642 return; 643 } 644 645 uint32_t amount_to_shift_right = 646 NUMBITS - fputil::FloatProperties<T>::MANTISSA_WIDTH - 1; 647 648 // Handle subnormals. 649 if (biased_exponent <= 0) { 650 amount_to_shift_right += 1 - biased_exponent; 651 biased_exponent = 0; 652 653 if (amount_to_shift_right > NUMBITS) { 654 // Return 0 if the exponent is too small. 655 *outputMantissa = 0; 656 *outputExp2 = 0; 657 errno = ERANGE; 658 return; 659 } 660 } 661 662 BitsType round_bit_mask = BitsType(1) << (amount_to_shift_right - 1); 663 BitsType sticky_mask = round_bit_mask - 1; 664 bool round_bit = mantissa & round_bit_mask; 665 bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated; 666 667 if (amount_to_shift_right < NUMBITS) { 668 // Shift the mantissa and clear the implicit bit. 669 mantissa >>= amount_to_shift_right; 670 mantissa &= fputil::FloatProperties<T>::MANTISSA_MASK; 671 } else { 672 mantissa = 0; 673 } 674 bool least_significant_bit = mantissa & BitsType(1); 675 // Perform rounding-to-nearest, tie-to-even. 676 if (round_bit && (least_significant_bit || sticky_bit)) { 677 ++mantissa; 678 } 679 680 if (mantissa > fputil::FloatProperties<T>::MANTISSA_MASK) { 681 // Rounding causes the exponent to increase. 682 ++biased_exponent; 683 684 if (biased_exponent == INF_EXP) { 685 errno = ERANGE; 686 } 687 } 688 689 if (biased_exponent == 0) { 690 errno = ERANGE; 691 } 692 693 *outputMantissa = mantissa & fputil::FloatProperties<T>::MANTISSA_MASK; 694 *outputExp2 = biased_exponent; 695 } 696 697 // checks if the next 4 characters of the string pointer are the start of a 698 // hexadecimal floating point number. Does not advance the string pointer. 699 static inline bool is_float_hex_start(const char *__restrict src, 700 const char decimalPoint) { 701 if (!(*src == '0' && (*(src + 1) | 32) == 'x')) { 702 return false; 703 } 704 if (*(src + 2) == decimalPoint) { 705 return isalnum(*(src + 3)) && b36_char_to_int(*(src + 3)) < 16; 706 } else { 707 return isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; 708 } 709 } 710 711 // Takes the start of a string representing a decimal float, as well as the 712 // local decimalPoint. It returns if it suceeded in parsing any digits, and if 713 // the return value is true then the outputs are pointer to the end of the 714 // number, and the mantissa and exponent for the closest float T representation. 715 // If the return value is false, then it is assumed that there is no number 716 // here. 717 template <class T> 718 static inline bool 719 decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, 720 char **__restrict strEnd, 721 typename fputil::FPBits<T>::UIntType *outputMantissa, 722 uint32_t *outputExponent) { 723 using BitsType = typename fputil::FPBits<T>::UIntType; 724 constexpr uint32_t BASE = 10; 725 constexpr char EXPONENT_MARKER = 'e'; 726 727 const char *__restrict num_start = src; 728 bool truncated = false; 729 bool seen_digit = false; 730 bool after_decimal = false; 731 BitsType mantissa = 0; 732 int32_t exponent = 0; 733 734 // The goal for the first step of parsing is to convert the number in src to 735 // the format mantissa * (base ^ exponent) 736 737 // The loop fills the mantissa with as many digits as it can hold 738 const BitsType bitstype_max_div_by_base = 739 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 740 while (true) { 741 if (isdigit(*src)) { 742 uint32_t digit = *src - '0'; 743 seen_digit = true; 744 745 if (mantissa < bitstype_max_div_by_base) { 746 mantissa = (mantissa * BASE) + digit; 747 if (after_decimal) { 748 --exponent; 749 } 750 } else { 751 if (digit > 0) 752 truncated = true; 753 if (!after_decimal) 754 ++exponent; 755 } 756 757 ++src; 758 continue; 759 } 760 if (*src == DECIMAL_POINT) { 761 if (after_decimal) { 762 break; // this means that *src points to a second decimal point, ending 763 // the number. 764 } 765 after_decimal = true; 766 ++src; 767 continue; 768 } 769 // The character is neither a digit nor a decimal point. 770 break; 771 } 772 773 if (!seen_digit) 774 return false; 775 776 if ((*src | 32) == EXPONENT_MARKER) { 777 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 778 ++src; 779 char *temp_str_end; 780 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 781 if (add_to_exponent > 100000) 782 add_to_exponent = 100000; 783 else if (add_to_exponent < -100000) 784 add_to_exponent = -100000; 785 786 src = temp_str_end; 787 exponent += add_to_exponent; 788 } 789 } 790 791 *strEnd = const_cast<char *>(src); 792 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 793 *outputMantissa = 0; 794 *outputExponent = 0; 795 } else { 796 decimal_exp_to_float<T>(mantissa, exponent, num_start, truncated, 797 outputMantissa, outputExponent); 798 } 799 return true; 800 } 801 802 // Takes the start of a string representing a hexadecimal float, as well as the 803 // local decimal point. It returns if it suceeded in parsing any digits, and if 804 // the return value is true then the outputs are pointer to the end of the 805 // number, and the mantissa and exponent for the closest float T representation. 806 // If the return value is false, then it is assumed that there is no number 807 // here. 808 template <class T> 809 static inline bool hexadecimal_string_to_float( 810 const char *__restrict src, const char DECIMAL_POINT, 811 char **__restrict strEnd, 812 typename fputil::FPBits<T>::UIntType *outputMantissa, 813 uint32_t *outputExponent) { 814 using BitsType = typename fputil::FPBits<T>::UIntType; 815 constexpr uint32_t BASE = 16; 816 constexpr char EXPONENT_MARKER = 'p'; 817 818 bool truncated = false; 819 bool seen_digit = false; 820 bool after_decimal = false; 821 BitsType mantissa = 0; 822 int32_t exponent = 0; 823 824 // The goal for the first step of parsing is to convert the number in src to 825 // the format mantissa * (base ^ exponent) 826 827 // The loop fills the mantissa with as many digits as it can hold 828 const BitsType bitstype_max_div_by_base = 829 __llvm_libc::cpp::NumericLimits<BitsType>::max() / BASE; 830 while (true) { 831 if (isalnum(*src)) { 832 uint32_t digit = b36_char_to_int(*src); 833 if (digit < BASE) 834 seen_digit = true; 835 else 836 break; 837 838 if (mantissa < bitstype_max_div_by_base) { 839 mantissa = (mantissa * BASE) + digit; 840 if (after_decimal) 841 --exponent; 842 } else { 843 if (digit > 0) 844 truncated = true; 845 if (!after_decimal) 846 ++exponent; 847 } 848 ++src; 849 continue; 850 } 851 if (*src == DECIMAL_POINT) { 852 if (after_decimal) { 853 break; // this means that *src points to a second decimal point, ending 854 // the number. 855 } 856 after_decimal = true; 857 ++src; 858 continue; 859 } 860 // The character is neither a hexadecimal digit nor a decimal point. 861 break; 862 } 863 864 if (!seen_digit) 865 return false; 866 867 // Convert the exponent from having a base of 16 to having a base of 2. 868 exponent *= 4; 869 870 if ((*src | 32) == EXPONENT_MARKER) { 871 if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { 872 ++src; 873 char *temp_str_end; 874 int32_t add_to_exponent = strtointeger<int32_t>(src, &temp_str_end, 10); 875 if (add_to_exponent > 100000) 876 add_to_exponent = 100000; 877 else if (add_to_exponent < -100000) 878 add_to_exponent = -100000; 879 src = temp_str_end; 880 exponent += add_to_exponent; 881 } 882 } 883 *strEnd = const_cast<char *>(src); 884 if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 885 *outputMantissa = 0; 886 *outputExponent = 0; 887 } else { 888 binary_exp_to_float<T>(mantissa, exponent, truncated, outputMantissa, 889 outputExponent); 890 } 891 return true; 892 } 893 894 // Takes a pointer to a string and a pointer to a string pointer. This function 895 // is used as the backend for all of the string to float functions. 896 template <class T> 897 static inline T strtofloatingpoint(const char *__restrict src, 898 char **__restrict strEnd) { 899 using BitsType = typename fputil::FPBits<T>::UIntType; 900 fputil::FPBits<T> result = fputil::FPBits<T>(); 901 const char *original_src = src; 902 bool seen_digit = false; 903 src = first_non_whitespace(src); 904 905 if (*src == '+' || *src == '-') { 906 if (*src == '-') { 907 result.set_sign(true); 908 } 909 ++src; 910 } 911 912 static constexpr char DECIMAL_POINT = '.'; 913 static const char *inf_string = "infinity"; 914 static const char *nan_string = "nan"; 915 916 // bool truncated = false; 917 918 if (isdigit(*src) || *src == DECIMAL_POINT) { // regular number 919 int base = 10; 920 if (is_float_hex_start(src, DECIMAL_POINT)) { 921 base = 16; 922 src += 2; 923 seen_digit = true; 924 } 925 char *new_str_end = nullptr; 926 927 BitsType output_mantissa = 0; 928 uint32_t output_exponent = 0; 929 if (base == 16) { 930 seen_digit = hexadecimal_string_to_float<T>( 931 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 932 } else { // base is 10 933 seen_digit = decimal_string_to_float<T>( 934 src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); 935 } 936 937 if (seen_digit) { 938 src += new_str_end - src; 939 result.set_mantissa(output_mantissa); 940 result.set_unbiased_exponent(output_exponent); 941 } 942 } else if ((*src | 32) == 'n') { // NaN 943 if ((src[1] | 32) == nan_string[1] && (src[2] | 32) == nan_string[2]) { 944 seen_digit = true; 945 src += 3; 946 BitsType nan_mantissa = 0; 947 // this handles the case of `NaN(n-character-sequence)`, where the 948 // n-character-sequence is made of 0 or more letters and numbers in any 949 // order. 950 if (*src == '(') { 951 const char *left_paren = src; 952 ++src; 953 while (isalnum(*src)) 954 ++src; 955 if (*src == ')') { 956 ++src; 957 char *temp_src = 0; 958 if (isdigit(*(left_paren + 1))) { 959 // This is to prevent errors when BitsType is larger than 64 bits, 960 // since strtointeger only supports up to 64 bits. This is actually 961 // more than is required by the specification, which says for the 962 // input type "NAN(n-char-sequence)" that "the meaning of 963 // the n-char sequence is implementation-defined." 964 nan_mantissa = static_cast<BitsType>( 965 strtointeger<uint64_t>(left_paren + 1, &temp_src, 0)); 966 if (*temp_src != ')') 967 nan_mantissa = 0; 968 } 969 } else 970 src = left_paren; 971 } 972 nan_mantissa |= fputil::FloatProperties<T>::QUIET_NAN_MASK; 973 if (result.get_sign()) { 974 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 975 result.set_sign(true); 976 } else { 977 result.set_sign(false); 978 result = fputil::FPBits<T>(result.build_nan(nan_mantissa)); 979 } 980 } 981 } else if ((*src | 32) == 'i') { // INF 982 if ((src[1] | 32) == inf_string[1] && (src[2] | 32) == inf_string[2]) { 983 seen_digit = true; 984 if (result.get_sign()) 985 result = result.neg_inf(); 986 else 987 result = result.inf(); 988 if ((src[3] | 32) == inf_string[3] && (src[4] | 32) == inf_string[4] && 989 (src[5] | 32) == inf_string[5] && (src[6] | 32) == inf_string[6] && 990 (src[7] | 32) == inf_string[7]) { 991 // if the string is "INFINITY" then strEnd needs to be set to src + 8. 992 src += 8; 993 } else { 994 src += 3; 995 } 996 } 997 } 998 if (!seen_digit) { // If there is nothing to actually parse, then return 0. 999 if (strEnd != nullptr) 1000 *strEnd = const_cast<char *>(original_src); 1001 return T(0); 1002 } 1003 1004 if (strEnd != nullptr) 1005 *strEnd = const_cast<char *>(src); 1006 1007 // This function only does something if T is long double and the platform uses 1008 // special 80 bit long doubles. Otherwise it should be inlined out. 1009 set_implicit_bit<T>(result); 1010 1011 return T(result); 1012 } 1013 1014 } // namespace internal 1015 } // namespace __llvm_libc 1016 1017 #endif // LIBC_SRC_SUPPORT_STR_TO_FLOAT_H 1018