1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template <int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact) { 23 SetToZero(); 24 while (*p == ' ') { 25 ++p; 26 } 27 const char *q{p}; 28 isNegative_ = *q == '-'; 29 if (*q == '-' || *q == '+') { 30 ++q; 31 } 32 const char *start{q}; 33 while (*q == '0') { 34 ++q; 35 } 36 const char *first{q}; 37 for (; *q >= '0' && *q <= '9'; ++q) { 38 } 39 const char *point{nullptr}; 40 if (*q == '.') { 41 point = q; 42 for (++q; *q >= '0' && *q <= '9'; ++q) { 43 } 44 } 45 if (q == start || (q == start + 1 && *start == '.')) { 46 return false; // require at least one digit 47 } 48 // There's a valid number here; set the reference argument to point to 49 // the first character afterward. 50 p = q; 51 // Strip off trailing zeroes 52 if (point) { 53 while (q[-1] == '0') { 54 --q; 55 } 56 if (q[-1] == '.') { 57 point = nullptr; 58 --q; 59 } 60 } 61 if (!point) { 62 while (q > first && q[-1] == '0') { 63 --q; 64 ++exponent_; 65 } 66 } 67 // Trim any excess digits 68 const char *limit{first + maxDigits * log10Radix + (point != nullptr)}; 69 if (q > limit) { 70 inexact = true; 71 if (point >= limit) { 72 q = point; 73 point = nullptr; 74 } 75 if (!point) { 76 exponent_ += q - limit; 77 } 78 q = limit; 79 } 80 if (point) { 81 exponent_ -= static_cast<int>(q - point - 1); 82 } 83 if (q == first) { 84 exponent_ = 0; // all zeros 85 } 86 // Rack the decimal digits up into big Digits. 87 for (auto times{radix}; q-- > first;) { 88 if (*q != '.') { 89 if (times == radix) { 90 digit_[digits_++] = *q - '0'; 91 times = 10; 92 } else { 93 digit_[digits_ - 1] += times * (*q - '0'); 94 times *= 10; 95 } 96 } 97 } 98 // Look for an optional exponent field. 99 q = p; 100 switch (*q) { 101 case 'e': 102 case 'E': 103 case 'd': 104 case 'D': 105 case 'q': 106 case 'Q': { 107 bool negExpo{*++q == '-'}; 108 if (*q == '-' || *q == '+') { 109 ++q; 110 } 111 if (*q >= '0' && *q <= '9') { 112 int expo{0}; 113 while (*q == '0') { 114 ++q; 115 } 116 const char *expDig{q}; 117 while (*q >= '0' && *q <= '9') { 118 expo = 10 * expo + *q++ - '0'; 119 } 120 if (q >= expDig + 8) { 121 // There's a ridiculous number of nonzero exponent digits. 122 // The decimal->binary conversion routine will cope with 123 // returning 0 or Inf, but we must ensure that "expo" didn't 124 // overflow back around to something legal. 125 expo = 10 * Real::decimalRange; 126 exponent_ = 0; 127 } 128 p = q; // exponent was valid 129 if (negExpo) { 130 exponent_ -= expo; 131 } else { 132 exponent_ += expo; 133 } 134 } 135 } break; 136 default: 137 break; 138 } 139 return true; 140 } 141 142 template <int PREC, int LOG10RADIX> 143 void BigRadixFloatingPointNumber<PREC, 144 LOG10RADIX>::LoseLeastSignificantDigit() { 145 Digit LSD{digit_[0]}; 146 for (int j{0}; j < digits_ - 1; ++j) { 147 digit_[j] = digit_[j + 1]; 148 } 149 digit_[digits_ - 1] = 0; 150 bool incr{false}; 151 switch (rounding_) { 152 case RoundNearest: 153 case RoundDefault: 154 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); 155 break; 156 case RoundUp: 157 incr = LSD > 0 && !isNegative_; 158 break; 159 case RoundDown: 160 incr = LSD > 0 && isNegative_; 161 break; 162 case RoundToZero: 163 break; 164 case RoundCompatible: 165 incr = LSD >= radix / 2; 166 break; 167 } 168 for (int j{0}; (digit_[j] += incr) == radix; ++j) { 169 digit_[j] = 0; 170 } 171 } 172 173 // This local utility class represents an unrounded nonnegative 174 // binary floating-point value with an unbiased (i.e., signed) 175 // binary exponent, an integer value (not a fraction) with an implied 176 // binary point to its *right*, and some guard bits for rounding. 177 template <int PREC> class IntermediateFloat { 178 public: 179 static constexpr int precision{PREC}; 180 using IntType = common::HostUnsignedIntType<precision>; 181 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 182 static constexpr IntType mask{topBit + (topBit - 1)}; 183 184 IntermediateFloat() {} 185 IntermediateFloat(const IntermediateFloat &) = default; 186 187 // Assumes that exponent_ is valid on entry, and may increment it. 188 // Returns the number of guard_ bits that have been determined. 189 template <typename UINT> bool SetTo(UINT n) { 190 static constexpr int nBits{CHAR_BIT * sizeof n}; 191 if constexpr (precision >= nBits) { 192 value_ = n; 193 guard_ = 0; 194 return 0; 195 } else { 196 int shift{common::BitsNeededFor(n) - precision}; 197 if (shift <= 0) { 198 value_ = n; 199 guard_ = 0; 200 return 0; 201 } else { 202 value_ = n >> shift; 203 exponent_ += shift; 204 n <<= nBits - shift; 205 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 206 return shift; 207 } 208 } 209 } 210 211 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 212 bool IsFull() const { return value_ >= topBit; } 213 void AdjustExponent(int by) { exponent_ += by; } 214 void SetGuard(int g) { 215 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 216 } 217 218 ConversionToBinaryResult<PREC> ToBinary( 219 bool isNegative, FortranRounding) const; 220 221 private: 222 static constexpr int guardBits{3}; // guard, round, sticky 223 using GuardType = int; 224 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 225 226 IntType value_{0}; 227 GuardType guard_{0}; 228 int exponent_{0}; 229 }; 230 231 template <int PREC> 232 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 233 bool isNegative, FortranRounding rounding) const { 234 using Binary = BinaryFloatingPointNumber<PREC>; 235 // Create a fraction with a binary point to the left of the integer 236 // value_, and bias the exponent. 237 IntType fraction{value_}; 238 GuardType guard{guard_}; 239 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 240 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 241 guard = (guard & 1) | (guard >> 1) | 242 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 243 fraction >>= 1; 244 ++expo; 245 } 246 int flags{Exact}; 247 if (guard != 0) { 248 flags |= Inexact; 249 } 250 if (fraction == 0 && guard <= oneHalf) { 251 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 252 } 253 // The value is nonzero; normalize it. 254 while (fraction < topBit && expo > 1) { 255 --expo; 256 fraction = fraction * 2 + (guard >> (guardBits - 2)); 257 guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 258 } 259 // Apply rounding 260 bool incr{false}; 261 switch (rounding) { 262 case RoundNearest: 263 case RoundDefault: 264 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 265 break; 266 case RoundUp: 267 incr = guard != 0 && !isNegative; 268 break; 269 case RoundDown: 270 incr = guard != 0 && isNegative; 271 break; 272 case RoundToZero: 273 break; 274 case RoundCompatible: 275 incr = guard >= oneHalf; 276 break; 277 } 278 if (incr) { 279 if (fraction == mask) { 280 // rounding causes a carry 281 ++expo; 282 fraction = topBit; 283 } else { 284 ++fraction; 285 } 286 } 287 if (expo == 1 && fraction < topBit) { 288 expo = 0; // subnormal 289 } 290 if (expo >= Binary::maxExponent) { 291 expo = Binary::maxExponent; // Inf 292 flags |= Overflow; 293 fraction = 0; 294 } 295 using Raw = typename Binary::RawType; 296 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 297 raw |= static_cast<Raw>(expo) << Binary::significandBits; 298 if constexpr (Binary::isImplicitMSB) { 299 fraction &= ~topBit; 300 } 301 raw |= fraction; 302 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 303 } 304 305 template <int PREC, int LOG10RADIX> 306 ConversionToBinaryResult<PREC> 307 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 308 // On entry, *this holds a multi-precision integer value in a radix of a 309 // large power of ten. Its radix point is defined to be to the right of its 310 // digits, and "exponent_" is the power of ten by which it is to be scaled. 311 Normalize(); 312 if (digits_ == 0) { // zero value 313 return {Real{SignBit()}}; 314 } 315 // The value is not zero: x = D. * 10.**E 316 // Shift our perspective on the radix (& decimal) point so that 317 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 318 exponent_ += digits_ * log10Radix; 319 // Sanity checks for ridiculous exponents 320 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 321 if (exponent_ < -crazy) { // underflow to +/-0. 322 return {Real{SignBit()}, Inexact}; 323 } else if (exponent_ > crazy) { // overflow to +/-Inf. 324 return {Real{Infinity()}, Overflow}; 325 } 326 // Apply any negative decimal exponent by multiplication 327 // by a power of two, adjusting the binary exponent to compensate. 328 IntermediateFloat<PREC> f; 329 while (exponent_ < log10Radix) { 330 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 331 f.AdjustExponent(-9); 332 digitLimit_ = digits_; 333 if (int carry{MultiplyWithoutNormalization<512>()}) { 334 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 335 PushCarry(carry); 336 exponent_ += log10Radix; 337 } 338 } 339 // Apply any positive decimal exponent greater than 340 // is needed to treat the topmost digit as an integer 341 // part by multiplying by 10 or 10000 repeatedly. 342 while (exponent_ > log10Radix) { 343 digitLimit_ = digits_; 344 int carry; 345 if (exponent_ >= log10Radix + 4) { 346 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 347 exponent_ -= 4; 348 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 349 f.AdjustExponent(4); 350 } else { 351 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 352 --exponent_; 353 carry = MultiplyWithoutNormalization<5>(); 354 f.AdjustExponent(1); 355 } 356 if (carry != 0) { 357 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 358 PushCarry(carry); 359 exponent_ += log10Radix; 360 } 361 } 362 // So exponent_ is now log10Radix, meaning that the 363 // MSD can be taken as an integer part and transferred 364 // to the binary result. 365 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 366 int guardShift{f.SetTo(digit_[--digits_])}; 367 // Transfer additional bits until the result is normal. 368 digitLimit_ = digits_; 369 while (!f.IsFull()) { 370 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 371 f.AdjustExponent(-1); 372 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 373 f.ShiftIn(carry); 374 } 375 // Get the next few bits for rounding. Allow for some guard bits 376 // that may have already been set in f.SetTo() above. 377 int guard{0}; 378 if (guardShift == 0) { 379 guard = MultiplyWithoutNormalization<4>(); 380 } else if (guardShift == 1) { 381 guard = MultiplyWithoutNormalization<2>(); 382 } 383 guard = guard + guard + !IsZero(); 384 f.SetGuard(guard); 385 return f.ToBinary(isNegative_, rounding_); 386 } 387 388 template <int PREC, int LOG10RADIX> 389 ConversionToBinaryResult<PREC> 390 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) { 391 bool inexact{false}; 392 if (ParseNumber(p, inexact)) { 393 auto result{ConvertToBinary()}; 394 if (inexact) { 395 result.flags = 396 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 397 } 398 return result; 399 } else { 400 // Could not parse a decimal floating-point number. p has been 401 // advanced over any leading spaces. 402 if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') { 403 // NaN 404 p += 3; 405 return {Real{NaN()}}; 406 } else { 407 // Try to parse Inf, maybe with a sign 408 const char *q{p}; 409 isNegative_ = *q == '-'; 410 if (*q == '-' || *q == '+') { 411 ++q; 412 } 413 if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' && 414 toupper(q[2]) == 'F') { 415 p = q + 3; 416 return {Real{Infinity()}}; 417 } else { 418 // Invalid input 419 return {Real{NaN()}, Invalid}; 420 } 421 } 422 } 423 } 424 425 template <int PREC> 426 ConversionToBinaryResult<PREC> ConvertToBinary( 427 const char *&p, enum FortranRounding rounding) { 428 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p); 429 } 430 431 template ConversionToBinaryResult<8> ConvertToBinary<8>( 432 const char *&, enum FortranRounding); 433 template ConversionToBinaryResult<11> ConvertToBinary<11>( 434 const char *&, enum FortranRounding); 435 template ConversionToBinaryResult<24> ConvertToBinary<24>( 436 const char *&, enum FortranRounding); 437 template ConversionToBinaryResult<53> ConvertToBinary<53>( 438 const char *&, enum FortranRounding); 439 template ConversionToBinaryResult<64> ConvertToBinary<64>( 440 const char *&, enum FortranRounding); 441 template ConversionToBinaryResult<113> ConvertToBinary<113>( 442 const char *&, enum FortranRounding); 443 444 extern "C" { 445 enum ConversionResultFlags ConvertDecimalToFloat( 446 const char **p, float *f, enum FortranRounding rounding) { 447 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 448 std::memcpy(reinterpret_cast<void *>(f), 449 reinterpret_cast<const void *>(&result.binary), sizeof *f); 450 return result.flags; 451 } 452 enum ConversionResultFlags ConvertDecimalToDouble( 453 const char **p, double *d, enum FortranRounding rounding) { 454 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 455 std::memcpy(reinterpret_cast<void *>(d), 456 reinterpret_cast<const void *>(&result.binary), sizeof *d); 457 return result.flags; 458 } 459 #if __x86_64__ && !defined(_MSC_VER) 460 enum ConversionResultFlags ConvertDecimalToLongDouble( 461 const char **p, long double *ld, enum FortranRounding rounding) { 462 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 463 std::memcpy(reinterpret_cast<void *>(ld), 464 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 465 return result.flags; 466 } 467 #endif 468 } 469 } // namespace Fortran::decimal 470