1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template <int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact) { 23 SetToZero(); 24 while (*p == ' ') { 25 ++p; 26 } 27 const char *q{p}; 28 isNegative_ = *q == '-'; 29 if (*q == '-' || *q == '+') { 30 ++q; 31 } 32 const char *start{q}; 33 while (*q == '0') { 34 ++q; 35 } 36 const char *first{q}; 37 for (; *q >= '0' && *q <= '9'; ++q) { 38 } 39 const char *point{nullptr}; 40 if (*q == '.') { 41 point = q; 42 for (++q; *q >= '0' && *q <= '9'; ++q) { 43 } 44 } 45 if (q == start || (q == start + 1 && *start == '.')) { 46 return false; // require at least one digit 47 } 48 // There's a valid number here; set the reference argument to point to 49 // the first character afterward. 50 p = q; 51 // Strip off trailing zeroes 52 if (point) { 53 while (q[-1] == '0') { 54 --q; 55 } 56 if (q[-1] == '.') { 57 point = nullptr; 58 --q; 59 } 60 } 61 if (!point) { 62 while (q > first && q[-1] == '0') { 63 --q; 64 ++exponent_; 65 } 66 } 67 // Trim any excess digits 68 const char *limit{first + maxDigits * log10Radix + (point != nullptr)}; 69 if (q > limit) { 70 inexact = true; 71 if (point >= limit) { 72 q = point; 73 point = nullptr; 74 } 75 if (!point) { 76 exponent_ += q - limit; 77 } 78 q = limit; 79 } 80 if (point) { 81 exponent_ -= static_cast<int>(q - point - 1); 82 } 83 if (q == first) { 84 exponent_ = 0; // all zeros 85 } 86 // Rack the decimal digits up into big Digits. 87 for (auto times{radix}; q-- > first;) { 88 if (*q != '.') { 89 if (times == radix) { 90 digit_[digits_++] = *q - '0'; 91 times = 10; 92 } else { 93 digit_[digits_ - 1] += times * (*q - '0'); 94 times *= 10; 95 } 96 } 97 } 98 // Look for an optional exponent field. 99 q = p; 100 switch (*q) { 101 case 'e': 102 case 'E': 103 case 'd': 104 case 'D': 105 case 'q': 106 case 'Q': { 107 bool negExpo{*++q == '-'}; 108 if (*q == '-' || *q == '+') { 109 ++q; 110 } 111 if (*q >= '0' && *q <= '9') { 112 int expo{0}; 113 while (*q == '0') { 114 ++q; 115 } 116 const char *expDig{q}; 117 while (*q >= '0' && *q <= '9') { 118 expo = 10 * expo + *q++ - '0'; 119 } 120 if (q >= expDig + 8) { 121 // There's a ridiculous number of nonzero exponent digits. 122 // The decimal->binary conversion routine will cope with 123 // returning 0 or Inf, but we must ensure that "expo" didn't 124 // overflow back around to something legal. 125 expo = 10 * Real::decimalRange; 126 exponent_ = 0; 127 } 128 p = q; // exponent was valid 129 if (negExpo) { 130 exponent_ -= expo; 131 } else { 132 exponent_ += expo; 133 } 134 } 135 } break; 136 default: 137 break; 138 } 139 return true; 140 } 141 142 template <int PREC, int LOG10RADIX> 143 void BigRadixFloatingPointNumber<PREC, 144 LOG10RADIX>::LoseLeastSignificantDigit() { 145 Digit LSD{digit_[0]}; 146 for (int j{0}; j < digits_ - 1; ++j) { 147 digit_[j] = digit_[j + 1]; 148 } 149 digit_[digits_ - 1] = 0; 150 bool incr{false}; 151 switch (rounding_) { 152 case RoundNearest: 153 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); 154 break; 155 case RoundUp: 156 incr = LSD > 0 && !isNegative_; 157 break; 158 case RoundDown: 159 incr = LSD > 0 && isNegative_; 160 break; 161 case RoundToZero: 162 break; 163 case RoundCompatible: 164 incr = LSD >= radix / 2; 165 break; 166 } 167 for (int j{0}; (digit_[j] += incr) == radix; ++j) { 168 digit_[j] = 0; 169 } 170 } 171 172 // This local utility class represents an unrounded nonnegative 173 // binary floating-point value with an unbiased (i.e., signed) 174 // binary exponent, an integer value (not a fraction) with an implied 175 // binary point to its *right*, and some guard bits for rounding. 176 template <int PREC> class IntermediateFloat { 177 public: 178 static constexpr int precision{PREC}; 179 using IntType = common::HostUnsignedIntType<precision>; 180 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 181 static constexpr IntType mask{topBit + (topBit - 1)}; 182 183 IntermediateFloat() {} 184 IntermediateFloat(const IntermediateFloat &) = default; 185 186 // Assumes that exponent_ is valid on entry, and may increment it. 187 // Returns the number of guard_ bits that have been determined. 188 template <typename UINT> bool SetTo(UINT n) { 189 static constexpr int nBits{CHAR_BIT * sizeof n}; 190 if constexpr (precision >= nBits) { 191 value_ = n; 192 guard_ = 0; 193 return 0; 194 } else { 195 int shift{common::BitsNeededFor(n) - precision}; 196 if (shift <= 0) { 197 value_ = n; 198 guard_ = 0; 199 return 0; 200 } else { 201 value_ = n >> shift; 202 exponent_ += shift; 203 n <<= nBits - shift; 204 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 205 return shift; 206 } 207 } 208 } 209 210 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 211 bool IsFull() const { return value_ >= topBit; } 212 void AdjustExponent(int by) { exponent_ += by; } 213 void SetGuard(int g) { 214 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 215 } 216 217 ConversionToBinaryResult<PREC> ToBinary( 218 bool isNegative, FortranRounding) const; 219 220 private: 221 static constexpr int guardBits{3}; // guard, round, sticky 222 using GuardType = int; 223 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 224 225 IntType value_{0}; 226 GuardType guard_{0}; 227 int exponent_{0}; 228 }; 229 230 template <int PREC> 231 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 232 bool isNegative, FortranRounding rounding) const { 233 using Binary = BinaryFloatingPointNumber<PREC>; 234 // Create a fraction with a binary point to the left of the integer 235 // value_, and bias the exponent. 236 IntType fraction{value_}; 237 GuardType guard{guard_}; 238 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 239 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 240 guard = (guard & 1) | (guard >> 1) | 241 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 242 fraction >>= 1; 243 ++expo; 244 } 245 int flags{Exact}; 246 if (guard != 0) { 247 flags |= Inexact; 248 } 249 if (fraction == 0 && guard <= oneHalf) { 250 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 251 } 252 // The value is nonzero; normalize it. 253 while (fraction < topBit && expo > 1) { 254 --expo; 255 fraction = fraction * 2 + (guard >> (guardBits - 2)); 256 guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 257 } 258 // Apply rounding 259 bool incr{false}; 260 switch (rounding) { 261 case RoundNearest: 262 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 263 break; 264 case RoundUp: 265 incr = guard != 0 && !isNegative; 266 break; 267 case RoundDown: 268 incr = guard != 0 && isNegative; 269 break; 270 case RoundToZero: 271 break; 272 case RoundCompatible: 273 incr = guard >= oneHalf; 274 break; 275 } 276 if (incr) { 277 if (fraction == mask) { 278 // rounding causes a carry 279 ++expo; 280 fraction = topBit; 281 } else { 282 ++fraction; 283 } 284 } 285 if (expo == 1 && fraction < topBit) { 286 expo = 0; // subnormal 287 } 288 if (expo >= Binary::maxExponent) { 289 expo = Binary::maxExponent; // Inf 290 flags |= Overflow; 291 fraction = 0; 292 } 293 using Raw = typename Binary::RawType; 294 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 295 raw |= static_cast<Raw>(expo) << Binary::significandBits; 296 if constexpr (Binary::isImplicitMSB) { 297 fraction &= ~topBit; 298 } 299 raw |= fraction; 300 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 301 } 302 303 template <int PREC, int LOG10RADIX> 304 ConversionToBinaryResult<PREC> 305 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 306 // On entry, *this holds a multi-precision integer value in a radix of a 307 // large power of ten. Its radix point is defined to be to the right of its 308 // digits, and "exponent_" is the power of ten by which it is to be scaled. 309 Normalize(); 310 if (digits_ == 0) { // zero value 311 return {Real{SignBit()}}; 312 } 313 // The value is not zero: x = D. * 10.**E 314 // Shift our perspective on the radix (& decimal) point so that 315 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 316 exponent_ += digits_ * log10Radix; 317 // Sanity checks for ridiculous exponents 318 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 319 if (exponent_ < -crazy) { // underflow to +/-0. 320 return {Real{SignBit()}, Inexact}; 321 } else if (exponent_ > crazy) { // overflow to +/-Inf. 322 return {Real{Infinity()}, Overflow}; 323 } 324 // Apply any negative decimal exponent by multiplication 325 // by a power of two, adjusting the binary exponent to compensate. 326 IntermediateFloat<PREC> f; 327 while (exponent_ < log10Radix) { 328 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 329 f.AdjustExponent(-9); 330 digitLimit_ = digits_; 331 if (int carry{MultiplyWithoutNormalization<512>()}) { 332 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 333 PushCarry(carry); 334 exponent_ += log10Radix; 335 } 336 } 337 // Apply any positive decimal exponent greater than 338 // is needed to treat the topmost digit as an integer 339 // part by multiplying by 10 or 10000 repeatedly. 340 while (exponent_ > log10Radix) { 341 digitLimit_ = digits_; 342 int carry; 343 if (exponent_ >= log10Radix + 4) { 344 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 345 exponent_ -= 4; 346 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 347 f.AdjustExponent(4); 348 } else { 349 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 350 --exponent_; 351 carry = MultiplyWithoutNormalization<5>(); 352 f.AdjustExponent(1); 353 } 354 if (carry != 0) { 355 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 356 PushCarry(carry); 357 exponent_ += log10Radix; 358 } 359 } 360 // So exponent_ is now log10Radix, meaning that the 361 // MSD can be taken as an integer part and transferred 362 // to the binary result. 363 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 364 int guardShift{f.SetTo(digit_[--digits_])}; 365 // Transfer additional bits until the result is normal. 366 digitLimit_ = digits_; 367 while (!f.IsFull()) { 368 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 369 f.AdjustExponent(-1); 370 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 371 f.ShiftIn(carry); 372 } 373 // Get the next few bits for rounding. Allow for some guard bits 374 // that may have already been set in f.SetTo() above. 375 int guard{0}; 376 if (guardShift == 0) { 377 guard = MultiplyWithoutNormalization<4>(); 378 } else if (guardShift == 1) { 379 guard = MultiplyWithoutNormalization<2>(); 380 } 381 guard = guard + guard + !IsZero(); 382 f.SetGuard(guard); 383 return f.ToBinary(isNegative_, rounding_); 384 } 385 386 template <int PREC, int LOG10RADIX> 387 ConversionToBinaryResult<PREC> 388 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) { 389 bool inexact{false}; 390 if (ParseNumber(p, inexact)) { 391 auto result{ConvertToBinary()}; 392 if (inexact) { 393 result.flags = 394 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 395 } 396 return result; 397 } else { 398 // Could not parse a decimal floating-point number. p has been 399 // advanced over any leading spaces. 400 if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') { 401 // NaN 402 p += 3; 403 return {Real{NaN()}}; 404 } else { 405 // Try to parse Inf, maybe with a sign 406 const char *q{p}; 407 isNegative_ = *q == '-'; 408 if (*q == '-' || *q == '+') { 409 ++q; 410 } 411 if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' && 412 toupper(q[2]) == 'F') { 413 p = q + 3; 414 return {Real{Infinity()}}; 415 } else { 416 // Invalid input 417 return {Real{NaN()}, Invalid}; 418 } 419 } 420 } 421 } 422 423 template <int PREC> 424 ConversionToBinaryResult<PREC> ConvertToBinary( 425 const char *&p, enum FortranRounding rounding) { 426 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p); 427 } 428 429 template ConversionToBinaryResult<8> ConvertToBinary<8>( 430 const char *&, enum FortranRounding); 431 template ConversionToBinaryResult<11> ConvertToBinary<11>( 432 const char *&, enum FortranRounding); 433 template ConversionToBinaryResult<24> ConvertToBinary<24>( 434 const char *&, enum FortranRounding); 435 template ConversionToBinaryResult<53> ConvertToBinary<53>( 436 const char *&, enum FortranRounding); 437 template ConversionToBinaryResult<64> ConvertToBinary<64>( 438 const char *&, enum FortranRounding); 439 template ConversionToBinaryResult<113> ConvertToBinary<113>( 440 const char *&, enum FortranRounding); 441 442 extern "C" { 443 enum ConversionResultFlags ConvertDecimalToFloat( 444 const char **p, float *f, enum FortranRounding rounding) { 445 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 446 std::memcpy(reinterpret_cast<void *>(f), 447 reinterpret_cast<const void *>(&result.binary), sizeof *f); 448 return result.flags; 449 } 450 enum ConversionResultFlags ConvertDecimalToDouble( 451 const char **p, double *d, enum FortranRounding rounding) { 452 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 453 std::memcpy(reinterpret_cast<void *>(d), 454 reinterpret_cast<const void *>(&result.binary), sizeof *d); 455 return result.flags; 456 } 457 #if __x86_64__ && !defined(_MSC_VER) 458 enum ConversionResultFlags ConvertDecimalToLongDouble( 459 const char **p, long double *ld, enum FortranRounding rounding) { 460 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 461 std::memcpy(reinterpret_cast<void *>(ld), 462 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 463 return result.flags; 464 } 465 #endif 466 } 467 } // namespace Fortran::decimal 468