1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 /* Represents floating point arithmetic semantics. */ 54 struct fltSemantics { 55 /* The largest E such that 2^E is representable; this matches the 56 definition of IEEE 754. */ 57 APFloatBase::ExponentType maxExponent; 58 59 /* The smallest E such that 2^E is a normalized number; this 60 matches the definition of IEEE 754. */ 61 APFloatBase::ExponentType minExponent; 62 63 /* Number of bits in the significand. This includes the integer 64 bit. */ 65 unsigned int precision; 66 67 /* Number of bits actually used in the semantics. */ 68 unsigned int sizeInBits; 69 70 // Returns true if any number described by this semantics can be precisely 71 // represented by the specified semantics. 72 bool isRepresentableBy(const fltSemantics &S) const { 73 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 74 precision <= S.precision; 75 } 76 }; 77 78 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 79 static const fltSemantics semBFloat = {127, -126, 8, 16}; 80 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 81 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 82 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 83 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 84 static const fltSemantics semBogus = {0, 0, 0, 0}; 85 86 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 87 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 88 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 89 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 90 to each other, and two 11-bit exponents. 91 92 Note: we need to make the value different from semBogus as otherwise 93 an unsafe optimization may collapse both values to a single address, 94 and we heavily rely on them having distinct addresses. */ 95 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; 96 97 /* These are legacy semantics for the fallback, inaccrurate implementation of 98 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 99 operation. It's equivalent to having an IEEE number with consecutive 106 100 bits of mantissa and 11 bits of exponent. 101 102 It's not equivalent to IBM double-double. For example, a legit IBM 103 double-double, 1 + epsilon: 104 105 1 + epsilon = 1 + (1 >> 1076) 106 107 is not representable by a consecutive 106 bits of mantissa. 108 109 Currently, these semantics are used in the following way: 110 111 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 112 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 113 semPPCDoubleDoubleLegacy -> IEEE operations 114 115 We use bitcastToAPInt() to get the bit representation (in APInt) of the 116 underlying IEEEdouble, then use the APInt constructor to construct the 117 legacy IEEE float. 118 119 TODO: Implement all operations in semPPCDoubleDouble, and delete these 120 semantics. */ 121 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 122 53 + 53, 128}; 123 124 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 125 switch (S) { 126 case S_IEEEhalf: 127 return IEEEhalf(); 128 case S_BFloat: 129 return BFloat(); 130 case S_IEEEsingle: 131 return IEEEsingle(); 132 case S_IEEEdouble: 133 return IEEEdouble(); 134 case S_x87DoubleExtended: 135 return x87DoubleExtended(); 136 case S_IEEEquad: 137 return IEEEquad(); 138 case S_PPCDoubleDouble: 139 return PPCDoubleDouble(); 140 } 141 llvm_unreachable("Unrecognised floating semantics"); 142 } 143 144 APFloatBase::Semantics 145 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 146 if (&Sem == &llvm::APFloat::IEEEhalf()) 147 return S_IEEEhalf; 148 else if (&Sem == &llvm::APFloat::BFloat()) 149 return S_BFloat; 150 else if (&Sem == &llvm::APFloat::IEEEsingle()) 151 return S_IEEEsingle; 152 else if (&Sem == &llvm::APFloat::IEEEdouble()) 153 return S_IEEEdouble; 154 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 155 return S_x87DoubleExtended; 156 else if (&Sem == &llvm::APFloat::IEEEquad()) 157 return S_IEEEquad; 158 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 159 return S_PPCDoubleDouble; 160 else 161 llvm_unreachable("Unknown floating semantics"); 162 } 163 164 const fltSemantics &APFloatBase::IEEEhalf() { 165 return semIEEEhalf; 166 } 167 const fltSemantics &APFloatBase::BFloat() { 168 return semBFloat; 169 } 170 const fltSemantics &APFloatBase::IEEEsingle() { 171 return semIEEEsingle; 172 } 173 const fltSemantics &APFloatBase::IEEEdouble() { 174 return semIEEEdouble; 175 } 176 const fltSemantics &APFloatBase::IEEEquad() { 177 return semIEEEquad; 178 } 179 const fltSemantics &APFloatBase::x87DoubleExtended() { 180 return semX87DoubleExtended; 181 } 182 const fltSemantics &APFloatBase::Bogus() { 183 return semBogus; 184 } 185 const fltSemantics &APFloatBase::PPCDoubleDouble() { 186 return semPPCDoubleDouble; 187 } 188 189 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 190 constexpr RoundingMode APFloatBase::rmTowardPositive; 191 constexpr RoundingMode APFloatBase::rmTowardNegative; 192 constexpr RoundingMode APFloatBase::rmTowardZero; 193 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 194 195 /* A tight upper bound on number of parts required to hold the value 196 pow(5, power) is 197 198 power * 815 / (351 * integerPartWidth) + 1 199 200 However, whilst the result may require only this many parts, 201 because we are multiplying two values to get it, the 202 multiplication may require an extra part with the excess part 203 being zero (consider the trivial case of 1 * 1, tcFullMultiply 204 requires two parts to hold the single-part result). So we add an 205 extra one to guarantee enough space whilst multiplying. */ 206 const unsigned int maxExponent = 16383; 207 const unsigned int maxPrecision = 113; 208 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 209 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 210 211 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 212 return semantics.precision; 213 } 214 APFloatBase::ExponentType 215 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 216 return semantics.maxExponent; 217 } 218 APFloatBase::ExponentType 219 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 220 return semantics.minExponent; 221 } 222 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 223 return semantics.sizeInBits; 224 } 225 226 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 227 return Sem.sizeInBits; 228 } 229 230 /* A bunch of private, handy routines. */ 231 232 static inline Error createError(const Twine &Err) { 233 return make_error<StringError>(Err, inconvertibleErrorCode()); 234 } 235 236 static inline unsigned int 237 partCountForBits(unsigned int bits) 238 { 239 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 240 } 241 242 /* Returns 0U-9U. Return values >= 10U are not digits. */ 243 static inline unsigned int 244 decDigitValue(unsigned int c) 245 { 246 return c - '0'; 247 } 248 249 /* Return the value of a decimal exponent of the form 250 [+-]ddddddd. 251 252 If the exponent overflows, returns a large exponent with the 253 appropriate sign. */ 254 static Expected<int> readExponent(StringRef::iterator begin, 255 StringRef::iterator end) { 256 bool isNegative; 257 unsigned int absExponent; 258 const unsigned int overlargeExponent = 24000; /* FIXME. */ 259 StringRef::iterator p = begin; 260 261 // Treat no exponent as 0 to match binutils 262 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 263 return 0; 264 } 265 266 isNegative = (*p == '-'); 267 if (*p == '-' || *p == '+') { 268 p++; 269 if (p == end) 270 return createError("Exponent has no digits"); 271 } 272 273 absExponent = decDigitValue(*p++); 274 if (absExponent >= 10U) 275 return createError("Invalid character in exponent"); 276 277 for (; p != end; ++p) { 278 unsigned int value; 279 280 value = decDigitValue(*p); 281 if (value >= 10U) 282 return createError("Invalid character in exponent"); 283 284 absExponent = absExponent * 10U + value; 285 if (absExponent >= overlargeExponent) { 286 absExponent = overlargeExponent; 287 break; 288 } 289 } 290 291 if (isNegative) 292 return -(int) absExponent; 293 else 294 return (int) absExponent; 295 } 296 297 /* This is ugly and needs cleaning up, but I don't immediately see 298 how whilst remaining safe. */ 299 static Expected<int> totalExponent(StringRef::iterator p, 300 StringRef::iterator end, 301 int exponentAdjustment) { 302 int unsignedExponent; 303 bool negative, overflow; 304 int exponent = 0; 305 306 if (p == end) 307 return createError("Exponent has no digits"); 308 309 negative = *p == '-'; 310 if (*p == '-' || *p == '+') { 311 p++; 312 if (p == end) 313 return createError("Exponent has no digits"); 314 } 315 316 unsignedExponent = 0; 317 overflow = false; 318 for (; p != end; ++p) { 319 unsigned int value; 320 321 value = decDigitValue(*p); 322 if (value >= 10U) 323 return createError("Invalid character in exponent"); 324 325 unsignedExponent = unsignedExponent * 10 + value; 326 if (unsignedExponent > 32767) { 327 overflow = true; 328 break; 329 } 330 } 331 332 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 333 overflow = true; 334 335 if (!overflow) { 336 exponent = unsignedExponent; 337 if (negative) 338 exponent = -exponent; 339 exponent += exponentAdjustment; 340 if (exponent > 32767 || exponent < -32768) 341 overflow = true; 342 } 343 344 if (overflow) 345 exponent = negative ? -32768: 32767; 346 347 return exponent; 348 } 349 350 static Expected<StringRef::iterator> 351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 352 StringRef::iterator *dot) { 353 StringRef::iterator p = begin; 354 *dot = end; 355 while (p != end && *p == '0') 356 p++; 357 358 if (p != end && *p == '.') { 359 *dot = p++; 360 361 if (end - begin == 1) 362 return createError("Significand has no digits"); 363 364 while (p != end && *p == '0') 365 p++; 366 } 367 368 return p; 369 } 370 371 /* Given a normal decimal floating point number of the form 372 373 dddd.dddd[eE][+-]ddd 374 375 where the decimal point and exponent are optional, fill out the 376 structure D. Exponent is appropriate if the significand is 377 treated as an integer, and normalizedExponent if the significand 378 is taken to have the decimal point after a single leading 379 non-zero digit. 380 381 If the value is zero, V->firstSigDigit points to a non-digit, and 382 the return exponent is zero. 383 */ 384 struct decimalInfo { 385 const char *firstSigDigit; 386 const char *lastSigDigit; 387 int exponent; 388 int normalizedExponent; 389 }; 390 391 static Error interpretDecimal(StringRef::iterator begin, 392 StringRef::iterator end, decimalInfo *D) { 393 StringRef::iterator dot = end; 394 395 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 396 if (!PtrOrErr) 397 return PtrOrErr.takeError(); 398 StringRef::iterator p = *PtrOrErr; 399 400 D->firstSigDigit = p; 401 D->exponent = 0; 402 D->normalizedExponent = 0; 403 404 for (; p != end; ++p) { 405 if (*p == '.') { 406 if (dot != end) 407 return createError("String contains multiple dots"); 408 dot = p++; 409 if (p == end) 410 break; 411 } 412 if (decDigitValue(*p) >= 10U) 413 break; 414 } 415 416 if (p != end) { 417 if (*p != 'e' && *p != 'E') 418 return createError("Invalid character in significand"); 419 if (p == begin) 420 return createError("Significand has no digits"); 421 if (dot != end && p - begin == 1) 422 return createError("Significand has no digits"); 423 424 /* p points to the first non-digit in the string */ 425 auto ExpOrErr = readExponent(p + 1, end); 426 if (!ExpOrErr) 427 return ExpOrErr.takeError(); 428 D->exponent = *ExpOrErr; 429 430 /* Implied decimal point? */ 431 if (dot == end) 432 dot = p; 433 } 434 435 /* If number is all zeroes accept any exponent. */ 436 if (p != D->firstSigDigit) { 437 /* Drop insignificant trailing zeroes. */ 438 if (p != begin) { 439 do 440 do 441 p--; 442 while (p != begin && *p == '0'); 443 while (p != begin && *p == '.'); 444 } 445 446 /* Adjust the exponents for any decimal point. */ 447 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 448 D->normalizedExponent = (D->exponent + 449 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 450 - (dot > D->firstSigDigit && dot < p))); 451 } 452 453 D->lastSigDigit = p; 454 return Error::success(); 455 } 456 457 /* Return the trailing fraction of a hexadecimal number. 458 DIGITVALUE is the first hex digit of the fraction, P points to 459 the next digit. */ 460 static Expected<lostFraction> 461 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 462 unsigned int digitValue) { 463 unsigned int hexDigit; 464 465 /* If the first trailing digit isn't 0 or 8 we can work out the 466 fraction immediately. */ 467 if (digitValue > 8) 468 return lfMoreThanHalf; 469 else if (digitValue < 8 && digitValue > 0) 470 return lfLessThanHalf; 471 472 // Otherwise we need to find the first non-zero digit. 473 while (p != end && (*p == '0' || *p == '.')) 474 p++; 475 476 if (p == end) 477 return createError("Invalid trailing hexadecimal fraction!"); 478 479 hexDigit = hexDigitValue(*p); 480 481 /* If we ran off the end it is exactly zero or one-half, otherwise 482 a little more. */ 483 if (hexDigit == -1U) 484 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 485 else 486 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 487 } 488 489 /* Return the fraction lost were a bignum truncated losing the least 490 significant BITS bits. */ 491 static lostFraction 492 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 493 unsigned int partCount, 494 unsigned int bits) 495 { 496 unsigned int lsb; 497 498 lsb = APInt::tcLSB(parts, partCount); 499 500 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 501 if (bits <= lsb) 502 return lfExactlyZero; 503 if (bits == lsb + 1) 504 return lfExactlyHalf; 505 if (bits <= partCount * APFloatBase::integerPartWidth && 506 APInt::tcExtractBit(parts, bits - 1)) 507 return lfMoreThanHalf; 508 509 return lfLessThanHalf; 510 } 511 512 /* Shift DST right BITS bits noting lost fraction. */ 513 static lostFraction 514 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 515 { 516 lostFraction lost_fraction; 517 518 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 519 520 APInt::tcShiftRight(dst, parts, bits); 521 522 return lost_fraction; 523 } 524 525 /* Combine the effect of two lost fractions. */ 526 static lostFraction 527 combineLostFractions(lostFraction moreSignificant, 528 lostFraction lessSignificant) 529 { 530 if (lessSignificant != lfExactlyZero) { 531 if (moreSignificant == lfExactlyZero) 532 moreSignificant = lfLessThanHalf; 533 else if (moreSignificant == lfExactlyHalf) 534 moreSignificant = lfMoreThanHalf; 535 } 536 537 return moreSignificant; 538 } 539 540 /* The error from the true value, in half-ulps, on multiplying two 541 floating point numbers, which differ from the value they 542 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 543 than the returned value. 544 545 See "How to Read Floating Point Numbers Accurately" by William D 546 Clinger. */ 547 static unsigned int 548 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 549 { 550 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 551 552 if (HUerr1 + HUerr2 == 0) 553 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 554 else 555 return inexactMultiply + 2 * (HUerr1 + HUerr2); 556 } 557 558 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 559 when the least significant BITS are truncated. BITS cannot be 560 zero. */ 561 static APFloatBase::integerPart 562 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 563 bool isNearest) { 564 unsigned int count, partBits; 565 APFloatBase::integerPart part, boundary; 566 567 assert(bits != 0); 568 569 bits--; 570 count = bits / APFloatBase::integerPartWidth; 571 partBits = bits % APFloatBase::integerPartWidth + 1; 572 573 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 574 575 if (isNearest) 576 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 577 else 578 boundary = 0; 579 580 if (count == 0) { 581 if (part - boundary <= boundary - part) 582 return part - boundary; 583 else 584 return boundary - part; 585 } 586 587 if (part == boundary) { 588 while (--count) 589 if (parts[count]) 590 return ~(APFloatBase::integerPart) 0; /* A lot. */ 591 592 return parts[0]; 593 } else if (part == boundary - 1) { 594 while (--count) 595 if (~parts[count]) 596 return ~(APFloatBase::integerPart) 0; /* A lot. */ 597 598 return -parts[0]; 599 } 600 601 return ~(APFloatBase::integerPart) 0; /* A lot. */ 602 } 603 604 /* Place pow(5, power) in DST, and return the number of parts used. 605 DST must be at least one part larger than size of the answer. */ 606 static unsigned int 607 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 608 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 609 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 610 pow5s[0] = 78125 * 5; 611 612 unsigned int partsCount[16] = { 1 }; 613 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 614 unsigned int result; 615 assert(power <= maxExponent); 616 617 p1 = dst; 618 p2 = scratch; 619 620 *p1 = firstEightPowers[power & 7]; 621 power >>= 3; 622 623 result = 1; 624 pow5 = pow5s; 625 626 for (unsigned int n = 0; power; power >>= 1, n++) { 627 unsigned int pc; 628 629 pc = partsCount[n]; 630 631 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 632 if (pc == 0) { 633 pc = partsCount[n - 1]; 634 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 635 pc *= 2; 636 if (pow5[pc - 1] == 0) 637 pc--; 638 partsCount[n] = pc; 639 } 640 641 if (power & 1) { 642 APFloatBase::integerPart *tmp; 643 644 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 645 result += pc; 646 if (p2[result - 1] == 0) 647 result--; 648 649 /* Now result is in p1 with partsCount parts and p2 is scratch 650 space. */ 651 tmp = p1; 652 p1 = p2; 653 p2 = tmp; 654 } 655 656 pow5 += pc; 657 } 658 659 if (p1 != dst) 660 APInt::tcAssign(dst, p1, result); 661 662 return result; 663 } 664 665 /* Zero at the end to avoid modular arithmetic when adding one; used 666 when rounding up during hexadecimal output. */ 667 static const char hexDigitsLower[] = "0123456789abcdef0"; 668 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 669 static const char infinityL[] = "infinity"; 670 static const char infinityU[] = "INFINITY"; 671 static const char NaNL[] = "nan"; 672 static const char NaNU[] = "NAN"; 673 674 /* Write out an integerPart in hexadecimal, starting with the most 675 significant nibble. Write out exactly COUNT hexdigits, return 676 COUNT. */ 677 static unsigned int 678 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 679 const char *hexDigitChars) 680 { 681 unsigned int result = count; 682 683 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 684 685 part >>= (APFloatBase::integerPartWidth - 4 * count); 686 while (count--) { 687 dst[count] = hexDigitChars[part & 0xf]; 688 part >>= 4; 689 } 690 691 return result; 692 } 693 694 /* Write out an unsigned decimal integer. */ 695 static char * 696 writeUnsignedDecimal (char *dst, unsigned int n) 697 { 698 char buff[40], *p; 699 700 p = buff; 701 do 702 *p++ = '0' + n % 10; 703 while (n /= 10); 704 705 do 706 *dst++ = *--p; 707 while (p != buff); 708 709 return dst; 710 } 711 712 /* Write out a signed decimal integer. */ 713 static char * 714 writeSignedDecimal (char *dst, int value) 715 { 716 if (value < 0) { 717 *dst++ = '-'; 718 dst = writeUnsignedDecimal(dst, -(unsigned) value); 719 } else 720 dst = writeUnsignedDecimal(dst, value); 721 722 return dst; 723 } 724 725 namespace detail { 726 /* Constructors. */ 727 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 728 unsigned int count; 729 730 semantics = ourSemantics; 731 count = partCount(); 732 if (count > 1) 733 significand.parts = new integerPart[count]; 734 } 735 736 void IEEEFloat::freeSignificand() { 737 if (needsCleanup()) 738 delete [] significand.parts; 739 } 740 741 void IEEEFloat::assign(const IEEEFloat &rhs) { 742 assert(semantics == rhs.semantics); 743 744 sign = rhs.sign; 745 category = rhs.category; 746 exponent = rhs.exponent; 747 if (isFiniteNonZero() || category == fcNaN) 748 copySignificand(rhs); 749 } 750 751 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 752 assert(isFiniteNonZero() || category == fcNaN); 753 assert(rhs.partCount() >= partCount()); 754 755 APInt::tcAssign(significandParts(), rhs.significandParts(), 756 partCount()); 757 } 758 759 /* Make this number a NaN, with an arbitrary but deterministic value 760 for the significand. If double or longer, this is a signalling NaN, 761 which may not be ideal. If float, this is QNaN(0). */ 762 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 763 category = fcNaN; 764 sign = Negative; 765 exponent = exponentNaN(); 766 767 integerPart *significand = significandParts(); 768 unsigned numParts = partCount(); 769 770 // Set the significand bits to the fill. 771 if (!fill || fill->getNumWords() < numParts) 772 APInt::tcSet(significand, 0, numParts); 773 if (fill) { 774 APInt::tcAssign(significand, fill->getRawData(), 775 std::min(fill->getNumWords(), numParts)); 776 777 // Zero out the excess bits of the significand. 778 unsigned bitsToPreserve = semantics->precision - 1; 779 unsigned part = bitsToPreserve / 64; 780 bitsToPreserve %= 64; 781 significand[part] &= ((1ULL << bitsToPreserve) - 1); 782 for (part++; part != numParts; ++part) 783 significand[part] = 0; 784 } 785 786 unsigned QNaNBit = semantics->precision - 2; 787 788 if (SNaN) { 789 // We always have to clear the QNaN bit to make it an SNaN. 790 APInt::tcClearBit(significand, QNaNBit); 791 792 // If there are no bits set in the payload, we have to set 793 // *something* to make it a NaN instead of an infinity; 794 // conventionally, this is the next bit down from the QNaN bit. 795 if (APInt::tcIsZero(significand, numParts)) 796 APInt::tcSetBit(significand, QNaNBit - 1); 797 } else { 798 // We always have to set the QNaN bit to make it a QNaN. 799 APInt::tcSetBit(significand, QNaNBit); 800 } 801 802 // For x87 extended precision, we want to make a NaN, not a 803 // pseudo-NaN. Maybe we should expose the ability to make 804 // pseudo-NaNs? 805 if (semantics == &semX87DoubleExtended) 806 APInt::tcSetBit(significand, QNaNBit + 1); 807 } 808 809 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 810 if (this != &rhs) { 811 if (semantics != rhs.semantics) { 812 freeSignificand(); 813 initialize(rhs.semantics); 814 } 815 assign(rhs); 816 } 817 818 return *this; 819 } 820 821 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 822 freeSignificand(); 823 824 semantics = rhs.semantics; 825 significand = rhs.significand; 826 exponent = rhs.exponent; 827 category = rhs.category; 828 sign = rhs.sign; 829 830 rhs.semantics = &semBogus; 831 return *this; 832 } 833 834 bool IEEEFloat::isDenormal() const { 835 return isFiniteNonZero() && (exponent == semantics->minExponent) && 836 (APInt::tcExtractBit(significandParts(), 837 semantics->precision - 1) == 0); 838 } 839 840 bool IEEEFloat::isSmallest() const { 841 // The smallest number by magnitude in our format will be the smallest 842 // denormal, i.e. the floating point number with exponent being minimum 843 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 844 return isFiniteNonZero() && exponent == semantics->minExponent && 845 significandMSB() == 0; 846 } 847 848 bool IEEEFloat::isSignificandAllOnes() const { 849 // Test if the significand excluding the integral bit is all ones. This allows 850 // us to test for binade boundaries. 851 const integerPart *Parts = significandParts(); 852 const unsigned PartCount = partCountForBits(semantics->precision); 853 for (unsigned i = 0; i < PartCount - 1; i++) 854 if (~Parts[i]) 855 return false; 856 857 // Set the unused high bits to all ones when we compare. 858 const unsigned NumHighBits = 859 PartCount*integerPartWidth - semantics->precision + 1; 860 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 861 "Can not have more high bits to fill than integerPartWidth"); 862 const integerPart HighBitFill = 863 ~integerPart(0) << (integerPartWidth - NumHighBits); 864 if (~(Parts[PartCount - 1] | HighBitFill)) 865 return false; 866 867 return true; 868 } 869 870 bool IEEEFloat::isSignificandAllZeros() const { 871 // Test if the significand excluding the integral bit is all zeros. This 872 // allows us to test for binade boundaries. 873 const integerPart *Parts = significandParts(); 874 const unsigned PartCount = partCountForBits(semantics->precision); 875 876 for (unsigned i = 0; i < PartCount - 1; i++) 877 if (Parts[i]) 878 return false; 879 880 // Compute how many bits are used in the final word. 881 const unsigned NumHighBits = 882 PartCount*integerPartWidth - semantics->precision + 1; 883 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 884 "clear than integerPartWidth"); 885 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 886 887 if (Parts[PartCount - 1] & HighBitMask) 888 return false; 889 890 return true; 891 } 892 893 bool IEEEFloat::isLargest() const { 894 // The largest number by magnitude in our format will be the floating point 895 // number with maximum exponent and with significand that is all ones. 896 return isFiniteNonZero() && exponent == semantics->maxExponent 897 && isSignificandAllOnes(); 898 } 899 900 bool IEEEFloat::isInteger() const { 901 // This could be made more efficient; I'm going for obviously correct. 902 if (!isFinite()) return false; 903 IEEEFloat truncated = *this; 904 truncated.roundToIntegral(rmTowardZero); 905 return compare(truncated) == cmpEqual; 906 } 907 908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 909 if (this == &rhs) 910 return true; 911 if (semantics != rhs.semantics || 912 category != rhs.category || 913 sign != rhs.sign) 914 return false; 915 if (category==fcZero || category==fcInfinity) 916 return true; 917 918 if (isFiniteNonZero() && exponent != rhs.exponent) 919 return false; 920 921 return std::equal(significandParts(), significandParts() + partCount(), 922 rhs.significandParts()); 923 } 924 925 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 926 initialize(&ourSemantics); 927 sign = 0; 928 category = fcNormal; 929 zeroSignificand(); 930 exponent = ourSemantics.precision - 1; 931 significandParts()[0] = value; 932 normalize(rmNearestTiesToEven, lfExactlyZero); 933 } 934 935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 936 initialize(&ourSemantics); 937 makeZero(false); 938 } 939 940 // Delegate to the previous constructor, because later copy constructor may 941 // actually inspects category, which can't be garbage. 942 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 943 : IEEEFloat(ourSemantics) {} 944 945 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 946 initialize(rhs.semantics); 947 assign(rhs); 948 } 949 950 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 951 *this = std::move(rhs); 952 } 953 954 IEEEFloat::~IEEEFloat() { freeSignificand(); } 955 956 unsigned int IEEEFloat::partCount() const { 957 return partCountForBits(semantics->precision + 1); 958 } 959 960 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 961 return const_cast<IEEEFloat *>(this)->significandParts(); 962 } 963 964 IEEEFloat::integerPart *IEEEFloat::significandParts() { 965 if (partCount() > 1) 966 return significand.parts; 967 else 968 return &significand.part; 969 } 970 971 void IEEEFloat::zeroSignificand() { 972 APInt::tcSet(significandParts(), 0, partCount()); 973 } 974 975 /* Increment an fcNormal floating point number's significand. */ 976 void IEEEFloat::incrementSignificand() { 977 integerPart carry; 978 979 carry = APInt::tcIncrement(significandParts(), partCount()); 980 981 /* Our callers should never cause us to overflow. */ 982 assert(carry == 0); 983 (void)carry; 984 } 985 986 /* Add the significand of the RHS. Returns the carry flag. */ 987 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 988 integerPart *parts; 989 990 parts = significandParts(); 991 992 assert(semantics == rhs.semantics); 993 assert(exponent == rhs.exponent); 994 995 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 996 } 997 998 /* Subtract the significand of the RHS with a borrow flag. Returns 999 the borrow flag. */ 1000 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1001 integerPart borrow) { 1002 integerPart *parts; 1003 1004 parts = significandParts(); 1005 1006 assert(semantics == rhs.semantics); 1007 assert(exponent == rhs.exponent); 1008 1009 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1010 partCount()); 1011 } 1012 1013 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1014 on to the full-precision result of the multiplication. Returns the 1015 lost fraction. */ 1016 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1017 IEEEFloat addend) { 1018 unsigned int omsb; // One, not zero, based MSB. 1019 unsigned int partsCount, newPartsCount, precision; 1020 integerPart *lhsSignificand; 1021 integerPart scratch[4]; 1022 integerPart *fullSignificand; 1023 lostFraction lost_fraction; 1024 bool ignored; 1025 1026 assert(semantics == rhs.semantics); 1027 1028 precision = semantics->precision; 1029 1030 // Allocate space for twice as many bits as the original significand, plus one 1031 // extra bit for the addition to overflow into. 1032 newPartsCount = partCountForBits(precision * 2 + 1); 1033 1034 if (newPartsCount > 4) 1035 fullSignificand = new integerPart[newPartsCount]; 1036 else 1037 fullSignificand = scratch; 1038 1039 lhsSignificand = significandParts(); 1040 partsCount = partCount(); 1041 1042 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1043 rhs.significandParts(), partsCount, partsCount); 1044 1045 lost_fraction = lfExactlyZero; 1046 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1047 exponent += rhs.exponent; 1048 1049 // Assume the operands involved in the multiplication are single-precision 1050 // FP, and the two multiplicants are: 1051 // *this = a23 . a22 ... a0 * 2^e1 1052 // rhs = b23 . b22 ... b0 * 2^e2 1053 // the result of multiplication is: 1054 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1055 // Note that there are three significant bits at the left-hand side of the 1056 // radix point: two for the multiplication, and an overflow bit for the 1057 // addition (that will always be zero at this point). Move the radix point 1058 // toward left by two bits, and adjust exponent accordingly. 1059 exponent += 2; 1060 1061 if (addend.isNonZero()) { 1062 // The intermediate result of the multiplication has "2 * precision" 1063 // signicant bit; adjust the addend to be consistent with mul result. 1064 // 1065 Significand savedSignificand = significand; 1066 const fltSemantics *savedSemantics = semantics; 1067 fltSemantics extendedSemantics; 1068 opStatus status; 1069 unsigned int extendedPrecision; 1070 1071 // Normalize our MSB to one below the top bit to allow for overflow. 1072 extendedPrecision = 2 * precision + 1; 1073 if (omsb != extendedPrecision - 1) { 1074 assert(extendedPrecision > omsb); 1075 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1076 (extendedPrecision - 1) - omsb); 1077 exponent -= (extendedPrecision - 1) - omsb; 1078 } 1079 1080 /* Create new semantics. */ 1081 extendedSemantics = *semantics; 1082 extendedSemantics.precision = extendedPrecision; 1083 1084 if (newPartsCount == 1) 1085 significand.part = fullSignificand[0]; 1086 else 1087 significand.parts = fullSignificand; 1088 semantics = &extendedSemantics; 1089 1090 // Make a copy so we can convert it to the extended semantics. 1091 // Note that we cannot convert the addend directly, as the extendedSemantics 1092 // is a local variable (which we take a reference to). 1093 IEEEFloat extendedAddend(addend); 1094 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1095 assert(status == opOK); 1096 (void)status; 1097 1098 // Shift the significand of the addend right by one bit. This guarantees 1099 // that the high bit of the significand is zero (same as fullSignificand), 1100 // so the addition will overflow (if it does overflow at all) into the top bit. 1101 lost_fraction = extendedAddend.shiftSignificandRight(1); 1102 assert(lost_fraction == lfExactlyZero && 1103 "Lost precision while shifting addend for fused-multiply-add."); 1104 1105 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1106 1107 /* Restore our state. */ 1108 if (newPartsCount == 1) 1109 fullSignificand[0] = significand.part; 1110 significand = savedSignificand; 1111 semantics = savedSemantics; 1112 1113 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1114 } 1115 1116 // Convert the result having "2 * precision" significant-bits back to the one 1117 // having "precision" significant-bits. First, move the radix point from 1118 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1119 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1120 exponent -= precision + 1; 1121 1122 // In case MSB resides at the left-hand side of radix point, shift the 1123 // mantissa right by some amount to make sure the MSB reside right before 1124 // the radix point (i.e. "MSB . rest-significant-bits"). 1125 // 1126 // Note that the result is not normalized when "omsb < precision". So, the 1127 // caller needs to call IEEEFloat::normalize() if normalized value is 1128 // expected. 1129 if (omsb > precision) { 1130 unsigned int bits, significantParts; 1131 lostFraction lf; 1132 1133 bits = omsb - precision; 1134 significantParts = partCountForBits(omsb); 1135 lf = shiftRight(fullSignificand, significantParts, bits); 1136 lost_fraction = combineLostFractions(lf, lost_fraction); 1137 exponent += bits; 1138 } 1139 1140 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1141 1142 if (newPartsCount > 4) 1143 delete [] fullSignificand; 1144 1145 return lost_fraction; 1146 } 1147 1148 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1149 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1150 } 1151 1152 /* Multiply the significands of LHS and RHS to DST. */ 1153 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1154 unsigned int bit, i, partsCount; 1155 const integerPart *rhsSignificand; 1156 integerPart *lhsSignificand, *dividend, *divisor; 1157 integerPart scratch[4]; 1158 lostFraction lost_fraction; 1159 1160 assert(semantics == rhs.semantics); 1161 1162 lhsSignificand = significandParts(); 1163 rhsSignificand = rhs.significandParts(); 1164 partsCount = partCount(); 1165 1166 if (partsCount > 2) 1167 dividend = new integerPart[partsCount * 2]; 1168 else 1169 dividend = scratch; 1170 1171 divisor = dividend + partsCount; 1172 1173 /* Copy the dividend and divisor as they will be modified in-place. */ 1174 for (i = 0; i < partsCount; i++) { 1175 dividend[i] = lhsSignificand[i]; 1176 divisor[i] = rhsSignificand[i]; 1177 lhsSignificand[i] = 0; 1178 } 1179 1180 exponent -= rhs.exponent; 1181 1182 unsigned int precision = semantics->precision; 1183 1184 /* Normalize the divisor. */ 1185 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1186 if (bit) { 1187 exponent += bit; 1188 APInt::tcShiftLeft(divisor, partsCount, bit); 1189 } 1190 1191 /* Normalize the dividend. */ 1192 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1193 if (bit) { 1194 exponent -= bit; 1195 APInt::tcShiftLeft(dividend, partsCount, bit); 1196 } 1197 1198 /* Ensure the dividend >= divisor initially for the loop below. 1199 Incidentally, this means that the division loop below is 1200 guaranteed to set the integer bit to one. */ 1201 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1202 exponent--; 1203 APInt::tcShiftLeft(dividend, partsCount, 1); 1204 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1205 } 1206 1207 /* Long division. */ 1208 for (bit = precision; bit; bit -= 1) { 1209 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1210 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1211 APInt::tcSetBit(lhsSignificand, bit - 1); 1212 } 1213 1214 APInt::tcShiftLeft(dividend, partsCount, 1); 1215 } 1216 1217 /* Figure out the lost fraction. */ 1218 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1219 1220 if (cmp > 0) 1221 lost_fraction = lfMoreThanHalf; 1222 else if (cmp == 0) 1223 lost_fraction = lfExactlyHalf; 1224 else if (APInt::tcIsZero(dividend, partsCount)) 1225 lost_fraction = lfExactlyZero; 1226 else 1227 lost_fraction = lfLessThanHalf; 1228 1229 if (partsCount > 2) 1230 delete [] dividend; 1231 1232 return lost_fraction; 1233 } 1234 1235 unsigned int IEEEFloat::significandMSB() const { 1236 return APInt::tcMSB(significandParts(), partCount()); 1237 } 1238 1239 unsigned int IEEEFloat::significandLSB() const { 1240 return APInt::tcLSB(significandParts(), partCount()); 1241 } 1242 1243 /* Note that a zero result is NOT normalized to fcZero. */ 1244 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1245 /* Our exponent should not overflow. */ 1246 assert((ExponentType) (exponent + bits) >= exponent); 1247 1248 exponent += bits; 1249 1250 return shiftRight(significandParts(), partCount(), bits); 1251 } 1252 1253 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1255 assert(bits < semantics->precision); 1256 1257 if (bits) { 1258 unsigned int partsCount = partCount(); 1259 1260 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1261 exponent -= bits; 1262 1263 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1264 } 1265 } 1266 1267 IEEEFloat::cmpResult 1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1269 int compare; 1270 1271 assert(semantics == rhs.semantics); 1272 assert(isFiniteNonZero()); 1273 assert(rhs.isFiniteNonZero()); 1274 1275 compare = exponent - rhs.exponent; 1276 1277 /* If exponents are equal, do an unsigned bignum comparison of the 1278 significands. */ 1279 if (compare == 0) 1280 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1281 partCount()); 1282 1283 if (compare > 0) 1284 return cmpGreaterThan; 1285 else if (compare < 0) 1286 return cmpLessThan; 1287 else 1288 return cmpEqual; 1289 } 1290 1291 /* Set the least significant BITS bits of a bignum, clear the 1292 rest. */ 1293 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1294 unsigned bits) { 1295 unsigned i = 0; 1296 while (bits > APInt::APINT_BITS_PER_WORD) { 1297 dst[i++] = ~(APInt::WordType)0; 1298 bits -= APInt::APINT_BITS_PER_WORD; 1299 } 1300 1301 if (bits) 1302 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1303 1304 while (i < parts) 1305 dst[i++] = 0; 1306 } 1307 1308 /* Handle overflow. Sign is preserved. We either become infinity or 1309 the largest finite number. */ 1310 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1311 /* Infinity? */ 1312 if (rounding_mode == rmNearestTiesToEven || 1313 rounding_mode == rmNearestTiesToAway || 1314 (rounding_mode == rmTowardPositive && !sign) || 1315 (rounding_mode == rmTowardNegative && sign)) { 1316 category = fcInfinity; 1317 return (opStatus) (opOverflow | opInexact); 1318 } 1319 1320 /* Otherwise we become the largest finite number. */ 1321 category = fcNormal; 1322 exponent = semantics->maxExponent; 1323 tcSetLeastSignificantBits(significandParts(), partCount(), 1324 semantics->precision); 1325 1326 return opInexact; 1327 } 1328 1329 /* Returns TRUE if, when truncating the current number, with BIT the 1330 new LSB, with the given lost fraction and rounding mode, the result 1331 would need to be rounded away from zero (i.e., by increasing the 1332 signficand). This routine must work for fcZero of both signs, and 1333 fcNormal numbers. */ 1334 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1335 lostFraction lost_fraction, 1336 unsigned int bit) const { 1337 /* NaNs and infinities should not have lost fractions. */ 1338 assert(isFiniteNonZero() || category == fcZero); 1339 1340 /* Current callers never pass this so we don't handle it. */ 1341 assert(lost_fraction != lfExactlyZero); 1342 1343 switch (rounding_mode) { 1344 case rmNearestTiesToAway: 1345 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1346 1347 case rmNearestTiesToEven: 1348 if (lost_fraction == lfMoreThanHalf) 1349 return true; 1350 1351 /* Our zeroes don't have a significand to test. */ 1352 if (lost_fraction == lfExactlyHalf && category != fcZero) 1353 return APInt::tcExtractBit(significandParts(), bit); 1354 1355 return false; 1356 1357 case rmTowardZero: 1358 return false; 1359 1360 case rmTowardPositive: 1361 return !sign; 1362 1363 case rmTowardNegative: 1364 return sign; 1365 1366 default: 1367 break; 1368 } 1369 llvm_unreachable("Invalid rounding mode found"); 1370 } 1371 1372 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1373 lostFraction lost_fraction) { 1374 unsigned int omsb; /* One, not zero, based MSB. */ 1375 int exponentChange; 1376 1377 if (!isFiniteNonZero()) 1378 return opOK; 1379 1380 /* Before rounding normalize the exponent of fcNormal numbers. */ 1381 omsb = significandMSB() + 1; 1382 1383 if (omsb) { 1384 /* OMSB is numbered from 1. We want to place it in the integer 1385 bit numbered PRECISION if possible, with a compensating change in 1386 the exponent. */ 1387 exponentChange = omsb - semantics->precision; 1388 1389 /* If the resulting exponent is too high, overflow according to 1390 the rounding mode. */ 1391 if (exponent + exponentChange > semantics->maxExponent) 1392 return handleOverflow(rounding_mode); 1393 1394 /* Subnormal numbers have exponent minExponent, and their MSB 1395 is forced based on that. */ 1396 if (exponent + exponentChange < semantics->minExponent) 1397 exponentChange = semantics->minExponent - exponent; 1398 1399 /* Shifting left is easy as we don't lose precision. */ 1400 if (exponentChange < 0) { 1401 assert(lost_fraction == lfExactlyZero); 1402 1403 shiftSignificandLeft(-exponentChange); 1404 1405 return opOK; 1406 } 1407 1408 if (exponentChange > 0) { 1409 lostFraction lf; 1410 1411 /* Shift right and capture any new lost fraction. */ 1412 lf = shiftSignificandRight(exponentChange); 1413 1414 lost_fraction = combineLostFractions(lf, lost_fraction); 1415 1416 /* Keep OMSB up-to-date. */ 1417 if (omsb > (unsigned) exponentChange) 1418 omsb -= exponentChange; 1419 else 1420 omsb = 0; 1421 } 1422 } 1423 1424 /* Now round the number according to rounding_mode given the lost 1425 fraction. */ 1426 1427 /* As specified in IEEE 754, since we do not trap we do not report 1428 underflow for exact results. */ 1429 if (lost_fraction == lfExactlyZero) { 1430 /* Canonicalize zeroes. */ 1431 if (omsb == 0) 1432 category = fcZero; 1433 1434 return opOK; 1435 } 1436 1437 /* Increment the significand if we're rounding away from zero. */ 1438 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1439 if (omsb == 0) 1440 exponent = semantics->minExponent; 1441 1442 incrementSignificand(); 1443 omsb = significandMSB() + 1; 1444 1445 /* Did the significand increment overflow? */ 1446 if (omsb == (unsigned) semantics->precision + 1) { 1447 /* Renormalize by incrementing the exponent and shifting our 1448 significand right one. However if we already have the 1449 maximum exponent we overflow to infinity. */ 1450 if (exponent == semantics->maxExponent) { 1451 category = fcInfinity; 1452 1453 return (opStatus) (opOverflow | opInexact); 1454 } 1455 1456 shiftSignificandRight(1); 1457 1458 return opInexact; 1459 } 1460 } 1461 1462 /* The normal case - we were and are not denormal, and any 1463 significand increment above didn't overflow. */ 1464 if (omsb == semantics->precision) 1465 return opInexact; 1466 1467 /* We have a non-zero denormal. */ 1468 assert(omsb < semantics->precision); 1469 1470 /* Canonicalize zeroes. */ 1471 if (omsb == 0) 1472 category = fcZero; 1473 1474 /* The fcZero case is a denormal that underflowed to zero. */ 1475 return (opStatus) (opUnderflow | opInexact); 1476 } 1477 1478 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1479 bool subtract) { 1480 switch (PackCategoriesIntoKey(category, rhs.category)) { 1481 default: 1482 llvm_unreachable(nullptr); 1483 1484 case PackCategoriesIntoKey(fcZero, fcNaN): 1485 case PackCategoriesIntoKey(fcNormal, fcNaN): 1486 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1487 assign(rhs); 1488 LLVM_FALLTHROUGH; 1489 case PackCategoriesIntoKey(fcNaN, fcZero): 1490 case PackCategoriesIntoKey(fcNaN, fcNormal): 1491 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1492 case PackCategoriesIntoKey(fcNaN, fcNaN): 1493 if (isSignaling()) { 1494 makeQuiet(); 1495 return opInvalidOp; 1496 } 1497 return rhs.isSignaling() ? opInvalidOp : opOK; 1498 1499 case PackCategoriesIntoKey(fcNormal, fcZero): 1500 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1501 case PackCategoriesIntoKey(fcInfinity, fcZero): 1502 return opOK; 1503 1504 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1505 case PackCategoriesIntoKey(fcZero, fcInfinity): 1506 category = fcInfinity; 1507 sign = rhs.sign ^ subtract; 1508 return opOK; 1509 1510 case PackCategoriesIntoKey(fcZero, fcNormal): 1511 assign(rhs); 1512 sign = rhs.sign ^ subtract; 1513 return opOK; 1514 1515 case PackCategoriesIntoKey(fcZero, fcZero): 1516 /* Sign depends on rounding mode; handled by caller. */ 1517 return opOK; 1518 1519 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1520 /* Differently signed infinities can only be validly 1521 subtracted. */ 1522 if (((sign ^ rhs.sign)!=0) != subtract) { 1523 makeNaN(); 1524 return opInvalidOp; 1525 } 1526 1527 return opOK; 1528 1529 case PackCategoriesIntoKey(fcNormal, fcNormal): 1530 return opDivByZero; 1531 } 1532 } 1533 1534 /* Add or subtract two normal numbers. */ 1535 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1536 bool subtract) { 1537 integerPart carry; 1538 lostFraction lost_fraction; 1539 int bits; 1540 1541 /* Determine if the operation on the absolute values is effectively 1542 an addition or subtraction. */ 1543 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1544 1545 /* Are we bigger exponent-wise than the RHS? */ 1546 bits = exponent - rhs.exponent; 1547 1548 /* Subtraction is more subtle than one might naively expect. */ 1549 if (subtract) { 1550 IEEEFloat temp_rhs(rhs); 1551 1552 if (bits == 0) 1553 lost_fraction = lfExactlyZero; 1554 else if (bits > 0) { 1555 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1556 shiftSignificandLeft(1); 1557 } else { 1558 lost_fraction = shiftSignificandRight(-bits - 1); 1559 temp_rhs.shiftSignificandLeft(1); 1560 } 1561 1562 // Should we reverse the subtraction. 1563 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1564 carry = temp_rhs.subtractSignificand 1565 (*this, lost_fraction != lfExactlyZero); 1566 copySignificand(temp_rhs); 1567 sign = !sign; 1568 } else { 1569 carry = subtractSignificand 1570 (temp_rhs, lost_fraction != lfExactlyZero); 1571 } 1572 1573 /* Invert the lost fraction - it was on the RHS and 1574 subtracted. */ 1575 if (lost_fraction == lfLessThanHalf) 1576 lost_fraction = lfMoreThanHalf; 1577 else if (lost_fraction == lfMoreThanHalf) 1578 lost_fraction = lfLessThanHalf; 1579 1580 /* The code above is intended to ensure that no borrow is 1581 necessary. */ 1582 assert(!carry); 1583 (void)carry; 1584 } else { 1585 if (bits > 0) { 1586 IEEEFloat temp_rhs(rhs); 1587 1588 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1589 carry = addSignificand(temp_rhs); 1590 } else { 1591 lost_fraction = shiftSignificandRight(-bits); 1592 carry = addSignificand(rhs); 1593 } 1594 1595 /* We have a guard bit; generating a carry cannot happen. */ 1596 assert(!carry); 1597 (void)carry; 1598 } 1599 1600 return lost_fraction; 1601 } 1602 1603 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1604 switch (PackCategoriesIntoKey(category, rhs.category)) { 1605 default: 1606 llvm_unreachable(nullptr); 1607 1608 case PackCategoriesIntoKey(fcZero, fcNaN): 1609 case PackCategoriesIntoKey(fcNormal, fcNaN): 1610 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1611 assign(rhs); 1612 sign = false; 1613 LLVM_FALLTHROUGH; 1614 case PackCategoriesIntoKey(fcNaN, fcZero): 1615 case PackCategoriesIntoKey(fcNaN, fcNormal): 1616 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1617 case PackCategoriesIntoKey(fcNaN, fcNaN): 1618 sign ^= rhs.sign; // restore the original sign 1619 if (isSignaling()) { 1620 makeQuiet(); 1621 return opInvalidOp; 1622 } 1623 return rhs.isSignaling() ? opInvalidOp : opOK; 1624 1625 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1626 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1627 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1628 category = fcInfinity; 1629 return opOK; 1630 1631 case PackCategoriesIntoKey(fcZero, fcNormal): 1632 case PackCategoriesIntoKey(fcNormal, fcZero): 1633 case PackCategoriesIntoKey(fcZero, fcZero): 1634 category = fcZero; 1635 return opOK; 1636 1637 case PackCategoriesIntoKey(fcZero, fcInfinity): 1638 case PackCategoriesIntoKey(fcInfinity, fcZero): 1639 makeNaN(); 1640 return opInvalidOp; 1641 1642 case PackCategoriesIntoKey(fcNormal, fcNormal): 1643 return opOK; 1644 } 1645 } 1646 1647 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1648 switch (PackCategoriesIntoKey(category, rhs.category)) { 1649 default: 1650 llvm_unreachable(nullptr); 1651 1652 case PackCategoriesIntoKey(fcZero, fcNaN): 1653 case PackCategoriesIntoKey(fcNormal, fcNaN): 1654 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1655 assign(rhs); 1656 sign = false; 1657 LLVM_FALLTHROUGH; 1658 case PackCategoriesIntoKey(fcNaN, fcZero): 1659 case PackCategoriesIntoKey(fcNaN, fcNormal): 1660 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1661 case PackCategoriesIntoKey(fcNaN, fcNaN): 1662 sign ^= rhs.sign; // restore the original sign 1663 if (isSignaling()) { 1664 makeQuiet(); 1665 return opInvalidOp; 1666 } 1667 return rhs.isSignaling() ? opInvalidOp : opOK; 1668 1669 case PackCategoriesIntoKey(fcInfinity, fcZero): 1670 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1671 case PackCategoriesIntoKey(fcZero, fcInfinity): 1672 case PackCategoriesIntoKey(fcZero, fcNormal): 1673 return opOK; 1674 1675 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1676 category = fcZero; 1677 return opOK; 1678 1679 case PackCategoriesIntoKey(fcNormal, fcZero): 1680 category = fcInfinity; 1681 return opDivByZero; 1682 1683 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1684 case PackCategoriesIntoKey(fcZero, fcZero): 1685 makeNaN(); 1686 return opInvalidOp; 1687 1688 case PackCategoriesIntoKey(fcNormal, fcNormal): 1689 return opOK; 1690 } 1691 } 1692 1693 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1694 switch (PackCategoriesIntoKey(category, rhs.category)) { 1695 default: 1696 llvm_unreachable(nullptr); 1697 1698 case PackCategoriesIntoKey(fcZero, fcNaN): 1699 case PackCategoriesIntoKey(fcNormal, fcNaN): 1700 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1701 assign(rhs); 1702 LLVM_FALLTHROUGH; 1703 case PackCategoriesIntoKey(fcNaN, fcZero): 1704 case PackCategoriesIntoKey(fcNaN, fcNormal): 1705 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1706 case PackCategoriesIntoKey(fcNaN, fcNaN): 1707 if (isSignaling()) { 1708 makeQuiet(); 1709 return opInvalidOp; 1710 } 1711 return rhs.isSignaling() ? opInvalidOp : opOK; 1712 1713 case PackCategoriesIntoKey(fcZero, fcInfinity): 1714 case PackCategoriesIntoKey(fcZero, fcNormal): 1715 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1716 return opOK; 1717 1718 case PackCategoriesIntoKey(fcNormal, fcZero): 1719 case PackCategoriesIntoKey(fcInfinity, fcZero): 1720 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1721 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1722 case PackCategoriesIntoKey(fcZero, fcZero): 1723 makeNaN(); 1724 return opInvalidOp; 1725 1726 case PackCategoriesIntoKey(fcNormal, fcNormal): 1727 return opOK; 1728 } 1729 } 1730 1731 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1732 switch (PackCategoriesIntoKey(category, rhs.category)) { 1733 default: 1734 llvm_unreachable(nullptr); 1735 1736 case PackCategoriesIntoKey(fcZero, fcNaN): 1737 case PackCategoriesIntoKey(fcNormal, fcNaN): 1738 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1739 assign(rhs); 1740 LLVM_FALLTHROUGH; 1741 case PackCategoriesIntoKey(fcNaN, fcZero): 1742 case PackCategoriesIntoKey(fcNaN, fcNormal): 1743 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1744 case PackCategoriesIntoKey(fcNaN, fcNaN): 1745 if (isSignaling()) { 1746 makeQuiet(); 1747 return opInvalidOp; 1748 } 1749 return rhs.isSignaling() ? opInvalidOp : opOK; 1750 1751 case PackCategoriesIntoKey(fcZero, fcInfinity): 1752 case PackCategoriesIntoKey(fcZero, fcNormal): 1753 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1754 return opOK; 1755 1756 case PackCategoriesIntoKey(fcNormal, fcZero): 1757 case PackCategoriesIntoKey(fcInfinity, fcZero): 1758 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1759 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1760 case PackCategoriesIntoKey(fcZero, fcZero): 1761 makeNaN(); 1762 return opInvalidOp; 1763 1764 case PackCategoriesIntoKey(fcNormal, fcNormal): 1765 return opDivByZero; // fake status, indicating this is not a special case 1766 } 1767 } 1768 1769 /* Change sign. */ 1770 void IEEEFloat::changeSign() { 1771 /* Look mummy, this one's easy. */ 1772 sign = !sign; 1773 } 1774 1775 /* Normalized addition or subtraction. */ 1776 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1777 roundingMode rounding_mode, 1778 bool subtract) { 1779 opStatus fs; 1780 1781 fs = addOrSubtractSpecials(rhs, subtract); 1782 1783 /* This return code means it was not a simple case. */ 1784 if (fs == opDivByZero) { 1785 lostFraction lost_fraction; 1786 1787 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1788 fs = normalize(rounding_mode, lost_fraction); 1789 1790 /* Can only be zero if we lost no fraction. */ 1791 assert(category != fcZero || lost_fraction == lfExactlyZero); 1792 } 1793 1794 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1795 positive zero unless rounding to minus infinity, except that 1796 adding two like-signed zeroes gives that zero. */ 1797 if (category == fcZero) { 1798 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1799 sign = (rounding_mode == rmTowardNegative); 1800 } 1801 1802 return fs; 1803 } 1804 1805 /* Normalized addition. */ 1806 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1807 roundingMode rounding_mode) { 1808 return addOrSubtract(rhs, rounding_mode, false); 1809 } 1810 1811 /* Normalized subtraction. */ 1812 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1813 roundingMode rounding_mode) { 1814 return addOrSubtract(rhs, rounding_mode, true); 1815 } 1816 1817 /* Normalized multiply. */ 1818 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1819 roundingMode rounding_mode) { 1820 opStatus fs; 1821 1822 sign ^= rhs.sign; 1823 fs = multiplySpecials(rhs); 1824 1825 if (isFiniteNonZero()) { 1826 lostFraction lost_fraction = multiplySignificand(rhs); 1827 fs = normalize(rounding_mode, lost_fraction); 1828 if (lost_fraction != lfExactlyZero) 1829 fs = (opStatus) (fs | opInexact); 1830 } 1831 1832 return fs; 1833 } 1834 1835 /* Normalized divide. */ 1836 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1837 roundingMode rounding_mode) { 1838 opStatus fs; 1839 1840 sign ^= rhs.sign; 1841 fs = divideSpecials(rhs); 1842 1843 if (isFiniteNonZero()) { 1844 lostFraction lost_fraction = divideSignificand(rhs); 1845 fs = normalize(rounding_mode, lost_fraction); 1846 if (lost_fraction != lfExactlyZero) 1847 fs = (opStatus) (fs | opInexact); 1848 } 1849 1850 return fs; 1851 } 1852 1853 /* Normalized remainder. */ 1854 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1855 opStatus fs; 1856 unsigned int origSign = sign; 1857 1858 // First handle the special cases. 1859 fs = remainderSpecials(rhs); 1860 if (fs != opDivByZero) 1861 return fs; 1862 1863 fs = opOK; 1864 1865 // Make sure the current value is less than twice the denom. If the addition 1866 // did not succeed (an overflow has happened), which means that the finite 1867 // value we currently posses must be less than twice the denom (as we are 1868 // using the same semantics). 1869 IEEEFloat P2 = rhs; 1870 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1871 fs = mod(P2); 1872 assert(fs == opOK); 1873 } 1874 1875 // Lets work with absolute numbers. 1876 IEEEFloat P = rhs; 1877 P.sign = false; 1878 sign = false; 1879 1880 // 1881 // To calculate the remainder we use the following scheme. 1882 // 1883 // The remainder is defained as follows: 1884 // 1885 // remainder = numer - rquot * denom = x - r * p 1886 // 1887 // Where r is the result of: x/p, rounded toward the nearest integral value 1888 // (with halfway cases rounded toward the even number). 1889 // 1890 // Currently, (after x mod 2p): 1891 // r is the number of 2p's present inside x, which is inherently, an even 1892 // number of p's. 1893 // 1894 // We may split the remaining calculation into 4 options: 1895 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1896 // - if x == 0.5p then we round to the nearest even number which is 0, and we 1897 // are done as well. 1898 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 1899 // to subtract 1p at least once. 1900 // - if x >= p then we must subtract p at least once, as x must be a 1901 // remainder. 1902 // 1903 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 1904 // 1905 // We can now split the remaining calculation to the following 3 options: 1906 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1907 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 1908 // must round up to the next even number. so we must subtract p once more. 1909 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 1910 // integral, and subtract p once more. 1911 // 1912 1913 // Extend the semantics to prevent an overflow/underflow or inexact result. 1914 bool losesInfo; 1915 fltSemantics extendedSemantics = *semantics; 1916 extendedSemantics.maxExponent++; 1917 extendedSemantics.minExponent--; 1918 extendedSemantics.precision += 2; 1919 1920 IEEEFloat VEx = *this; 1921 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1922 assert(fs == opOK && !losesInfo); 1923 IEEEFloat PEx = P; 1924 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1925 assert(fs == opOK && !losesInfo); 1926 1927 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 1928 // any fraction. 1929 fs = VEx.add(VEx, rmNearestTiesToEven); 1930 assert(fs == opOK); 1931 1932 if (VEx.compare(PEx) == cmpGreaterThan) { 1933 fs = subtract(P, rmNearestTiesToEven); 1934 assert(fs == opOK); 1935 1936 // Make VEx = this.add(this), but because we have different semantics, we do 1937 // not want to `convert` again, so we just subtract PEx twice (which equals 1938 // to the desired value). 1939 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1940 assert(fs == opOK); 1941 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1942 assert(fs == opOK); 1943 1944 cmpResult result = VEx.compare(PEx); 1945 if (result == cmpGreaterThan || result == cmpEqual) { 1946 fs = subtract(P, rmNearestTiesToEven); 1947 assert(fs == opOK); 1948 } 1949 } 1950 1951 if (isZero()) 1952 sign = origSign; // IEEE754 requires this 1953 else 1954 sign ^= origSign; 1955 return fs; 1956 } 1957 1958 /* Normalized llvm frem (C fmod). */ 1959 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1960 opStatus fs; 1961 fs = modSpecials(rhs); 1962 unsigned int origSign = sign; 1963 1964 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 1965 compareAbsoluteValue(rhs) != cmpLessThan) { 1966 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); 1967 if (compareAbsoluteValue(V) == cmpLessThan) 1968 V = scalbn(V, -1, rmNearestTiesToEven); 1969 V.sign = sign; 1970 1971 fs = subtract(V, rmNearestTiesToEven); 1972 assert(fs==opOK); 1973 } 1974 if (isZero()) 1975 sign = origSign; // fmod requires this 1976 return fs; 1977 } 1978 1979 /* Normalized fused-multiply-add. */ 1980 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1981 const IEEEFloat &addend, 1982 roundingMode rounding_mode) { 1983 opStatus fs; 1984 1985 /* Post-multiplication sign, before addition. */ 1986 sign ^= multiplicand.sign; 1987 1988 /* If and only if all arguments are normal do we need to do an 1989 extended-precision calculation. */ 1990 if (isFiniteNonZero() && 1991 multiplicand.isFiniteNonZero() && 1992 addend.isFinite()) { 1993 lostFraction lost_fraction; 1994 1995 lost_fraction = multiplySignificand(multiplicand, addend); 1996 fs = normalize(rounding_mode, lost_fraction); 1997 if (lost_fraction != lfExactlyZero) 1998 fs = (opStatus) (fs | opInexact); 1999 2000 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2001 positive zero unless rounding to minus infinity, except that 2002 adding two like-signed zeroes gives that zero. */ 2003 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 2004 sign = (rounding_mode == rmTowardNegative); 2005 } else { 2006 fs = multiplySpecials(multiplicand); 2007 2008 /* FS can only be opOK or opInvalidOp. There is no more work 2009 to do in the latter case. The IEEE-754R standard says it is 2010 implementation-defined in this case whether, if ADDEND is a 2011 quiet NaN, we raise invalid op; this implementation does so. 2012 2013 If we need to do the addition we can do so with normal 2014 precision. */ 2015 if (fs == opOK) 2016 fs = addOrSubtract(addend, rounding_mode, false); 2017 } 2018 2019 return fs; 2020 } 2021 2022 /* Rounding-mode correct round to integral value. */ 2023 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2024 opStatus fs; 2025 2026 if (isInfinity()) 2027 // [IEEE Std 754-2008 6.1]: 2028 // The behavior of infinity in floating-point arithmetic is derived from the 2029 // limiting cases of real arithmetic with operands of arbitrarily 2030 // large magnitude, when such a limit exists. 2031 // ... 2032 // Operations on infinite operands are usually exact and therefore signal no 2033 // exceptions ... 2034 return opOK; 2035 2036 if (isNaN()) { 2037 if (isSignaling()) { 2038 // [IEEE Std 754-2008 6.2]: 2039 // Under default exception handling, any operation signaling an invalid 2040 // operation exception and for which a floating-point result is to be 2041 // delivered shall deliver a quiet NaN. 2042 makeQuiet(); 2043 // [IEEE Std 754-2008 6.2]: 2044 // Signaling NaNs shall be reserved operands that, under default exception 2045 // handling, signal the invalid operation exception(see 7.2) for every 2046 // general-computational and signaling-computational operation except for 2047 // the conversions described in 5.12. 2048 return opInvalidOp; 2049 } else { 2050 // [IEEE Std 754-2008 6.2]: 2051 // For an operation with quiet NaN inputs, other than maximum and minimum 2052 // operations, if a floating-point result is to be delivered the result 2053 // shall be a quiet NaN which should be one of the input NaNs. 2054 // ... 2055 // Every general-computational and quiet-computational operation involving 2056 // one or more input NaNs, none of them signaling, shall signal no 2057 // exception, except fusedMultiplyAdd might signal the invalid operation 2058 // exception(see 7.2). 2059 return opOK; 2060 } 2061 } 2062 2063 if (isZero()) { 2064 // [IEEE Std 754-2008 6.3]: 2065 // ... the sign of the result of conversions, the quantize operation, the 2066 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2067 // the sign of the first or only operand. 2068 return opOK; 2069 } 2070 2071 // If the exponent is large enough, we know that this value is already 2072 // integral, and the arithmetic below would potentially cause it to saturate 2073 // to +/-Inf. Bail out early instead. 2074 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2075 return opOK; 2076 2077 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2078 // precision of our format, and then subtract it back off again. The choice 2079 // of rounding modes for the addition/subtraction determines the rounding mode 2080 // for our integral rounding as well. 2081 // NOTE: When the input value is negative, we do subtraction followed by 2082 // addition instead. 2083 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2084 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2085 IEEEFloat MagicConstant(*semantics); 2086 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2087 rmNearestTiesToEven); 2088 assert(fs == opOK); 2089 MagicConstant.sign = sign; 2090 2091 // Preserve the input sign so that we can handle the case of zero result 2092 // correctly. 2093 bool inputSign = isNegative(); 2094 2095 fs = add(MagicConstant, rounding_mode); 2096 2097 // Current value and 'MagicConstant' are both integers, so the result of the 2098 // subtraction is always exact according to Sterbenz' lemma. 2099 subtract(MagicConstant, rounding_mode); 2100 2101 // Restore the input sign. 2102 if (inputSign != isNegative()) 2103 changeSign(); 2104 2105 return fs; 2106 } 2107 2108 2109 /* Comparison requires normalized numbers. */ 2110 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2111 cmpResult result; 2112 2113 assert(semantics == rhs.semantics); 2114 2115 switch (PackCategoriesIntoKey(category, rhs.category)) { 2116 default: 2117 llvm_unreachable(nullptr); 2118 2119 case PackCategoriesIntoKey(fcNaN, fcZero): 2120 case PackCategoriesIntoKey(fcNaN, fcNormal): 2121 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2122 case PackCategoriesIntoKey(fcNaN, fcNaN): 2123 case PackCategoriesIntoKey(fcZero, fcNaN): 2124 case PackCategoriesIntoKey(fcNormal, fcNaN): 2125 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2126 return cmpUnordered; 2127 2128 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2129 case PackCategoriesIntoKey(fcInfinity, fcZero): 2130 case PackCategoriesIntoKey(fcNormal, fcZero): 2131 if (sign) 2132 return cmpLessThan; 2133 else 2134 return cmpGreaterThan; 2135 2136 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2137 case PackCategoriesIntoKey(fcZero, fcInfinity): 2138 case PackCategoriesIntoKey(fcZero, fcNormal): 2139 if (rhs.sign) 2140 return cmpGreaterThan; 2141 else 2142 return cmpLessThan; 2143 2144 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2145 if (sign == rhs.sign) 2146 return cmpEqual; 2147 else if (sign) 2148 return cmpLessThan; 2149 else 2150 return cmpGreaterThan; 2151 2152 case PackCategoriesIntoKey(fcZero, fcZero): 2153 return cmpEqual; 2154 2155 case PackCategoriesIntoKey(fcNormal, fcNormal): 2156 break; 2157 } 2158 2159 /* Two normal numbers. Do they have the same sign? */ 2160 if (sign != rhs.sign) { 2161 if (sign) 2162 result = cmpLessThan; 2163 else 2164 result = cmpGreaterThan; 2165 } else { 2166 /* Compare absolute values; invert result if negative. */ 2167 result = compareAbsoluteValue(rhs); 2168 2169 if (sign) { 2170 if (result == cmpLessThan) 2171 result = cmpGreaterThan; 2172 else if (result == cmpGreaterThan) 2173 result = cmpLessThan; 2174 } 2175 } 2176 2177 return result; 2178 } 2179 2180 /// IEEEFloat::convert - convert a value of one floating point type to another. 2181 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2182 /// records whether the transformation lost information, i.e. whether 2183 /// converting the result back to the original type will produce the 2184 /// original value (this is almost the same as return value==fsOK, but there 2185 /// are edge cases where this is not so). 2186 2187 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2188 roundingMode rounding_mode, 2189 bool *losesInfo) { 2190 lostFraction lostFraction; 2191 unsigned int newPartCount, oldPartCount; 2192 opStatus fs; 2193 int shift; 2194 const fltSemantics &fromSemantics = *semantics; 2195 2196 lostFraction = lfExactlyZero; 2197 newPartCount = partCountForBits(toSemantics.precision + 1); 2198 oldPartCount = partCount(); 2199 shift = toSemantics.precision - fromSemantics.precision; 2200 2201 bool X86SpecialNan = false; 2202 if (&fromSemantics == &semX87DoubleExtended && 2203 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2204 (!(*significandParts() & 0x8000000000000000ULL) || 2205 !(*significandParts() & 0x4000000000000000ULL))) { 2206 // x86 has some unusual NaNs which cannot be represented in any other 2207 // format; note them here. 2208 X86SpecialNan = true; 2209 } 2210 2211 // If this is a truncation of a denormal number, and the target semantics 2212 // has larger exponent range than the source semantics (this can happen 2213 // when truncating from PowerPC double-double to double format), the 2214 // right shift could lose result mantissa bits. Adjust exponent instead 2215 // of performing excessive shift. 2216 if (shift < 0 && isFiniteNonZero()) { 2217 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2218 if (exponent + exponentChange < toSemantics.minExponent) 2219 exponentChange = toSemantics.minExponent - exponent; 2220 if (exponentChange < shift) 2221 exponentChange = shift; 2222 if (exponentChange < 0) { 2223 shift -= exponentChange; 2224 exponent += exponentChange; 2225 } 2226 } 2227 2228 // If this is a truncation, perform the shift before we narrow the storage. 2229 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2230 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2231 2232 // Fix the storage so it can hold to new value. 2233 if (newPartCount > oldPartCount) { 2234 // The new type requires more storage; make it available. 2235 integerPart *newParts; 2236 newParts = new integerPart[newPartCount]; 2237 APInt::tcSet(newParts, 0, newPartCount); 2238 if (isFiniteNonZero() || category==fcNaN) 2239 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2240 freeSignificand(); 2241 significand.parts = newParts; 2242 } else if (newPartCount == 1 && oldPartCount != 1) { 2243 // Switch to built-in storage for a single part. 2244 integerPart newPart = 0; 2245 if (isFiniteNonZero() || category==fcNaN) 2246 newPart = significandParts()[0]; 2247 freeSignificand(); 2248 significand.part = newPart; 2249 } 2250 2251 // Now that we have the right storage, switch the semantics. 2252 semantics = &toSemantics; 2253 2254 // If this is an extension, perform the shift now that the storage is 2255 // available. 2256 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2257 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2258 2259 if (isFiniteNonZero()) { 2260 fs = normalize(rounding_mode, lostFraction); 2261 *losesInfo = (fs != opOK); 2262 } else if (category == fcNaN) { 2263 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2264 2265 // For x87 extended precision, we want to make a NaN, not a special NaN if 2266 // the input wasn't special either. 2267 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2268 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2269 2270 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2271 // This also guarantees that a sNaN does not become Inf on a truncation 2272 // that loses all payload bits. 2273 if (isSignaling()) { 2274 makeQuiet(); 2275 fs = opInvalidOp; 2276 } else { 2277 fs = opOK; 2278 } 2279 } else { 2280 *losesInfo = false; 2281 fs = opOK; 2282 } 2283 2284 return fs; 2285 } 2286 2287 /* Convert a floating point number to an integer according to the 2288 rounding mode. If the rounded integer value is out of range this 2289 returns an invalid operation exception and the contents of the 2290 destination parts are unspecified. If the rounded value is in 2291 range but the floating point number is not the exact integer, the C 2292 standard doesn't require an inexact exception to be raised. IEEE 2293 854 does require it so we do that. 2294 2295 Note that for conversions to integer type the C standard requires 2296 round-to-zero to always be used. */ 2297 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2298 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2299 roundingMode rounding_mode, bool *isExact) const { 2300 lostFraction lost_fraction; 2301 const integerPart *src; 2302 unsigned int dstPartsCount, truncatedBits; 2303 2304 *isExact = false; 2305 2306 /* Handle the three special cases first. */ 2307 if (category == fcInfinity || category == fcNaN) 2308 return opInvalidOp; 2309 2310 dstPartsCount = partCountForBits(width); 2311 assert(dstPartsCount <= parts.size() && "Integer too big"); 2312 2313 if (category == fcZero) { 2314 APInt::tcSet(parts.data(), 0, dstPartsCount); 2315 // Negative zero can't be represented as an int. 2316 *isExact = !sign; 2317 return opOK; 2318 } 2319 2320 src = significandParts(); 2321 2322 /* Step 1: place our absolute value, with any fraction truncated, in 2323 the destination. */ 2324 if (exponent < 0) { 2325 /* Our absolute value is less than one; truncate everything. */ 2326 APInt::tcSet(parts.data(), 0, dstPartsCount); 2327 /* For exponent -1 the integer bit represents .5, look at that. 2328 For smaller exponents leftmost truncated bit is 0. */ 2329 truncatedBits = semantics->precision -1U - exponent; 2330 } else { 2331 /* We want the most significant (exponent + 1) bits; the rest are 2332 truncated. */ 2333 unsigned int bits = exponent + 1U; 2334 2335 /* Hopelessly large in magnitude? */ 2336 if (bits > width) 2337 return opInvalidOp; 2338 2339 if (bits < semantics->precision) { 2340 /* We truncate (semantics->precision - bits) bits. */ 2341 truncatedBits = semantics->precision - bits; 2342 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2343 } else { 2344 /* We want at least as many bits as are available. */ 2345 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2346 0); 2347 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2348 bits - semantics->precision); 2349 truncatedBits = 0; 2350 } 2351 } 2352 2353 /* Step 2: work out any lost fraction, and increment the absolute 2354 value if we would round away from zero. */ 2355 if (truncatedBits) { 2356 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2357 truncatedBits); 2358 if (lost_fraction != lfExactlyZero && 2359 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2360 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2361 return opInvalidOp; /* Overflow. */ 2362 } 2363 } else { 2364 lost_fraction = lfExactlyZero; 2365 } 2366 2367 /* Step 3: check if we fit in the destination. */ 2368 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2369 2370 if (sign) { 2371 if (!isSigned) { 2372 /* Negative numbers cannot be represented as unsigned. */ 2373 if (omsb != 0) 2374 return opInvalidOp; 2375 } else { 2376 /* It takes omsb bits to represent the unsigned integer value. 2377 We lose a bit for the sign, but care is needed as the 2378 maximally negative integer is a special case. */ 2379 if (omsb == width && 2380 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2381 return opInvalidOp; 2382 2383 /* This case can happen because of rounding. */ 2384 if (omsb > width) 2385 return opInvalidOp; 2386 } 2387 2388 APInt::tcNegate (parts.data(), dstPartsCount); 2389 } else { 2390 if (omsb >= width + !isSigned) 2391 return opInvalidOp; 2392 } 2393 2394 if (lost_fraction == lfExactlyZero) { 2395 *isExact = true; 2396 return opOK; 2397 } else 2398 return opInexact; 2399 } 2400 2401 /* Same as convertToSignExtendedInteger, except we provide 2402 deterministic values in case of an invalid operation exception, 2403 namely zero for NaNs and the minimal or maximal value respectively 2404 for underflow or overflow. 2405 The *isExact output tells whether the result is exact, in the sense 2406 that converting it back to the original floating point type produces 2407 the original value. This is almost equivalent to result==opOK, 2408 except for negative zeroes. 2409 */ 2410 IEEEFloat::opStatus 2411 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2412 unsigned int width, bool isSigned, 2413 roundingMode rounding_mode, bool *isExact) const { 2414 opStatus fs; 2415 2416 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2417 isExact); 2418 2419 if (fs == opInvalidOp) { 2420 unsigned int bits, dstPartsCount; 2421 2422 dstPartsCount = partCountForBits(width); 2423 assert(dstPartsCount <= parts.size() && "Integer too big"); 2424 2425 if (category == fcNaN) 2426 bits = 0; 2427 else if (sign) 2428 bits = isSigned; 2429 else 2430 bits = width - isSigned; 2431 2432 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2433 if (sign && isSigned) 2434 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2435 } 2436 2437 return fs; 2438 } 2439 2440 /* Convert an unsigned integer SRC to a floating point number, 2441 rounding according to ROUNDING_MODE. The sign of the floating 2442 point number is not modified. */ 2443 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2444 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2445 unsigned int omsb, precision, dstCount; 2446 integerPart *dst; 2447 lostFraction lost_fraction; 2448 2449 category = fcNormal; 2450 omsb = APInt::tcMSB(src, srcCount) + 1; 2451 dst = significandParts(); 2452 dstCount = partCount(); 2453 precision = semantics->precision; 2454 2455 /* We want the most significant PRECISION bits of SRC. There may not 2456 be that many; extract what we can. */ 2457 if (precision <= omsb) { 2458 exponent = omsb - 1; 2459 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2460 omsb - precision); 2461 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2462 } else { 2463 exponent = precision - 1; 2464 lost_fraction = lfExactlyZero; 2465 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2466 } 2467 2468 return normalize(rounding_mode, lost_fraction); 2469 } 2470 2471 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2472 roundingMode rounding_mode) { 2473 unsigned int partCount = Val.getNumWords(); 2474 APInt api = Val; 2475 2476 sign = false; 2477 if (isSigned && api.isNegative()) { 2478 sign = true; 2479 api = -api; 2480 } 2481 2482 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2483 } 2484 2485 /* Convert a two's complement integer SRC to a floating point number, 2486 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2487 integer is signed, in which case it must be sign-extended. */ 2488 IEEEFloat::opStatus 2489 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2490 unsigned int srcCount, bool isSigned, 2491 roundingMode rounding_mode) { 2492 opStatus status; 2493 2494 if (isSigned && 2495 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2496 integerPart *copy; 2497 2498 /* If we're signed and negative negate a copy. */ 2499 sign = true; 2500 copy = new integerPart[srcCount]; 2501 APInt::tcAssign(copy, src, srcCount); 2502 APInt::tcNegate(copy, srcCount); 2503 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2504 delete [] copy; 2505 } else { 2506 sign = false; 2507 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2508 } 2509 2510 return status; 2511 } 2512 2513 /* FIXME: should this just take a const APInt reference? */ 2514 IEEEFloat::opStatus 2515 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2516 unsigned int width, bool isSigned, 2517 roundingMode rounding_mode) { 2518 unsigned int partCount = partCountForBits(width); 2519 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2520 2521 sign = false; 2522 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2523 sign = true; 2524 api = -api; 2525 } 2526 2527 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2528 } 2529 2530 Expected<IEEEFloat::opStatus> 2531 IEEEFloat::convertFromHexadecimalString(StringRef s, 2532 roundingMode rounding_mode) { 2533 lostFraction lost_fraction = lfExactlyZero; 2534 2535 category = fcNormal; 2536 zeroSignificand(); 2537 exponent = 0; 2538 2539 integerPart *significand = significandParts(); 2540 unsigned partsCount = partCount(); 2541 unsigned bitPos = partsCount * integerPartWidth; 2542 bool computedTrailingFraction = false; 2543 2544 // Skip leading zeroes and any (hexa)decimal point. 2545 StringRef::iterator begin = s.begin(); 2546 StringRef::iterator end = s.end(); 2547 StringRef::iterator dot; 2548 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2549 if (!PtrOrErr) 2550 return PtrOrErr.takeError(); 2551 StringRef::iterator p = *PtrOrErr; 2552 StringRef::iterator firstSignificantDigit = p; 2553 2554 while (p != end) { 2555 integerPart hex_value; 2556 2557 if (*p == '.') { 2558 if (dot != end) 2559 return createError("String contains multiple dots"); 2560 dot = p++; 2561 continue; 2562 } 2563 2564 hex_value = hexDigitValue(*p); 2565 if (hex_value == -1U) 2566 break; 2567 2568 p++; 2569 2570 // Store the number while we have space. 2571 if (bitPos) { 2572 bitPos -= 4; 2573 hex_value <<= bitPos % integerPartWidth; 2574 significand[bitPos / integerPartWidth] |= hex_value; 2575 } else if (!computedTrailingFraction) { 2576 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2577 if (!FractOrErr) 2578 return FractOrErr.takeError(); 2579 lost_fraction = *FractOrErr; 2580 computedTrailingFraction = true; 2581 } 2582 } 2583 2584 /* Hex floats require an exponent but not a hexadecimal point. */ 2585 if (p == end) 2586 return createError("Hex strings require an exponent"); 2587 if (*p != 'p' && *p != 'P') 2588 return createError("Invalid character in significand"); 2589 if (p == begin) 2590 return createError("Significand has no digits"); 2591 if (dot != end && p - begin == 1) 2592 return createError("Significand has no digits"); 2593 2594 /* Ignore the exponent if we are zero. */ 2595 if (p != firstSignificantDigit) { 2596 int expAdjustment; 2597 2598 /* Implicit hexadecimal point? */ 2599 if (dot == end) 2600 dot = p; 2601 2602 /* Calculate the exponent adjustment implicit in the number of 2603 significant digits. */ 2604 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2605 if (expAdjustment < 0) 2606 expAdjustment++; 2607 expAdjustment = expAdjustment * 4 - 1; 2608 2609 /* Adjust for writing the significand starting at the most 2610 significant nibble. */ 2611 expAdjustment += semantics->precision; 2612 expAdjustment -= partsCount * integerPartWidth; 2613 2614 /* Adjust for the given exponent. */ 2615 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2616 if (!ExpOrErr) 2617 return ExpOrErr.takeError(); 2618 exponent = *ExpOrErr; 2619 } 2620 2621 return normalize(rounding_mode, lost_fraction); 2622 } 2623 2624 IEEEFloat::opStatus 2625 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2626 unsigned sigPartCount, int exp, 2627 roundingMode rounding_mode) { 2628 unsigned int parts, pow5PartCount; 2629 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2630 integerPart pow5Parts[maxPowerOfFiveParts]; 2631 bool isNearest; 2632 2633 isNearest = (rounding_mode == rmNearestTiesToEven || 2634 rounding_mode == rmNearestTiesToAway); 2635 2636 parts = partCountForBits(semantics->precision + 11); 2637 2638 /* Calculate pow(5, abs(exp)). */ 2639 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2640 2641 for (;; parts *= 2) { 2642 opStatus sigStatus, powStatus; 2643 unsigned int excessPrecision, truncatedBits; 2644 2645 calcSemantics.precision = parts * integerPartWidth - 1; 2646 excessPrecision = calcSemantics.precision - semantics->precision; 2647 truncatedBits = excessPrecision; 2648 2649 IEEEFloat decSig(calcSemantics, uninitialized); 2650 decSig.makeZero(sign); 2651 IEEEFloat pow5(calcSemantics); 2652 2653 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2654 rmNearestTiesToEven); 2655 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2656 rmNearestTiesToEven); 2657 /* Add exp, as 10^n = 5^n * 2^n. */ 2658 decSig.exponent += exp; 2659 2660 lostFraction calcLostFraction; 2661 integerPart HUerr, HUdistance; 2662 unsigned int powHUerr; 2663 2664 if (exp >= 0) { 2665 /* multiplySignificand leaves the precision-th bit set to 1. */ 2666 calcLostFraction = decSig.multiplySignificand(pow5); 2667 powHUerr = powStatus != opOK; 2668 } else { 2669 calcLostFraction = decSig.divideSignificand(pow5); 2670 /* Denormal numbers have less precision. */ 2671 if (decSig.exponent < semantics->minExponent) { 2672 excessPrecision += (semantics->minExponent - decSig.exponent); 2673 truncatedBits = excessPrecision; 2674 if (excessPrecision > calcSemantics.precision) 2675 excessPrecision = calcSemantics.precision; 2676 } 2677 /* Extra half-ulp lost in reciprocal of exponent. */ 2678 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2679 } 2680 2681 /* Both multiplySignificand and divideSignificand return the 2682 result with the integer bit set. */ 2683 assert(APInt::tcExtractBit 2684 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2685 2686 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2687 powHUerr); 2688 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2689 excessPrecision, isNearest); 2690 2691 /* Are we guaranteed to round correctly if we truncate? */ 2692 if (HUdistance >= HUerr) { 2693 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2694 calcSemantics.precision - excessPrecision, 2695 excessPrecision); 2696 /* Take the exponent of decSig. If we tcExtract-ed less bits 2697 above we must adjust our exponent to compensate for the 2698 implicit right shift. */ 2699 exponent = (decSig.exponent + semantics->precision 2700 - (calcSemantics.precision - excessPrecision)); 2701 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2702 decSig.partCount(), 2703 truncatedBits); 2704 return normalize(rounding_mode, calcLostFraction); 2705 } 2706 } 2707 } 2708 2709 Expected<IEEEFloat::opStatus> 2710 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2711 decimalInfo D; 2712 opStatus fs; 2713 2714 /* Scan the text. */ 2715 StringRef::iterator p = str.begin(); 2716 if (Error Err = interpretDecimal(p, str.end(), &D)) 2717 return std::move(Err); 2718 2719 /* Handle the quick cases. First the case of no significant digits, 2720 i.e. zero, and then exponents that are obviously too large or too 2721 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2722 definitely overflows if 2723 2724 (exp - 1) * L >= maxExponent 2725 2726 and definitely underflows to zero where 2727 2728 (exp + 1) * L <= minExponent - precision 2729 2730 With integer arithmetic the tightest bounds for L are 2731 2732 93/28 < L < 196/59 [ numerator <= 256 ] 2733 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2734 */ 2735 2736 // Test if we have a zero number allowing for strings with no null terminators 2737 // and zero decimals with non-zero exponents. 2738 // 2739 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2740 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2741 // be at most one dot. On the other hand, if we have a zero with a non-zero 2742 // exponent, then we know that D.firstSigDigit will be non-numeric. 2743 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2744 category = fcZero; 2745 fs = opOK; 2746 2747 /* Check whether the normalized exponent is high enough to overflow 2748 max during the log-rebasing in the max-exponent check below. */ 2749 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2750 fs = handleOverflow(rounding_mode); 2751 2752 /* If it wasn't, then it also wasn't high enough to overflow max 2753 during the log-rebasing in the min-exponent check. Check that it 2754 won't overflow min in either check, then perform the min-exponent 2755 check. */ 2756 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2757 (D.normalizedExponent + 1) * 28738 <= 2758 8651 * (semantics->minExponent - (int) semantics->precision)) { 2759 /* Underflow to zero and round. */ 2760 category = fcNormal; 2761 zeroSignificand(); 2762 fs = normalize(rounding_mode, lfLessThanHalf); 2763 2764 /* We can finally safely perform the max-exponent check. */ 2765 } else if ((D.normalizedExponent - 1) * 42039 2766 >= 12655 * semantics->maxExponent) { 2767 /* Overflow and round. */ 2768 fs = handleOverflow(rounding_mode); 2769 } else { 2770 integerPart *decSignificand; 2771 unsigned int partCount; 2772 2773 /* A tight upper bound on number of bits required to hold an 2774 N-digit decimal integer is N * 196 / 59. Allocate enough space 2775 to hold the full significand, and an extra part required by 2776 tcMultiplyPart. */ 2777 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2778 partCount = partCountForBits(1 + 196 * partCount / 59); 2779 decSignificand = new integerPart[partCount + 1]; 2780 partCount = 0; 2781 2782 /* Convert to binary efficiently - we do almost all multiplication 2783 in an integerPart. When this would overflow do we do a single 2784 bignum multiplication, and then revert again to multiplication 2785 in an integerPart. */ 2786 do { 2787 integerPart decValue, val, multiplier; 2788 2789 val = 0; 2790 multiplier = 1; 2791 2792 do { 2793 if (*p == '.') { 2794 p++; 2795 if (p == str.end()) { 2796 break; 2797 } 2798 } 2799 decValue = decDigitValue(*p++); 2800 if (decValue >= 10U) { 2801 delete[] decSignificand; 2802 return createError("Invalid character in significand"); 2803 } 2804 multiplier *= 10; 2805 val = val * 10 + decValue; 2806 /* The maximum number that can be multiplied by ten with any 2807 digit added without overflowing an integerPart. */ 2808 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2809 2810 /* Multiply out the current part. */ 2811 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2812 partCount, partCount + 1, false); 2813 2814 /* If we used another part (likely but not guaranteed), increase 2815 the count. */ 2816 if (decSignificand[partCount]) 2817 partCount++; 2818 } while (p <= D.lastSigDigit); 2819 2820 category = fcNormal; 2821 fs = roundSignificandWithExponent(decSignificand, partCount, 2822 D.exponent, rounding_mode); 2823 2824 delete [] decSignificand; 2825 } 2826 2827 return fs; 2828 } 2829 2830 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2831 const size_t MIN_NAME_SIZE = 3; 2832 2833 if (str.size() < MIN_NAME_SIZE) 2834 return false; 2835 2836 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2837 makeInf(false); 2838 return true; 2839 } 2840 2841 bool IsNegative = str.front() == '-'; 2842 if (IsNegative) { 2843 str = str.drop_front(); 2844 if (str.size() < MIN_NAME_SIZE) 2845 return false; 2846 2847 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2848 makeInf(true); 2849 return true; 2850 } 2851 } 2852 2853 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2854 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2855 if (IsSignaling) { 2856 str = str.drop_front(); 2857 if (str.size() < MIN_NAME_SIZE) 2858 return false; 2859 } 2860 2861 if (str.startswith("nan") || str.startswith("NaN")) { 2862 str = str.drop_front(3); 2863 2864 // A NaN without payload. 2865 if (str.empty()) { 2866 makeNaN(IsSignaling, IsNegative); 2867 return true; 2868 } 2869 2870 // Allow the payload to be inside parentheses. 2871 if (str.front() == '(') { 2872 // Parentheses should be balanced (and not empty). 2873 if (str.size() <= 2 || str.back() != ')') 2874 return false; 2875 2876 str = str.slice(1, str.size() - 1); 2877 } 2878 2879 // Determine the payload number's radix. 2880 unsigned Radix = 10; 2881 if (str[0] == '0') { 2882 if (str.size() > 1 && tolower(str[1]) == 'x') { 2883 str = str.drop_front(2); 2884 Radix = 16; 2885 } else 2886 Radix = 8; 2887 } 2888 2889 // Parse the payload and make the NaN. 2890 APInt Payload; 2891 if (!str.getAsInteger(Radix, Payload)) { 2892 makeNaN(IsSignaling, IsNegative, &Payload); 2893 return true; 2894 } 2895 } 2896 2897 return false; 2898 } 2899 2900 Expected<IEEEFloat::opStatus> 2901 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 2902 if (str.empty()) 2903 return createError("Invalid string length"); 2904 2905 // Handle special cases. 2906 if (convertFromStringSpecials(str)) 2907 return opOK; 2908 2909 /* Handle a leading minus sign. */ 2910 StringRef::iterator p = str.begin(); 2911 size_t slen = str.size(); 2912 sign = *p == '-' ? 1 : 0; 2913 if (*p == '-' || *p == '+') { 2914 p++; 2915 slen--; 2916 if (!slen) 2917 return createError("String has no digits"); 2918 } 2919 2920 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2921 if (slen == 2) 2922 return createError("Invalid string"); 2923 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2924 rounding_mode); 2925 } 2926 2927 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2928 } 2929 2930 /* Write out a hexadecimal representation of the floating point value 2931 to DST, which must be of sufficient size, in the C99 form 2932 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2933 excluding the terminating NUL. 2934 2935 If UPPERCASE, the output is in upper case, otherwise in lower case. 2936 2937 HEXDIGITS digits appear altogether, rounding the value if 2938 necessary. If HEXDIGITS is 0, the minimal precision to display the 2939 number precisely is used instead. If nothing would appear after 2940 the decimal point it is suppressed. 2941 2942 The decimal exponent is always printed and has at least one digit. 2943 Zero values display an exponent of zero. Infinities and NaNs 2944 appear as "infinity" or "nan" respectively. 2945 2946 The above rules are as specified by C99. There is ambiguity about 2947 what the leading hexadecimal digit should be. This implementation 2948 uses whatever is necessary so that the exponent is displayed as 2949 stored. This implies the exponent will fall within the IEEE format 2950 range, and the leading hexadecimal digit will be 0 (for denormals), 2951 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2952 any other digits zero). 2953 */ 2954 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2955 bool upperCase, 2956 roundingMode rounding_mode) const { 2957 char *p; 2958 2959 p = dst; 2960 if (sign) 2961 *dst++ = '-'; 2962 2963 switch (category) { 2964 case fcInfinity: 2965 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2966 dst += sizeof infinityL - 1; 2967 break; 2968 2969 case fcNaN: 2970 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2971 dst += sizeof NaNU - 1; 2972 break; 2973 2974 case fcZero: 2975 *dst++ = '0'; 2976 *dst++ = upperCase ? 'X': 'x'; 2977 *dst++ = '0'; 2978 if (hexDigits > 1) { 2979 *dst++ = '.'; 2980 memset (dst, '0', hexDigits - 1); 2981 dst += hexDigits - 1; 2982 } 2983 *dst++ = upperCase ? 'P': 'p'; 2984 *dst++ = '0'; 2985 break; 2986 2987 case fcNormal: 2988 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2989 break; 2990 } 2991 2992 *dst = 0; 2993 2994 return static_cast<unsigned int>(dst - p); 2995 } 2996 2997 /* Does the hard work of outputting the correctly rounded hexadecimal 2998 form of a normal floating point number with the specified number of 2999 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3000 digits necessary to print the value precisely is output. */ 3001 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3002 bool upperCase, 3003 roundingMode rounding_mode) const { 3004 unsigned int count, valueBits, shift, partsCount, outputDigits; 3005 const char *hexDigitChars; 3006 const integerPart *significand; 3007 char *p; 3008 bool roundUp; 3009 3010 *dst++ = '0'; 3011 *dst++ = upperCase ? 'X': 'x'; 3012 3013 roundUp = false; 3014 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3015 3016 significand = significandParts(); 3017 partsCount = partCount(); 3018 3019 /* +3 because the first digit only uses the single integer bit, so 3020 we have 3 virtual zero most-significant-bits. */ 3021 valueBits = semantics->precision + 3; 3022 shift = integerPartWidth - valueBits % integerPartWidth; 3023 3024 /* The natural number of digits required ignoring trailing 3025 insignificant zeroes. */ 3026 outputDigits = (valueBits - significandLSB () + 3) / 4; 3027 3028 /* hexDigits of zero means use the required number for the 3029 precision. Otherwise, see if we are truncating. If we are, 3030 find out if we need to round away from zero. */ 3031 if (hexDigits) { 3032 if (hexDigits < outputDigits) { 3033 /* We are dropping non-zero bits, so need to check how to round. 3034 "bits" is the number of dropped bits. */ 3035 unsigned int bits; 3036 lostFraction fraction; 3037 3038 bits = valueBits - hexDigits * 4; 3039 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3040 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3041 } 3042 outputDigits = hexDigits; 3043 } 3044 3045 /* Write the digits consecutively, and start writing in the location 3046 of the hexadecimal point. We move the most significant digit 3047 left and add the hexadecimal point later. */ 3048 p = ++dst; 3049 3050 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3051 3052 while (outputDigits && count) { 3053 integerPart part; 3054 3055 /* Put the most significant integerPartWidth bits in "part". */ 3056 if (--count == partsCount) 3057 part = 0; /* An imaginary higher zero part. */ 3058 else 3059 part = significand[count] << shift; 3060 3061 if (count && shift) 3062 part |= significand[count - 1] >> (integerPartWidth - shift); 3063 3064 /* Convert as much of "part" to hexdigits as we can. */ 3065 unsigned int curDigits = integerPartWidth / 4; 3066 3067 if (curDigits > outputDigits) 3068 curDigits = outputDigits; 3069 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3070 outputDigits -= curDigits; 3071 } 3072 3073 if (roundUp) { 3074 char *q = dst; 3075 3076 /* Note that hexDigitChars has a trailing '0'. */ 3077 do { 3078 q--; 3079 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3080 } while (*q == '0'); 3081 assert(q >= p); 3082 } else { 3083 /* Add trailing zeroes. */ 3084 memset (dst, '0', outputDigits); 3085 dst += outputDigits; 3086 } 3087 3088 /* Move the most significant digit to before the point, and if there 3089 is something after the decimal point add it. This must come 3090 after rounding above. */ 3091 p[-1] = p[0]; 3092 if (dst -1 == p) 3093 dst--; 3094 else 3095 p[0] = '.'; 3096 3097 /* Finally output the exponent. */ 3098 *dst++ = upperCase ? 'P': 'p'; 3099 3100 return writeSignedDecimal (dst, exponent); 3101 } 3102 3103 hash_code hash_value(const IEEEFloat &Arg) { 3104 if (!Arg.isFiniteNonZero()) 3105 return hash_combine((uint8_t)Arg.category, 3106 // NaN has no sign, fix it at zero. 3107 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3108 Arg.semantics->precision); 3109 3110 // Normal floats need their exponent and significand hashed. 3111 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3112 Arg.semantics->precision, Arg.exponent, 3113 hash_combine_range( 3114 Arg.significandParts(), 3115 Arg.significandParts() + Arg.partCount())); 3116 } 3117 3118 // Conversion from APFloat to/from host float/double. It may eventually be 3119 // possible to eliminate these and have everybody deal with APFloats, but that 3120 // will take a while. This approach will not easily extend to long double. 3121 // Current implementation requires integerPartWidth==64, which is correct at 3122 // the moment but could be made more general. 3123 3124 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3125 // the actual IEEE respresentations. We compensate for that here. 3126 3127 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3128 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3129 assert(partCount()==2); 3130 3131 uint64_t myexponent, mysignificand; 3132 3133 if (isFiniteNonZero()) { 3134 myexponent = exponent+16383; //bias 3135 mysignificand = significandParts()[0]; 3136 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3137 myexponent = 0; // denormal 3138 } else if (category==fcZero) { 3139 myexponent = 0; 3140 mysignificand = 0; 3141 } else if (category==fcInfinity) { 3142 myexponent = 0x7fff; 3143 mysignificand = 0x8000000000000000ULL; 3144 } else { 3145 assert(category == fcNaN && "Unknown category"); 3146 myexponent = 0x7fff; 3147 mysignificand = significandParts()[0]; 3148 } 3149 3150 uint64_t words[2]; 3151 words[0] = mysignificand; 3152 words[1] = ((uint64_t)(sign & 1) << 15) | 3153 (myexponent & 0x7fffLL); 3154 return APInt(80, words); 3155 } 3156 3157 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3158 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3159 assert(partCount()==2); 3160 3161 uint64_t words[2]; 3162 opStatus fs; 3163 bool losesInfo; 3164 3165 // Convert number to double. To avoid spurious underflows, we re- 3166 // normalize against the "double" minExponent first, and only *then* 3167 // truncate the mantissa. The result of that second conversion 3168 // may be inexact, but should never underflow. 3169 // Declare fltSemantics before APFloat that uses it (and 3170 // saves pointer to it) to ensure correct destruction order. 3171 fltSemantics extendedSemantics = *semantics; 3172 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3173 IEEEFloat extended(*this); 3174 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3175 assert(fs == opOK && !losesInfo); 3176 (void)fs; 3177 3178 IEEEFloat u(extended); 3179 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3180 assert(fs == opOK || fs == opInexact); 3181 (void)fs; 3182 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3183 3184 // If conversion was exact or resulted in a special case, we're done; 3185 // just set the second double to zero. Otherwise, re-convert back to 3186 // the extended format and compute the difference. This now should 3187 // convert exactly to double. 3188 if (u.isFiniteNonZero() && losesInfo) { 3189 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3190 assert(fs == opOK && !losesInfo); 3191 (void)fs; 3192 3193 IEEEFloat v(extended); 3194 v.subtract(u, rmNearestTiesToEven); 3195 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3196 assert(fs == opOK && !losesInfo); 3197 (void)fs; 3198 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3199 } else { 3200 words[1] = 0; 3201 } 3202 3203 return APInt(128, words); 3204 } 3205 3206 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3207 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3208 assert(partCount()==2); 3209 3210 uint64_t myexponent, mysignificand, mysignificand2; 3211 3212 if (isFiniteNonZero()) { 3213 myexponent = exponent+16383; //bias 3214 mysignificand = significandParts()[0]; 3215 mysignificand2 = significandParts()[1]; 3216 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3217 myexponent = 0; // denormal 3218 } else if (category==fcZero) { 3219 myexponent = 0; 3220 mysignificand = mysignificand2 = 0; 3221 } else if (category==fcInfinity) { 3222 myexponent = 0x7fff; 3223 mysignificand = mysignificand2 = 0; 3224 } else { 3225 assert(category == fcNaN && "Unknown category!"); 3226 myexponent = 0x7fff; 3227 mysignificand = significandParts()[0]; 3228 mysignificand2 = significandParts()[1]; 3229 } 3230 3231 uint64_t words[2]; 3232 words[0] = mysignificand; 3233 words[1] = ((uint64_t)(sign & 1) << 63) | 3234 ((myexponent & 0x7fff) << 48) | 3235 (mysignificand2 & 0xffffffffffffLL); 3236 3237 return APInt(128, words); 3238 } 3239 3240 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3241 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3242 assert(partCount()==1); 3243 3244 uint64_t myexponent, mysignificand; 3245 3246 if (isFiniteNonZero()) { 3247 myexponent = exponent+1023; //bias 3248 mysignificand = *significandParts(); 3249 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3250 myexponent = 0; // denormal 3251 } else if (category==fcZero) { 3252 myexponent = 0; 3253 mysignificand = 0; 3254 } else if (category==fcInfinity) { 3255 myexponent = 0x7ff; 3256 mysignificand = 0; 3257 } else { 3258 assert(category == fcNaN && "Unknown category!"); 3259 myexponent = 0x7ff; 3260 mysignificand = *significandParts(); 3261 } 3262 3263 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3264 ((myexponent & 0x7ff) << 52) | 3265 (mysignificand & 0xfffffffffffffLL)))); 3266 } 3267 3268 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3269 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3270 assert(partCount()==1); 3271 3272 uint32_t myexponent, mysignificand; 3273 3274 if (isFiniteNonZero()) { 3275 myexponent = exponent+127; //bias 3276 mysignificand = (uint32_t)*significandParts(); 3277 if (myexponent == 1 && !(mysignificand & 0x800000)) 3278 myexponent = 0; // denormal 3279 } else if (category==fcZero) { 3280 myexponent = 0; 3281 mysignificand = 0; 3282 } else if (category==fcInfinity) { 3283 myexponent = 0xff; 3284 mysignificand = 0; 3285 } else { 3286 assert(category == fcNaN && "Unknown category!"); 3287 myexponent = 0xff; 3288 mysignificand = (uint32_t)*significandParts(); 3289 } 3290 3291 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3292 (mysignificand & 0x7fffff))); 3293 } 3294 3295 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3296 assert(semantics == (const llvm::fltSemantics *)&semBFloat); 3297 assert(partCount() == 1); 3298 3299 uint32_t myexponent, mysignificand; 3300 3301 if (isFiniteNonZero()) { 3302 myexponent = exponent + 127; // bias 3303 mysignificand = (uint32_t)*significandParts(); 3304 if (myexponent == 1 && !(mysignificand & 0x80)) 3305 myexponent = 0; // denormal 3306 } else if (category == fcZero) { 3307 myexponent = 0; 3308 mysignificand = 0; 3309 } else if (category == fcInfinity) { 3310 myexponent = 0xff; 3311 mysignificand = 0; 3312 } else { 3313 assert(category == fcNaN && "Unknown category!"); 3314 myexponent = 0xff; 3315 mysignificand = (uint32_t)*significandParts(); 3316 } 3317 3318 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | 3319 (mysignificand & 0x7f))); 3320 } 3321 3322 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3323 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3324 assert(partCount()==1); 3325 3326 uint32_t myexponent, mysignificand; 3327 3328 if (isFiniteNonZero()) { 3329 myexponent = exponent+15; //bias 3330 mysignificand = (uint32_t)*significandParts(); 3331 if (myexponent == 1 && !(mysignificand & 0x400)) 3332 myexponent = 0; // denormal 3333 } else if (category==fcZero) { 3334 myexponent = 0; 3335 mysignificand = 0; 3336 } else if (category==fcInfinity) { 3337 myexponent = 0x1f; 3338 mysignificand = 0; 3339 } else { 3340 assert(category == fcNaN && "Unknown category!"); 3341 myexponent = 0x1f; 3342 mysignificand = (uint32_t)*significandParts(); 3343 } 3344 3345 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3346 (mysignificand & 0x3ff))); 3347 } 3348 3349 // This function creates an APInt that is just a bit map of the floating 3350 // point constant as it would appear in memory. It is not a conversion, 3351 // and treating the result as a normal integer is unlikely to be useful. 3352 3353 APInt IEEEFloat::bitcastToAPInt() const { 3354 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3355 return convertHalfAPFloatToAPInt(); 3356 3357 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3358 return convertBFloatAPFloatToAPInt(); 3359 3360 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3361 return convertFloatAPFloatToAPInt(); 3362 3363 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3364 return convertDoubleAPFloatToAPInt(); 3365 3366 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3367 return convertQuadrupleAPFloatToAPInt(); 3368 3369 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3370 return convertPPCDoubleDoubleAPFloatToAPInt(); 3371 3372 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3373 "unknown format!"); 3374 return convertF80LongDoubleAPFloatToAPInt(); 3375 } 3376 3377 float IEEEFloat::convertToFloat() const { 3378 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3379 "Float semantics are not IEEEsingle"); 3380 APInt api = bitcastToAPInt(); 3381 return api.bitsToFloat(); 3382 } 3383 3384 double IEEEFloat::convertToDouble() const { 3385 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3386 "Float semantics are not IEEEdouble"); 3387 APInt api = bitcastToAPInt(); 3388 return api.bitsToDouble(); 3389 } 3390 3391 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3392 /// does not support these bit patterns: 3393 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3394 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3395 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3396 /// exponent = 0, integer bit 1 ("pseudodenormal") 3397 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3398 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3399 assert(api.getBitWidth()==80); 3400 uint64_t i1 = api.getRawData()[0]; 3401 uint64_t i2 = api.getRawData()[1]; 3402 uint64_t myexponent = (i2 & 0x7fff); 3403 uint64_t mysignificand = i1; 3404 uint8_t myintegerbit = mysignificand >> 63; 3405 3406 initialize(&semX87DoubleExtended); 3407 assert(partCount()==2); 3408 3409 sign = static_cast<unsigned int>(i2>>15); 3410 if (myexponent == 0 && mysignificand == 0) { 3411 makeZero(sign); 3412 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3413 makeInf(sign); 3414 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3415 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3416 category = fcNaN; 3417 exponent = exponentNaN(); 3418 significandParts()[0] = mysignificand; 3419 significandParts()[1] = 0; 3420 } else { 3421 category = fcNormal; 3422 exponent = myexponent - 16383; 3423 significandParts()[0] = mysignificand; 3424 significandParts()[1] = 0; 3425 if (myexponent==0) // denormal 3426 exponent = -16382; 3427 } 3428 } 3429 3430 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3431 assert(api.getBitWidth()==128); 3432 uint64_t i1 = api.getRawData()[0]; 3433 uint64_t i2 = api.getRawData()[1]; 3434 opStatus fs; 3435 bool losesInfo; 3436 3437 // Get the first double and convert to our format. 3438 initFromDoubleAPInt(APInt(64, i1)); 3439 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3440 assert(fs == opOK && !losesInfo); 3441 (void)fs; 3442 3443 // Unless we have a special case, add in second double. 3444 if (isFiniteNonZero()) { 3445 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3446 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3447 assert(fs == opOK && !losesInfo); 3448 (void)fs; 3449 3450 add(v, rmNearestTiesToEven); 3451 } 3452 } 3453 3454 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3455 assert(api.getBitWidth()==128); 3456 uint64_t i1 = api.getRawData()[0]; 3457 uint64_t i2 = api.getRawData()[1]; 3458 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3459 uint64_t mysignificand = i1; 3460 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3461 3462 initialize(&semIEEEquad); 3463 assert(partCount()==2); 3464 3465 sign = static_cast<unsigned int>(i2>>63); 3466 if (myexponent==0 && 3467 (mysignificand==0 && mysignificand2==0)) { 3468 makeZero(sign); 3469 } else if (myexponent==0x7fff && 3470 (mysignificand==0 && mysignificand2==0)) { 3471 makeInf(sign); 3472 } else if (myexponent==0x7fff && 3473 (mysignificand!=0 || mysignificand2 !=0)) { 3474 category = fcNaN; 3475 exponent = exponentNaN(); 3476 significandParts()[0] = mysignificand; 3477 significandParts()[1] = mysignificand2; 3478 } else { 3479 category = fcNormal; 3480 exponent = myexponent - 16383; 3481 significandParts()[0] = mysignificand; 3482 significandParts()[1] = mysignificand2; 3483 if (myexponent==0) // denormal 3484 exponent = -16382; 3485 else 3486 significandParts()[1] |= 0x1000000000000LL; // integer bit 3487 } 3488 } 3489 3490 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3491 assert(api.getBitWidth()==64); 3492 uint64_t i = *api.getRawData(); 3493 uint64_t myexponent = (i >> 52) & 0x7ff; 3494 uint64_t mysignificand = i & 0xfffffffffffffLL; 3495 3496 initialize(&semIEEEdouble); 3497 assert(partCount()==1); 3498 3499 sign = static_cast<unsigned int>(i>>63); 3500 if (myexponent==0 && mysignificand==0) { 3501 makeZero(sign); 3502 } else if (myexponent==0x7ff && mysignificand==0) { 3503 makeInf(sign); 3504 } else if (myexponent==0x7ff && mysignificand!=0) { 3505 category = fcNaN; 3506 exponent = exponentNaN(); 3507 *significandParts() = mysignificand; 3508 } else { 3509 category = fcNormal; 3510 exponent = myexponent - 1023; 3511 *significandParts() = mysignificand; 3512 if (myexponent==0) // denormal 3513 exponent = -1022; 3514 else 3515 *significandParts() |= 0x10000000000000LL; // integer bit 3516 } 3517 } 3518 3519 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3520 assert(api.getBitWidth()==32); 3521 uint32_t i = (uint32_t)*api.getRawData(); 3522 uint32_t myexponent = (i >> 23) & 0xff; 3523 uint32_t mysignificand = i & 0x7fffff; 3524 3525 initialize(&semIEEEsingle); 3526 assert(partCount()==1); 3527 3528 sign = i >> 31; 3529 if (myexponent==0 && mysignificand==0) { 3530 makeZero(sign); 3531 } else if (myexponent==0xff && mysignificand==0) { 3532 makeInf(sign); 3533 } else if (myexponent==0xff && mysignificand!=0) { 3534 category = fcNaN; 3535 exponent = exponentNaN(); 3536 *significandParts() = mysignificand; 3537 } else { 3538 category = fcNormal; 3539 exponent = myexponent - 127; //bias 3540 *significandParts() = mysignificand; 3541 if (myexponent==0) // denormal 3542 exponent = -126; 3543 else 3544 *significandParts() |= 0x800000; // integer bit 3545 } 3546 } 3547 3548 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3549 assert(api.getBitWidth() == 16); 3550 uint32_t i = (uint32_t)*api.getRawData(); 3551 uint32_t myexponent = (i >> 7) & 0xff; 3552 uint32_t mysignificand = i & 0x7f; 3553 3554 initialize(&semBFloat); 3555 assert(partCount() == 1); 3556 3557 sign = i >> 15; 3558 if (myexponent == 0 && mysignificand == 0) { 3559 makeZero(sign); 3560 } else if (myexponent == 0xff && mysignificand == 0) { 3561 makeInf(sign); 3562 } else if (myexponent == 0xff && mysignificand != 0) { 3563 category = fcNaN; 3564 exponent = exponentNaN(); 3565 *significandParts() = mysignificand; 3566 } else { 3567 category = fcNormal; 3568 exponent = myexponent - 127; // bias 3569 *significandParts() = mysignificand; 3570 if (myexponent == 0) // denormal 3571 exponent = -126; 3572 else 3573 *significandParts() |= 0x80; // integer bit 3574 } 3575 } 3576 3577 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3578 assert(api.getBitWidth()==16); 3579 uint32_t i = (uint32_t)*api.getRawData(); 3580 uint32_t myexponent = (i >> 10) & 0x1f; 3581 uint32_t mysignificand = i & 0x3ff; 3582 3583 initialize(&semIEEEhalf); 3584 assert(partCount()==1); 3585 3586 sign = i >> 15; 3587 if (myexponent==0 && mysignificand==0) { 3588 makeZero(sign); 3589 } else if (myexponent==0x1f && mysignificand==0) { 3590 makeInf(sign); 3591 } else if (myexponent==0x1f && mysignificand!=0) { 3592 category = fcNaN; 3593 exponent = exponentNaN(); 3594 *significandParts() = mysignificand; 3595 } else { 3596 category = fcNormal; 3597 exponent = myexponent - 15; //bias 3598 *significandParts() = mysignificand; 3599 if (myexponent==0) // denormal 3600 exponent = -14; 3601 else 3602 *significandParts() |= 0x400; // integer bit 3603 } 3604 } 3605 3606 /// Treat api as containing the bits of a floating point number. Currently 3607 /// we infer the floating point type from the size of the APInt. The 3608 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3609 /// when the size is anything else). 3610 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3611 if (Sem == &semIEEEhalf) 3612 return initFromHalfAPInt(api); 3613 if (Sem == &semBFloat) 3614 return initFromBFloatAPInt(api); 3615 if (Sem == &semIEEEsingle) 3616 return initFromFloatAPInt(api); 3617 if (Sem == &semIEEEdouble) 3618 return initFromDoubleAPInt(api); 3619 if (Sem == &semX87DoubleExtended) 3620 return initFromF80LongDoubleAPInt(api); 3621 if (Sem == &semIEEEquad) 3622 return initFromQuadrupleAPInt(api); 3623 if (Sem == &semPPCDoubleDoubleLegacy) 3624 return initFromPPCDoubleDoubleAPInt(api); 3625 3626 llvm_unreachable(nullptr); 3627 } 3628 3629 /// Make this number the largest magnitude normal number in the given 3630 /// semantics. 3631 void IEEEFloat::makeLargest(bool Negative) { 3632 // We want (in interchange format): 3633 // sign = {Negative} 3634 // exponent = 1..10 3635 // significand = 1..1 3636 category = fcNormal; 3637 sign = Negative; 3638 exponent = semantics->maxExponent; 3639 3640 // Use memset to set all but the highest integerPart to all ones. 3641 integerPart *significand = significandParts(); 3642 unsigned PartCount = partCount(); 3643 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3644 3645 // Set the high integerPart especially setting all unused top bits for 3646 // internal consistency. 3647 const unsigned NumUnusedHighBits = 3648 PartCount*integerPartWidth - semantics->precision; 3649 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3650 ? (~integerPart(0) >> NumUnusedHighBits) 3651 : 0; 3652 } 3653 3654 /// Make this number the smallest magnitude denormal number in the given 3655 /// semantics. 3656 void IEEEFloat::makeSmallest(bool Negative) { 3657 // We want (in interchange format): 3658 // sign = {Negative} 3659 // exponent = 0..0 3660 // significand = 0..01 3661 category = fcNormal; 3662 sign = Negative; 3663 exponent = semantics->minExponent; 3664 APInt::tcSet(significandParts(), 1, partCount()); 3665 } 3666 3667 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3668 // We want (in interchange format): 3669 // sign = {Negative} 3670 // exponent = 0..0 3671 // significand = 10..0 3672 3673 category = fcNormal; 3674 zeroSignificand(); 3675 sign = Negative; 3676 exponent = semantics->minExponent; 3677 significandParts()[partCountForBits(semantics->precision) - 1] |= 3678 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3679 } 3680 3681 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3682 initFromAPInt(&Sem, API); 3683 } 3684 3685 IEEEFloat::IEEEFloat(float f) { 3686 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3687 } 3688 3689 IEEEFloat::IEEEFloat(double d) { 3690 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3691 } 3692 3693 namespace { 3694 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3695 Buffer.append(Str.begin(), Str.end()); 3696 } 3697 3698 /// Removes data from the given significand until it is no more 3699 /// precise than is required for the desired precision. 3700 void AdjustToPrecision(APInt &significand, 3701 int &exp, unsigned FormatPrecision) { 3702 unsigned bits = significand.getActiveBits(); 3703 3704 // 196/59 is a very slight overestimate of lg_2(10). 3705 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3706 3707 if (bits <= bitsRequired) return; 3708 3709 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3710 if (!tensRemovable) return; 3711 3712 exp += tensRemovable; 3713 3714 APInt divisor(significand.getBitWidth(), 1); 3715 APInt powten(significand.getBitWidth(), 10); 3716 while (true) { 3717 if (tensRemovable & 1) 3718 divisor *= powten; 3719 tensRemovable >>= 1; 3720 if (!tensRemovable) break; 3721 powten *= powten; 3722 } 3723 3724 significand = significand.udiv(divisor); 3725 3726 // Truncate the significand down to its active bit count. 3727 significand = significand.trunc(significand.getActiveBits()); 3728 } 3729 3730 3731 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3732 int &exp, unsigned FormatPrecision) { 3733 unsigned N = buffer.size(); 3734 if (N <= FormatPrecision) return; 3735 3736 // The most significant figures are the last ones in the buffer. 3737 unsigned FirstSignificant = N - FormatPrecision; 3738 3739 // Round. 3740 // FIXME: this probably shouldn't use 'round half up'. 3741 3742 // Rounding down is just a truncation, except we also want to drop 3743 // trailing zeros from the new result. 3744 if (buffer[FirstSignificant - 1] < '5') { 3745 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3746 FirstSignificant++; 3747 3748 exp += FirstSignificant; 3749 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3750 return; 3751 } 3752 3753 // Rounding up requires a decimal add-with-carry. If we continue 3754 // the carry, the newly-introduced zeros will just be truncated. 3755 for (unsigned I = FirstSignificant; I != N; ++I) { 3756 if (buffer[I] == '9') { 3757 FirstSignificant++; 3758 } else { 3759 buffer[I]++; 3760 break; 3761 } 3762 } 3763 3764 // If we carried through, we have exactly one digit of precision. 3765 if (FirstSignificant == N) { 3766 exp += FirstSignificant; 3767 buffer.clear(); 3768 buffer.push_back('1'); 3769 return; 3770 } 3771 3772 exp += FirstSignificant; 3773 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3774 } 3775 } // namespace 3776 3777 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3778 unsigned FormatMaxPadding, bool TruncateZero) const { 3779 switch (category) { 3780 case fcInfinity: 3781 if (isNegative()) 3782 return append(Str, "-Inf"); 3783 else 3784 return append(Str, "+Inf"); 3785 3786 case fcNaN: return append(Str, "NaN"); 3787 3788 case fcZero: 3789 if (isNegative()) 3790 Str.push_back('-'); 3791 3792 if (!FormatMaxPadding) { 3793 if (TruncateZero) 3794 append(Str, "0.0E+0"); 3795 else { 3796 append(Str, "0.0"); 3797 if (FormatPrecision > 1) 3798 Str.append(FormatPrecision - 1, '0'); 3799 append(Str, "e+00"); 3800 } 3801 } else 3802 Str.push_back('0'); 3803 return; 3804 3805 case fcNormal: 3806 break; 3807 } 3808 3809 if (isNegative()) 3810 Str.push_back('-'); 3811 3812 // Decompose the number into an APInt and an exponent. 3813 int exp = exponent - ((int) semantics->precision - 1); 3814 APInt significand(semantics->precision, 3815 makeArrayRef(significandParts(), 3816 partCountForBits(semantics->precision))); 3817 3818 // Set FormatPrecision if zero. We want to do this before we 3819 // truncate trailing zeros, as those are part of the precision. 3820 if (!FormatPrecision) { 3821 // We use enough digits so the number can be round-tripped back to an 3822 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3823 // Accurately" by Steele and White. 3824 // FIXME: Using a formula based purely on the precision is conservative; 3825 // we can print fewer digits depending on the actual value being printed. 3826 3827 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3828 FormatPrecision = 2 + semantics->precision * 59 / 196; 3829 } 3830 3831 // Ignore trailing binary zeros. 3832 int trailingZeros = significand.countTrailingZeros(); 3833 exp += trailingZeros; 3834 significand.lshrInPlace(trailingZeros); 3835 3836 // Change the exponent from 2^e to 10^e. 3837 if (exp == 0) { 3838 // Nothing to do. 3839 } else if (exp > 0) { 3840 // Just shift left. 3841 significand = significand.zext(semantics->precision + exp); 3842 significand <<= exp; 3843 exp = 0; 3844 } else { /* exp < 0 */ 3845 int texp = -exp; 3846 3847 // We transform this using the identity: 3848 // (N)(2^-e) == (N)(5^e)(10^-e) 3849 // This means we have to multiply N (the significand) by 5^e. 3850 // To avoid overflow, we have to operate on numbers large 3851 // enough to store N * 5^e: 3852 // log2(N * 5^e) == log2(N) + e * log2(5) 3853 // <= semantics->precision + e * 137 / 59 3854 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3855 3856 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3857 3858 // Multiply significand by 5^e. 3859 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3860 significand = significand.zext(precision); 3861 APInt five_to_the_i(precision, 5); 3862 while (true) { 3863 if (texp & 1) significand *= five_to_the_i; 3864 3865 texp >>= 1; 3866 if (!texp) break; 3867 five_to_the_i *= five_to_the_i; 3868 } 3869 } 3870 3871 AdjustToPrecision(significand, exp, FormatPrecision); 3872 3873 SmallVector<char, 256> buffer; 3874 3875 // Fill the buffer. 3876 unsigned precision = significand.getBitWidth(); 3877 APInt ten(precision, 10); 3878 APInt digit(precision, 0); 3879 3880 bool inTrail = true; 3881 while (significand != 0) { 3882 // digit <- significand % 10 3883 // significand <- significand / 10 3884 APInt::udivrem(significand, ten, significand, digit); 3885 3886 unsigned d = digit.getZExtValue(); 3887 3888 // Drop trailing zeros. 3889 if (inTrail && !d) exp++; 3890 else { 3891 buffer.push_back((char) ('0' + d)); 3892 inTrail = false; 3893 } 3894 } 3895 3896 assert(!buffer.empty() && "no characters in buffer!"); 3897 3898 // Drop down to FormatPrecision. 3899 // TODO: don't do more precise calculations above than are required. 3900 AdjustToPrecision(buffer, exp, FormatPrecision); 3901 3902 unsigned NDigits = buffer.size(); 3903 3904 // Check whether we should use scientific notation. 3905 bool FormatScientific; 3906 if (!FormatMaxPadding) 3907 FormatScientific = true; 3908 else { 3909 if (exp >= 0) { 3910 // 765e3 --> 765000 3911 // ^^^ 3912 // But we shouldn't make the number look more precise than it is. 3913 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3914 NDigits + (unsigned) exp > FormatPrecision); 3915 } else { 3916 // Power of the most significant digit. 3917 int MSD = exp + (int) (NDigits - 1); 3918 if (MSD >= 0) { 3919 // 765e-2 == 7.65 3920 FormatScientific = false; 3921 } else { 3922 // 765e-5 == 0.00765 3923 // ^ ^^ 3924 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3925 } 3926 } 3927 } 3928 3929 // Scientific formatting is pretty straightforward. 3930 if (FormatScientific) { 3931 exp += (NDigits - 1); 3932 3933 Str.push_back(buffer[NDigits-1]); 3934 Str.push_back('.'); 3935 if (NDigits == 1 && TruncateZero) 3936 Str.push_back('0'); 3937 else 3938 for (unsigned I = 1; I != NDigits; ++I) 3939 Str.push_back(buffer[NDigits-1-I]); 3940 // Fill with zeros up to FormatPrecision. 3941 if (!TruncateZero && FormatPrecision > NDigits - 1) 3942 Str.append(FormatPrecision - NDigits + 1, '0'); 3943 // For !TruncateZero we use lower 'e'. 3944 Str.push_back(TruncateZero ? 'E' : 'e'); 3945 3946 Str.push_back(exp >= 0 ? '+' : '-'); 3947 if (exp < 0) exp = -exp; 3948 SmallVector<char, 6> expbuf; 3949 do { 3950 expbuf.push_back((char) ('0' + (exp % 10))); 3951 exp /= 10; 3952 } while (exp); 3953 // Exponent always at least two digits if we do not truncate zeros. 3954 if (!TruncateZero && expbuf.size() < 2) 3955 expbuf.push_back('0'); 3956 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3957 Str.push_back(expbuf[E-1-I]); 3958 return; 3959 } 3960 3961 // Non-scientific, positive exponents. 3962 if (exp >= 0) { 3963 for (unsigned I = 0; I != NDigits; ++I) 3964 Str.push_back(buffer[NDigits-1-I]); 3965 for (unsigned I = 0; I != (unsigned) exp; ++I) 3966 Str.push_back('0'); 3967 return; 3968 } 3969 3970 // Non-scientific, negative exponents. 3971 3972 // The number of digits to the left of the decimal point. 3973 int NWholeDigits = exp + (int) NDigits; 3974 3975 unsigned I = 0; 3976 if (NWholeDigits > 0) { 3977 for (; I != (unsigned) NWholeDigits; ++I) 3978 Str.push_back(buffer[NDigits-I-1]); 3979 Str.push_back('.'); 3980 } else { 3981 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3982 3983 Str.push_back('0'); 3984 Str.push_back('.'); 3985 for (unsigned Z = 1; Z != NZeros; ++Z) 3986 Str.push_back('0'); 3987 } 3988 3989 for (; I != NDigits; ++I) 3990 Str.push_back(buffer[NDigits-I-1]); 3991 } 3992 3993 bool IEEEFloat::getExactInverse(APFloat *inv) const { 3994 // Special floats and denormals have no exact inverse. 3995 if (!isFiniteNonZero()) 3996 return false; 3997 3998 // Check that the number is a power of two by making sure that only the 3999 // integer bit is set in the significand. 4000 if (significandLSB() != semantics->precision - 1) 4001 return false; 4002 4003 // Get the inverse. 4004 IEEEFloat reciprocal(*semantics, 1ULL); 4005 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4006 return false; 4007 4008 // Avoid multiplication with a denormal, it is not safe on all platforms and 4009 // may be slower than a normal division. 4010 if (reciprocal.isDenormal()) 4011 return false; 4012 4013 assert(reciprocal.isFiniteNonZero() && 4014 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4015 4016 if (inv) 4017 *inv = APFloat(reciprocal, *semantics); 4018 4019 return true; 4020 } 4021 4022 bool IEEEFloat::isSignaling() const { 4023 if (!isNaN()) 4024 return false; 4025 4026 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4027 // first bit of the trailing significand being 0. 4028 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4029 } 4030 4031 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4032 /// 4033 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4034 /// appropriate sign switching before/after the computation. 4035 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4036 // If we are performing nextDown, swap sign so we have -x. 4037 if (nextDown) 4038 changeSign(); 4039 4040 // Compute nextUp(x) 4041 opStatus result = opOK; 4042 4043 // Handle each float category separately. 4044 switch (category) { 4045 case fcInfinity: 4046 // nextUp(+inf) = +inf 4047 if (!isNegative()) 4048 break; 4049 // nextUp(-inf) = -getLargest() 4050 makeLargest(true); 4051 break; 4052 case fcNaN: 4053 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4054 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4055 // change the payload. 4056 if (isSignaling()) { 4057 result = opInvalidOp; 4058 // For consistency, propagate the sign of the sNaN to the qNaN. 4059 makeNaN(false, isNegative(), nullptr); 4060 } 4061 break; 4062 case fcZero: 4063 // nextUp(pm 0) = +getSmallest() 4064 makeSmallest(false); 4065 break; 4066 case fcNormal: 4067 // nextUp(-getSmallest()) = -0 4068 if (isSmallest() && isNegative()) { 4069 APInt::tcSet(significandParts(), 0, partCount()); 4070 category = fcZero; 4071 exponent = 0; 4072 break; 4073 } 4074 4075 // nextUp(getLargest()) == INFINITY 4076 if (isLargest() && !isNegative()) { 4077 APInt::tcSet(significandParts(), 0, partCount()); 4078 category = fcInfinity; 4079 exponent = semantics->maxExponent + 1; 4080 break; 4081 } 4082 4083 // nextUp(normal) == normal + inc. 4084 if (isNegative()) { 4085 // If we are negative, we need to decrement the significand. 4086 4087 // We only cross a binade boundary that requires adjusting the exponent 4088 // if: 4089 // 1. exponent != semantics->minExponent. This implies we are not in the 4090 // smallest binade or are dealing with denormals. 4091 // 2. Our significand excluding the integral bit is all zeros. 4092 bool WillCrossBinadeBoundary = 4093 exponent != semantics->minExponent && isSignificandAllZeros(); 4094 4095 // Decrement the significand. 4096 // 4097 // We always do this since: 4098 // 1. If we are dealing with a non-binade decrement, by definition we 4099 // just decrement the significand. 4100 // 2. If we are dealing with a normal -> normal binade decrement, since 4101 // we have an explicit integral bit the fact that all bits but the 4102 // integral bit are zero implies that subtracting one will yield a 4103 // significand with 0 integral bit and 1 in all other spots. Thus we 4104 // must just adjust the exponent and set the integral bit to 1. 4105 // 3. If we are dealing with a normal -> denormal binade decrement, 4106 // since we set the integral bit to 0 when we represent denormals, we 4107 // just decrement the significand. 4108 integerPart *Parts = significandParts(); 4109 APInt::tcDecrement(Parts, partCount()); 4110 4111 if (WillCrossBinadeBoundary) { 4112 // Our result is a normal number. Do the following: 4113 // 1. Set the integral bit to 1. 4114 // 2. Decrement the exponent. 4115 APInt::tcSetBit(Parts, semantics->precision - 1); 4116 exponent--; 4117 } 4118 } else { 4119 // If we are positive, we need to increment the significand. 4120 4121 // We only cross a binade boundary that requires adjusting the exponent if 4122 // the input is not a denormal and all of said input's significand bits 4123 // are set. If all of said conditions are true: clear the significand, set 4124 // the integral bit to 1, and increment the exponent. If we have a 4125 // denormal always increment since moving denormals and the numbers in the 4126 // smallest normal binade have the same exponent in our representation. 4127 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4128 4129 if (WillCrossBinadeBoundary) { 4130 integerPart *Parts = significandParts(); 4131 APInt::tcSet(Parts, 0, partCount()); 4132 APInt::tcSetBit(Parts, semantics->precision - 1); 4133 assert(exponent != semantics->maxExponent && 4134 "We can not increment an exponent beyond the maxExponent allowed" 4135 " by the given floating point semantics."); 4136 exponent++; 4137 } else { 4138 incrementSignificand(); 4139 } 4140 } 4141 break; 4142 } 4143 4144 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4145 if (nextDown) 4146 changeSign(); 4147 4148 return result; 4149 } 4150 4151 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4152 return semantics->maxExponent + 1; 4153 } 4154 4155 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4156 return semantics->maxExponent + 1; 4157 } 4158 4159 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4160 return semantics->minExponent - 1; 4161 } 4162 4163 void IEEEFloat::makeInf(bool Negative) { 4164 category = fcInfinity; 4165 sign = Negative; 4166 exponent = exponentInf(); 4167 APInt::tcSet(significandParts(), 0, partCount()); 4168 } 4169 4170 void IEEEFloat::makeZero(bool Negative) { 4171 category = fcZero; 4172 sign = Negative; 4173 exponent = exponentZero(); 4174 APInt::tcSet(significandParts(), 0, partCount()); 4175 } 4176 4177 void IEEEFloat::makeQuiet() { 4178 assert(isNaN()); 4179 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4180 } 4181 4182 int ilogb(const IEEEFloat &Arg) { 4183 if (Arg.isNaN()) 4184 return IEEEFloat::IEK_NaN; 4185 if (Arg.isZero()) 4186 return IEEEFloat::IEK_Zero; 4187 if (Arg.isInfinity()) 4188 return IEEEFloat::IEK_Inf; 4189 if (!Arg.isDenormal()) 4190 return Arg.exponent; 4191 4192 IEEEFloat Normalized(Arg); 4193 int SignificandBits = Arg.getSemantics().precision - 1; 4194 4195 Normalized.exponent += SignificandBits; 4196 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4197 return Normalized.exponent - SignificandBits; 4198 } 4199 4200 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4201 auto MaxExp = X.getSemantics().maxExponent; 4202 auto MinExp = X.getSemantics().minExponent; 4203 4204 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4205 // overflow; clamp it to a safe range before adding, but ensure that the range 4206 // is large enough that the clamp does not change the result. The range we 4207 // need to support is the difference between the largest possible exponent and 4208 // the normalized exponent of half the smallest denormal. 4209 4210 int SignificandBits = X.getSemantics().precision - 1; 4211 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4212 4213 // Clamp to one past the range ends to let normalize handle overlflow. 4214 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4215 X.normalize(RoundingMode, lfExactlyZero); 4216 if (X.isNaN()) 4217 X.makeQuiet(); 4218 return X; 4219 } 4220 4221 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4222 Exp = ilogb(Val); 4223 4224 // Quiet signalling nans. 4225 if (Exp == IEEEFloat::IEK_NaN) { 4226 IEEEFloat Quiet(Val); 4227 Quiet.makeQuiet(); 4228 return Quiet; 4229 } 4230 4231 if (Exp == IEEEFloat::IEK_Inf) 4232 return Val; 4233 4234 // 1 is added because frexp is defined to return a normalized fraction in 4235 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4236 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4237 return scalbn(Val, -Exp, RM); 4238 } 4239 4240 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4241 : Semantics(&S), 4242 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4243 assert(Semantics == &semPPCDoubleDouble); 4244 } 4245 4246 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4247 : Semantics(&S), 4248 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4249 APFloat(semIEEEdouble, uninitialized)}) { 4250 assert(Semantics == &semPPCDoubleDouble); 4251 } 4252 4253 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4254 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4255 APFloat(semIEEEdouble)}) { 4256 assert(Semantics == &semPPCDoubleDouble); 4257 } 4258 4259 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4260 : Semantics(&S), 4261 Floats(new APFloat[2]{ 4262 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4263 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4264 assert(Semantics == &semPPCDoubleDouble); 4265 } 4266 4267 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4268 APFloat &&Second) 4269 : Semantics(&S), 4270 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4271 assert(Semantics == &semPPCDoubleDouble); 4272 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4273 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4274 } 4275 4276 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4277 : Semantics(RHS.Semantics), 4278 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4279 APFloat(RHS.Floats[1])} 4280 : nullptr) { 4281 assert(Semantics == &semPPCDoubleDouble); 4282 } 4283 4284 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4285 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4286 RHS.Semantics = &semBogus; 4287 assert(Semantics == &semPPCDoubleDouble); 4288 } 4289 4290 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4291 if (Semantics == RHS.Semantics && RHS.Floats) { 4292 Floats[0] = RHS.Floats[0]; 4293 Floats[1] = RHS.Floats[1]; 4294 } else if (this != &RHS) { 4295 this->~DoubleAPFloat(); 4296 new (this) DoubleAPFloat(RHS); 4297 } 4298 return *this; 4299 } 4300 4301 // Implement addition, subtraction, multiplication and division based on: 4302 // "Software for Doubled-Precision Floating-Point Computations", 4303 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4304 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4305 const APFloat &c, const APFloat &cc, 4306 roundingMode RM) { 4307 int Status = opOK; 4308 APFloat z = a; 4309 Status |= z.add(c, RM); 4310 if (!z.isFinite()) { 4311 if (!z.isInfinity()) { 4312 Floats[0] = std::move(z); 4313 Floats[1].makeZero(/* Neg = */ false); 4314 return (opStatus)Status; 4315 } 4316 Status = opOK; 4317 auto AComparedToC = a.compareAbsoluteValue(c); 4318 z = cc; 4319 Status |= z.add(aa, RM); 4320 if (AComparedToC == APFloat::cmpGreaterThan) { 4321 // z = cc + aa + c + a; 4322 Status |= z.add(c, RM); 4323 Status |= z.add(a, RM); 4324 } else { 4325 // z = cc + aa + a + c; 4326 Status |= z.add(a, RM); 4327 Status |= z.add(c, RM); 4328 } 4329 if (!z.isFinite()) { 4330 Floats[0] = std::move(z); 4331 Floats[1].makeZero(/* Neg = */ false); 4332 return (opStatus)Status; 4333 } 4334 Floats[0] = z; 4335 APFloat zz = aa; 4336 Status |= zz.add(cc, RM); 4337 if (AComparedToC == APFloat::cmpGreaterThan) { 4338 // Floats[1] = a - z + c + zz; 4339 Floats[1] = a; 4340 Status |= Floats[1].subtract(z, RM); 4341 Status |= Floats[1].add(c, RM); 4342 Status |= Floats[1].add(zz, RM); 4343 } else { 4344 // Floats[1] = c - z + a + zz; 4345 Floats[1] = c; 4346 Status |= Floats[1].subtract(z, RM); 4347 Status |= Floats[1].add(a, RM); 4348 Status |= Floats[1].add(zz, RM); 4349 } 4350 } else { 4351 // q = a - z; 4352 APFloat q = a; 4353 Status |= q.subtract(z, RM); 4354 4355 // zz = q + c + (a - (q + z)) + aa + cc; 4356 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4357 auto zz = q; 4358 Status |= zz.add(c, RM); 4359 Status |= q.add(z, RM); 4360 Status |= q.subtract(a, RM); 4361 q.changeSign(); 4362 Status |= zz.add(q, RM); 4363 Status |= zz.add(aa, RM); 4364 Status |= zz.add(cc, RM); 4365 if (zz.isZero() && !zz.isNegative()) { 4366 Floats[0] = std::move(z); 4367 Floats[1].makeZero(/* Neg = */ false); 4368 return opOK; 4369 } 4370 Floats[0] = z; 4371 Status |= Floats[0].add(zz, RM); 4372 if (!Floats[0].isFinite()) { 4373 Floats[1].makeZero(/* Neg = */ false); 4374 return (opStatus)Status; 4375 } 4376 Floats[1] = std::move(z); 4377 Status |= Floats[1].subtract(Floats[0], RM); 4378 Status |= Floats[1].add(zz, RM); 4379 } 4380 return (opStatus)Status; 4381 } 4382 4383 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4384 const DoubleAPFloat &RHS, 4385 DoubleAPFloat &Out, 4386 roundingMode RM) { 4387 if (LHS.getCategory() == fcNaN) { 4388 Out = LHS; 4389 return opOK; 4390 } 4391 if (RHS.getCategory() == fcNaN) { 4392 Out = RHS; 4393 return opOK; 4394 } 4395 if (LHS.getCategory() == fcZero) { 4396 Out = RHS; 4397 return opOK; 4398 } 4399 if (RHS.getCategory() == fcZero) { 4400 Out = LHS; 4401 return opOK; 4402 } 4403 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4404 LHS.isNegative() != RHS.isNegative()) { 4405 Out.makeNaN(false, Out.isNegative(), nullptr); 4406 return opInvalidOp; 4407 } 4408 if (LHS.getCategory() == fcInfinity) { 4409 Out = LHS; 4410 return opOK; 4411 } 4412 if (RHS.getCategory() == fcInfinity) { 4413 Out = RHS; 4414 return opOK; 4415 } 4416 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4417 4418 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4419 CC(RHS.Floats[1]); 4420 assert(&A.getSemantics() == &semIEEEdouble); 4421 assert(&AA.getSemantics() == &semIEEEdouble); 4422 assert(&C.getSemantics() == &semIEEEdouble); 4423 assert(&CC.getSemantics() == &semIEEEdouble); 4424 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4425 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4426 return Out.addImpl(A, AA, C, CC, RM); 4427 } 4428 4429 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4430 roundingMode RM) { 4431 return addWithSpecial(*this, RHS, *this, RM); 4432 } 4433 4434 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4435 roundingMode RM) { 4436 changeSign(); 4437 auto Ret = add(RHS, RM); 4438 changeSign(); 4439 return Ret; 4440 } 4441 4442 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4443 APFloat::roundingMode RM) { 4444 const auto &LHS = *this; 4445 auto &Out = *this; 4446 /* Interesting observation: For special categories, finding the lowest 4447 common ancestor of the following layered graph gives the correct 4448 return category: 4449 4450 NaN 4451 / \ 4452 Zero Inf 4453 \ / 4454 Normal 4455 4456 e.g. NaN * NaN = NaN 4457 Zero * Inf = NaN 4458 Normal * Zero = Zero 4459 Normal * Inf = Inf 4460 */ 4461 if (LHS.getCategory() == fcNaN) { 4462 Out = LHS; 4463 return opOK; 4464 } 4465 if (RHS.getCategory() == fcNaN) { 4466 Out = RHS; 4467 return opOK; 4468 } 4469 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4470 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4471 Out.makeNaN(false, false, nullptr); 4472 return opOK; 4473 } 4474 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4475 Out = LHS; 4476 return opOK; 4477 } 4478 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4479 Out = RHS; 4480 return opOK; 4481 } 4482 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4483 "Special cases not handled exhaustively"); 4484 4485 int Status = opOK; 4486 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4487 // t = a * c 4488 APFloat T = A; 4489 Status |= T.multiply(C, RM); 4490 if (!T.isFiniteNonZero()) { 4491 Floats[0] = T; 4492 Floats[1].makeZero(/* Neg = */ false); 4493 return (opStatus)Status; 4494 } 4495 4496 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4497 APFloat Tau = A; 4498 T.changeSign(); 4499 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4500 T.changeSign(); 4501 { 4502 // v = a * d 4503 APFloat V = A; 4504 Status |= V.multiply(D, RM); 4505 // w = b * c 4506 APFloat W = B; 4507 Status |= W.multiply(C, RM); 4508 Status |= V.add(W, RM); 4509 // tau += v + w 4510 Status |= Tau.add(V, RM); 4511 } 4512 // u = t + tau 4513 APFloat U = T; 4514 Status |= U.add(Tau, RM); 4515 4516 Floats[0] = U; 4517 if (!U.isFinite()) { 4518 Floats[1].makeZero(/* Neg = */ false); 4519 } else { 4520 // Floats[1] = (t - u) + tau 4521 Status |= T.subtract(U, RM); 4522 Status |= T.add(Tau, RM); 4523 Floats[1] = T; 4524 } 4525 return (opStatus)Status; 4526 } 4527 4528 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4529 APFloat::roundingMode RM) { 4530 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4531 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4532 auto Ret = 4533 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4534 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4535 return Ret; 4536 } 4537 4538 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4539 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4540 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4541 auto Ret = 4542 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4543 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4544 return Ret; 4545 } 4546 4547 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4548 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4549 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4550 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4551 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4552 return Ret; 4553 } 4554 4555 APFloat::opStatus 4556 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4557 const DoubleAPFloat &Addend, 4558 APFloat::roundingMode RM) { 4559 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4560 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4561 auto Ret = Tmp.fusedMultiplyAdd( 4562 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4563 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4564 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4565 return Ret; 4566 } 4567 4568 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4569 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4570 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4571 auto Ret = Tmp.roundToIntegral(RM); 4572 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4573 return Ret; 4574 } 4575 4576 void DoubleAPFloat::changeSign() { 4577 Floats[0].changeSign(); 4578 Floats[1].changeSign(); 4579 } 4580 4581 APFloat::cmpResult 4582 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4583 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4584 if (Result != cmpEqual) 4585 return Result; 4586 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4587 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4588 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4589 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4590 if (Against && !RHSAgainst) 4591 return cmpLessThan; 4592 if (!Against && RHSAgainst) 4593 return cmpGreaterThan; 4594 if (!Against && !RHSAgainst) 4595 return Result; 4596 if (Against && RHSAgainst) 4597 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4598 } 4599 return Result; 4600 } 4601 4602 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4603 return Floats[0].getCategory(); 4604 } 4605 4606 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4607 4608 void DoubleAPFloat::makeInf(bool Neg) { 4609 Floats[0].makeInf(Neg); 4610 Floats[1].makeZero(/* Neg = */ false); 4611 } 4612 4613 void DoubleAPFloat::makeZero(bool Neg) { 4614 Floats[0].makeZero(Neg); 4615 Floats[1].makeZero(/* Neg = */ false); 4616 } 4617 4618 void DoubleAPFloat::makeLargest(bool Neg) { 4619 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4620 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4621 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4622 if (Neg) 4623 changeSign(); 4624 } 4625 4626 void DoubleAPFloat::makeSmallest(bool Neg) { 4627 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4628 Floats[0].makeSmallest(Neg); 4629 Floats[1].makeZero(/* Neg = */ false); 4630 } 4631 4632 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4633 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4634 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4635 if (Neg) 4636 Floats[0].changeSign(); 4637 Floats[1].makeZero(/* Neg = */ false); 4638 } 4639 4640 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4641 Floats[0].makeNaN(SNaN, Neg, fill); 4642 Floats[1].makeZero(/* Neg = */ false); 4643 } 4644 4645 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4646 auto Result = Floats[0].compare(RHS.Floats[0]); 4647 // |Float[0]| > |Float[1]| 4648 if (Result == APFloat::cmpEqual) 4649 return Floats[1].compare(RHS.Floats[1]); 4650 return Result; 4651 } 4652 4653 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4654 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4655 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4656 } 4657 4658 hash_code hash_value(const DoubleAPFloat &Arg) { 4659 if (Arg.Floats) 4660 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4661 return hash_combine(Arg.Semantics); 4662 } 4663 4664 APInt DoubleAPFloat::bitcastToAPInt() const { 4665 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4666 uint64_t Data[] = { 4667 Floats[0].bitcastToAPInt().getRawData()[0], 4668 Floats[1].bitcastToAPInt().getRawData()[0], 4669 }; 4670 return APInt(128, 2, Data); 4671 } 4672 4673 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4674 roundingMode RM) { 4675 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4676 APFloat Tmp(semPPCDoubleDoubleLegacy); 4677 auto Ret = Tmp.convertFromString(S, RM); 4678 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4679 return Ret; 4680 } 4681 4682 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4683 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4684 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4685 auto Ret = Tmp.next(nextDown); 4686 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4687 return Ret; 4688 } 4689 4690 APFloat::opStatus 4691 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4692 unsigned int Width, bool IsSigned, 4693 roundingMode RM, bool *IsExact) const { 4694 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4695 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4696 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4697 } 4698 4699 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4700 bool IsSigned, 4701 roundingMode RM) { 4702 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4703 APFloat Tmp(semPPCDoubleDoubleLegacy); 4704 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4705 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4706 return Ret; 4707 } 4708 4709 APFloat::opStatus 4710 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4711 unsigned int InputSize, 4712 bool IsSigned, roundingMode RM) { 4713 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4714 APFloat Tmp(semPPCDoubleDoubleLegacy); 4715 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4716 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4717 return Ret; 4718 } 4719 4720 APFloat::opStatus 4721 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4722 unsigned int InputSize, 4723 bool IsSigned, roundingMode RM) { 4724 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4725 APFloat Tmp(semPPCDoubleDoubleLegacy); 4726 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4727 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4728 return Ret; 4729 } 4730 4731 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4732 unsigned int HexDigits, 4733 bool UpperCase, 4734 roundingMode RM) const { 4735 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4736 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4737 .convertToHexString(DST, HexDigits, UpperCase, RM); 4738 } 4739 4740 bool DoubleAPFloat::isDenormal() const { 4741 return getCategory() == fcNormal && 4742 (Floats[0].isDenormal() || Floats[1].isDenormal() || 4743 // (double)(Hi + Lo) == Hi defines a normal number. 4744 Floats[0] != Floats[0] + Floats[1]); 4745 } 4746 4747 bool DoubleAPFloat::isSmallest() const { 4748 if (getCategory() != fcNormal) 4749 return false; 4750 DoubleAPFloat Tmp(*this); 4751 Tmp.makeSmallest(this->isNegative()); 4752 return Tmp.compare(*this) == cmpEqual; 4753 } 4754 4755 bool DoubleAPFloat::isLargest() const { 4756 if (getCategory() != fcNormal) 4757 return false; 4758 DoubleAPFloat Tmp(*this); 4759 Tmp.makeLargest(this->isNegative()); 4760 return Tmp.compare(*this) == cmpEqual; 4761 } 4762 4763 bool DoubleAPFloat::isInteger() const { 4764 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4765 return Floats[0].isInteger() && Floats[1].isInteger(); 4766 } 4767 4768 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 4769 unsigned FormatPrecision, 4770 unsigned FormatMaxPadding, 4771 bool TruncateZero) const { 4772 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4773 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4774 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 4775 } 4776 4777 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 4778 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4779 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4780 if (!inv) 4781 return Tmp.getExactInverse(nullptr); 4782 APFloat Inv(semPPCDoubleDoubleLegacy); 4783 auto Ret = Tmp.getExactInverse(&Inv); 4784 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 4785 return Ret; 4786 } 4787 4788 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 4789 APFloat::roundingMode RM) { 4790 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4791 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 4792 scalbn(Arg.Floats[1], Exp, RM)); 4793 } 4794 4795 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 4796 APFloat::roundingMode RM) { 4797 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4798 APFloat First = frexp(Arg.Floats[0], Exp, RM); 4799 APFloat Second = Arg.Floats[1]; 4800 if (Arg.getCategory() == APFloat::fcNormal) 4801 Second = scalbn(Second, -Exp, RM); 4802 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 4803 } 4804 4805 } // namespace detail 4806 4807 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4808 if (usesLayout<IEEEFloat>(Semantics)) { 4809 new (&IEEE) IEEEFloat(std::move(F)); 4810 return; 4811 } 4812 if (usesLayout<DoubleAPFloat>(Semantics)) { 4813 const fltSemantics& S = F.getSemantics(); 4814 new (&Double) 4815 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 4816 APFloat(semIEEEdouble)); 4817 return; 4818 } 4819 llvm_unreachable("Unexpected semantics"); 4820 } 4821 4822 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 4823 roundingMode RM) { 4824 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 4825 } 4826 4827 hash_code hash_value(const APFloat &Arg) { 4828 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 4829 return hash_value(Arg.U.IEEE); 4830 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 4831 return hash_value(Arg.U.Double); 4832 llvm_unreachable("Unexpected semantics"); 4833 } 4834 4835 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4836 : APFloat(Semantics) { 4837 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 4838 assert(StatusOrErr && "Invalid floating point representation"); 4839 consumeError(StatusOrErr.takeError()); 4840 } 4841 4842 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4843 roundingMode RM, bool *losesInfo) { 4844 if (&getSemantics() == &ToSemantics) { 4845 *losesInfo = false; 4846 return opOK; 4847 } 4848 if (usesLayout<IEEEFloat>(getSemantics()) && 4849 usesLayout<IEEEFloat>(ToSemantics)) 4850 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4851 if (usesLayout<IEEEFloat>(getSemantics()) && 4852 usesLayout<DoubleAPFloat>(ToSemantics)) { 4853 assert(&ToSemantics == &semPPCDoubleDouble); 4854 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 4855 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 4856 return Ret; 4857 } 4858 if (usesLayout<DoubleAPFloat>(getSemantics()) && 4859 usesLayout<IEEEFloat>(ToSemantics)) { 4860 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4861 *this = APFloat(std::move(getIEEE()), ToSemantics); 4862 return Ret; 4863 } 4864 llvm_unreachable("Unexpected semantics"); 4865 } 4866 4867 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics, 4868 unsigned BitWidth) { 4869 return APFloat(Semantics, APInt::getAllOnes(BitWidth)); 4870 } 4871 4872 void APFloat::print(raw_ostream &OS) const { 4873 SmallVector<char, 16> Buffer; 4874 toString(Buffer); 4875 OS << Buffer << "\n"; 4876 } 4877 4878 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4879 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 4880 #endif 4881 4882 void APFloat::Profile(FoldingSetNodeID &NID) const { 4883 NID.Add(bitcastToAPInt()); 4884 } 4885 4886 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 4887 an APSInt, whose initial bit-width and signed-ness are used to determine the 4888 precision of the conversion. 4889 */ 4890 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 4891 roundingMode rounding_mode, 4892 bool *isExact) const { 4893 unsigned bitWidth = result.getBitWidth(); 4894 SmallVector<uint64_t, 4> parts(result.getNumWords()); 4895 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 4896 rounding_mode, isExact); 4897 // Keeps the original signed-ness. 4898 result = APInt(bitWidth, parts); 4899 return status; 4900 } 4901 4902 double APFloat::convertToDouble() const { 4903 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 4904 return getIEEE().convertToDouble(); 4905 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 4906 "Float semantics is not representable by IEEEdouble"); 4907 APFloat Temp = *this; 4908 bool LosesInfo; 4909 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 4910 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4911 (void)St; 4912 return Temp.getIEEE().convertToDouble(); 4913 } 4914 4915 float APFloat::convertToFloat() const { 4916 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 4917 return getIEEE().convertToFloat(); 4918 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 4919 "Float semantics is not representable by IEEEsingle"); 4920 APFloat Temp = *this; 4921 bool LosesInfo; 4922 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 4923 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4924 (void)St; 4925 return Temp.getIEEE().convertToFloat(); 4926 } 4927 4928 } // namespace llvm 4929 4930 #undef APFLOAT_DISPATCH_ON_SEMANTICS 4931