1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 using namespace llvm; 30 31 /// A macro used to combine two fcCategory enums into one key which can be used 32 /// in a switch statement to classify how the interaction of two APFloat's 33 /// categories affects an operation. 34 /// 35 /// TODO: If clang source code is ever allowed to use constexpr in its own 36 /// codebase, change this into a static inline function. 37 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 38 39 /* Assumed in hexadecimal significand parsing, and conversion to 40 hexadecimal strings. */ 41 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 42 43 namespace llvm { 44 /* Represents floating point arithmetic semantics. */ 45 struct fltSemantics { 46 /* The largest E such that 2^E is representable; this matches the 47 definition of IEEE 754. */ 48 APFloatBase::ExponentType maxExponent; 49 50 /* The smallest E such that 2^E is a normalized number; this 51 matches the definition of IEEE 754. */ 52 APFloatBase::ExponentType minExponent; 53 54 /* Number of bits in the significand. This includes the integer 55 bit. */ 56 unsigned int precision; 57 58 /* Number of bits actually used in the semantics. */ 59 unsigned int sizeInBits; 60 }; 61 62 const fltSemantics APFloatBase::IEEEhalf = {15, -14, 11, 16}; 63 const fltSemantics APFloatBase::IEEEsingle = {127, -126, 24, 32}; 64 const fltSemantics APFloatBase::IEEEdouble = {1023, -1022, 53, 64}; 65 const fltSemantics APFloatBase::IEEEquad = {16383, -16382, 113, 128}; 66 const fltSemantics APFloatBase::x87DoubleExtended = {16383, -16382, 64, 80}; 67 const fltSemantics APFloatBase::Bogus = {0, 0, 0, 0}; 68 69 /* The PowerPC format consists of two doubles. It does not map cleanly 70 onto the usual format above. It is approximated using twice the 71 mantissa bits. Note that for exponents near the double minimum, 72 we no longer can represent the full 106 mantissa bits, so those 73 will be treated as denormal numbers. 74 75 FIXME: While this approximation is equivalent to what GCC uses for 76 compile-time arithmetic on PPC double-double numbers, it is not able 77 to represent all possible values held by a PPC double-double number, 78 for example: (long double) 1.0 + (long double) 0x1p-106 79 Should this be replaced by a full emulation of PPC double-double? */ 80 const fltSemantics APFloatBase::PPCDoubleDouble = {0, 0, 0, 0}; 81 82 /* There are temporary semantics for the real PPCDoubleDouble implementation. 83 Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the 84 high part of double double, and one IEEEdouble as the low part, so that 85 the old operations operate on PPCDoubleDoubleImpl, while the newly added 86 operations also populate the IEEEdouble. 87 88 TODO: Once all functions support DoubleAPFloat mode, we'll change all 89 PPCDoubleDoubleImpl to IEEEdouble and remove PPCDoubleDoubleImpl. */ 90 static const fltSemantics PPCDoubleDoubleImpl = {1023, -1022 + 53, 53 + 53, 91 128}; 92 93 /* A tight upper bound on number of parts required to hold the value 94 pow(5, power) is 95 96 power * 815 / (351 * integerPartWidth) + 1 97 98 However, whilst the result may require only this many parts, 99 because we are multiplying two values to get it, the 100 multiplication may require an extra part with the excess part 101 being zero (consider the trivial case of 1 * 1, tcFullMultiply 102 requires two parts to hold the single-part result). So we add an 103 extra one to guarantee enough space whilst multiplying. */ 104 const unsigned int maxExponent = 16383; 105 const unsigned int maxPrecision = 113; 106 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 107 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 108 / (351 * integerPartWidth)); 109 110 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 111 return semantics.precision; 112 } 113 APFloatBase::ExponentType 114 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 115 return semantics.maxExponent; 116 } 117 APFloatBase::ExponentType 118 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 119 return semantics.minExponent; 120 } 121 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 122 return semantics.sizeInBits; 123 } 124 125 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 126 return Sem.sizeInBits; 127 } 128 129 /* A bunch of private, handy routines. */ 130 131 static inline unsigned int 132 partCountForBits(unsigned int bits) 133 { 134 return ((bits) + integerPartWidth - 1) / integerPartWidth; 135 } 136 137 /* Returns 0U-9U. Return values >= 10U are not digits. */ 138 static inline unsigned int 139 decDigitValue(unsigned int c) 140 { 141 return c - '0'; 142 } 143 144 /* Return the value of a decimal exponent of the form 145 [+-]ddddddd. 146 147 If the exponent overflows, returns a large exponent with the 148 appropriate sign. */ 149 static int 150 readExponent(StringRef::iterator begin, StringRef::iterator end) 151 { 152 bool isNegative; 153 unsigned int absExponent; 154 const unsigned int overlargeExponent = 24000; /* FIXME. */ 155 StringRef::iterator p = begin; 156 157 assert(p != end && "Exponent has no digits"); 158 159 isNegative = (*p == '-'); 160 if (*p == '-' || *p == '+') { 161 p++; 162 assert(p != end && "Exponent has no digits"); 163 } 164 165 absExponent = decDigitValue(*p++); 166 assert(absExponent < 10U && "Invalid character in exponent"); 167 168 for (; p != end; ++p) { 169 unsigned int value; 170 171 value = decDigitValue(*p); 172 assert(value < 10U && "Invalid character in exponent"); 173 174 value += absExponent * 10; 175 if (absExponent >= overlargeExponent) { 176 absExponent = overlargeExponent; 177 p = end; /* outwit assert below */ 178 break; 179 } 180 absExponent = value; 181 } 182 183 assert(p == end && "Invalid exponent in exponent"); 184 185 if (isNegative) 186 return -(int) absExponent; 187 else 188 return (int) absExponent; 189 } 190 191 /* This is ugly and needs cleaning up, but I don't immediately see 192 how whilst remaining safe. */ 193 static int 194 totalExponent(StringRef::iterator p, StringRef::iterator end, 195 int exponentAdjustment) 196 { 197 int unsignedExponent; 198 bool negative, overflow; 199 int exponent = 0; 200 201 assert(p != end && "Exponent has no digits"); 202 203 negative = *p == '-'; 204 if (*p == '-' || *p == '+') { 205 p++; 206 assert(p != end && "Exponent has no digits"); 207 } 208 209 unsignedExponent = 0; 210 overflow = false; 211 for (; p != end; ++p) { 212 unsigned int value; 213 214 value = decDigitValue(*p); 215 assert(value < 10U && "Invalid character in exponent"); 216 217 unsignedExponent = unsignedExponent * 10 + value; 218 if (unsignedExponent > 32767) { 219 overflow = true; 220 break; 221 } 222 } 223 224 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 225 overflow = true; 226 227 if (!overflow) { 228 exponent = unsignedExponent; 229 if (negative) 230 exponent = -exponent; 231 exponent += exponentAdjustment; 232 if (exponent > 32767 || exponent < -32768) 233 overflow = true; 234 } 235 236 if (overflow) 237 exponent = negative ? -32768: 32767; 238 239 return exponent; 240 } 241 242 static StringRef::iterator 243 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 244 StringRef::iterator *dot) 245 { 246 StringRef::iterator p = begin; 247 *dot = end; 248 while (p != end && *p == '0') 249 p++; 250 251 if (p != end && *p == '.') { 252 *dot = p++; 253 254 assert(end - begin != 1 && "Significand has no digits"); 255 256 while (p != end && *p == '0') 257 p++; 258 } 259 260 return p; 261 } 262 263 /* Given a normal decimal floating point number of the form 264 265 dddd.dddd[eE][+-]ddd 266 267 where the decimal point and exponent are optional, fill out the 268 structure D. Exponent is appropriate if the significand is 269 treated as an integer, and normalizedExponent if the significand 270 is taken to have the decimal point after a single leading 271 non-zero digit. 272 273 If the value is zero, V->firstSigDigit points to a non-digit, and 274 the return exponent is zero. 275 */ 276 struct decimalInfo { 277 const char *firstSigDigit; 278 const char *lastSigDigit; 279 int exponent; 280 int normalizedExponent; 281 }; 282 283 static void 284 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 285 decimalInfo *D) 286 { 287 StringRef::iterator dot = end; 288 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 289 290 D->firstSigDigit = p; 291 D->exponent = 0; 292 D->normalizedExponent = 0; 293 294 for (; p != end; ++p) { 295 if (*p == '.') { 296 assert(dot == end && "String contains multiple dots"); 297 dot = p++; 298 if (p == end) 299 break; 300 } 301 if (decDigitValue(*p) >= 10U) 302 break; 303 } 304 305 if (p != end) { 306 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 307 assert(p != begin && "Significand has no digits"); 308 assert((dot == end || p - begin != 1) && "Significand has no digits"); 309 310 /* p points to the first non-digit in the string */ 311 D->exponent = readExponent(p + 1, end); 312 313 /* Implied decimal point? */ 314 if (dot == end) 315 dot = p; 316 } 317 318 /* If number is all zeroes accept any exponent. */ 319 if (p != D->firstSigDigit) { 320 /* Drop insignificant trailing zeroes. */ 321 if (p != begin) { 322 do 323 do 324 p--; 325 while (p != begin && *p == '0'); 326 while (p != begin && *p == '.'); 327 } 328 329 /* Adjust the exponents for any decimal point. */ 330 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 331 D->normalizedExponent = (D->exponent + 332 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 333 - (dot > D->firstSigDigit && dot < p))); 334 } 335 336 D->lastSigDigit = p; 337 } 338 339 /* Return the trailing fraction of a hexadecimal number. 340 DIGITVALUE is the first hex digit of the fraction, P points to 341 the next digit. */ 342 static lostFraction 343 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 344 unsigned int digitValue) 345 { 346 unsigned int hexDigit; 347 348 /* If the first trailing digit isn't 0 or 8 we can work out the 349 fraction immediately. */ 350 if (digitValue > 8) 351 return lfMoreThanHalf; 352 else if (digitValue < 8 && digitValue > 0) 353 return lfLessThanHalf; 354 355 // Otherwise we need to find the first non-zero digit. 356 while (p != end && (*p == '0' || *p == '.')) 357 p++; 358 359 assert(p != end && "Invalid trailing hexadecimal fraction!"); 360 361 hexDigit = hexDigitValue(*p); 362 363 /* If we ran off the end it is exactly zero or one-half, otherwise 364 a little more. */ 365 if (hexDigit == -1U) 366 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 367 else 368 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 369 } 370 371 /* Return the fraction lost were a bignum truncated losing the least 372 significant BITS bits. */ 373 static lostFraction 374 lostFractionThroughTruncation(const integerPart *parts, 375 unsigned int partCount, 376 unsigned int bits) 377 { 378 unsigned int lsb; 379 380 lsb = APInt::tcLSB(parts, partCount); 381 382 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 383 if (bits <= lsb) 384 return lfExactlyZero; 385 if (bits == lsb + 1) 386 return lfExactlyHalf; 387 if (bits <= partCount * integerPartWidth && 388 APInt::tcExtractBit(parts, bits - 1)) 389 return lfMoreThanHalf; 390 391 return lfLessThanHalf; 392 } 393 394 /* Shift DST right BITS bits noting lost fraction. */ 395 static lostFraction 396 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 397 { 398 lostFraction lost_fraction; 399 400 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 401 402 APInt::tcShiftRight(dst, parts, bits); 403 404 return lost_fraction; 405 } 406 407 /* Combine the effect of two lost fractions. */ 408 static lostFraction 409 combineLostFractions(lostFraction moreSignificant, 410 lostFraction lessSignificant) 411 { 412 if (lessSignificant != lfExactlyZero) { 413 if (moreSignificant == lfExactlyZero) 414 moreSignificant = lfLessThanHalf; 415 else if (moreSignificant == lfExactlyHalf) 416 moreSignificant = lfMoreThanHalf; 417 } 418 419 return moreSignificant; 420 } 421 422 /* The error from the true value, in half-ulps, on multiplying two 423 floating point numbers, which differ from the value they 424 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 425 than the returned value. 426 427 See "How to Read Floating Point Numbers Accurately" by William D 428 Clinger. */ 429 static unsigned int 430 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 431 { 432 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 433 434 if (HUerr1 + HUerr2 == 0) 435 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 436 else 437 return inexactMultiply + 2 * (HUerr1 + HUerr2); 438 } 439 440 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 441 when the least significant BITS are truncated. BITS cannot be 442 zero. */ 443 static integerPart 444 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 445 { 446 unsigned int count, partBits; 447 integerPart part, boundary; 448 449 assert(bits != 0); 450 451 bits--; 452 count = bits / integerPartWidth; 453 partBits = bits % integerPartWidth + 1; 454 455 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 456 457 if (isNearest) 458 boundary = (integerPart) 1 << (partBits - 1); 459 else 460 boundary = 0; 461 462 if (count == 0) { 463 if (part - boundary <= boundary - part) 464 return part - boundary; 465 else 466 return boundary - part; 467 } 468 469 if (part == boundary) { 470 while (--count) 471 if (parts[count]) 472 return ~(integerPart) 0; /* A lot. */ 473 474 return parts[0]; 475 } else if (part == boundary - 1) { 476 while (--count) 477 if (~parts[count]) 478 return ~(integerPart) 0; /* A lot. */ 479 480 return -parts[0]; 481 } 482 483 return ~(integerPart) 0; /* A lot. */ 484 } 485 486 /* Place pow(5, power) in DST, and return the number of parts used. 487 DST must be at least one part larger than size of the answer. */ 488 static unsigned int 489 powerOf5(integerPart *dst, unsigned int power) 490 { 491 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 492 15625, 78125 }; 493 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 494 pow5s[0] = 78125 * 5; 495 496 unsigned int partsCount[16] = { 1 }; 497 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 498 unsigned int result; 499 assert(power <= maxExponent); 500 501 p1 = dst; 502 p2 = scratch; 503 504 *p1 = firstEightPowers[power & 7]; 505 power >>= 3; 506 507 result = 1; 508 pow5 = pow5s; 509 510 for (unsigned int n = 0; power; power >>= 1, n++) { 511 unsigned int pc; 512 513 pc = partsCount[n]; 514 515 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 516 if (pc == 0) { 517 pc = partsCount[n - 1]; 518 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 519 pc *= 2; 520 if (pow5[pc - 1] == 0) 521 pc--; 522 partsCount[n] = pc; 523 } 524 525 if (power & 1) { 526 integerPart *tmp; 527 528 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 529 result += pc; 530 if (p2[result - 1] == 0) 531 result--; 532 533 /* Now result is in p1 with partsCount parts and p2 is scratch 534 space. */ 535 tmp = p1; 536 p1 = p2; 537 p2 = tmp; 538 } 539 540 pow5 += pc; 541 } 542 543 if (p1 != dst) 544 APInt::tcAssign(dst, p1, result); 545 546 return result; 547 } 548 549 /* Zero at the end to avoid modular arithmetic when adding one; used 550 when rounding up during hexadecimal output. */ 551 static const char hexDigitsLower[] = "0123456789abcdef0"; 552 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 553 static const char infinityL[] = "infinity"; 554 static const char infinityU[] = "INFINITY"; 555 static const char NaNL[] = "nan"; 556 static const char NaNU[] = "NAN"; 557 558 /* Write out an integerPart in hexadecimal, starting with the most 559 significant nibble. Write out exactly COUNT hexdigits, return 560 COUNT. */ 561 static unsigned int 562 partAsHex (char *dst, integerPart part, unsigned int count, 563 const char *hexDigitChars) 564 { 565 unsigned int result = count; 566 567 assert(count != 0 && count <= integerPartWidth / 4); 568 569 part >>= (integerPartWidth - 4 * count); 570 while (count--) { 571 dst[count] = hexDigitChars[part & 0xf]; 572 part >>= 4; 573 } 574 575 return result; 576 } 577 578 /* Write out an unsigned decimal integer. */ 579 static char * 580 writeUnsignedDecimal (char *dst, unsigned int n) 581 { 582 char buff[40], *p; 583 584 p = buff; 585 do 586 *p++ = '0' + n % 10; 587 while (n /= 10); 588 589 do 590 *dst++ = *--p; 591 while (p != buff); 592 593 return dst; 594 } 595 596 /* Write out a signed decimal integer. */ 597 static char * 598 writeSignedDecimal (char *dst, int value) 599 { 600 if (value < 0) { 601 *dst++ = '-'; 602 dst = writeUnsignedDecimal(dst, -(unsigned) value); 603 } else 604 dst = writeUnsignedDecimal(dst, value); 605 606 return dst; 607 } 608 609 namespace detail { 610 /* Constructors. */ 611 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 612 unsigned int count; 613 614 semantics = ourSemantics; 615 count = partCount(); 616 if (count > 1) 617 significand.parts = new integerPart[count]; 618 } 619 620 void IEEEFloat::freeSignificand() { 621 if (needsCleanup()) 622 delete [] significand.parts; 623 } 624 625 void IEEEFloat::assign(const IEEEFloat &rhs) { 626 assert(semantics == rhs.semantics); 627 628 sign = rhs.sign; 629 category = rhs.category; 630 exponent = rhs.exponent; 631 if (isFiniteNonZero() || category == fcNaN) 632 copySignificand(rhs); 633 } 634 635 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 636 assert(isFiniteNonZero() || category == fcNaN); 637 assert(rhs.partCount() >= partCount()); 638 639 APInt::tcAssign(significandParts(), rhs.significandParts(), 640 partCount()); 641 } 642 643 /* Make this number a NaN, with an arbitrary but deterministic value 644 for the significand. If double or longer, this is a signalling NaN, 645 which may not be ideal. If float, this is QNaN(0). */ 646 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 647 category = fcNaN; 648 sign = Negative; 649 650 integerPart *significand = significandParts(); 651 unsigned numParts = partCount(); 652 653 // Set the significand bits to the fill. 654 if (!fill || fill->getNumWords() < numParts) 655 APInt::tcSet(significand, 0, numParts); 656 if (fill) { 657 APInt::tcAssign(significand, fill->getRawData(), 658 std::min(fill->getNumWords(), numParts)); 659 660 // Zero out the excess bits of the significand. 661 unsigned bitsToPreserve = semantics->precision - 1; 662 unsigned part = bitsToPreserve / 64; 663 bitsToPreserve %= 64; 664 significand[part] &= ((1ULL << bitsToPreserve) - 1); 665 for (part++; part != numParts; ++part) 666 significand[part] = 0; 667 } 668 669 unsigned QNaNBit = semantics->precision - 2; 670 671 if (SNaN) { 672 // We always have to clear the QNaN bit to make it an SNaN. 673 APInt::tcClearBit(significand, QNaNBit); 674 675 // If there are no bits set in the payload, we have to set 676 // *something* to make it a NaN instead of an infinity; 677 // conventionally, this is the next bit down from the QNaN bit. 678 if (APInt::tcIsZero(significand, numParts)) 679 APInt::tcSetBit(significand, QNaNBit - 1); 680 } else { 681 // We always have to set the QNaN bit to make it a QNaN. 682 APInt::tcSetBit(significand, QNaNBit); 683 } 684 685 // For x87 extended precision, we want to make a NaN, not a 686 // pseudo-NaN. Maybe we should expose the ability to make 687 // pseudo-NaNs? 688 if (semantics == &APFloat::x87DoubleExtended) 689 APInt::tcSetBit(significand, QNaNBit + 1); 690 } 691 692 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 693 if (this != &rhs) { 694 if (semantics != rhs.semantics) { 695 freeSignificand(); 696 initialize(rhs.semantics); 697 } 698 assign(rhs); 699 } 700 701 return *this; 702 } 703 704 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 705 freeSignificand(); 706 707 semantics = rhs.semantics; 708 significand = rhs.significand; 709 exponent = rhs.exponent; 710 category = rhs.category; 711 sign = rhs.sign; 712 713 rhs.semantics = &Bogus; 714 return *this; 715 } 716 717 bool IEEEFloat::isDenormal() const { 718 return isFiniteNonZero() && (exponent == semantics->minExponent) && 719 (APInt::tcExtractBit(significandParts(), 720 semantics->precision - 1) == 0); 721 } 722 723 bool IEEEFloat::isSmallest() const { 724 // The smallest number by magnitude in our format will be the smallest 725 // denormal, i.e. the floating point number with exponent being minimum 726 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 727 return isFiniteNonZero() && exponent == semantics->minExponent && 728 significandMSB() == 0; 729 } 730 731 bool IEEEFloat::isSignificandAllOnes() const { 732 // Test if the significand excluding the integral bit is all ones. This allows 733 // us to test for binade boundaries. 734 const integerPart *Parts = significandParts(); 735 const unsigned PartCount = partCount(); 736 for (unsigned i = 0; i < PartCount - 1; i++) 737 if (~Parts[i]) 738 return false; 739 740 // Set the unused high bits to all ones when we compare. 741 const unsigned NumHighBits = 742 PartCount*integerPartWidth - semantics->precision + 1; 743 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 744 "fill than integerPartWidth"); 745 const integerPart HighBitFill = 746 ~integerPart(0) << (integerPartWidth - NumHighBits); 747 if (~(Parts[PartCount - 1] | HighBitFill)) 748 return false; 749 750 return true; 751 } 752 753 bool IEEEFloat::isSignificandAllZeros() const { 754 // Test if the significand excluding the integral bit is all zeros. This 755 // allows us to test for binade boundaries. 756 const integerPart *Parts = significandParts(); 757 const unsigned PartCount = partCount(); 758 759 for (unsigned i = 0; i < PartCount - 1; i++) 760 if (Parts[i]) 761 return false; 762 763 const unsigned NumHighBits = 764 PartCount*integerPartWidth - semantics->precision + 1; 765 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 766 "clear than integerPartWidth"); 767 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 768 769 if (Parts[PartCount - 1] & HighBitMask) 770 return false; 771 772 return true; 773 } 774 775 bool IEEEFloat::isLargest() const { 776 // The largest number by magnitude in our format will be the floating point 777 // number with maximum exponent and with significand that is all ones. 778 return isFiniteNonZero() && exponent == semantics->maxExponent 779 && isSignificandAllOnes(); 780 } 781 782 bool IEEEFloat::isInteger() const { 783 // This could be made more efficient; I'm going for obviously correct. 784 if (!isFinite()) return false; 785 IEEEFloat truncated = *this; 786 truncated.roundToIntegral(rmTowardZero); 787 return compare(truncated) == cmpEqual; 788 } 789 790 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 791 if (this == &rhs) 792 return true; 793 if (semantics != rhs.semantics || 794 category != rhs.category || 795 sign != rhs.sign) 796 return false; 797 if (category==fcZero || category==fcInfinity) 798 return true; 799 800 if (isFiniteNonZero() && exponent != rhs.exponent) 801 return false; 802 803 return std::equal(significandParts(), significandParts() + partCount(), 804 rhs.significandParts()); 805 } 806 807 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 808 initialize(&ourSemantics); 809 sign = 0; 810 category = fcNormal; 811 zeroSignificand(); 812 exponent = ourSemantics.precision - 1; 813 significandParts()[0] = value; 814 normalize(rmNearestTiesToEven, lfExactlyZero); 815 } 816 817 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 818 initialize(&ourSemantics); 819 category = fcZero; 820 sign = false; 821 } 822 823 // Delegate to the previous constructor, because later copy constructor may 824 // actually inspects category, which can't be garbage. 825 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 826 : IEEEFloat(ourSemantics) {} 827 828 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 829 initialize(rhs.semantics); 830 assign(rhs); 831 } 832 833 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&Bogus) { 834 *this = std::move(rhs); 835 } 836 837 IEEEFloat::~IEEEFloat() { freeSignificand(); } 838 839 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 840 void IEEEFloat::Profile(FoldingSetNodeID &ID) const { 841 ID.Add(bitcastToAPInt()); 842 } 843 844 unsigned int IEEEFloat::partCount() const { 845 return partCountForBits(semantics->precision + 1); 846 } 847 848 const integerPart *IEEEFloat::significandParts() const { 849 return const_cast<IEEEFloat *>(this)->significandParts(); 850 } 851 852 integerPart *IEEEFloat::significandParts() { 853 if (partCount() > 1) 854 return significand.parts; 855 else 856 return &significand.part; 857 } 858 859 void IEEEFloat::zeroSignificand() { 860 APInt::tcSet(significandParts(), 0, partCount()); 861 } 862 863 /* Increment an fcNormal floating point number's significand. */ 864 void IEEEFloat::incrementSignificand() { 865 integerPart carry; 866 867 carry = APInt::tcIncrement(significandParts(), partCount()); 868 869 /* Our callers should never cause us to overflow. */ 870 assert(carry == 0); 871 (void)carry; 872 } 873 874 /* Add the significand of the RHS. Returns the carry flag. */ 875 integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 876 integerPart *parts; 877 878 parts = significandParts(); 879 880 assert(semantics == rhs.semantics); 881 assert(exponent == rhs.exponent); 882 883 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 884 } 885 886 /* Subtract the significand of the RHS with a borrow flag. Returns 887 the borrow flag. */ 888 integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 889 integerPart borrow) { 890 integerPart *parts; 891 892 parts = significandParts(); 893 894 assert(semantics == rhs.semantics); 895 assert(exponent == rhs.exponent); 896 897 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 898 partCount()); 899 } 900 901 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 902 on to the full-precision result of the multiplication. Returns the 903 lost fraction. */ 904 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 905 const IEEEFloat *addend) { 906 unsigned int omsb; // One, not zero, based MSB. 907 unsigned int partsCount, newPartsCount, precision; 908 integerPart *lhsSignificand; 909 integerPart scratch[4]; 910 integerPart *fullSignificand; 911 lostFraction lost_fraction; 912 bool ignored; 913 914 assert(semantics == rhs.semantics); 915 916 precision = semantics->precision; 917 918 // Allocate space for twice as many bits as the original significand, plus one 919 // extra bit for the addition to overflow into. 920 newPartsCount = partCountForBits(precision * 2 + 1); 921 922 if (newPartsCount > 4) 923 fullSignificand = new integerPart[newPartsCount]; 924 else 925 fullSignificand = scratch; 926 927 lhsSignificand = significandParts(); 928 partsCount = partCount(); 929 930 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 931 rhs.significandParts(), partsCount, partsCount); 932 933 lost_fraction = lfExactlyZero; 934 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 935 exponent += rhs.exponent; 936 937 // Assume the operands involved in the multiplication are single-precision 938 // FP, and the two multiplicants are: 939 // *this = a23 . a22 ... a0 * 2^e1 940 // rhs = b23 . b22 ... b0 * 2^e2 941 // the result of multiplication is: 942 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 943 // Note that there are three significant bits at the left-hand side of the 944 // radix point: two for the multiplication, and an overflow bit for the 945 // addition (that will always be zero at this point). Move the radix point 946 // toward left by two bits, and adjust exponent accordingly. 947 exponent += 2; 948 949 if (addend && addend->isNonZero()) { 950 // The intermediate result of the multiplication has "2 * precision" 951 // signicant bit; adjust the addend to be consistent with mul result. 952 // 953 Significand savedSignificand = significand; 954 const fltSemantics *savedSemantics = semantics; 955 fltSemantics extendedSemantics; 956 opStatus status; 957 unsigned int extendedPrecision; 958 959 // Normalize our MSB to one below the top bit to allow for overflow. 960 extendedPrecision = 2 * precision + 1; 961 if (omsb != extendedPrecision - 1) { 962 assert(extendedPrecision > omsb); 963 APInt::tcShiftLeft(fullSignificand, newPartsCount, 964 (extendedPrecision - 1) - omsb); 965 exponent -= (extendedPrecision - 1) - omsb; 966 } 967 968 /* Create new semantics. */ 969 extendedSemantics = *semantics; 970 extendedSemantics.precision = extendedPrecision; 971 972 if (newPartsCount == 1) 973 significand.part = fullSignificand[0]; 974 else 975 significand.parts = fullSignificand; 976 semantics = &extendedSemantics; 977 978 IEEEFloat extendedAddend(*addend); 979 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 980 assert(status == opOK); 981 (void)status; 982 983 // Shift the significand of the addend right by one bit. This guarantees 984 // that the high bit of the significand is zero (same as fullSignificand), 985 // so the addition will overflow (if it does overflow at all) into the top bit. 986 lost_fraction = extendedAddend.shiftSignificandRight(1); 987 assert(lost_fraction == lfExactlyZero && 988 "Lost precision while shifting addend for fused-multiply-add."); 989 990 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 991 992 /* Restore our state. */ 993 if (newPartsCount == 1) 994 fullSignificand[0] = significand.part; 995 significand = savedSignificand; 996 semantics = savedSemantics; 997 998 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 999 } 1000 1001 // Convert the result having "2 * precision" significant-bits back to the one 1002 // having "precision" significant-bits. First, move the radix point from 1003 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1004 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1005 exponent -= precision + 1; 1006 1007 // In case MSB resides at the left-hand side of radix point, shift the 1008 // mantissa right by some amount to make sure the MSB reside right before 1009 // the radix point (i.e. "MSB . rest-significant-bits"). 1010 // 1011 // Note that the result is not normalized when "omsb < precision". So, the 1012 // caller needs to call IEEEFloat::normalize() if normalized value is 1013 // expected. 1014 if (omsb > precision) { 1015 unsigned int bits, significantParts; 1016 lostFraction lf; 1017 1018 bits = omsb - precision; 1019 significantParts = partCountForBits(omsb); 1020 lf = shiftRight(fullSignificand, significantParts, bits); 1021 lost_fraction = combineLostFractions(lf, lost_fraction); 1022 exponent += bits; 1023 } 1024 1025 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1026 1027 if (newPartsCount > 4) 1028 delete [] fullSignificand; 1029 1030 return lost_fraction; 1031 } 1032 1033 /* Multiply the significands of LHS and RHS to DST. */ 1034 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1035 unsigned int bit, i, partsCount; 1036 const integerPart *rhsSignificand; 1037 integerPart *lhsSignificand, *dividend, *divisor; 1038 integerPart scratch[4]; 1039 lostFraction lost_fraction; 1040 1041 assert(semantics == rhs.semantics); 1042 1043 lhsSignificand = significandParts(); 1044 rhsSignificand = rhs.significandParts(); 1045 partsCount = partCount(); 1046 1047 if (partsCount > 2) 1048 dividend = new integerPart[partsCount * 2]; 1049 else 1050 dividend = scratch; 1051 1052 divisor = dividend + partsCount; 1053 1054 /* Copy the dividend and divisor as they will be modified in-place. */ 1055 for (i = 0; i < partsCount; i++) { 1056 dividend[i] = lhsSignificand[i]; 1057 divisor[i] = rhsSignificand[i]; 1058 lhsSignificand[i] = 0; 1059 } 1060 1061 exponent -= rhs.exponent; 1062 1063 unsigned int precision = semantics->precision; 1064 1065 /* Normalize the divisor. */ 1066 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1067 if (bit) { 1068 exponent += bit; 1069 APInt::tcShiftLeft(divisor, partsCount, bit); 1070 } 1071 1072 /* Normalize the dividend. */ 1073 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1074 if (bit) { 1075 exponent -= bit; 1076 APInt::tcShiftLeft(dividend, partsCount, bit); 1077 } 1078 1079 /* Ensure the dividend >= divisor initially for the loop below. 1080 Incidentally, this means that the division loop below is 1081 guaranteed to set the integer bit to one. */ 1082 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1083 exponent--; 1084 APInt::tcShiftLeft(dividend, partsCount, 1); 1085 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1086 } 1087 1088 /* Long division. */ 1089 for (bit = precision; bit; bit -= 1) { 1090 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1091 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1092 APInt::tcSetBit(lhsSignificand, bit - 1); 1093 } 1094 1095 APInt::tcShiftLeft(dividend, partsCount, 1); 1096 } 1097 1098 /* Figure out the lost fraction. */ 1099 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1100 1101 if (cmp > 0) 1102 lost_fraction = lfMoreThanHalf; 1103 else if (cmp == 0) 1104 lost_fraction = lfExactlyHalf; 1105 else if (APInt::tcIsZero(dividend, partsCount)) 1106 lost_fraction = lfExactlyZero; 1107 else 1108 lost_fraction = lfLessThanHalf; 1109 1110 if (partsCount > 2) 1111 delete [] dividend; 1112 1113 return lost_fraction; 1114 } 1115 1116 unsigned int IEEEFloat::significandMSB() const { 1117 return APInt::tcMSB(significandParts(), partCount()); 1118 } 1119 1120 unsigned int IEEEFloat::significandLSB() const { 1121 return APInt::tcLSB(significandParts(), partCount()); 1122 } 1123 1124 /* Note that a zero result is NOT normalized to fcZero. */ 1125 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1126 /* Our exponent should not overflow. */ 1127 assert((ExponentType) (exponent + bits) >= exponent); 1128 1129 exponent += bits; 1130 1131 return shiftRight(significandParts(), partCount(), bits); 1132 } 1133 1134 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1135 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1136 assert(bits < semantics->precision); 1137 1138 if (bits) { 1139 unsigned int partsCount = partCount(); 1140 1141 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1142 exponent -= bits; 1143 1144 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1145 } 1146 } 1147 1148 IEEEFloat::cmpResult 1149 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1150 int compare; 1151 1152 assert(semantics == rhs.semantics); 1153 assert(isFiniteNonZero()); 1154 assert(rhs.isFiniteNonZero()); 1155 1156 compare = exponent - rhs.exponent; 1157 1158 /* If exponents are equal, do an unsigned bignum comparison of the 1159 significands. */ 1160 if (compare == 0) 1161 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1162 partCount()); 1163 1164 if (compare > 0) 1165 return cmpGreaterThan; 1166 else if (compare < 0) 1167 return cmpLessThan; 1168 else 1169 return cmpEqual; 1170 } 1171 1172 /* Handle overflow. Sign is preserved. We either become infinity or 1173 the largest finite number. */ 1174 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1175 /* Infinity? */ 1176 if (rounding_mode == rmNearestTiesToEven || 1177 rounding_mode == rmNearestTiesToAway || 1178 (rounding_mode == rmTowardPositive && !sign) || 1179 (rounding_mode == rmTowardNegative && sign)) { 1180 category = fcInfinity; 1181 return (opStatus) (opOverflow | opInexact); 1182 } 1183 1184 /* Otherwise we become the largest finite number. */ 1185 category = fcNormal; 1186 exponent = semantics->maxExponent; 1187 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1188 semantics->precision); 1189 1190 return opInexact; 1191 } 1192 1193 /* Returns TRUE if, when truncating the current number, with BIT the 1194 new LSB, with the given lost fraction and rounding mode, the result 1195 would need to be rounded away from zero (i.e., by increasing the 1196 signficand). This routine must work for fcZero of both signs, and 1197 fcNormal numbers. */ 1198 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1199 lostFraction lost_fraction, 1200 unsigned int bit) const { 1201 /* NaNs and infinities should not have lost fractions. */ 1202 assert(isFiniteNonZero() || category == fcZero); 1203 1204 /* Current callers never pass this so we don't handle it. */ 1205 assert(lost_fraction != lfExactlyZero); 1206 1207 switch (rounding_mode) { 1208 case rmNearestTiesToAway: 1209 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1210 1211 case rmNearestTiesToEven: 1212 if (lost_fraction == lfMoreThanHalf) 1213 return true; 1214 1215 /* Our zeroes don't have a significand to test. */ 1216 if (lost_fraction == lfExactlyHalf && category != fcZero) 1217 return APInt::tcExtractBit(significandParts(), bit); 1218 1219 return false; 1220 1221 case rmTowardZero: 1222 return false; 1223 1224 case rmTowardPositive: 1225 return !sign; 1226 1227 case rmTowardNegative: 1228 return sign; 1229 } 1230 llvm_unreachable("Invalid rounding mode found"); 1231 } 1232 1233 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1234 lostFraction lost_fraction) { 1235 unsigned int omsb; /* One, not zero, based MSB. */ 1236 int exponentChange; 1237 1238 if (!isFiniteNonZero()) 1239 return opOK; 1240 1241 /* Before rounding normalize the exponent of fcNormal numbers. */ 1242 omsb = significandMSB() + 1; 1243 1244 if (omsb) { 1245 /* OMSB is numbered from 1. We want to place it in the integer 1246 bit numbered PRECISION if possible, with a compensating change in 1247 the exponent. */ 1248 exponentChange = omsb - semantics->precision; 1249 1250 /* If the resulting exponent is too high, overflow according to 1251 the rounding mode. */ 1252 if (exponent + exponentChange > semantics->maxExponent) 1253 return handleOverflow(rounding_mode); 1254 1255 /* Subnormal numbers have exponent minExponent, and their MSB 1256 is forced based on that. */ 1257 if (exponent + exponentChange < semantics->minExponent) 1258 exponentChange = semantics->minExponent - exponent; 1259 1260 /* Shifting left is easy as we don't lose precision. */ 1261 if (exponentChange < 0) { 1262 assert(lost_fraction == lfExactlyZero); 1263 1264 shiftSignificandLeft(-exponentChange); 1265 1266 return opOK; 1267 } 1268 1269 if (exponentChange > 0) { 1270 lostFraction lf; 1271 1272 /* Shift right and capture any new lost fraction. */ 1273 lf = shiftSignificandRight(exponentChange); 1274 1275 lost_fraction = combineLostFractions(lf, lost_fraction); 1276 1277 /* Keep OMSB up-to-date. */ 1278 if (omsb > (unsigned) exponentChange) 1279 omsb -= exponentChange; 1280 else 1281 omsb = 0; 1282 } 1283 } 1284 1285 /* Now round the number according to rounding_mode given the lost 1286 fraction. */ 1287 1288 /* As specified in IEEE 754, since we do not trap we do not report 1289 underflow for exact results. */ 1290 if (lost_fraction == lfExactlyZero) { 1291 /* Canonicalize zeroes. */ 1292 if (omsb == 0) 1293 category = fcZero; 1294 1295 return opOK; 1296 } 1297 1298 /* Increment the significand if we're rounding away from zero. */ 1299 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1300 if (omsb == 0) 1301 exponent = semantics->minExponent; 1302 1303 incrementSignificand(); 1304 omsb = significandMSB() + 1; 1305 1306 /* Did the significand increment overflow? */ 1307 if (omsb == (unsigned) semantics->precision + 1) { 1308 /* Renormalize by incrementing the exponent and shifting our 1309 significand right one. However if we already have the 1310 maximum exponent we overflow to infinity. */ 1311 if (exponent == semantics->maxExponent) { 1312 category = fcInfinity; 1313 1314 return (opStatus) (opOverflow | opInexact); 1315 } 1316 1317 shiftSignificandRight(1); 1318 1319 return opInexact; 1320 } 1321 } 1322 1323 /* The normal case - we were and are not denormal, and any 1324 significand increment above didn't overflow. */ 1325 if (omsb == semantics->precision) 1326 return opInexact; 1327 1328 /* We have a non-zero denormal. */ 1329 assert(omsb < semantics->precision); 1330 1331 /* Canonicalize zeroes. */ 1332 if (omsb == 0) 1333 category = fcZero; 1334 1335 /* The fcZero case is a denormal that underflowed to zero. */ 1336 return (opStatus) (opUnderflow | opInexact); 1337 } 1338 1339 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1340 bool subtract) { 1341 switch (PackCategoriesIntoKey(category, rhs.category)) { 1342 default: 1343 llvm_unreachable(nullptr); 1344 1345 case PackCategoriesIntoKey(fcNaN, fcZero): 1346 case PackCategoriesIntoKey(fcNaN, fcNormal): 1347 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1348 case PackCategoriesIntoKey(fcNaN, fcNaN): 1349 case PackCategoriesIntoKey(fcNormal, fcZero): 1350 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1351 case PackCategoriesIntoKey(fcInfinity, fcZero): 1352 return opOK; 1353 1354 case PackCategoriesIntoKey(fcZero, fcNaN): 1355 case PackCategoriesIntoKey(fcNormal, fcNaN): 1356 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1357 // We need to be sure to flip the sign here for subtraction because we 1358 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1359 sign = rhs.sign ^ subtract; 1360 category = fcNaN; 1361 copySignificand(rhs); 1362 return opOK; 1363 1364 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1365 case PackCategoriesIntoKey(fcZero, fcInfinity): 1366 category = fcInfinity; 1367 sign = rhs.sign ^ subtract; 1368 return opOK; 1369 1370 case PackCategoriesIntoKey(fcZero, fcNormal): 1371 assign(rhs); 1372 sign = rhs.sign ^ subtract; 1373 return opOK; 1374 1375 case PackCategoriesIntoKey(fcZero, fcZero): 1376 /* Sign depends on rounding mode; handled by caller. */ 1377 return opOK; 1378 1379 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1380 /* Differently signed infinities can only be validly 1381 subtracted. */ 1382 if (((sign ^ rhs.sign)!=0) != subtract) { 1383 makeNaN(); 1384 return opInvalidOp; 1385 } 1386 1387 return opOK; 1388 1389 case PackCategoriesIntoKey(fcNormal, fcNormal): 1390 return opDivByZero; 1391 } 1392 } 1393 1394 /* Add or subtract two normal numbers. */ 1395 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1396 bool subtract) { 1397 integerPart carry; 1398 lostFraction lost_fraction; 1399 int bits; 1400 1401 /* Determine if the operation on the absolute values is effectively 1402 an addition or subtraction. */ 1403 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1404 1405 /* Are we bigger exponent-wise than the RHS? */ 1406 bits = exponent - rhs.exponent; 1407 1408 /* Subtraction is more subtle than one might naively expect. */ 1409 if (subtract) { 1410 IEEEFloat temp_rhs(rhs); 1411 bool reverse; 1412 1413 if (bits == 0) { 1414 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1415 lost_fraction = lfExactlyZero; 1416 } else if (bits > 0) { 1417 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1418 shiftSignificandLeft(1); 1419 reverse = false; 1420 } else { 1421 lost_fraction = shiftSignificandRight(-bits - 1); 1422 temp_rhs.shiftSignificandLeft(1); 1423 reverse = true; 1424 } 1425 1426 if (reverse) { 1427 carry = temp_rhs.subtractSignificand 1428 (*this, lost_fraction != lfExactlyZero); 1429 copySignificand(temp_rhs); 1430 sign = !sign; 1431 } else { 1432 carry = subtractSignificand 1433 (temp_rhs, lost_fraction != lfExactlyZero); 1434 } 1435 1436 /* Invert the lost fraction - it was on the RHS and 1437 subtracted. */ 1438 if (lost_fraction == lfLessThanHalf) 1439 lost_fraction = lfMoreThanHalf; 1440 else if (lost_fraction == lfMoreThanHalf) 1441 lost_fraction = lfLessThanHalf; 1442 1443 /* The code above is intended to ensure that no borrow is 1444 necessary. */ 1445 assert(!carry); 1446 (void)carry; 1447 } else { 1448 if (bits > 0) { 1449 IEEEFloat temp_rhs(rhs); 1450 1451 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1452 carry = addSignificand(temp_rhs); 1453 } else { 1454 lost_fraction = shiftSignificandRight(-bits); 1455 carry = addSignificand(rhs); 1456 } 1457 1458 /* We have a guard bit; generating a carry cannot happen. */ 1459 assert(!carry); 1460 (void)carry; 1461 } 1462 1463 return lost_fraction; 1464 } 1465 1466 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1467 switch (PackCategoriesIntoKey(category, rhs.category)) { 1468 default: 1469 llvm_unreachable(nullptr); 1470 1471 case PackCategoriesIntoKey(fcNaN, fcZero): 1472 case PackCategoriesIntoKey(fcNaN, fcNormal): 1473 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1474 case PackCategoriesIntoKey(fcNaN, fcNaN): 1475 sign = false; 1476 return opOK; 1477 1478 case PackCategoriesIntoKey(fcZero, fcNaN): 1479 case PackCategoriesIntoKey(fcNormal, fcNaN): 1480 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1481 sign = false; 1482 category = fcNaN; 1483 copySignificand(rhs); 1484 return opOK; 1485 1486 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1487 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1488 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1489 category = fcInfinity; 1490 return opOK; 1491 1492 case PackCategoriesIntoKey(fcZero, fcNormal): 1493 case PackCategoriesIntoKey(fcNormal, fcZero): 1494 case PackCategoriesIntoKey(fcZero, fcZero): 1495 category = fcZero; 1496 return opOK; 1497 1498 case PackCategoriesIntoKey(fcZero, fcInfinity): 1499 case PackCategoriesIntoKey(fcInfinity, fcZero): 1500 makeNaN(); 1501 return opInvalidOp; 1502 1503 case PackCategoriesIntoKey(fcNormal, fcNormal): 1504 return opOK; 1505 } 1506 } 1507 1508 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1509 switch (PackCategoriesIntoKey(category, rhs.category)) { 1510 default: 1511 llvm_unreachable(nullptr); 1512 1513 case PackCategoriesIntoKey(fcZero, fcNaN): 1514 case PackCategoriesIntoKey(fcNormal, fcNaN): 1515 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1516 category = fcNaN; 1517 copySignificand(rhs); 1518 case PackCategoriesIntoKey(fcNaN, fcZero): 1519 case PackCategoriesIntoKey(fcNaN, fcNormal): 1520 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1521 case PackCategoriesIntoKey(fcNaN, fcNaN): 1522 sign = false; 1523 case PackCategoriesIntoKey(fcInfinity, fcZero): 1524 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1525 case PackCategoriesIntoKey(fcZero, fcInfinity): 1526 case PackCategoriesIntoKey(fcZero, fcNormal): 1527 return opOK; 1528 1529 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1530 category = fcZero; 1531 return opOK; 1532 1533 case PackCategoriesIntoKey(fcNormal, fcZero): 1534 category = fcInfinity; 1535 return opDivByZero; 1536 1537 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1538 case PackCategoriesIntoKey(fcZero, fcZero): 1539 makeNaN(); 1540 return opInvalidOp; 1541 1542 case PackCategoriesIntoKey(fcNormal, fcNormal): 1543 return opOK; 1544 } 1545 } 1546 1547 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1548 switch (PackCategoriesIntoKey(category, rhs.category)) { 1549 default: 1550 llvm_unreachable(nullptr); 1551 1552 case PackCategoriesIntoKey(fcNaN, fcZero): 1553 case PackCategoriesIntoKey(fcNaN, fcNormal): 1554 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1555 case PackCategoriesIntoKey(fcNaN, fcNaN): 1556 case PackCategoriesIntoKey(fcZero, fcInfinity): 1557 case PackCategoriesIntoKey(fcZero, fcNormal): 1558 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1559 return opOK; 1560 1561 case PackCategoriesIntoKey(fcZero, fcNaN): 1562 case PackCategoriesIntoKey(fcNormal, fcNaN): 1563 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1564 sign = false; 1565 category = fcNaN; 1566 copySignificand(rhs); 1567 return opOK; 1568 1569 case PackCategoriesIntoKey(fcNormal, fcZero): 1570 case PackCategoriesIntoKey(fcInfinity, fcZero): 1571 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1572 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1573 case PackCategoriesIntoKey(fcZero, fcZero): 1574 makeNaN(); 1575 return opInvalidOp; 1576 1577 case PackCategoriesIntoKey(fcNormal, fcNormal): 1578 return opOK; 1579 } 1580 } 1581 1582 /* Change sign. */ 1583 void IEEEFloat::changeSign() { 1584 /* Look mummy, this one's easy. */ 1585 sign = !sign; 1586 } 1587 1588 void IEEEFloat::clearSign() { 1589 /* So is this one. */ 1590 sign = 0; 1591 } 1592 1593 void IEEEFloat::copySign(const IEEEFloat &rhs) { 1594 /* And this one. */ 1595 sign = rhs.sign; 1596 } 1597 1598 /* Normalized addition or subtraction. */ 1599 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1600 roundingMode rounding_mode, 1601 bool subtract) { 1602 opStatus fs; 1603 1604 fs = addOrSubtractSpecials(rhs, subtract); 1605 1606 /* This return code means it was not a simple case. */ 1607 if (fs == opDivByZero) { 1608 lostFraction lost_fraction; 1609 1610 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1611 fs = normalize(rounding_mode, lost_fraction); 1612 1613 /* Can only be zero if we lost no fraction. */ 1614 assert(category != fcZero || lost_fraction == lfExactlyZero); 1615 } 1616 1617 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1618 positive zero unless rounding to minus infinity, except that 1619 adding two like-signed zeroes gives that zero. */ 1620 if (category == fcZero) { 1621 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1622 sign = (rounding_mode == rmTowardNegative); 1623 } 1624 1625 return fs; 1626 } 1627 1628 /* Normalized addition. */ 1629 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1630 roundingMode rounding_mode) { 1631 return addOrSubtract(rhs, rounding_mode, false); 1632 } 1633 1634 /* Normalized subtraction. */ 1635 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1636 roundingMode rounding_mode) { 1637 return addOrSubtract(rhs, rounding_mode, true); 1638 } 1639 1640 /* Normalized multiply. */ 1641 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1642 roundingMode rounding_mode) { 1643 opStatus fs; 1644 1645 sign ^= rhs.sign; 1646 fs = multiplySpecials(rhs); 1647 1648 if (isFiniteNonZero()) { 1649 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1650 fs = normalize(rounding_mode, lost_fraction); 1651 if (lost_fraction != lfExactlyZero) 1652 fs = (opStatus) (fs | opInexact); 1653 } 1654 1655 return fs; 1656 } 1657 1658 /* Normalized divide. */ 1659 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1660 roundingMode rounding_mode) { 1661 opStatus fs; 1662 1663 sign ^= rhs.sign; 1664 fs = divideSpecials(rhs); 1665 1666 if (isFiniteNonZero()) { 1667 lostFraction lost_fraction = divideSignificand(rhs); 1668 fs = normalize(rounding_mode, lost_fraction); 1669 if (lost_fraction != lfExactlyZero) 1670 fs = (opStatus) (fs | opInexact); 1671 } 1672 1673 return fs; 1674 } 1675 1676 /* Normalized remainder. This is not currently correct in all cases. */ 1677 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1678 opStatus fs; 1679 IEEEFloat V = *this; 1680 unsigned int origSign = sign; 1681 1682 fs = V.divide(rhs, rmNearestTiesToEven); 1683 if (fs == opDivByZero) 1684 return fs; 1685 1686 int parts = partCount(); 1687 integerPart *x = new integerPart[parts]; 1688 bool ignored; 1689 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1690 rmNearestTiesToEven, &ignored); 1691 if (fs==opInvalidOp) { 1692 delete[] x; 1693 return fs; 1694 } 1695 1696 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1697 rmNearestTiesToEven); 1698 assert(fs==opOK); // should always work 1699 1700 fs = V.multiply(rhs, rmNearestTiesToEven); 1701 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1702 1703 fs = subtract(V, rmNearestTiesToEven); 1704 assert(fs==opOK || fs==opInexact); // likewise 1705 1706 if (isZero()) 1707 sign = origSign; // IEEE754 requires this 1708 delete[] x; 1709 return fs; 1710 } 1711 1712 /* Normalized llvm frem (C fmod). 1713 This is not currently correct in all cases. */ 1714 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1715 opStatus fs; 1716 fs = modSpecials(rhs); 1717 1718 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1719 IEEEFloat V = *this; 1720 unsigned int origSign = sign; 1721 1722 fs = V.divide(rhs, rmNearestTiesToEven); 1723 if (fs == opDivByZero) 1724 return fs; 1725 1726 int parts = partCount(); 1727 integerPart *x = new integerPart[parts]; 1728 bool ignored; 1729 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1730 rmTowardZero, &ignored); 1731 if (fs==opInvalidOp) { 1732 delete[] x; 1733 return fs; 1734 } 1735 1736 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1737 rmNearestTiesToEven); 1738 assert(fs==opOK); // should always work 1739 1740 fs = V.multiply(rhs, rmNearestTiesToEven); 1741 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1742 1743 fs = subtract(V, rmNearestTiesToEven); 1744 assert(fs==opOK || fs==opInexact); // likewise 1745 1746 if (isZero()) 1747 sign = origSign; // IEEE754 requires this 1748 delete[] x; 1749 } 1750 return fs; 1751 } 1752 1753 /* Normalized fused-multiply-add. */ 1754 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1755 const IEEEFloat &addend, 1756 roundingMode rounding_mode) { 1757 opStatus fs; 1758 1759 /* Post-multiplication sign, before addition. */ 1760 sign ^= multiplicand.sign; 1761 1762 /* If and only if all arguments are normal do we need to do an 1763 extended-precision calculation. */ 1764 if (isFiniteNonZero() && 1765 multiplicand.isFiniteNonZero() && 1766 addend.isFinite()) { 1767 lostFraction lost_fraction; 1768 1769 lost_fraction = multiplySignificand(multiplicand, &addend); 1770 fs = normalize(rounding_mode, lost_fraction); 1771 if (lost_fraction != lfExactlyZero) 1772 fs = (opStatus) (fs | opInexact); 1773 1774 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1775 positive zero unless rounding to minus infinity, except that 1776 adding two like-signed zeroes gives that zero. */ 1777 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1778 sign = (rounding_mode == rmTowardNegative); 1779 } else { 1780 fs = multiplySpecials(multiplicand); 1781 1782 /* FS can only be opOK or opInvalidOp. There is no more work 1783 to do in the latter case. The IEEE-754R standard says it is 1784 implementation-defined in this case whether, if ADDEND is a 1785 quiet NaN, we raise invalid op; this implementation does so. 1786 1787 If we need to do the addition we can do so with normal 1788 precision. */ 1789 if (fs == opOK) 1790 fs = addOrSubtract(addend, rounding_mode, false); 1791 } 1792 1793 return fs; 1794 } 1795 1796 /* Rounding-mode corrrect round to integral value. */ 1797 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 1798 opStatus fs; 1799 1800 // If the exponent is large enough, we know that this value is already 1801 // integral, and the arithmetic below would potentially cause it to saturate 1802 // to +/-Inf. Bail out early instead. 1803 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1804 return opOK; 1805 1806 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1807 // precision of our format, and then subtract it back off again. The choice 1808 // of rounding modes for the addition/subtraction determines the rounding mode 1809 // for our integral rounding as well. 1810 // NOTE: When the input value is negative, we do subtraction followed by 1811 // addition instead. 1812 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1813 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1814 IEEEFloat MagicConstant(*semantics); 1815 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1816 rmNearestTiesToEven); 1817 MagicConstant.copySign(*this); 1818 1819 if (fs != opOK) 1820 return fs; 1821 1822 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1823 bool inputSign = isNegative(); 1824 1825 fs = add(MagicConstant, rounding_mode); 1826 if (fs != opOK && fs != opInexact) 1827 return fs; 1828 1829 fs = subtract(MagicConstant, rounding_mode); 1830 1831 // Restore the input sign. 1832 if (inputSign != isNegative()) 1833 changeSign(); 1834 1835 return fs; 1836 } 1837 1838 1839 /* Comparison requires normalized numbers. */ 1840 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 1841 cmpResult result; 1842 1843 assert(semantics == rhs.semantics); 1844 1845 switch (PackCategoriesIntoKey(category, rhs.category)) { 1846 default: 1847 llvm_unreachable(nullptr); 1848 1849 case PackCategoriesIntoKey(fcNaN, fcZero): 1850 case PackCategoriesIntoKey(fcNaN, fcNormal): 1851 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1852 case PackCategoriesIntoKey(fcNaN, fcNaN): 1853 case PackCategoriesIntoKey(fcZero, fcNaN): 1854 case PackCategoriesIntoKey(fcNormal, fcNaN): 1855 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1856 return cmpUnordered; 1857 1858 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1859 case PackCategoriesIntoKey(fcInfinity, fcZero): 1860 case PackCategoriesIntoKey(fcNormal, fcZero): 1861 if (sign) 1862 return cmpLessThan; 1863 else 1864 return cmpGreaterThan; 1865 1866 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1867 case PackCategoriesIntoKey(fcZero, fcInfinity): 1868 case PackCategoriesIntoKey(fcZero, fcNormal): 1869 if (rhs.sign) 1870 return cmpGreaterThan; 1871 else 1872 return cmpLessThan; 1873 1874 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1875 if (sign == rhs.sign) 1876 return cmpEqual; 1877 else if (sign) 1878 return cmpLessThan; 1879 else 1880 return cmpGreaterThan; 1881 1882 case PackCategoriesIntoKey(fcZero, fcZero): 1883 return cmpEqual; 1884 1885 case PackCategoriesIntoKey(fcNormal, fcNormal): 1886 break; 1887 } 1888 1889 /* Two normal numbers. Do they have the same sign? */ 1890 if (sign != rhs.sign) { 1891 if (sign) 1892 result = cmpLessThan; 1893 else 1894 result = cmpGreaterThan; 1895 } else { 1896 /* Compare absolute values; invert result if negative. */ 1897 result = compareAbsoluteValue(rhs); 1898 1899 if (sign) { 1900 if (result == cmpLessThan) 1901 result = cmpGreaterThan; 1902 else if (result == cmpGreaterThan) 1903 result = cmpLessThan; 1904 } 1905 } 1906 1907 return result; 1908 } 1909 1910 /// IEEEFloat::convert - convert a value of one floating point type to another. 1911 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1912 /// records whether the transformation lost information, i.e. whether 1913 /// converting the result back to the original type will produce the 1914 /// original value (this is almost the same as return value==fsOK, but there 1915 /// are edge cases where this is not so). 1916 1917 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 1918 roundingMode rounding_mode, 1919 bool *losesInfo) { 1920 lostFraction lostFraction; 1921 unsigned int newPartCount, oldPartCount; 1922 opStatus fs; 1923 int shift; 1924 const fltSemantics &fromSemantics = *semantics; 1925 1926 lostFraction = lfExactlyZero; 1927 newPartCount = partCountForBits(toSemantics.precision + 1); 1928 oldPartCount = partCount(); 1929 shift = toSemantics.precision - fromSemantics.precision; 1930 1931 bool X86SpecialNan = false; 1932 if (&fromSemantics == &IEEEFloat::x87DoubleExtended && 1933 &toSemantics != &IEEEFloat::x87DoubleExtended && category == fcNaN && 1934 (!(*significandParts() & 0x8000000000000000ULL) || 1935 !(*significandParts() & 0x4000000000000000ULL))) { 1936 // x86 has some unusual NaNs which cannot be represented in any other 1937 // format; note them here. 1938 X86SpecialNan = true; 1939 } 1940 1941 // If this is a truncation of a denormal number, and the target semantics 1942 // has larger exponent range than the source semantics (this can happen 1943 // when truncating from PowerPC double-double to double format), the 1944 // right shift could lose result mantissa bits. Adjust exponent instead 1945 // of performing excessive shift. 1946 if (shift < 0 && isFiniteNonZero()) { 1947 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 1948 if (exponent + exponentChange < toSemantics.minExponent) 1949 exponentChange = toSemantics.minExponent - exponent; 1950 if (exponentChange < shift) 1951 exponentChange = shift; 1952 if (exponentChange < 0) { 1953 shift -= exponentChange; 1954 exponent += exponentChange; 1955 } 1956 } 1957 1958 // If this is a truncation, perform the shift before we narrow the storage. 1959 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 1960 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 1961 1962 // Fix the storage so it can hold to new value. 1963 if (newPartCount > oldPartCount) { 1964 // The new type requires more storage; make it available. 1965 integerPart *newParts; 1966 newParts = new integerPart[newPartCount]; 1967 APInt::tcSet(newParts, 0, newPartCount); 1968 if (isFiniteNonZero() || category==fcNaN) 1969 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1970 freeSignificand(); 1971 significand.parts = newParts; 1972 } else if (newPartCount == 1 && oldPartCount != 1) { 1973 // Switch to built-in storage for a single part. 1974 integerPart newPart = 0; 1975 if (isFiniteNonZero() || category==fcNaN) 1976 newPart = significandParts()[0]; 1977 freeSignificand(); 1978 significand.part = newPart; 1979 } 1980 1981 // Now that we have the right storage, switch the semantics. 1982 semantics = &toSemantics; 1983 1984 // If this is an extension, perform the shift now that the storage is 1985 // available. 1986 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 1987 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1988 1989 if (isFiniteNonZero()) { 1990 fs = normalize(rounding_mode, lostFraction); 1991 *losesInfo = (fs != opOK); 1992 } else if (category == fcNaN) { 1993 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 1994 1995 // For x87 extended precision, we want to make a NaN, not a special NaN if 1996 // the input wasn't special either. 1997 if (!X86SpecialNan && semantics == &IEEEFloat::x87DoubleExtended) 1998 APInt::tcSetBit(significandParts(), semantics->precision - 1); 1999 2000 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2001 // does not give you back the same bits. This is dubious, and we 2002 // don't currently do it. You're really supposed to get 2003 // an invalid operation signal at runtime, but nobody does that. 2004 fs = opOK; 2005 } else { 2006 *losesInfo = false; 2007 fs = opOK; 2008 } 2009 2010 return fs; 2011 } 2012 2013 /* Convert a floating point number to an integer according to the 2014 rounding mode. If the rounded integer value is out of range this 2015 returns an invalid operation exception and the contents of the 2016 destination parts are unspecified. If the rounded value is in 2017 range but the floating point number is not the exact integer, the C 2018 standard doesn't require an inexact exception to be raised. IEEE 2019 854 does require it so we do that. 2020 2021 Note that for conversions to integer type the C standard requires 2022 round-to-zero to always be used. */ 2023 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2024 integerPart *parts, unsigned int width, bool isSigned, 2025 roundingMode rounding_mode, bool *isExact) const { 2026 lostFraction lost_fraction; 2027 const integerPart *src; 2028 unsigned int dstPartsCount, truncatedBits; 2029 2030 *isExact = false; 2031 2032 /* Handle the three special cases first. */ 2033 if (category == fcInfinity || category == fcNaN) 2034 return opInvalidOp; 2035 2036 dstPartsCount = partCountForBits(width); 2037 2038 if (category == fcZero) { 2039 APInt::tcSet(parts, 0, dstPartsCount); 2040 // Negative zero can't be represented as an int. 2041 *isExact = !sign; 2042 return opOK; 2043 } 2044 2045 src = significandParts(); 2046 2047 /* Step 1: place our absolute value, with any fraction truncated, in 2048 the destination. */ 2049 if (exponent < 0) { 2050 /* Our absolute value is less than one; truncate everything. */ 2051 APInt::tcSet(parts, 0, dstPartsCount); 2052 /* For exponent -1 the integer bit represents .5, look at that. 2053 For smaller exponents leftmost truncated bit is 0. */ 2054 truncatedBits = semantics->precision -1U - exponent; 2055 } else { 2056 /* We want the most significant (exponent + 1) bits; the rest are 2057 truncated. */ 2058 unsigned int bits = exponent + 1U; 2059 2060 /* Hopelessly large in magnitude? */ 2061 if (bits > width) 2062 return opInvalidOp; 2063 2064 if (bits < semantics->precision) { 2065 /* We truncate (semantics->precision - bits) bits. */ 2066 truncatedBits = semantics->precision - bits; 2067 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2068 } else { 2069 /* We want at least as many bits as are available. */ 2070 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2071 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2072 truncatedBits = 0; 2073 } 2074 } 2075 2076 /* Step 2: work out any lost fraction, and increment the absolute 2077 value if we would round away from zero. */ 2078 if (truncatedBits) { 2079 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2080 truncatedBits); 2081 if (lost_fraction != lfExactlyZero && 2082 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2083 if (APInt::tcIncrement(parts, dstPartsCount)) 2084 return opInvalidOp; /* Overflow. */ 2085 } 2086 } else { 2087 lost_fraction = lfExactlyZero; 2088 } 2089 2090 /* Step 3: check if we fit in the destination. */ 2091 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2092 2093 if (sign) { 2094 if (!isSigned) { 2095 /* Negative numbers cannot be represented as unsigned. */ 2096 if (omsb != 0) 2097 return opInvalidOp; 2098 } else { 2099 /* It takes omsb bits to represent the unsigned integer value. 2100 We lose a bit for the sign, but care is needed as the 2101 maximally negative integer is a special case. */ 2102 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2103 return opInvalidOp; 2104 2105 /* This case can happen because of rounding. */ 2106 if (omsb > width) 2107 return opInvalidOp; 2108 } 2109 2110 APInt::tcNegate (parts, dstPartsCount); 2111 } else { 2112 if (omsb >= width + !isSigned) 2113 return opInvalidOp; 2114 } 2115 2116 if (lost_fraction == lfExactlyZero) { 2117 *isExact = true; 2118 return opOK; 2119 } else 2120 return opInexact; 2121 } 2122 2123 /* Same as convertToSignExtendedInteger, except we provide 2124 deterministic values in case of an invalid operation exception, 2125 namely zero for NaNs and the minimal or maximal value respectively 2126 for underflow or overflow. 2127 The *isExact output tells whether the result is exact, in the sense 2128 that converting it back to the original floating point type produces 2129 the original value. This is almost equivalent to result==opOK, 2130 except for negative zeroes. 2131 */ 2132 IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts, 2133 unsigned int width, 2134 bool isSigned, 2135 roundingMode rounding_mode, 2136 bool *isExact) const { 2137 opStatus fs; 2138 2139 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2140 isExact); 2141 2142 if (fs == opInvalidOp) { 2143 unsigned int bits, dstPartsCount; 2144 2145 dstPartsCount = partCountForBits(width); 2146 2147 if (category == fcNaN) 2148 bits = 0; 2149 else if (sign) 2150 bits = isSigned; 2151 else 2152 bits = width - isSigned; 2153 2154 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2155 if (sign && isSigned) 2156 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2157 } 2158 2159 return fs; 2160 } 2161 2162 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2163 an APSInt, whose initial bit-width and signed-ness are used to determine the 2164 precision of the conversion. 2165 */ 2166 IEEEFloat::opStatus IEEEFloat::convertToInteger(APSInt &result, 2167 roundingMode rounding_mode, 2168 bool *isExact) const { 2169 unsigned bitWidth = result.getBitWidth(); 2170 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2171 opStatus status = convertToInteger( 2172 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2173 // Keeps the original signed-ness. 2174 result = APInt(bitWidth, parts); 2175 return status; 2176 } 2177 2178 /* Convert an unsigned integer SRC to a floating point number, 2179 rounding according to ROUNDING_MODE. The sign of the floating 2180 point number is not modified. */ 2181 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2182 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2183 unsigned int omsb, precision, dstCount; 2184 integerPart *dst; 2185 lostFraction lost_fraction; 2186 2187 category = fcNormal; 2188 omsb = APInt::tcMSB(src, srcCount) + 1; 2189 dst = significandParts(); 2190 dstCount = partCount(); 2191 precision = semantics->precision; 2192 2193 /* We want the most significant PRECISION bits of SRC. There may not 2194 be that many; extract what we can. */ 2195 if (precision <= omsb) { 2196 exponent = omsb - 1; 2197 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2198 omsb - precision); 2199 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2200 } else { 2201 exponent = precision - 1; 2202 lost_fraction = lfExactlyZero; 2203 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2204 } 2205 2206 return normalize(rounding_mode, lost_fraction); 2207 } 2208 2209 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2210 roundingMode rounding_mode) { 2211 unsigned int partCount = Val.getNumWords(); 2212 APInt api = Val; 2213 2214 sign = false; 2215 if (isSigned && api.isNegative()) { 2216 sign = true; 2217 api = -api; 2218 } 2219 2220 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2221 } 2222 2223 /* Convert a two's complement integer SRC to a floating point number, 2224 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2225 integer is signed, in which case it must be sign-extended. */ 2226 IEEEFloat::opStatus 2227 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2228 unsigned int srcCount, bool isSigned, 2229 roundingMode rounding_mode) { 2230 opStatus status; 2231 2232 if (isSigned && 2233 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2234 integerPart *copy; 2235 2236 /* If we're signed and negative negate a copy. */ 2237 sign = true; 2238 copy = new integerPart[srcCount]; 2239 APInt::tcAssign(copy, src, srcCount); 2240 APInt::tcNegate(copy, srcCount); 2241 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2242 delete [] copy; 2243 } else { 2244 sign = false; 2245 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2246 } 2247 2248 return status; 2249 } 2250 2251 /* FIXME: should this just take a const APInt reference? */ 2252 IEEEFloat::opStatus 2253 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2254 unsigned int width, bool isSigned, 2255 roundingMode rounding_mode) { 2256 unsigned int partCount = partCountForBits(width); 2257 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2258 2259 sign = false; 2260 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2261 sign = true; 2262 api = -api; 2263 } 2264 2265 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2266 } 2267 2268 IEEEFloat::opStatus 2269 IEEEFloat::convertFromHexadecimalString(StringRef s, 2270 roundingMode rounding_mode) { 2271 lostFraction lost_fraction = lfExactlyZero; 2272 2273 category = fcNormal; 2274 zeroSignificand(); 2275 exponent = 0; 2276 2277 integerPart *significand = significandParts(); 2278 unsigned partsCount = partCount(); 2279 unsigned bitPos = partsCount * integerPartWidth; 2280 bool computedTrailingFraction = false; 2281 2282 // Skip leading zeroes and any (hexa)decimal point. 2283 StringRef::iterator begin = s.begin(); 2284 StringRef::iterator end = s.end(); 2285 StringRef::iterator dot; 2286 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2287 StringRef::iterator firstSignificantDigit = p; 2288 2289 while (p != end) { 2290 integerPart hex_value; 2291 2292 if (*p == '.') { 2293 assert(dot == end && "String contains multiple dots"); 2294 dot = p++; 2295 continue; 2296 } 2297 2298 hex_value = hexDigitValue(*p); 2299 if (hex_value == -1U) 2300 break; 2301 2302 p++; 2303 2304 // Store the number while we have space. 2305 if (bitPos) { 2306 bitPos -= 4; 2307 hex_value <<= bitPos % integerPartWidth; 2308 significand[bitPos / integerPartWidth] |= hex_value; 2309 } else if (!computedTrailingFraction) { 2310 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2311 computedTrailingFraction = true; 2312 } 2313 } 2314 2315 /* Hex floats require an exponent but not a hexadecimal point. */ 2316 assert(p != end && "Hex strings require an exponent"); 2317 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2318 assert(p != begin && "Significand has no digits"); 2319 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2320 2321 /* Ignore the exponent if we are zero. */ 2322 if (p != firstSignificantDigit) { 2323 int expAdjustment; 2324 2325 /* Implicit hexadecimal point? */ 2326 if (dot == end) 2327 dot = p; 2328 2329 /* Calculate the exponent adjustment implicit in the number of 2330 significant digits. */ 2331 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2332 if (expAdjustment < 0) 2333 expAdjustment++; 2334 expAdjustment = expAdjustment * 4 - 1; 2335 2336 /* Adjust for writing the significand starting at the most 2337 significant nibble. */ 2338 expAdjustment += semantics->precision; 2339 expAdjustment -= partsCount * integerPartWidth; 2340 2341 /* Adjust for the given exponent. */ 2342 exponent = totalExponent(p + 1, end, expAdjustment); 2343 } 2344 2345 return normalize(rounding_mode, lost_fraction); 2346 } 2347 2348 IEEEFloat::opStatus 2349 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2350 unsigned sigPartCount, int exp, 2351 roundingMode rounding_mode) { 2352 unsigned int parts, pow5PartCount; 2353 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2354 integerPart pow5Parts[maxPowerOfFiveParts]; 2355 bool isNearest; 2356 2357 isNearest = (rounding_mode == rmNearestTiesToEven || 2358 rounding_mode == rmNearestTiesToAway); 2359 2360 parts = partCountForBits(semantics->precision + 11); 2361 2362 /* Calculate pow(5, abs(exp)). */ 2363 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2364 2365 for (;; parts *= 2) { 2366 opStatus sigStatus, powStatus; 2367 unsigned int excessPrecision, truncatedBits; 2368 2369 calcSemantics.precision = parts * integerPartWidth - 1; 2370 excessPrecision = calcSemantics.precision - semantics->precision; 2371 truncatedBits = excessPrecision; 2372 2373 IEEEFloat decSig(calcSemantics, uninitialized); 2374 decSig.makeZero(sign); 2375 IEEEFloat pow5(calcSemantics); 2376 2377 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2378 rmNearestTiesToEven); 2379 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2380 rmNearestTiesToEven); 2381 /* Add exp, as 10^n = 5^n * 2^n. */ 2382 decSig.exponent += exp; 2383 2384 lostFraction calcLostFraction; 2385 integerPart HUerr, HUdistance; 2386 unsigned int powHUerr; 2387 2388 if (exp >= 0) { 2389 /* multiplySignificand leaves the precision-th bit set to 1. */ 2390 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2391 powHUerr = powStatus != opOK; 2392 } else { 2393 calcLostFraction = decSig.divideSignificand(pow5); 2394 /* Denormal numbers have less precision. */ 2395 if (decSig.exponent < semantics->minExponent) { 2396 excessPrecision += (semantics->minExponent - decSig.exponent); 2397 truncatedBits = excessPrecision; 2398 if (excessPrecision > calcSemantics.precision) 2399 excessPrecision = calcSemantics.precision; 2400 } 2401 /* Extra half-ulp lost in reciprocal of exponent. */ 2402 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2403 } 2404 2405 /* Both multiplySignificand and divideSignificand return the 2406 result with the integer bit set. */ 2407 assert(APInt::tcExtractBit 2408 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2409 2410 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2411 powHUerr); 2412 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2413 excessPrecision, isNearest); 2414 2415 /* Are we guaranteed to round correctly if we truncate? */ 2416 if (HUdistance >= HUerr) { 2417 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2418 calcSemantics.precision - excessPrecision, 2419 excessPrecision); 2420 /* Take the exponent of decSig. If we tcExtract-ed less bits 2421 above we must adjust our exponent to compensate for the 2422 implicit right shift. */ 2423 exponent = (decSig.exponent + semantics->precision 2424 - (calcSemantics.precision - excessPrecision)); 2425 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2426 decSig.partCount(), 2427 truncatedBits); 2428 return normalize(rounding_mode, calcLostFraction); 2429 } 2430 } 2431 } 2432 2433 IEEEFloat::opStatus 2434 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2435 decimalInfo D; 2436 opStatus fs; 2437 2438 /* Scan the text. */ 2439 StringRef::iterator p = str.begin(); 2440 interpretDecimal(p, str.end(), &D); 2441 2442 /* Handle the quick cases. First the case of no significant digits, 2443 i.e. zero, and then exponents that are obviously too large or too 2444 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2445 definitely overflows if 2446 2447 (exp - 1) * L >= maxExponent 2448 2449 and definitely underflows to zero where 2450 2451 (exp + 1) * L <= minExponent - precision 2452 2453 With integer arithmetic the tightest bounds for L are 2454 2455 93/28 < L < 196/59 [ numerator <= 256 ] 2456 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2457 */ 2458 2459 // Test if we have a zero number allowing for strings with no null terminators 2460 // and zero decimals with non-zero exponents. 2461 // 2462 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2463 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2464 // be at most one dot. On the other hand, if we have a zero with a non-zero 2465 // exponent, then we know that D.firstSigDigit will be non-numeric. 2466 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2467 category = fcZero; 2468 fs = opOK; 2469 2470 /* Check whether the normalized exponent is high enough to overflow 2471 max during the log-rebasing in the max-exponent check below. */ 2472 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2473 fs = handleOverflow(rounding_mode); 2474 2475 /* If it wasn't, then it also wasn't high enough to overflow max 2476 during the log-rebasing in the min-exponent check. Check that it 2477 won't overflow min in either check, then perform the min-exponent 2478 check. */ 2479 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2480 (D.normalizedExponent + 1) * 28738 <= 2481 8651 * (semantics->minExponent - (int) semantics->precision)) { 2482 /* Underflow to zero and round. */ 2483 category = fcNormal; 2484 zeroSignificand(); 2485 fs = normalize(rounding_mode, lfLessThanHalf); 2486 2487 /* We can finally safely perform the max-exponent check. */ 2488 } else if ((D.normalizedExponent - 1) * 42039 2489 >= 12655 * semantics->maxExponent) { 2490 /* Overflow and round. */ 2491 fs = handleOverflow(rounding_mode); 2492 } else { 2493 integerPart *decSignificand; 2494 unsigned int partCount; 2495 2496 /* A tight upper bound on number of bits required to hold an 2497 N-digit decimal integer is N * 196 / 59. Allocate enough space 2498 to hold the full significand, and an extra part required by 2499 tcMultiplyPart. */ 2500 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2501 partCount = partCountForBits(1 + 196 * partCount / 59); 2502 decSignificand = new integerPart[partCount + 1]; 2503 partCount = 0; 2504 2505 /* Convert to binary efficiently - we do almost all multiplication 2506 in an integerPart. When this would overflow do we do a single 2507 bignum multiplication, and then revert again to multiplication 2508 in an integerPart. */ 2509 do { 2510 integerPart decValue, val, multiplier; 2511 2512 val = 0; 2513 multiplier = 1; 2514 2515 do { 2516 if (*p == '.') { 2517 p++; 2518 if (p == str.end()) { 2519 break; 2520 } 2521 } 2522 decValue = decDigitValue(*p++); 2523 assert(decValue < 10U && "Invalid character in significand"); 2524 multiplier *= 10; 2525 val = val * 10 + decValue; 2526 /* The maximum number that can be multiplied by ten with any 2527 digit added without overflowing an integerPart. */ 2528 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2529 2530 /* Multiply out the current part. */ 2531 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2532 partCount, partCount + 1, false); 2533 2534 /* If we used another part (likely but not guaranteed), increase 2535 the count. */ 2536 if (decSignificand[partCount]) 2537 partCount++; 2538 } while (p <= D.lastSigDigit); 2539 2540 category = fcNormal; 2541 fs = roundSignificandWithExponent(decSignificand, partCount, 2542 D.exponent, rounding_mode); 2543 2544 delete [] decSignificand; 2545 } 2546 2547 return fs; 2548 } 2549 2550 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2551 if (str.equals("inf") || str.equals("INFINITY")) { 2552 makeInf(false); 2553 return true; 2554 } 2555 2556 if (str.equals("-inf") || str.equals("-INFINITY")) { 2557 makeInf(true); 2558 return true; 2559 } 2560 2561 if (str.equals("nan") || str.equals("NaN")) { 2562 makeNaN(false, false); 2563 return true; 2564 } 2565 2566 if (str.equals("-nan") || str.equals("-NaN")) { 2567 makeNaN(false, true); 2568 return true; 2569 } 2570 2571 return false; 2572 } 2573 2574 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str, 2575 roundingMode rounding_mode) { 2576 assert(!str.empty() && "Invalid string length"); 2577 2578 // Handle special cases. 2579 if (convertFromStringSpecials(str)) 2580 return opOK; 2581 2582 /* Handle a leading minus sign. */ 2583 StringRef::iterator p = str.begin(); 2584 size_t slen = str.size(); 2585 sign = *p == '-' ? 1 : 0; 2586 if (*p == '-' || *p == '+') { 2587 p++; 2588 slen--; 2589 assert(slen && "String has no digits"); 2590 } 2591 2592 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2593 assert(slen - 2 && "Invalid string"); 2594 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2595 rounding_mode); 2596 } 2597 2598 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2599 } 2600 2601 /* Write out a hexadecimal representation of the floating point value 2602 to DST, which must be of sufficient size, in the C99 form 2603 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2604 excluding the terminating NUL. 2605 2606 If UPPERCASE, the output is in upper case, otherwise in lower case. 2607 2608 HEXDIGITS digits appear altogether, rounding the value if 2609 necessary. If HEXDIGITS is 0, the minimal precision to display the 2610 number precisely is used instead. If nothing would appear after 2611 the decimal point it is suppressed. 2612 2613 The decimal exponent is always printed and has at least one digit. 2614 Zero values display an exponent of zero. Infinities and NaNs 2615 appear as "infinity" or "nan" respectively. 2616 2617 The above rules are as specified by C99. There is ambiguity about 2618 what the leading hexadecimal digit should be. This implementation 2619 uses whatever is necessary so that the exponent is displayed as 2620 stored. This implies the exponent will fall within the IEEE format 2621 range, and the leading hexadecimal digit will be 0 (for denormals), 2622 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2623 any other digits zero). 2624 */ 2625 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2626 bool upperCase, 2627 roundingMode rounding_mode) const { 2628 char *p; 2629 2630 p = dst; 2631 if (sign) 2632 *dst++ = '-'; 2633 2634 switch (category) { 2635 case fcInfinity: 2636 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2637 dst += sizeof infinityL - 1; 2638 break; 2639 2640 case fcNaN: 2641 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2642 dst += sizeof NaNU - 1; 2643 break; 2644 2645 case fcZero: 2646 *dst++ = '0'; 2647 *dst++ = upperCase ? 'X': 'x'; 2648 *dst++ = '0'; 2649 if (hexDigits > 1) { 2650 *dst++ = '.'; 2651 memset (dst, '0', hexDigits - 1); 2652 dst += hexDigits - 1; 2653 } 2654 *dst++ = upperCase ? 'P': 'p'; 2655 *dst++ = '0'; 2656 break; 2657 2658 case fcNormal: 2659 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2660 break; 2661 } 2662 2663 *dst = 0; 2664 2665 return static_cast<unsigned int>(dst - p); 2666 } 2667 2668 /* Does the hard work of outputting the correctly rounded hexadecimal 2669 form of a normal floating point number with the specified number of 2670 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2671 digits necessary to print the value precisely is output. */ 2672 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2673 bool upperCase, 2674 roundingMode rounding_mode) const { 2675 unsigned int count, valueBits, shift, partsCount, outputDigits; 2676 const char *hexDigitChars; 2677 const integerPart *significand; 2678 char *p; 2679 bool roundUp; 2680 2681 *dst++ = '0'; 2682 *dst++ = upperCase ? 'X': 'x'; 2683 2684 roundUp = false; 2685 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2686 2687 significand = significandParts(); 2688 partsCount = partCount(); 2689 2690 /* +3 because the first digit only uses the single integer bit, so 2691 we have 3 virtual zero most-significant-bits. */ 2692 valueBits = semantics->precision + 3; 2693 shift = integerPartWidth - valueBits % integerPartWidth; 2694 2695 /* The natural number of digits required ignoring trailing 2696 insignificant zeroes. */ 2697 outputDigits = (valueBits - significandLSB () + 3) / 4; 2698 2699 /* hexDigits of zero means use the required number for the 2700 precision. Otherwise, see if we are truncating. If we are, 2701 find out if we need to round away from zero. */ 2702 if (hexDigits) { 2703 if (hexDigits < outputDigits) { 2704 /* We are dropping non-zero bits, so need to check how to round. 2705 "bits" is the number of dropped bits. */ 2706 unsigned int bits; 2707 lostFraction fraction; 2708 2709 bits = valueBits - hexDigits * 4; 2710 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2711 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2712 } 2713 outputDigits = hexDigits; 2714 } 2715 2716 /* Write the digits consecutively, and start writing in the location 2717 of the hexadecimal point. We move the most significant digit 2718 left and add the hexadecimal point later. */ 2719 p = ++dst; 2720 2721 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2722 2723 while (outputDigits && count) { 2724 integerPart part; 2725 2726 /* Put the most significant integerPartWidth bits in "part". */ 2727 if (--count == partsCount) 2728 part = 0; /* An imaginary higher zero part. */ 2729 else 2730 part = significand[count] << shift; 2731 2732 if (count && shift) 2733 part |= significand[count - 1] >> (integerPartWidth - shift); 2734 2735 /* Convert as much of "part" to hexdigits as we can. */ 2736 unsigned int curDigits = integerPartWidth / 4; 2737 2738 if (curDigits > outputDigits) 2739 curDigits = outputDigits; 2740 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2741 outputDigits -= curDigits; 2742 } 2743 2744 if (roundUp) { 2745 char *q = dst; 2746 2747 /* Note that hexDigitChars has a trailing '0'. */ 2748 do { 2749 q--; 2750 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2751 } while (*q == '0'); 2752 assert(q >= p); 2753 } else { 2754 /* Add trailing zeroes. */ 2755 memset (dst, '0', outputDigits); 2756 dst += outputDigits; 2757 } 2758 2759 /* Move the most significant digit to before the point, and if there 2760 is something after the decimal point add it. This must come 2761 after rounding above. */ 2762 p[-1] = p[0]; 2763 if (dst -1 == p) 2764 dst--; 2765 else 2766 p[0] = '.'; 2767 2768 /* Finally output the exponent. */ 2769 *dst++ = upperCase ? 'P': 'p'; 2770 2771 return writeSignedDecimal (dst, exponent); 2772 } 2773 2774 hash_code hash_value(const IEEEFloat &Arg) { 2775 if (!Arg.isFiniteNonZero()) 2776 return hash_combine((uint8_t)Arg.category, 2777 // NaN has no sign, fix it at zero. 2778 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2779 Arg.semantics->precision); 2780 2781 // Normal floats need their exponent and significand hashed. 2782 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2783 Arg.semantics->precision, Arg.exponent, 2784 hash_combine_range( 2785 Arg.significandParts(), 2786 Arg.significandParts() + Arg.partCount())); 2787 } 2788 2789 // Conversion from APFloat to/from host float/double. It may eventually be 2790 // possible to eliminate these and have everybody deal with APFloats, but that 2791 // will take a while. This approach will not easily extend to long double. 2792 // Current implementation requires integerPartWidth==64, which is correct at 2793 // the moment but could be made more general. 2794 2795 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2796 // the actual IEEE respresentations. We compensate for that here. 2797 2798 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 2799 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2800 assert(partCount()==2); 2801 2802 uint64_t myexponent, mysignificand; 2803 2804 if (isFiniteNonZero()) { 2805 myexponent = exponent+16383; //bias 2806 mysignificand = significandParts()[0]; 2807 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2808 myexponent = 0; // denormal 2809 } else if (category==fcZero) { 2810 myexponent = 0; 2811 mysignificand = 0; 2812 } else if (category==fcInfinity) { 2813 myexponent = 0x7fff; 2814 mysignificand = 0x8000000000000000ULL; 2815 } else { 2816 assert(category == fcNaN && "Unknown category"); 2817 myexponent = 0x7fff; 2818 mysignificand = significandParts()[0]; 2819 } 2820 2821 uint64_t words[2]; 2822 words[0] = mysignificand; 2823 words[1] = ((uint64_t)(sign & 1) << 15) | 2824 (myexponent & 0x7fffLL); 2825 return APInt(80, words); 2826 } 2827 2828 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 2829 assert(semantics == (const llvm::fltSemantics *)&PPCDoubleDoubleImpl); 2830 assert(partCount()==2); 2831 2832 uint64_t words[2]; 2833 opStatus fs; 2834 bool losesInfo; 2835 2836 // Convert number to double. To avoid spurious underflows, we re- 2837 // normalize against the "double" minExponent first, and only *then* 2838 // truncate the mantissa. The result of that second conversion 2839 // may be inexact, but should never underflow. 2840 // Declare fltSemantics before APFloat that uses it (and 2841 // saves pointer to it) to ensure correct destruction order. 2842 fltSemantics extendedSemantics = *semantics; 2843 extendedSemantics.minExponent = IEEEdouble.minExponent; 2844 IEEEFloat extended(*this); 2845 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2846 assert(fs == opOK && !losesInfo); 2847 (void)fs; 2848 2849 IEEEFloat u(extended); 2850 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2851 assert(fs == opOK || fs == opInexact); 2852 (void)fs; 2853 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2854 2855 // If conversion was exact or resulted in a special case, we're done; 2856 // just set the second double to zero. Otherwise, re-convert back to 2857 // the extended format and compute the difference. This now should 2858 // convert exactly to double. 2859 if (u.isFiniteNonZero() && losesInfo) { 2860 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2861 assert(fs == opOK && !losesInfo); 2862 (void)fs; 2863 2864 IEEEFloat v(extended); 2865 v.subtract(u, rmNearestTiesToEven); 2866 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2867 assert(fs == opOK && !losesInfo); 2868 (void)fs; 2869 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2870 } else { 2871 words[1] = 0; 2872 } 2873 2874 return APInt(128, words); 2875 } 2876 2877 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 2878 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2879 assert(partCount()==2); 2880 2881 uint64_t myexponent, mysignificand, mysignificand2; 2882 2883 if (isFiniteNonZero()) { 2884 myexponent = exponent+16383; //bias 2885 mysignificand = significandParts()[0]; 2886 mysignificand2 = significandParts()[1]; 2887 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2888 myexponent = 0; // denormal 2889 } else if (category==fcZero) { 2890 myexponent = 0; 2891 mysignificand = mysignificand2 = 0; 2892 } else if (category==fcInfinity) { 2893 myexponent = 0x7fff; 2894 mysignificand = mysignificand2 = 0; 2895 } else { 2896 assert(category == fcNaN && "Unknown category!"); 2897 myexponent = 0x7fff; 2898 mysignificand = significandParts()[0]; 2899 mysignificand2 = significandParts()[1]; 2900 } 2901 2902 uint64_t words[2]; 2903 words[0] = mysignificand; 2904 words[1] = ((uint64_t)(sign & 1) << 63) | 2905 ((myexponent & 0x7fff) << 48) | 2906 (mysignificand2 & 0xffffffffffffLL); 2907 2908 return APInt(128, words); 2909 } 2910 2911 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 2912 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2913 assert(partCount()==1); 2914 2915 uint64_t myexponent, mysignificand; 2916 2917 if (isFiniteNonZero()) { 2918 myexponent = exponent+1023; //bias 2919 mysignificand = *significandParts(); 2920 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2921 myexponent = 0; // denormal 2922 } else if (category==fcZero) { 2923 myexponent = 0; 2924 mysignificand = 0; 2925 } else if (category==fcInfinity) { 2926 myexponent = 0x7ff; 2927 mysignificand = 0; 2928 } else { 2929 assert(category == fcNaN && "Unknown category!"); 2930 myexponent = 0x7ff; 2931 mysignificand = *significandParts(); 2932 } 2933 2934 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2935 ((myexponent & 0x7ff) << 52) | 2936 (mysignificand & 0xfffffffffffffLL)))); 2937 } 2938 2939 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 2940 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2941 assert(partCount()==1); 2942 2943 uint32_t myexponent, mysignificand; 2944 2945 if (isFiniteNonZero()) { 2946 myexponent = exponent+127; //bias 2947 mysignificand = (uint32_t)*significandParts(); 2948 if (myexponent == 1 && !(mysignificand & 0x800000)) 2949 myexponent = 0; // denormal 2950 } else if (category==fcZero) { 2951 myexponent = 0; 2952 mysignificand = 0; 2953 } else if (category==fcInfinity) { 2954 myexponent = 0xff; 2955 mysignificand = 0; 2956 } else { 2957 assert(category == fcNaN && "Unknown category!"); 2958 myexponent = 0xff; 2959 mysignificand = (uint32_t)*significandParts(); 2960 } 2961 2962 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2963 (mysignificand & 0x7fffff))); 2964 } 2965 2966 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 2967 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 2968 assert(partCount()==1); 2969 2970 uint32_t myexponent, mysignificand; 2971 2972 if (isFiniteNonZero()) { 2973 myexponent = exponent+15; //bias 2974 mysignificand = (uint32_t)*significandParts(); 2975 if (myexponent == 1 && !(mysignificand & 0x400)) 2976 myexponent = 0; // denormal 2977 } else if (category==fcZero) { 2978 myexponent = 0; 2979 mysignificand = 0; 2980 } else if (category==fcInfinity) { 2981 myexponent = 0x1f; 2982 mysignificand = 0; 2983 } else { 2984 assert(category == fcNaN && "Unknown category!"); 2985 myexponent = 0x1f; 2986 mysignificand = (uint32_t)*significandParts(); 2987 } 2988 2989 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 2990 (mysignificand & 0x3ff))); 2991 } 2992 2993 // This function creates an APInt that is just a bit map of the floating 2994 // point constant as it would appear in memory. It is not a conversion, 2995 // and treating the result as a normal integer is unlikely to be useful. 2996 2997 APInt IEEEFloat::bitcastToAPInt() const { 2998 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 2999 return convertHalfAPFloatToAPInt(); 3000 3001 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3002 return convertFloatAPFloatToAPInt(); 3003 3004 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3005 return convertDoubleAPFloatToAPInt(); 3006 3007 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3008 return convertQuadrupleAPFloatToAPInt(); 3009 3010 if (semantics == (const llvm::fltSemantics *)&PPCDoubleDoubleImpl) 3011 return convertPPCDoubleDoubleAPFloatToAPInt(); 3012 3013 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3014 "unknown format!"); 3015 return convertF80LongDoubleAPFloatToAPInt(); 3016 } 3017 3018 float IEEEFloat::convertToFloat() const { 3019 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3020 "Float semantics are not IEEEsingle"); 3021 APInt api = bitcastToAPInt(); 3022 return api.bitsToFloat(); 3023 } 3024 3025 double IEEEFloat::convertToDouble() const { 3026 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3027 "Float semantics are not IEEEdouble"); 3028 APInt api = bitcastToAPInt(); 3029 return api.bitsToDouble(); 3030 } 3031 3032 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3033 /// does not support these bit patterns: 3034 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3035 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3036 /// exponent = 0, integer bit 1 ("pseudodenormal") 3037 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3038 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3039 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3040 assert(api.getBitWidth()==80); 3041 uint64_t i1 = api.getRawData()[0]; 3042 uint64_t i2 = api.getRawData()[1]; 3043 uint64_t myexponent = (i2 & 0x7fff); 3044 uint64_t mysignificand = i1; 3045 3046 initialize(&IEEEFloat::x87DoubleExtended); 3047 assert(partCount()==2); 3048 3049 sign = static_cast<unsigned int>(i2>>15); 3050 if (myexponent==0 && mysignificand==0) { 3051 // exponent, significand meaningless 3052 category = fcZero; 3053 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3054 // exponent, significand meaningless 3055 category = fcInfinity; 3056 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3057 // exponent meaningless 3058 category = fcNaN; 3059 significandParts()[0] = mysignificand; 3060 significandParts()[1] = 0; 3061 } else { 3062 category = fcNormal; 3063 exponent = myexponent - 16383; 3064 significandParts()[0] = mysignificand; 3065 significandParts()[1] = 0; 3066 if (myexponent==0) // denormal 3067 exponent = -16382; 3068 } 3069 } 3070 3071 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3072 assert(api.getBitWidth()==128); 3073 uint64_t i1 = api.getRawData()[0]; 3074 uint64_t i2 = api.getRawData()[1]; 3075 opStatus fs; 3076 bool losesInfo; 3077 3078 // Get the first double and convert to our format. 3079 initFromDoubleAPInt(APInt(64, i1)); 3080 fs = convert(PPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo); 3081 assert(fs == opOK && !losesInfo); 3082 (void)fs; 3083 3084 // Unless we have a special case, add in second double. 3085 if (isFiniteNonZero()) { 3086 IEEEFloat v(IEEEdouble, APInt(64, i2)); 3087 fs = v.convert(PPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo); 3088 assert(fs == opOK && !losesInfo); 3089 (void)fs; 3090 3091 add(v, rmNearestTiesToEven); 3092 } 3093 } 3094 3095 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3096 assert(api.getBitWidth()==128); 3097 uint64_t i1 = api.getRawData()[0]; 3098 uint64_t i2 = api.getRawData()[1]; 3099 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3100 uint64_t mysignificand = i1; 3101 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3102 3103 initialize(&IEEEFloat::IEEEquad); 3104 assert(partCount()==2); 3105 3106 sign = static_cast<unsigned int>(i2>>63); 3107 if (myexponent==0 && 3108 (mysignificand==0 && mysignificand2==0)) { 3109 // exponent, significand meaningless 3110 category = fcZero; 3111 } else if (myexponent==0x7fff && 3112 (mysignificand==0 && mysignificand2==0)) { 3113 // exponent, significand meaningless 3114 category = fcInfinity; 3115 } else if (myexponent==0x7fff && 3116 (mysignificand!=0 || mysignificand2 !=0)) { 3117 // exponent meaningless 3118 category = fcNaN; 3119 significandParts()[0] = mysignificand; 3120 significandParts()[1] = mysignificand2; 3121 } else { 3122 category = fcNormal; 3123 exponent = myexponent - 16383; 3124 significandParts()[0] = mysignificand; 3125 significandParts()[1] = mysignificand2; 3126 if (myexponent==0) // denormal 3127 exponent = -16382; 3128 else 3129 significandParts()[1] |= 0x1000000000000LL; // integer bit 3130 } 3131 } 3132 3133 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3134 assert(api.getBitWidth()==64); 3135 uint64_t i = *api.getRawData(); 3136 uint64_t myexponent = (i >> 52) & 0x7ff; 3137 uint64_t mysignificand = i & 0xfffffffffffffLL; 3138 3139 initialize(&IEEEFloat::IEEEdouble); 3140 assert(partCount()==1); 3141 3142 sign = static_cast<unsigned int>(i>>63); 3143 if (myexponent==0 && mysignificand==0) { 3144 // exponent, significand meaningless 3145 category = fcZero; 3146 } else if (myexponent==0x7ff && mysignificand==0) { 3147 // exponent, significand meaningless 3148 category = fcInfinity; 3149 } else if (myexponent==0x7ff && mysignificand!=0) { 3150 // exponent meaningless 3151 category = fcNaN; 3152 *significandParts() = mysignificand; 3153 } else { 3154 category = fcNormal; 3155 exponent = myexponent - 1023; 3156 *significandParts() = mysignificand; 3157 if (myexponent==0) // denormal 3158 exponent = -1022; 3159 else 3160 *significandParts() |= 0x10000000000000LL; // integer bit 3161 } 3162 } 3163 3164 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3165 assert(api.getBitWidth()==32); 3166 uint32_t i = (uint32_t)*api.getRawData(); 3167 uint32_t myexponent = (i >> 23) & 0xff; 3168 uint32_t mysignificand = i & 0x7fffff; 3169 3170 initialize(&IEEEFloat::IEEEsingle); 3171 assert(partCount()==1); 3172 3173 sign = i >> 31; 3174 if (myexponent==0 && mysignificand==0) { 3175 // exponent, significand meaningless 3176 category = fcZero; 3177 } else if (myexponent==0xff && mysignificand==0) { 3178 // exponent, significand meaningless 3179 category = fcInfinity; 3180 } else if (myexponent==0xff && mysignificand!=0) { 3181 // sign, exponent, significand meaningless 3182 category = fcNaN; 3183 *significandParts() = mysignificand; 3184 } else { 3185 category = fcNormal; 3186 exponent = myexponent - 127; //bias 3187 *significandParts() = mysignificand; 3188 if (myexponent==0) // denormal 3189 exponent = -126; 3190 else 3191 *significandParts() |= 0x800000; // integer bit 3192 } 3193 } 3194 3195 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3196 assert(api.getBitWidth()==16); 3197 uint32_t i = (uint32_t)*api.getRawData(); 3198 uint32_t myexponent = (i >> 10) & 0x1f; 3199 uint32_t mysignificand = i & 0x3ff; 3200 3201 initialize(&IEEEFloat::IEEEhalf); 3202 assert(partCount()==1); 3203 3204 sign = i >> 15; 3205 if (myexponent==0 && mysignificand==0) { 3206 // exponent, significand meaningless 3207 category = fcZero; 3208 } else if (myexponent==0x1f && mysignificand==0) { 3209 // exponent, significand meaningless 3210 category = fcInfinity; 3211 } else if (myexponent==0x1f && mysignificand!=0) { 3212 // sign, exponent, significand meaningless 3213 category = fcNaN; 3214 *significandParts() = mysignificand; 3215 } else { 3216 category = fcNormal; 3217 exponent = myexponent - 15; //bias 3218 *significandParts() = mysignificand; 3219 if (myexponent==0) // denormal 3220 exponent = -14; 3221 else 3222 *significandParts() |= 0x400; // integer bit 3223 } 3224 } 3225 3226 /// Treat api as containing the bits of a floating point number. Currently 3227 /// we infer the floating point type from the size of the APInt. The 3228 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3229 /// when the size is anything else). 3230 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3231 if (Sem == &IEEEhalf) 3232 return initFromHalfAPInt(api); 3233 if (Sem == &IEEEsingle) 3234 return initFromFloatAPInt(api); 3235 if (Sem == &IEEEdouble) 3236 return initFromDoubleAPInt(api); 3237 if (Sem == &x87DoubleExtended) 3238 return initFromF80LongDoubleAPInt(api); 3239 if (Sem == &IEEEquad) 3240 return initFromQuadrupleAPInt(api); 3241 if (Sem == &PPCDoubleDoubleImpl) 3242 return initFromPPCDoubleDoubleAPInt(api); 3243 3244 llvm_unreachable(nullptr); 3245 } 3246 3247 /// Make this number the largest magnitude normal number in the given 3248 /// semantics. 3249 void IEEEFloat::makeLargest(bool Negative) { 3250 // We want (in interchange format): 3251 // sign = {Negative} 3252 // exponent = 1..10 3253 // significand = 1..1 3254 category = fcNormal; 3255 sign = Negative; 3256 exponent = semantics->maxExponent; 3257 3258 // Use memset to set all but the highest integerPart to all ones. 3259 integerPart *significand = significandParts(); 3260 unsigned PartCount = partCount(); 3261 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3262 3263 // Set the high integerPart especially setting all unused top bits for 3264 // internal consistency. 3265 const unsigned NumUnusedHighBits = 3266 PartCount*integerPartWidth - semantics->precision; 3267 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3268 ? (~integerPart(0) >> NumUnusedHighBits) 3269 : 0; 3270 } 3271 3272 /// Make this number the smallest magnitude denormal number in the given 3273 /// semantics. 3274 void IEEEFloat::makeSmallest(bool Negative) { 3275 // We want (in interchange format): 3276 // sign = {Negative} 3277 // exponent = 0..0 3278 // significand = 0..01 3279 category = fcNormal; 3280 sign = Negative; 3281 exponent = semantics->minExponent; 3282 APInt::tcSet(significandParts(), 1, partCount()); 3283 } 3284 3285 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3286 // We want (in interchange format): 3287 // sign = {Negative} 3288 // exponent = 0..0 3289 // significand = 10..0 3290 3291 category = fcNormal; 3292 zeroSignificand(); 3293 sign = Negative; 3294 exponent = semantics->minExponent; 3295 significandParts()[partCountForBits(semantics->precision) - 1] |= 3296 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3297 } 3298 3299 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3300 initFromAPInt(&Sem, API); 3301 } 3302 3303 IEEEFloat::IEEEFloat(float f) { 3304 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3305 } 3306 3307 IEEEFloat::IEEEFloat(double d) { 3308 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3309 } 3310 3311 namespace { 3312 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3313 Buffer.append(Str.begin(), Str.end()); 3314 } 3315 3316 /// Removes data from the given significand until it is no more 3317 /// precise than is required for the desired precision. 3318 void AdjustToPrecision(APInt &significand, 3319 int &exp, unsigned FormatPrecision) { 3320 unsigned bits = significand.getActiveBits(); 3321 3322 // 196/59 is a very slight overestimate of lg_2(10). 3323 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3324 3325 if (bits <= bitsRequired) return; 3326 3327 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3328 if (!tensRemovable) return; 3329 3330 exp += tensRemovable; 3331 3332 APInt divisor(significand.getBitWidth(), 1); 3333 APInt powten(significand.getBitWidth(), 10); 3334 while (true) { 3335 if (tensRemovable & 1) 3336 divisor *= powten; 3337 tensRemovable >>= 1; 3338 if (!tensRemovable) break; 3339 powten *= powten; 3340 } 3341 3342 significand = significand.udiv(divisor); 3343 3344 // Truncate the significand down to its active bit count. 3345 significand = significand.trunc(significand.getActiveBits()); 3346 } 3347 3348 3349 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3350 int &exp, unsigned FormatPrecision) { 3351 unsigned N = buffer.size(); 3352 if (N <= FormatPrecision) return; 3353 3354 // The most significant figures are the last ones in the buffer. 3355 unsigned FirstSignificant = N - FormatPrecision; 3356 3357 // Round. 3358 // FIXME: this probably shouldn't use 'round half up'. 3359 3360 // Rounding down is just a truncation, except we also want to drop 3361 // trailing zeros from the new result. 3362 if (buffer[FirstSignificant - 1] < '5') { 3363 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3364 FirstSignificant++; 3365 3366 exp += FirstSignificant; 3367 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3368 return; 3369 } 3370 3371 // Rounding up requires a decimal add-with-carry. If we continue 3372 // the carry, the newly-introduced zeros will just be truncated. 3373 for (unsigned I = FirstSignificant; I != N; ++I) { 3374 if (buffer[I] == '9') { 3375 FirstSignificant++; 3376 } else { 3377 buffer[I]++; 3378 break; 3379 } 3380 } 3381 3382 // If we carried through, we have exactly one digit of precision. 3383 if (FirstSignificant == N) { 3384 exp += FirstSignificant; 3385 buffer.clear(); 3386 buffer.push_back('1'); 3387 return; 3388 } 3389 3390 exp += FirstSignificant; 3391 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3392 } 3393 } 3394 3395 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3396 unsigned FormatMaxPadding) const { 3397 switch (category) { 3398 case fcInfinity: 3399 if (isNegative()) 3400 return append(Str, "-Inf"); 3401 else 3402 return append(Str, "+Inf"); 3403 3404 case fcNaN: return append(Str, "NaN"); 3405 3406 case fcZero: 3407 if (isNegative()) 3408 Str.push_back('-'); 3409 3410 if (!FormatMaxPadding) 3411 append(Str, "0.0E+0"); 3412 else 3413 Str.push_back('0'); 3414 return; 3415 3416 case fcNormal: 3417 break; 3418 } 3419 3420 if (isNegative()) 3421 Str.push_back('-'); 3422 3423 // Decompose the number into an APInt and an exponent. 3424 int exp = exponent - ((int) semantics->precision - 1); 3425 APInt significand(semantics->precision, 3426 makeArrayRef(significandParts(), 3427 partCountForBits(semantics->precision))); 3428 3429 // Set FormatPrecision if zero. We want to do this before we 3430 // truncate trailing zeros, as those are part of the precision. 3431 if (!FormatPrecision) { 3432 // We use enough digits so the number can be round-tripped back to an 3433 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3434 // Accurately" by Steele and White. 3435 // FIXME: Using a formula based purely on the precision is conservative; 3436 // we can print fewer digits depending on the actual value being printed. 3437 3438 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3439 FormatPrecision = 2 + semantics->precision * 59 / 196; 3440 } 3441 3442 // Ignore trailing binary zeros. 3443 int trailingZeros = significand.countTrailingZeros(); 3444 exp += trailingZeros; 3445 significand = significand.lshr(trailingZeros); 3446 3447 // Change the exponent from 2^e to 10^e. 3448 if (exp == 0) { 3449 // Nothing to do. 3450 } else if (exp > 0) { 3451 // Just shift left. 3452 significand = significand.zext(semantics->precision + exp); 3453 significand <<= exp; 3454 exp = 0; 3455 } else { /* exp < 0 */ 3456 int texp = -exp; 3457 3458 // We transform this using the identity: 3459 // (N)(2^-e) == (N)(5^e)(10^-e) 3460 // This means we have to multiply N (the significand) by 5^e. 3461 // To avoid overflow, we have to operate on numbers large 3462 // enough to store N * 5^e: 3463 // log2(N * 5^e) == log2(N) + e * log2(5) 3464 // <= semantics->precision + e * 137 / 59 3465 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3466 3467 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3468 3469 // Multiply significand by 5^e. 3470 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3471 significand = significand.zext(precision); 3472 APInt five_to_the_i(precision, 5); 3473 while (true) { 3474 if (texp & 1) significand *= five_to_the_i; 3475 3476 texp >>= 1; 3477 if (!texp) break; 3478 five_to_the_i *= five_to_the_i; 3479 } 3480 } 3481 3482 AdjustToPrecision(significand, exp, FormatPrecision); 3483 3484 SmallVector<char, 256> buffer; 3485 3486 // Fill the buffer. 3487 unsigned precision = significand.getBitWidth(); 3488 APInt ten(precision, 10); 3489 APInt digit(precision, 0); 3490 3491 bool inTrail = true; 3492 while (significand != 0) { 3493 // digit <- significand % 10 3494 // significand <- significand / 10 3495 APInt::udivrem(significand, ten, significand, digit); 3496 3497 unsigned d = digit.getZExtValue(); 3498 3499 // Drop trailing zeros. 3500 if (inTrail && !d) exp++; 3501 else { 3502 buffer.push_back((char) ('0' + d)); 3503 inTrail = false; 3504 } 3505 } 3506 3507 assert(!buffer.empty() && "no characters in buffer!"); 3508 3509 // Drop down to FormatPrecision. 3510 // TODO: don't do more precise calculations above than are required. 3511 AdjustToPrecision(buffer, exp, FormatPrecision); 3512 3513 unsigned NDigits = buffer.size(); 3514 3515 // Check whether we should use scientific notation. 3516 bool FormatScientific; 3517 if (!FormatMaxPadding) 3518 FormatScientific = true; 3519 else { 3520 if (exp >= 0) { 3521 // 765e3 --> 765000 3522 // ^^^ 3523 // But we shouldn't make the number look more precise than it is. 3524 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3525 NDigits + (unsigned) exp > FormatPrecision); 3526 } else { 3527 // Power of the most significant digit. 3528 int MSD = exp + (int) (NDigits - 1); 3529 if (MSD >= 0) { 3530 // 765e-2 == 7.65 3531 FormatScientific = false; 3532 } else { 3533 // 765e-5 == 0.00765 3534 // ^ ^^ 3535 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3536 } 3537 } 3538 } 3539 3540 // Scientific formatting is pretty straightforward. 3541 if (FormatScientific) { 3542 exp += (NDigits - 1); 3543 3544 Str.push_back(buffer[NDigits-1]); 3545 Str.push_back('.'); 3546 if (NDigits == 1) 3547 Str.push_back('0'); 3548 else 3549 for (unsigned I = 1; I != NDigits; ++I) 3550 Str.push_back(buffer[NDigits-1-I]); 3551 Str.push_back('E'); 3552 3553 Str.push_back(exp >= 0 ? '+' : '-'); 3554 if (exp < 0) exp = -exp; 3555 SmallVector<char, 6> expbuf; 3556 do { 3557 expbuf.push_back((char) ('0' + (exp % 10))); 3558 exp /= 10; 3559 } while (exp); 3560 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3561 Str.push_back(expbuf[E-1-I]); 3562 return; 3563 } 3564 3565 // Non-scientific, positive exponents. 3566 if (exp >= 0) { 3567 for (unsigned I = 0; I != NDigits; ++I) 3568 Str.push_back(buffer[NDigits-1-I]); 3569 for (unsigned I = 0; I != (unsigned) exp; ++I) 3570 Str.push_back('0'); 3571 return; 3572 } 3573 3574 // Non-scientific, negative exponents. 3575 3576 // The number of digits to the left of the decimal point. 3577 int NWholeDigits = exp + (int) NDigits; 3578 3579 unsigned I = 0; 3580 if (NWholeDigits > 0) { 3581 for (; I != (unsigned) NWholeDigits; ++I) 3582 Str.push_back(buffer[NDigits-I-1]); 3583 Str.push_back('.'); 3584 } else { 3585 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3586 3587 Str.push_back('0'); 3588 Str.push_back('.'); 3589 for (unsigned Z = 1; Z != NZeros; ++Z) 3590 Str.push_back('0'); 3591 } 3592 3593 for (; I != NDigits; ++I) 3594 Str.push_back(buffer[NDigits-I-1]); 3595 } 3596 3597 bool IEEEFloat::getExactInverse(IEEEFloat *inv) const { 3598 // Special floats and denormals have no exact inverse. 3599 if (!isFiniteNonZero()) 3600 return false; 3601 3602 // Check that the number is a power of two by making sure that only the 3603 // integer bit is set in the significand. 3604 if (significandLSB() != semantics->precision - 1) 3605 return false; 3606 3607 // Get the inverse. 3608 IEEEFloat reciprocal(*semantics, 1ULL); 3609 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3610 return false; 3611 3612 // Avoid multiplication with a denormal, it is not safe on all platforms and 3613 // may be slower than a normal division. 3614 if (reciprocal.isDenormal()) 3615 return false; 3616 3617 assert(reciprocal.isFiniteNonZero() && 3618 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3619 3620 if (inv) 3621 *inv = reciprocal; 3622 3623 return true; 3624 } 3625 3626 bool IEEEFloat::isSignaling() const { 3627 if (!isNaN()) 3628 return false; 3629 3630 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3631 // first bit of the trailing significand being 0. 3632 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3633 } 3634 3635 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3636 /// 3637 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3638 /// appropriate sign switching before/after the computation. 3639 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 3640 // If we are performing nextDown, swap sign so we have -x. 3641 if (nextDown) 3642 changeSign(); 3643 3644 // Compute nextUp(x) 3645 opStatus result = opOK; 3646 3647 // Handle each float category separately. 3648 switch (category) { 3649 case fcInfinity: 3650 // nextUp(+inf) = +inf 3651 if (!isNegative()) 3652 break; 3653 // nextUp(-inf) = -getLargest() 3654 makeLargest(true); 3655 break; 3656 case fcNaN: 3657 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3658 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3659 // change the payload. 3660 if (isSignaling()) { 3661 result = opInvalidOp; 3662 // For consistency, propagate the sign of the sNaN to the qNaN. 3663 makeNaN(false, isNegative(), nullptr); 3664 } 3665 break; 3666 case fcZero: 3667 // nextUp(pm 0) = +getSmallest() 3668 makeSmallest(false); 3669 break; 3670 case fcNormal: 3671 // nextUp(-getSmallest()) = -0 3672 if (isSmallest() && isNegative()) { 3673 APInt::tcSet(significandParts(), 0, partCount()); 3674 category = fcZero; 3675 exponent = 0; 3676 break; 3677 } 3678 3679 // nextUp(getLargest()) == INFINITY 3680 if (isLargest() && !isNegative()) { 3681 APInt::tcSet(significandParts(), 0, partCount()); 3682 category = fcInfinity; 3683 exponent = semantics->maxExponent + 1; 3684 break; 3685 } 3686 3687 // nextUp(normal) == normal + inc. 3688 if (isNegative()) { 3689 // If we are negative, we need to decrement the significand. 3690 3691 // We only cross a binade boundary that requires adjusting the exponent 3692 // if: 3693 // 1. exponent != semantics->minExponent. This implies we are not in the 3694 // smallest binade or are dealing with denormals. 3695 // 2. Our significand excluding the integral bit is all zeros. 3696 bool WillCrossBinadeBoundary = 3697 exponent != semantics->minExponent && isSignificandAllZeros(); 3698 3699 // Decrement the significand. 3700 // 3701 // We always do this since: 3702 // 1. If we are dealing with a non-binade decrement, by definition we 3703 // just decrement the significand. 3704 // 2. If we are dealing with a normal -> normal binade decrement, since 3705 // we have an explicit integral bit the fact that all bits but the 3706 // integral bit are zero implies that subtracting one will yield a 3707 // significand with 0 integral bit and 1 in all other spots. Thus we 3708 // must just adjust the exponent and set the integral bit to 1. 3709 // 3. If we are dealing with a normal -> denormal binade decrement, 3710 // since we set the integral bit to 0 when we represent denormals, we 3711 // just decrement the significand. 3712 integerPart *Parts = significandParts(); 3713 APInt::tcDecrement(Parts, partCount()); 3714 3715 if (WillCrossBinadeBoundary) { 3716 // Our result is a normal number. Do the following: 3717 // 1. Set the integral bit to 1. 3718 // 2. Decrement the exponent. 3719 APInt::tcSetBit(Parts, semantics->precision - 1); 3720 exponent--; 3721 } 3722 } else { 3723 // If we are positive, we need to increment the significand. 3724 3725 // We only cross a binade boundary that requires adjusting the exponent if 3726 // the input is not a denormal and all of said input's significand bits 3727 // are set. If all of said conditions are true: clear the significand, set 3728 // the integral bit to 1, and increment the exponent. If we have a 3729 // denormal always increment since moving denormals and the numbers in the 3730 // smallest normal binade have the same exponent in our representation. 3731 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3732 3733 if (WillCrossBinadeBoundary) { 3734 integerPart *Parts = significandParts(); 3735 APInt::tcSet(Parts, 0, partCount()); 3736 APInt::tcSetBit(Parts, semantics->precision - 1); 3737 assert(exponent != semantics->maxExponent && 3738 "We can not increment an exponent beyond the maxExponent allowed" 3739 " by the given floating point semantics."); 3740 exponent++; 3741 } else { 3742 incrementSignificand(); 3743 } 3744 } 3745 break; 3746 } 3747 3748 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3749 if (nextDown) 3750 changeSign(); 3751 3752 return result; 3753 } 3754 3755 void IEEEFloat::makeInf(bool Negative) { 3756 category = fcInfinity; 3757 sign = Negative; 3758 exponent = semantics->maxExponent + 1; 3759 APInt::tcSet(significandParts(), 0, partCount()); 3760 } 3761 3762 void IEEEFloat::makeZero(bool Negative) { 3763 category = fcZero; 3764 sign = Negative; 3765 exponent = semantics->minExponent-1; 3766 APInt::tcSet(significandParts(), 0, partCount()); 3767 } 3768 3769 void IEEEFloat::makeQuiet() { 3770 assert(isNaN()); 3771 APInt::tcSetBit(significandParts(), semantics->precision - 2); 3772 } 3773 3774 int ilogb(const IEEEFloat &Arg) { 3775 if (Arg.isNaN()) 3776 return IEEEFloat::IEK_NaN; 3777 if (Arg.isZero()) 3778 return IEEEFloat::IEK_Zero; 3779 if (Arg.isInfinity()) 3780 return IEEEFloat::IEK_Inf; 3781 if (!Arg.isDenormal()) 3782 return Arg.exponent; 3783 3784 IEEEFloat Normalized(Arg); 3785 int SignificandBits = Arg.getSemantics().precision - 1; 3786 3787 Normalized.exponent += SignificandBits; 3788 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 3789 return Normalized.exponent - SignificandBits; 3790 } 3791 3792 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 3793 auto MaxExp = X.getSemantics().maxExponent; 3794 auto MinExp = X.getSemantics().minExponent; 3795 3796 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 3797 // overflow; clamp it to a safe range before adding, but ensure that the range 3798 // is large enough that the clamp does not change the result. The range we 3799 // need to support is the difference between the largest possible exponent and 3800 // the normalized exponent of half the smallest denormal. 3801 3802 int SignificandBits = X.getSemantics().precision - 1; 3803 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 3804 3805 // Clamp to one past the range ends to let normalize handle overlflow. 3806 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 3807 X.normalize(RoundingMode, lfExactlyZero); 3808 if (X.isNaN()) 3809 X.makeQuiet(); 3810 return X; 3811 } 3812 3813 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 3814 Exp = ilogb(Val); 3815 3816 // Quiet signalling nans. 3817 if (Exp == IEEEFloat::IEK_NaN) { 3818 IEEEFloat Quiet(Val); 3819 Quiet.makeQuiet(); 3820 return Quiet; 3821 } 3822 3823 if (Exp == IEEEFloat::IEK_Inf) 3824 return Val; 3825 3826 // 1 is added because frexp is defined to return a normalized fraction in 3827 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 3828 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 3829 return scalbn(Val, -Exp, RM); 3830 } 3831 3832 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 3833 : Semantics(&S), Floats(new APFloat[2]{APFloat(PPCDoubleDoubleImpl), 3834 APFloat(IEEEdouble)}) { 3835 assert(Semantics == &PPCDoubleDouble); 3836 } 3837 3838 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 3839 : Semantics(&S), 3840 Floats(new APFloat[2]{APFloat(PPCDoubleDoubleImpl, uninitialized), 3841 APFloat(IEEEdouble, uninitialized)}) { 3842 assert(Semantics == &PPCDoubleDouble); 3843 } 3844 3845 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 3846 : Semantics(&S), Floats(new APFloat[2]{APFloat(PPCDoubleDoubleImpl, I), 3847 APFloat(IEEEdouble)}) { 3848 assert(Semantics == &PPCDoubleDouble); 3849 } 3850 3851 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 3852 : Semantics(&S), Floats(new APFloat[2]{ 3853 APFloat(PPCDoubleDoubleImpl, I), 3854 APFloat(IEEEdouble, APInt(64, I.getRawData()[1]))}) { 3855 assert(Semantics == &PPCDoubleDouble); 3856 } 3857 3858 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 3859 APFloat &&Second) 3860 : Semantics(&S), 3861 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 3862 assert(Semantics == &PPCDoubleDouble); 3863 // TODO Check for First == &IEEEdouble once the transition is done. 3864 assert(&Floats[0].getSemantics() == &PPCDoubleDoubleImpl || 3865 &Floats[0].getSemantics() == &IEEEdouble); 3866 assert(&Floats[1].getSemantics() == &IEEEdouble); 3867 } 3868 3869 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 3870 : Semantics(RHS.Semantics), 3871 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 3872 APFloat(RHS.Floats[1])} 3873 : nullptr) { 3874 assert(Semantics == &PPCDoubleDouble); 3875 } 3876 3877 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 3878 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 3879 RHS.Semantics = &Bogus; 3880 assert(Semantics == &PPCDoubleDouble); 3881 } 3882 3883 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 3884 if (Semantics == RHS.Semantics && RHS.Floats) { 3885 Floats[0] = RHS.Floats[0]; 3886 Floats[1] = RHS.Floats[1]; 3887 } else if (this != &RHS) { 3888 this->~DoubleAPFloat(); 3889 new (this) DoubleAPFloat(RHS); 3890 } 3891 return *this; 3892 } 3893 3894 // "Software for Doubled-Precision Floating-Point Computations", 3895 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 3896 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 3897 const APFloat &c, const APFloat &cc, 3898 roundingMode RM) { 3899 int Status = opOK; 3900 APFloat z = a; 3901 Status |= z.add(c, RM); 3902 if (!z.isFinite()) { 3903 if (!z.isInfinity()) { 3904 Floats[0] = std::move(z); 3905 Floats[1].makeZero(false); 3906 return (opStatus)Status; 3907 } 3908 Status = opOK; 3909 auto AComparedToC = a.compareAbsoluteValue(c); 3910 z = cc; 3911 Status |= z.add(aa, RM); 3912 if (AComparedToC == APFloat::cmpGreaterThan) { 3913 // z = cc + aa + c + a; 3914 Status |= z.add(c, RM); 3915 Status |= z.add(a, RM); 3916 } else { 3917 // z = cc + aa + a + c; 3918 Status |= z.add(a, RM); 3919 Status |= z.add(c, RM); 3920 } 3921 if (!z.isFinite()) { 3922 Floats[0] = std::move(z); 3923 Floats[1].makeZero(false); 3924 return (opStatus)Status; 3925 } 3926 Floats[0] = z; 3927 APFloat zz = aa; 3928 Status |= zz.add(cc, RM); 3929 if (AComparedToC == APFloat::cmpGreaterThan) { 3930 // Floats[1] = a - z + c + zz; 3931 Floats[1] = a; 3932 Status |= Floats[1].subtract(z, RM); 3933 Status |= Floats[1].add(c, RM); 3934 Status |= Floats[1].add(zz, RM); 3935 } else { 3936 // Floats[1] = c - z + a + zz; 3937 Floats[1] = c; 3938 Status |= Floats[1].subtract(z, RM); 3939 Status |= Floats[1].add(a, RM); 3940 Status |= Floats[1].add(zz, RM); 3941 } 3942 } else { 3943 // q = a - z; 3944 APFloat q = a; 3945 Status |= q.subtract(z, RM); 3946 3947 // zz = q + c + (a - (q + z)) + aa + cc; 3948 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 3949 auto zz = q; 3950 Status |= zz.add(c, RM); 3951 Status |= q.add(z, RM); 3952 Status |= q.subtract(a, RM); 3953 q.changeSign(); 3954 Status |= zz.add(q, RM); 3955 Status |= zz.add(aa, RM); 3956 Status |= zz.add(cc, RM); 3957 if (zz.isZero() && !zz.isNegative()) { 3958 Floats[0] = std::move(z); 3959 Floats[1].makeZero(false); 3960 return opOK; 3961 } 3962 Floats[0] = z; 3963 Status |= Floats[0].add(zz, RM); 3964 if (!Floats[0].isFinite()) { 3965 Floats[1].makeZero(false); 3966 return (opStatus)Status; 3967 } 3968 Floats[1] = std::move(z); 3969 Status |= Floats[1].subtract(Floats[0], RM); 3970 Status |= Floats[1].add(zz, RM); 3971 } 3972 return (opStatus)Status; 3973 } 3974 3975 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 3976 const DoubleAPFloat &RHS, 3977 DoubleAPFloat &Out, 3978 roundingMode RM) { 3979 if (LHS.getCategory() == fcNaN) { 3980 Out = LHS; 3981 return opOK; 3982 } 3983 if (RHS.getCategory() == fcNaN) { 3984 Out = RHS; 3985 return opOK; 3986 } 3987 if (LHS.getCategory() == fcZero) { 3988 Out = RHS; 3989 return opOK; 3990 } 3991 if (RHS.getCategory() == fcZero) { 3992 Out = LHS; 3993 return opOK; 3994 } 3995 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 3996 LHS.isNegative() != RHS.isNegative()) { 3997 Out.makeNaN(false, Out.isNegative(), nullptr); 3998 return opInvalidOp; 3999 } 4000 if (LHS.getCategory() == fcInfinity) { 4001 Out = LHS; 4002 return opOK; 4003 } 4004 if (RHS.getCategory() == fcInfinity) { 4005 Out = RHS; 4006 return opOK; 4007 } 4008 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4009 4010 // These conversions will go away once PPCDoubleDoubleImpl goes away. 4011 // (PPCDoubleDoubleImpl, IEEEDouble) -> (IEEEDouble, IEEEDouble) 4012 APFloat A(IEEEdouble, 4013 APInt(64, LHS.Floats[0].bitcastToAPInt().getRawData()[0])), 4014 AA(LHS.Floats[1]), 4015 C(IEEEdouble, APInt(64, RHS.Floats[0].bitcastToAPInt().getRawData()[0])), 4016 CC(RHS.Floats[1]); 4017 assert(&AA.getSemantics() == &IEEEdouble); 4018 assert(&CC.getSemantics() == &IEEEdouble); 4019 Out.Floats[0] = APFloat(IEEEdouble); 4020 assert(&Out.Floats[1].getSemantics() == &IEEEdouble); 4021 4022 auto Ret = Out.addImpl(A, AA, C, CC, RM); 4023 4024 // (IEEEDouble, IEEEDouble) -> (PPCDoubleDoubleImpl, IEEEDouble) 4025 uint64_t Buffer[] = {Out.Floats[0].bitcastToAPInt().getRawData()[0], 4026 Out.Floats[1].bitcastToAPInt().getRawData()[0]}; 4027 Out.Floats[0] = APFloat(PPCDoubleDoubleImpl, APInt(128, 2, Buffer)); 4028 return Ret; 4029 } 4030 4031 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4032 roundingMode RM) { 4033 return addWithSpecial(*this, RHS, *this, RM); 4034 } 4035 4036 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4037 roundingMode RM) { 4038 changeSign(); 4039 auto Ret = add(RHS, RM); 4040 changeSign(); 4041 return Ret; 4042 } 4043 4044 void DoubleAPFloat::changeSign() { 4045 Floats[0].changeSign(); 4046 Floats[1].changeSign(); 4047 } 4048 4049 APFloat::cmpResult 4050 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4051 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4052 if (Result != cmpEqual) 4053 return Result; 4054 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4055 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4056 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4057 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4058 if (Against && !RHSAgainst) 4059 return cmpLessThan; 4060 if (!Against && RHSAgainst) 4061 return cmpGreaterThan; 4062 if (!Against && !RHSAgainst) 4063 return Result; 4064 if (Against && RHSAgainst) 4065 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4066 } 4067 return Result; 4068 } 4069 4070 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4071 return Floats[0].getCategory(); 4072 } 4073 4074 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4075 4076 void DoubleAPFloat::makeInf(bool Neg) { 4077 Floats[0].makeInf(Neg); 4078 Floats[1].makeZero(false); 4079 } 4080 4081 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4082 Floats[0].makeNaN(SNaN, Neg, fill); 4083 Floats[1].makeZero(false); 4084 } 4085 4086 } // End detail namespace 4087 4088 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4089 if (usesLayout<IEEEFloat>(Semantics)) { 4090 new (&IEEE) IEEEFloat(std::move(F)); 4091 } else if (usesLayout<DoubleAPFloat>(Semantics)) { 4092 new (&Double) 4093 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()), 4094 APFloat(IEEEdouble)); 4095 } else { 4096 llvm_unreachable("Unexpected semantics"); 4097 } 4098 } 4099 4100 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) { 4101 return getIEEE().convertFromString(Str, RM); 4102 } 4103 4104 hash_code hash_value(const APFloat &Arg) { return hash_value(Arg.getIEEE()); } 4105 4106 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4107 : APFloat(Semantics) { 4108 convertFromString(S, rmNearestTiesToEven); 4109 } 4110 4111 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4112 roundingMode RM, bool *losesInfo) { 4113 if (&getSemantics() == &ToSemantics) 4114 return opOK; 4115 if (usesLayout<IEEEFloat>(getSemantics()) && 4116 usesLayout<IEEEFloat>(ToSemantics)) { 4117 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4118 } else if (usesLayout<IEEEFloat>(getSemantics()) && 4119 usesLayout<DoubleAPFloat>(ToSemantics)) { 4120 assert(&ToSemantics == &PPCDoubleDouble); 4121 auto Ret = U.IEEE.convert(PPCDoubleDoubleImpl, RM, losesInfo); 4122 *this = APFloat( 4123 DoubleAPFloat(PPCDoubleDouble, std::move(*this), APFloat(IEEEdouble)), 4124 ToSemantics); 4125 return Ret; 4126 } else if (usesLayout<DoubleAPFloat>(getSemantics()) && 4127 usesLayout<IEEEFloat>(ToSemantics)) { 4128 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4129 *this = APFloat(std::move(getIEEE()), ToSemantics); 4130 return Ret; 4131 } else { 4132 llvm_unreachable("Unexpected semantics"); 4133 } 4134 } 4135 4136 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { 4137 if (isIEEE) { 4138 switch (BitWidth) { 4139 case 16: 4140 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 4141 case 32: 4142 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 4143 case 64: 4144 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 4145 case 80: 4146 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 4147 case 128: 4148 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 4149 default: 4150 llvm_unreachable("Unknown floating bit width"); 4151 } 4152 } else { 4153 assert(BitWidth == 128); 4154 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 4155 } 4156 } 4157 4158 void APFloat::print(raw_ostream &OS) const { 4159 SmallVector<char, 16> Buffer; 4160 toString(Buffer); 4161 OS << Buffer << "\n"; 4162 } 4163 4164 void APFloat::dump() const { print(dbgs()); } 4165 4166 } // End llvm namespace 4167