1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/MathExtras.h" 24 #include <cstring> 25 #include <limits.h> 26 27 using namespace llvm; 28 29 /// A macro used to combine two fcCategory enums into one key which can be used 30 /// in a switch statement to classify how the interaction of two APFloat's 31 /// categories affects an operation. 32 /// 33 /// TODO: If clang source code is ever allowed to use constexpr in its own 34 /// codebase, change this into a static inline function. 35 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 36 37 /* Assumed in hexadecimal significand parsing, and conversion to 38 hexadecimal strings. */ 39 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 40 41 namespace llvm { 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloatBase::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloatBase::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 56 /* Number of bits actually used in the semantics. */ 57 unsigned int sizeInBits; 58 }; 59 60 const fltSemantics APFloatBase::IEEEhalf = {15, -14, 11, 16}; 61 const fltSemantics APFloatBase::IEEEsingle = {127, -126, 24, 32}; 62 const fltSemantics APFloatBase::IEEEdouble = {1023, -1022, 53, 64}; 63 const fltSemantics APFloatBase::IEEEquad = {16383, -16382, 113, 128}; 64 const fltSemantics APFloatBase::x87DoubleExtended = {16383, -16382, 64, 80}; 65 const fltSemantics APFloatBase::Bogus = {0, 0, 0, 0}; 66 67 /* The PowerPC format consists of two doubles. It does not map cleanly 68 onto the usual format above. It is approximated using twice the 69 mantissa bits. Note that for exponents near the double minimum, 70 we no longer can represent the full 106 mantissa bits, so those 71 will be treated as denormal numbers. 72 73 FIXME: While this approximation is equivalent to what GCC uses for 74 compile-time arithmetic on PPC double-double numbers, it is not able 75 to represent all possible values held by a PPC double-double number, 76 for example: (long double) 1.0 + (long double) 0x1p-106 77 Should this be replaced by a full emulation of PPC double-double? */ 78 const fltSemantics APFloatBase::PPCDoubleDouble = {1023, -1022 + 53, 53 + 53, 79 128}; 80 81 /* A tight upper bound on number of parts required to hold the value 82 pow(5, power) is 83 84 power * 815 / (351 * integerPartWidth) + 1 85 86 However, whilst the result may require only this many parts, 87 because we are multiplying two values to get it, the 88 multiplication may require an extra part with the excess part 89 being zero (consider the trivial case of 1 * 1, tcFullMultiply 90 requires two parts to hold the single-part result). So we add an 91 extra one to guarantee enough space whilst multiplying. */ 92 const unsigned int maxExponent = 16383; 93 const unsigned int maxPrecision = 113; 94 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 95 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 96 / (351 * integerPartWidth)); 97 98 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 99 return semantics.precision; 100 } 101 APFloatBase::ExponentType 102 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 103 return semantics.maxExponent; 104 } 105 APFloatBase::ExponentType 106 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 107 return semantics.minExponent; 108 } 109 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 110 return semantics.sizeInBits; 111 } 112 113 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 114 return Sem.sizeInBits; 115 } 116 117 /* A bunch of private, handy routines. */ 118 119 static inline unsigned int 120 partCountForBits(unsigned int bits) 121 { 122 return ((bits) + integerPartWidth - 1) / integerPartWidth; 123 } 124 125 /* Returns 0U-9U. Return values >= 10U are not digits. */ 126 static inline unsigned int 127 decDigitValue(unsigned int c) 128 { 129 return c - '0'; 130 } 131 132 /* Return the value of a decimal exponent of the form 133 [+-]ddddddd. 134 135 If the exponent overflows, returns a large exponent with the 136 appropriate sign. */ 137 static int 138 readExponent(StringRef::iterator begin, StringRef::iterator end) 139 { 140 bool isNegative; 141 unsigned int absExponent; 142 const unsigned int overlargeExponent = 24000; /* FIXME. */ 143 StringRef::iterator p = begin; 144 145 assert(p != end && "Exponent has no digits"); 146 147 isNegative = (*p == '-'); 148 if (*p == '-' || *p == '+') { 149 p++; 150 assert(p != end && "Exponent has no digits"); 151 } 152 153 absExponent = decDigitValue(*p++); 154 assert(absExponent < 10U && "Invalid character in exponent"); 155 156 for (; p != end; ++p) { 157 unsigned int value; 158 159 value = decDigitValue(*p); 160 assert(value < 10U && "Invalid character in exponent"); 161 162 value += absExponent * 10; 163 if (absExponent >= overlargeExponent) { 164 absExponent = overlargeExponent; 165 p = end; /* outwit assert below */ 166 break; 167 } 168 absExponent = value; 169 } 170 171 assert(p == end && "Invalid exponent in exponent"); 172 173 if (isNegative) 174 return -(int) absExponent; 175 else 176 return (int) absExponent; 177 } 178 179 /* This is ugly and needs cleaning up, but I don't immediately see 180 how whilst remaining safe. */ 181 static int 182 totalExponent(StringRef::iterator p, StringRef::iterator end, 183 int exponentAdjustment) 184 { 185 int unsignedExponent; 186 bool negative, overflow; 187 int exponent = 0; 188 189 assert(p != end && "Exponent has no digits"); 190 191 negative = *p == '-'; 192 if (*p == '-' || *p == '+') { 193 p++; 194 assert(p != end && "Exponent has no digits"); 195 } 196 197 unsignedExponent = 0; 198 overflow = false; 199 for (; p != end; ++p) { 200 unsigned int value; 201 202 value = decDigitValue(*p); 203 assert(value < 10U && "Invalid character in exponent"); 204 205 unsignedExponent = unsignedExponent * 10 + value; 206 if (unsignedExponent > 32767) { 207 overflow = true; 208 break; 209 } 210 } 211 212 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 213 overflow = true; 214 215 if (!overflow) { 216 exponent = unsignedExponent; 217 if (negative) 218 exponent = -exponent; 219 exponent += exponentAdjustment; 220 if (exponent > 32767 || exponent < -32768) 221 overflow = true; 222 } 223 224 if (overflow) 225 exponent = negative ? -32768: 32767; 226 227 return exponent; 228 } 229 230 static StringRef::iterator 231 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 232 StringRef::iterator *dot) 233 { 234 StringRef::iterator p = begin; 235 *dot = end; 236 while (p != end && *p == '0') 237 p++; 238 239 if (p != end && *p == '.') { 240 *dot = p++; 241 242 assert(end - begin != 1 && "Significand has no digits"); 243 244 while (p != end && *p == '0') 245 p++; 246 } 247 248 return p; 249 } 250 251 /* Given a normal decimal floating point number of the form 252 253 dddd.dddd[eE][+-]ddd 254 255 where the decimal point and exponent are optional, fill out the 256 structure D. Exponent is appropriate if the significand is 257 treated as an integer, and normalizedExponent if the significand 258 is taken to have the decimal point after a single leading 259 non-zero digit. 260 261 If the value is zero, V->firstSigDigit points to a non-digit, and 262 the return exponent is zero. 263 */ 264 struct decimalInfo { 265 const char *firstSigDigit; 266 const char *lastSigDigit; 267 int exponent; 268 int normalizedExponent; 269 }; 270 271 static void 272 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 273 decimalInfo *D) 274 { 275 StringRef::iterator dot = end; 276 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 277 278 D->firstSigDigit = p; 279 D->exponent = 0; 280 D->normalizedExponent = 0; 281 282 for (; p != end; ++p) { 283 if (*p == '.') { 284 assert(dot == end && "String contains multiple dots"); 285 dot = p++; 286 if (p == end) 287 break; 288 } 289 if (decDigitValue(*p) >= 10U) 290 break; 291 } 292 293 if (p != end) { 294 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 295 assert(p != begin && "Significand has no digits"); 296 assert((dot == end || p - begin != 1) && "Significand has no digits"); 297 298 /* p points to the first non-digit in the string */ 299 D->exponent = readExponent(p + 1, end); 300 301 /* Implied decimal point? */ 302 if (dot == end) 303 dot = p; 304 } 305 306 /* If number is all zeroes accept any exponent. */ 307 if (p != D->firstSigDigit) { 308 /* Drop insignificant trailing zeroes. */ 309 if (p != begin) { 310 do 311 do 312 p--; 313 while (p != begin && *p == '0'); 314 while (p != begin && *p == '.'); 315 } 316 317 /* Adjust the exponents for any decimal point. */ 318 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 319 D->normalizedExponent = (D->exponent + 320 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 321 - (dot > D->firstSigDigit && dot < p))); 322 } 323 324 D->lastSigDigit = p; 325 } 326 327 /* Return the trailing fraction of a hexadecimal number. 328 DIGITVALUE is the first hex digit of the fraction, P points to 329 the next digit. */ 330 static lostFraction 331 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 332 unsigned int digitValue) 333 { 334 unsigned int hexDigit; 335 336 /* If the first trailing digit isn't 0 or 8 we can work out the 337 fraction immediately. */ 338 if (digitValue > 8) 339 return lfMoreThanHalf; 340 else if (digitValue < 8 && digitValue > 0) 341 return lfLessThanHalf; 342 343 // Otherwise we need to find the first non-zero digit. 344 while (p != end && (*p == '0' || *p == '.')) 345 p++; 346 347 assert(p != end && "Invalid trailing hexadecimal fraction!"); 348 349 hexDigit = hexDigitValue(*p); 350 351 /* If we ran off the end it is exactly zero or one-half, otherwise 352 a little more. */ 353 if (hexDigit == -1U) 354 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 355 else 356 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 357 } 358 359 /* Return the fraction lost were a bignum truncated losing the least 360 significant BITS bits. */ 361 static lostFraction 362 lostFractionThroughTruncation(const integerPart *parts, 363 unsigned int partCount, 364 unsigned int bits) 365 { 366 unsigned int lsb; 367 368 lsb = APInt::tcLSB(parts, partCount); 369 370 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 371 if (bits <= lsb) 372 return lfExactlyZero; 373 if (bits == lsb + 1) 374 return lfExactlyHalf; 375 if (bits <= partCount * integerPartWidth && 376 APInt::tcExtractBit(parts, bits - 1)) 377 return lfMoreThanHalf; 378 379 return lfLessThanHalf; 380 } 381 382 /* Shift DST right BITS bits noting lost fraction. */ 383 static lostFraction 384 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 385 { 386 lostFraction lost_fraction; 387 388 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 389 390 APInt::tcShiftRight(dst, parts, bits); 391 392 return lost_fraction; 393 } 394 395 /* Combine the effect of two lost fractions. */ 396 static lostFraction 397 combineLostFractions(lostFraction moreSignificant, 398 lostFraction lessSignificant) 399 { 400 if (lessSignificant != lfExactlyZero) { 401 if (moreSignificant == lfExactlyZero) 402 moreSignificant = lfLessThanHalf; 403 else if (moreSignificant == lfExactlyHalf) 404 moreSignificant = lfMoreThanHalf; 405 } 406 407 return moreSignificant; 408 } 409 410 /* The error from the true value, in half-ulps, on multiplying two 411 floating point numbers, which differ from the value they 412 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 413 than the returned value. 414 415 See "How to Read Floating Point Numbers Accurately" by William D 416 Clinger. */ 417 static unsigned int 418 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 419 { 420 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 421 422 if (HUerr1 + HUerr2 == 0) 423 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 424 else 425 return inexactMultiply + 2 * (HUerr1 + HUerr2); 426 } 427 428 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 429 when the least significant BITS are truncated. BITS cannot be 430 zero. */ 431 static integerPart 432 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 433 { 434 unsigned int count, partBits; 435 integerPart part, boundary; 436 437 assert(bits != 0); 438 439 bits--; 440 count = bits / integerPartWidth; 441 partBits = bits % integerPartWidth + 1; 442 443 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 444 445 if (isNearest) 446 boundary = (integerPart) 1 << (partBits - 1); 447 else 448 boundary = 0; 449 450 if (count == 0) { 451 if (part - boundary <= boundary - part) 452 return part - boundary; 453 else 454 return boundary - part; 455 } 456 457 if (part == boundary) { 458 while (--count) 459 if (parts[count]) 460 return ~(integerPart) 0; /* A lot. */ 461 462 return parts[0]; 463 } else if (part == boundary - 1) { 464 while (--count) 465 if (~parts[count]) 466 return ~(integerPart) 0; /* A lot. */ 467 468 return -parts[0]; 469 } 470 471 return ~(integerPart) 0; /* A lot. */ 472 } 473 474 /* Place pow(5, power) in DST, and return the number of parts used. 475 DST must be at least one part larger than size of the answer. */ 476 static unsigned int 477 powerOf5(integerPart *dst, unsigned int power) 478 { 479 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 480 15625, 78125 }; 481 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 482 pow5s[0] = 78125 * 5; 483 484 unsigned int partsCount[16] = { 1 }; 485 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 486 unsigned int result; 487 assert(power <= maxExponent); 488 489 p1 = dst; 490 p2 = scratch; 491 492 *p1 = firstEightPowers[power & 7]; 493 power >>= 3; 494 495 result = 1; 496 pow5 = pow5s; 497 498 for (unsigned int n = 0; power; power >>= 1, n++) { 499 unsigned int pc; 500 501 pc = partsCount[n]; 502 503 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 504 if (pc == 0) { 505 pc = partsCount[n - 1]; 506 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 507 pc *= 2; 508 if (pow5[pc - 1] == 0) 509 pc--; 510 partsCount[n] = pc; 511 } 512 513 if (power & 1) { 514 integerPart *tmp; 515 516 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 517 result += pc; 518 if (p2[result - 1] == 0) 519 result--; 520 521 /* Now result is in p1 with partsCount parts and p2 is scratch 522 space. */ 523 tmp = p1; 524 p1 = p2; 525 p2 = tmp; 526 } 527 528 pow5 += pc; 529 } 530 531 if (p1 != dst) 532 APInt::tcAssign(dst, p1, result); 533 534 return result; 535 } 536 537 /* Zero at the end to avoid modular arithmetic when adding one; used 538 when rounding up during hexadecimal output. */ 539 static const char hexDigitsLower[] = "0123456789abcdef0"; 540 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 541 static const char infinityL[] = "infinity"; 542 static const char infinityU[] = "INFINITY"; 543 static const char NaNL[] = "nan"; 544 static const char NaNU[] = "NAN"; 545 546 /* Write out an integerPart in hexadecimal, starting with the most 547 significant nibble. Write out exactly COUNT hexdigits, return 548 COUNT. */ 549 static unsigned int 550 partAsHex (char *dst, integerPart part, unsigned int count, 551 const char *hexDigitChars) 552 { 553 unsigned int result = count; 554 555 assert(count != 0 && count <= integerPartWidth / 4); 556 557 part >>= (integerPartWidth - 4 * count); 558 while (count--) { 559 dst[count] = hexDigitChars[part & 0xf]; 560 part >>= 4; 561 } 562 563 return result; 564 } 565 566 /* Write out an unsigned decimal integer. */ 567 static char * 568 writeUnsignedDecimal (char *dst, unsigned int n) 569 { 570 char buff[40], *p; 571 572 p = buff; 573 do 574 *p++ = '0' + n % 10; 575 while (n /= 10); 576 577 do 578 *dst++ = *--p; 579 while (p != buff); 580 581 return dst; 582 } 583 584 /* Write out a signed decimal integer. */ 585 static char * 586 writeSignedDecimal (char *dst, int value) 587 { 588 if (value < 0) { 589 *dst++ = '-'; 590 dst = writeUnsignedDecimal(dst, -(unsigned) value); 591 } else 592 dst = writeUnsignedDecimal(dst, value); 593 594 return dst; 595 } 596 597 namespace detail { 598 /* Constructors. */ 599 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 600 unsigned int count; 601 602 semantics = ourSemantics; 603 count = partCount(); 604 if (count > 1) 605 significand.parts = new integerPart[count]; 606 } 607 608 void IEEEFloat::freeSignificand() { 609 if (needsCleanup()) 610 delete [] significand.parts; 611 } 612 613 void IEEEFloat::assign(const IEEEFloat &rhs) { 614 assert(semantics == rhs.semantics); 615 616 sign = rhs.sign; 617 category = rhs.category; 618 exponent = rhs.exponent; 619 if (isFiniteNonZero() || category == fcNaN) 620 copySignificand(rhs); 621 } 622 623 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 624 assert(isFiniteNonZero() || category == fcNaN); 625 assert(rhs.partCount() >= partCount()); 626 627 APInt::tcAssign(significandParts(), rhs.significandParts(), 628 partCount()); 629 } 630 631 /* Make this number a NaN, with an arbitrary but deterministic value 632 for the significand. If double or longer, this is a signalling NaN, 633 which may not be ideal. If float, this is QNaN(0). */ 634 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 635 category = fcNaN; 636 sign = Negative; 637 638 integerPart *significand = significandParts(); 639 unsigned numParts = partCount(); 640 641 // Set the significand bits to the fill. 642 if (!fill || fill->getNumWords() < numParts) 643 APInt::tcSet(significand, 0, numParts); 644 if (fill) { 645 APInt::tcAssign(significand, fill->getRawData(), 646 std::min(fill->getNumWords(), numParts)); 647 648 // Zero out the excess bits of the significand. 649 unsigned bitsToPreserve = semantics->precision - 1; 650 unsigned part = bitsToPreserve / 64; 651 bitsToPreserve %= 64; 652 significand[part] &= ((1ULL << bitsToPreserve) - 1); 653 for (part++; part != numParts; ++part) 654 significand[part] = 0; 655 } 656 657 unsigned QNaNBit = semantics->precision - 2; 658 659 if (SNaN) { 660 // We always have to clear the QNaN bit to make it an SNaN. 661 APInt::tcClearBit(significand, QNaNBit); 662 663 // If there are no bits set in the payload, we have to set 664 // *something* to make it a NaN instead of an infinity; 665 // conventionally, this is the next bit down from the QNaN bit. 666 if (APInt::tcIsZero(significand, numParts)) 667 APInt::tcSetBit(significand, QNaNBit - 1); 668 } else { 669 // We always have to set the QNaN bit to make it a QNaN. 670 APInt::tcSetBit(significand, QNaNBit); 671 } 672 673 // For x87 extended precision, we want to make a NaN, not a 674 // pseudo-NaN. Maybe we should expose the ability to make 675 // pseudo-NaNs? 676 if (semantics == &APFloat::x87DoubleExtended) 677 APInt::tcSetBit(significand, QNaNBit + 1); 678 } 679 680 IEEEFloat IEEEFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 681 const APInt *fill) { 682 IEEEFloat value(Sem, uninitialized); 683 value.makeNaN(SNaN, Negative, fill); 684 return value; 685 } 686 687 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 688 if (this != &rhs) { 689 if (semantics != rhs.semantics) { 690 freeSignificand(); 691 initialize(rhs.semantics); 692 } 693 assign(rhs); 694 } 695 696 return *this; 697 } 698 699 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 700 freeSignificand(); 701 702 semantics = rhs.semantics; 703 significand = rhs.significand; 704 exponent = rhs.exponent; 705 category = rhs.category; 706 sign = rhs.sign; 707 708 rhs.semantics = &Bogus; 709 return *this; 710 } 711 712 bool IEEEFloat::isDenormal() const { 713 return isFiniteNonZero() && (exponent == semantics->minExponent) && 714 (APInt::tcExtractBit(significandParts(), 715 semantics->precision - 1) == 0); 716 } 717 718 bool IEEEFloat::isSmallest() const { 719 // The smallest number by magnitude in our format will be the smallest 720 // denormal, i.e. the floating point number with exponent being minimum 721 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 722 return isFiniteNonZero() && exponent == semantics->minExponent && 723 significandMSB() == 0; 724 } 725 726 bool IEEEFloat::isSignificandAllOnes() const { 727 // Test if the significand excluding the integral bit is all ones. This allows 728 // us to test for binade boundaries. 729 const integerPart *Parts = significandParts(); 730 const unsigned PartCount = partCount(); 731 for (unsigned i = 0; i < PartCount - 1; i++) 732 if (~Parts[i]) 733 return false; 734 735 // Set the unused high bits to all ones when we compare. 736 const unsigned NumHighBits = 737 PartCount*integerPartWidth - semantics->precision + 1; 738 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 739 "fill than integerPartWidth"); 740 const integerPart HighBitFill = 741 ~integerPart(0) << (integerPartWidth - NumHighBits); 742 if (~(Parts[PartCount - 1] | HighBitFill)) 743 return false; 744 745 return true; 746 } 747 748 bool IEEEFloat::isSignificandAllZeros() const { 749 // Test if the significand excluding the integral bit is all zeros. This 750 // allows us to test for binade boundaries. 751 const integerPart *Parts = significandParts(); 752 const unsigned PartCount = partCount(); 753 754 for (unsigned i = 0; i < PartCount - 1; i++) 755 if (Parts[i]) 756 return false; 757 758 const unsigned NumHighBits = 759 PartCount*integerPartWidth - semantics->precision + 1; 760 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 761 "clear than integerPartWidth"); 762 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 763 764 if (Parts[PartCount - 1] & HighBitMask) 765 return false; 766 767 return true; 768 } 769 770 bool IEEEFloat::isLargest() const { 771 // The largest number by magnitude in our format will be the floating point 772 // number with maximum exponent and with significand that is all ones. 773 return isFiniteNonZero() && exponent == semantics->maxExponent 774 && isSignificandAllOnes(); 775 } 776 777 bool IEEEFloat::isInteger() const { 778 // This could be made more efficient; I'm going for obviously correct. 779 if (!isFinite()) return false; 780 IEEEFloat truncated = *this; 781 truncated.roundToIntegral(rmTowardZero); 782 return compare(truncated) == cmpEqual; 783 } 784 785 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 786 if (this == &rhs) 787 return true; 788 if (semantics != rhs.semantics || 789 category != rhs.category || 790 sign != rhs.sign) 791 return false; 792 if (category==fcZero || category==fcInfinity) 793 return true; 794 795 if (isFiniteNonZero() && exponent != rhs.exponent) 796 return false; 797 798 return std::equal(significandParts(), significandParts() + partCount(), 799 rhs.significandParts()); 800 } 801 802 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 803 initialize(&ourSemantics); 804 sign = 0; 805 category = fcNormal; 806 zeroSignificand(); 807 exponent = ourSemantics.precision - 1; 808 significandParts()[0] = value; 809 normalize(rmNearestTiesToEven, lfExactlyZero); 810 } 811 812 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 813 initialize(&ourSemantics); 814 category = fcZero; 815 sign = false; 816 } 817 818 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 819 // Allocates storage if necessary but does not initialize it. 820 initialize(&ourSemantics); 821 } 822 823 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, StringRef text) { 824 initialize(&ourSemantics); 825 convertFromString(text, rmNearestTiesToEven); 826 } 827 828 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 829 initialize(rhs.semantics); 830 assign(rhs); 831 } 832 833 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&Bogus) { 834 *this = std::move(rhs); 835 } 836 837 IEEEFloat::~IEEEFloat() { freeSignificand(); } 838 839 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 840 void IEEEFloat::Profile(FoldingSetNodeID &ID) const { 841 ID.Add(bitcastToAPInt()); 842 } 843 844 unsigned int IEEEFloat::partCount() const { 845 return partCountForBits(semantics->precision + 1); 846 } 847 848 const integerPart *IEEEFloat::significandParts() const { 849 return const_cast<IEEEFloat *>(this)->significandParts(); 850 } 851 852 integerPart *IEEEFloat::significandParts() { 853 if (partCount() > 1) 854 return significand.parts; 855 else 856 return &significand.part; 857 } 858 859 void IEEEFloat::zeroSignificand() { 860 APInt::tcSet(significandParts(), 0, partCount()); 861 } 862 863 /* Increment an fcNormal floating point number's significand. */ 864 void IEEEFloat::incrementSignificand() { 865 integerPart carry; 866 867 carry = APInt::tcIncrement(significandParts(), partCount()); 868 869 /* Our callers should never cause us to overflow. */ 870 assert(carry == 0); 871 (void)carry; 872 } 873 874 /* Add the significand of the RHS. Returns the carry flag. */ 875 integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 876 integerPart *parts; 877 878 parts = significandParts(); 879 880 assert(semantics == rhs.semantics); 881 assert(exponent == rhs.exponent); 882 883 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 884 } 885 886 /* Subtract the significand of the RHS with a borrow flag. Returns 887 the borrow flag. */ 888 integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 889 integerPart borrow) { 890 integerPart *parts; 891 892 parts = significandParts(); 893 894 assert(semantics == rhs.semantics); 895 assert(exponent == rhs.exponent); 896 897 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 898 partCount()); 899 } 900 901 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 902 on to the full-precision result of the multiplication. Returns the 903 lost fraction. */ 904 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 905 const IEEEFloat *addend) { 906 unsigned int omsb; // One, not zero, based MSB. 907 unsigned int partsCount, newPartsCount, precision; 908 integerPart *lhsSignificand; 909 integerPart scratch[4]; 910 integerPart *fullSignificand; 911 lostFraction lost_fraction; 912 bool ignored; 913 914 assert(semantics == rhs.semantics); 915 916 precision = semantics->precision; 917 918 // Allocate space for twice as many bits as the original significand, plus one 919 // extra bit for the addition to overflow into. 920 newPartsCount = partCountForBits(precision * 2 + 1); 921 922 if (newPartsCount > 4) 923 fullSignificand = new integerPart[newPartsCount]; 924 else 925 fullSignificand = scratch; 926 927 lhsSignificand = significandParts(); 928 partsCount = partCount(); 929 930 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 931 rhs.significandParts(), partsCount, partsCount); 932 933 lost_fraction = lfExactlyZero; 934 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 935 exponent += rhs.exponent; 936 937 // Assume the operands involved in the multiplication are single-precision 938 // FP, and the two multiplicants are: 939 // *this = a23 . a22 ... a0 * 2^e1 940 // rhs = b23 . b22 ... b0 * 2^e2 941 // the result of multiplication is: 942 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 943 // Note that there are three significant bits at the left-hand side of the 944 // radix point: two for the multiplication, and an overflow bit for the 945 // addition (that will always be zero at this point). Move the radix point 946 // toward left by two bits, and adjust exponent accordingly. 947 exponent += 2; 948 949 if (addend && addend->isNonZero()) { 950 // The intermediate result of the multiplication has "2 * precision" 951 // signicant bit; adjust the addend to be consistent with mul result. 952 // 953 Significand savedSignificand = significand; 954 const fltSemantics *savedSemantics = semantics; 955 fltSemantics extendedSemantics; 956 opStatus status; 957 unsigned int extendedPrecision; 958 959 // Normalize our MSB to one below the top bit to allow for overflow. 960 extendedPrecision = 2 * precision + 1; 961 if (omsb != extendedPrecision - 1) { 962 assert(extendedPrecision > omsb); 963 APInt::tcShiftLeft(fullSignificand, newPartsCount, 964 (extendedPrecision - 1) - omsb); 965 exponent -= (extendedPrecision - 1) - omsb; 966 } 967 968 /* Create new semantics. */ 969 extendedSemantics = *semantics; 970 extendedSemantics.precision = extendedPrecision; 971 972 if (newPartsCount == 1) 973 significand.part = fullSignificand[0]; 974 else 975 significand.parts = fullSignificand; 976 semantics = &extendedSemantics; 977 978 IEEEFloat extendedAddend(*addend); 979 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 980 assert(status == opOK); 981 (void)status; 982 983 // Shift the significand of the addend right by one bit. This guarantees 984 // that the high bit of the significand is zero (same as fullSignificand), 985 // so the addition will overflow (if it does overflow at all) into the top bit. 986 lost_fraction = extendedAddend.shiftSignificandRight(1); 987 assert(lost_fraction == lfExactlyZero && 988 "Lost precision while shifting addend for fused-multiply-add."); 989 990 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 991 992 /* Restore our state. */ 993 if (newPartsCount == 1) 994 fullSignificand[0] = significand.part; 995 significand = savedSignificand; 996 semantics = savedSemantics; 997 998 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 999 } 1000 1001 // Convert the result having "2 * precision" significant-bits back to the one 1002 // having "precision" significant-bits. First, move the radix point from 1003 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1004 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1005 exponent -= precision + 1; 1006 1007 // In case MSB resides at the left-hand side of radix point, shift the 1008 // mantissa right by some amount to make sure the MSB reside right before 1009 // the radix point (i.e. "MSB . rest-significant-bits"). 1010 // 1011 // Note that the result is not normalized when "omsb < precision". So, the 1012 // caller needs to call IEEEFloat::normalize() if normalized value is 1013 // expected. 1014 if (omsb > precision) { 1015 unsigned int bits, significantParts; 1016 lostFraction lf; 1017 1018 bits = omsb - precision; 1019 significantParts = partCountForBits(omsb); 1020 lf = shiftRight(fullSignificand, significantParts, bits); 1021 lost_fraction = combineLostFractions(lf, lost_fraction); 1022 exponent += bits; 1023 } 1024 1025 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1026 1027 if (newPartsCount > 4) 1028 delete [] fullSignificand; 1029 1030 return lost_fraction; 1031 } 1032 1033 /* Multiply the significands of LHS and RHS to DST. */ 1034 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1035 unsigned int bit, i, partsCount; 1036 const integerPart *rhsSignificand; 1037 integerPart *lhsSignificand, *dividend, *divisor; 1038 integerPart scratch[4]; 1039 lostFraction lost_fraction; 1040 1041 assert(semantics == rhs.semantics); 1042 1043 lhsSignificand = significandParts(); 1044 rhsSignificand = rhs.significandParts(); 1045 partsCount = partCount(); 1046 1047 if (partsCount > 2) 1048 dividend = new integerPart[partsCount * 2]; 1049 else 1050 dividend = scratch; 1051 1052 divisor = dividend + partsCount; 1053 1054 /* Copy the dividend and divisor as they will be modified in-place. */ 1055 for (i = 0; i < partsCount; i++) { 1056 dividend[i] = lhsSignificand[i]; 1057 divisor[i] = rhsSignificand[i]; 1058 lhsSignificand[i] = 0; 1059 } 1060 1061 exponent -= rhs.exponent; 1062 1063 unsigned int precision = semantics->precision; 1064 1065 /* Normalize the divisor. */ 1066 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1067 if (bit) { 1068 exponent += bit; 1069 APInt::tcShiftLeft(divisor, partsCount, bit); 1070 } 1071 1072 /* Normalize the dividend. */ 1073 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1074 if (bit) { 1075 exponent -= bit; 1076 APInt::tcShiftLeft(dividend, partsCount, bit); 1077 } 1078 1079 /* Ensure the dividend >= divisor initially for the loop below. 1080 Incidentally, this means that the division loop below is 1081 guaranteed to set the integer bit to one. */ 1082 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1083 exponent--; 1084 APInt::tcShiftLeft(dividend, partsCount, 1); 1085 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1086 } 1087 1088 /* Long division. */ 1089 for (bit = precision; bit; bit -= 1) { 1090 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1091 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1092 APInt::tcSetBit(lhsSignificand, bit - 1); 1093 } 1094 1095 APInt::tcShiftLeft(dividend, partsCount, 1); 1096 } 1097 1098 /* Figure out the lost fraction. */ 1099 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1100 1101 if (cmp > 0) 1102 lost_fraction = lfMoreThanHalf; 1103 else if (cmp == 0) 1104 lost_fraction = lfExactlyHalf; 1105 else if (APInt::tcIsZero(dividend, partsCount)) 1106 lost_fraction = lfExactlyZero; 1107 else 1108 lost_fraction = lfLessThanHalf; 1109 1110 if (partsCount > 2) 1111 delete [] dividend; 1112 1113 return lost_fraction; 1114 } 1115 1116 unsigned int IEEEFloat::significandMSB() const { 1117 return APInt::tcMSB(significandParts(), partCount()); 1118 } 1119 1120 unsigned int IEEEFloat::significandLSB() const { 1121 return APInt::tcLSB(significandParts(), partCount()); 1122 } 1123 1124 /* Note that a zero result is NOT normalized to fcZero. */ 1125 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1126 /* Our exponent should not overflow. */ 1127 assert((ExponentType) (exponent + bits) >= exponent); 1128 1129 exponent += bits; 1130 1131 return shiftRight(significandParts(), partCount(), bits); 1132 } 1133 1134 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1135 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1136 assert(bits < semantics->precision); 1137 1138 if (bits) { 1139 unsigned int partsCount = partCount(); 1140 1141 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1142 exponent -= bits; 1143 1144 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1145 } 1146 } 1147 1148 IEEEFloat::cmpResult 1149 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1150 int compare; 1151 1152 assert(semantics == rhs.semantics); 1153 assert(isFiniteNonZero()); 1154 assert(rhs.isFiniteNonZero()); 1155 1156 compare = exponent - rhs.exponent; 1157 1158 /* If exponents are equal, do an unsigned bignum comparison of the 1159 significands. */ 1160 if (compare == 0) 1161 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1162 partCount()); 1163 1164 if (compare > 0) 1165 return cmpGreaterThan; 1166 else if (compare < 0) 1167 return cmpLessThan; 1168 else 1169 return cmpEqual; 1170 } 1171 1172 /* Handle overflow. Sign is preserved. We either become infinity or 1173 the largest finite number. */ 1174 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1175 /* Infinity? */ 1176 if (rounding_mode == rmNearestTiesToEven || 1177 rounding_mode == rmNearestTiesToAway || 1178 (rounding_mode == rmTowardPositive && !sign) || 1179 (rounding_mode == rmTowardNegative && sign)) { 1180 category = fcInfinity; 1181 return (opStatus) (opOverflow | opInexact); 1182 } 1183 1184 /* Otherwise we become the largest finite number. */ 1185 category = fcNormal; 1186 exponent = semantics->maxExponent; 1187 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1188 semantics->precision); 1189 1190 return opInexact; 1191 } 1192 1193 /* Returns TRUE if, when truncating the current number, with BIT the 1194 new LSB, with the given lost fraction and rounding mode, the result 1195 would need to be rounded away from zero (i.e., by increasing the 1196 signficand). This routine must work for fcZero of both signs, and 1197 fcNormal numbers. */ 1198 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1199 lostFraction lost_fraction, 1200 unsigned int bit) const { 1201 /* NaNs and infinities should not have lost fractions. */ 1202 assert(isFiniteNonZero() || category == fcZero); 1203 1204 /* Current callers never pass this so we don't handle it. */ 1205 assert(lost_fraction != lfExactlyZero); 1206 1207 switch (rounding_mode) { 1208 case rmNearestTiesToAway: 1209 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1210 1211 case rmNearestTiesToEven: 1212 if (lost_fraction == lfMoreThanHalf) 1213 return true; 1214 1215 /* Our zeroes don't have a significand to test. */ 1216 if (lost_fraction == lfExactlyHalf && category != fcZero) 1217 return APInt::tcExtractBit(significandParts(), bit); 1218 1219 return false; 1220 1221 case rmTowardZero: 1222 return false; 1223 1224 case rmTowardPositive: 1225 return !sign; 1226 1227 case rmTowardNegative: 1228 return sign; 1229 } 1230 llvm_unreachable("Invalid rounding mode found"); 1231 } 1232 1233 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1234 lostFraction lost_fraction) { 1235 unsigned int omsb; /* One, not zero, based MSB. */ 1236 int exponentChange; 1237 1238 if (!isFiniteNonZero()) 1239 return opOK; 1240 1241 /* Before rounding normalize the exponent of fcNormal numbers. */ 1242 omsb = significandMSB() + 1; 1243 1244 if (omsb) { 1245 /* OMSB is numbered from 1. We want to place it in the integer 1246 bit numbered PRECISION if possible, with a compensating change in 1247 the exponent. */ 1248 exponentChange = omsb - semantics->precision; 1249 1250 /* If the resulting exponent is too high, overflow according to 1251 the rounding mode. */ 1252 if (exponent + exponentChange > semantics->maxExponent) 1253 return handleOverflow(rounding_mode); 1254 1255 /* Subnormal numbers have exponent minExponent, and their MSB 1256 is forced based on that. */ 1257 if (exponent + exponentChange < semantics->minExponent) 1258 exponentChange = semantics->minExponent - exponent; 1259 1260 /* Shifting left is easy as we don't lose precision. */ 1261 if (exponentChange < 0) { 1262 assert(lost_fraction == lfExactlyZero); 1263 1264 shiftSignificandLeft(-exponentChange); 1265 1266 return opOK; 1267 } 1268 1269 if (exponentChange > 0) { 1270 lostFraction lf; 1271 1272 /* Shift right and capture any new lost fraction. */ 1273 lf = shiftSignificandRight(exponentChange); 1274 1275 lost_fraction = combineLostFractions(lf, lost_fraction); 1276 1277 /* Keep OMSB up-to-date. */ 1278 if (omsb > (unsigned) exponentChange) 1279 omsb -= exponentChange; 1280 else 1281 omsb = 0; 1282 } 1283 } 1284 1285 /* Now round the number according to rounding_mode given the lost 1286 fraction. */ 1287 1288 /* As specified in IEEE 754, since we do not trap we do not report 1289 underflow for exact results. */ 1290 if (lost_fraction == lfExactlyZero) { 1291 /* Canonicalize zeroes. */ 1292 if (omsb == 0) 1293 category = fcZero; 1294 1295 return opOK; 1296 } 1297 1298 /* Increment the significand if we're rounding away from zero. */ 1299 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1300 if (omsb == 0) 1301 exponent = semantics->minExponent; 1302 1303 incrementSignificand(); 1304 omsb = significandMSB() + 1; 1305 1306 /* Did the significand increment overflow? */ 1307 if (omsb == (unsigned) semantics->precision + 1) { 1308 /* Renormalize by incrementing the exponent and shifting our 1309 significand right one. However if we already have the 1310 maximum exponent we overflow to infinity. */ 1311 if (exponent == semantics->maxExponent) { 1312 category = fcInfinity; 1313 1314 return (opStatus) (opOverflow | opInexact); 1315 } 1316 1317 shiftSignificandRight(1); 1318 1319 return opInexact; 1320 } 1321 } 1322 1323 /* The normal case - we were and are not denormal, and any 1324 significand increment above didn't overflow. */ 1325 if (omsb == semantics->precision) 1326 return opInexact; 1327 1328 /* We have a non-zero denormal. */ 1329 assert(omsb < semantics->precision); 1330 1331 /* Canonicalize zeroes. */ 1332 if (omsb == 0) 1333 category = fcZero; 1334 1335 /* The fcZero case is a denormal that underflowed to zero. */ 1336 return (opStatus) (opUnderflow | opInexact); 1337 } 1338 1339 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1340 bool subtract) { 1341 switch (PackCategoriesIntoKey(category, rhs.category)) { 1342 default: 1343 llvm_unreachable(nullptr); 1344 1345 case PackCategoriesIntoKey(fcNaN, fcZero): 1346 case PackCategoriesIntoKey(fcNaN, fcNormal): 1347 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1348 case PackCategoriesIntoKey(fcNaN, fcNaN): 1349 case PackCategoriesIntoKey(fcNormal, fcZero): 1350 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1351 case PackCategoriesIntoKey(fcInfinity, fcZero): 1352 return opOK; 1353 1354 case PackCategoriesIntoKey(fcZero, fcNaN): 1355 case PackCategoriesIntoKey(fcNormal, fcNaN): 1356 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1357 // We need to be sure to flip the sign here for subtraction because we 1358 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1359 sign = rhs.sign ^ subtract; 1360 category = fcNaN; 1361 copySignificand(rhs); 1362 return opOK; 1363 1364 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1365 case PackCategoriesIntoKey(fcZero, fcInfinity): 1366 category = fcInfinity; 1367 sign = rhs.sign ^ subtract; 1368 return opOK; 1369 1370 case PackCategoriesIntoKey(fcZero, fcNormal): 1371 assign(rhs); 1372 sign = rhs.sign ^ subtract; 1373 return opOK; 1374 1375 case PackCategoriesIntoKey(fcZero, fcZero): 1376 /* Sign depends on rounding mode; handled by caller. */ 1377 return opOK; 1378 1379 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1380 /* Differently signed infinities can only be validly 1381 subtracted. */ 1382 if (((sign ^ rhs.sign)!=0) != subtract) { 1383 makeNaN(); 1384 return opInvalidOp; 1385 } 1386 1387 return opOK; 1388 1389 case PackCategoriesIntoKey(fcNormal, fcNormal): 1390 return opDivByZero; 1391 } 1392 } 1393 1394 /* Add or subtract two normal numbers. */ 1395 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1396 bool subtract) { 1397 integerPart carry; 1398 lostFraction lost_fraction; 1399 int bits; 1400 1401 /* Determine if the operation on the absolute values is effectively 1402 an addition or subtraction. */ 1403 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1404 1405 /* Are we bigger exponent-wise than the RHS? */ 1406 bits = exponent - rhs.exponent; 1407 1408 /* Subtraction is more subtle than one might naively expect. */ 1409 if (subtract) { 1410 IEEEFloat temp_rhs(rhs); 1411 bool reverse; 1412 1413 if (bits == 0) { 1414 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1415 lost_fraction = lfExactlyZero; 1416 } else if (bits > 0) { 1417 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1418 shiftSignificandLeft(1); 1419 reverse = false; 1420 } else { 1421 lost_fraction = shiftSignificandRight(-bits - 1); 1422 temp_rhs.shiftSignificandLeft(1); 1423 reverse = true; 1424 } 1425 1426 if (reverse) { 1427 carry = temp_rhs.subtractSignificand 1428 (*this, lost_fraction != lfExactlyZero); 1429 copySignificand(temp_rhs); 1430 sign = !sign; 1431 } else { 1432 carry = subtractSignificand 1433 (temp_rhs, lost_fraction != lfExactlyZero); 1434 } 1435 1436 /* Invert the lost fraction - it was on the RHS and 1437 subtracted. */ 1438 if (lost_fraction == lfLessThanHalf) 1439 lost_fraction = lfMoreThanHalf; 1440 else if (lost_fraction == lfMoreThanHalf) 1441 lost_fraction = lfLessThanHalf; 1442 1443 /* The code above is intended to ensure that no borrow is 1444 necessary. */ 1445 assert(!carry); 1446 (void)carry; 1447 } else { 1448 if (bits > 0) { 1449 IEEEFloat temp_rhs(rhs); 1450 1451 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1452 carry = addSignificand(temp_rhs); 1453 } else { 1454 lost_fraction = shiftSignificandRight(-bits); 1455 carry = addSignificand(rhs); 1456 } 1457 1458 /* We have a guard bit; generating a carry cannot happen. */ 1459 assert(!carry); 1460 (void)carry; 1461 } 1462 1463 return lost_fraction; 1464 } 1465 1466 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1467 switch (PackCategoriesIntoKey(category, rhs.category)) { 1468 default: 1469 llvm_unreachable(nullptr); 1470 1471 case PackCategoriesIntoKey(fcNaN, fcZero): 1472 case PackCategoriesIntoKey(fcNaN, fcNormal): 1473 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1474 case PackCategoriesIntoKey(fcNaN, fcNaN): 1475 sign = false; 1476 return opOK; 1477 1478 case PackCategoriesIntoKey(fcZero, fcNaN): 1479 case PackCategoriesIntoKey(fcNormal, fcNaN): 1480 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1481 sign = false; 1482 category = fcNaN; 1483 copySignificand(rhs); 1484 return opOK; 1485 1486 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1487 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1488 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1489 category = fcInfinity; 1490 return opOK; 1491 1492 case PackCategoriesIntoKey(fcZero, fcNormal): 1493 case PackCategoriesIntoKey(fcNormal, fcZero): 1494 case PackCategoriesIntoKey(fcZero, fcZero): 1495 category = fcZero; 1496 return opOK; 1497 1498 case PackCategoriesIntoKey(fcZero, fcInfinity): 1499 case PackCategoriesIntoKey(fcInfinity, fcZero): 1500 makeNaN(); 1501 return opInvalidOp; 1502 1503 case PackCategoriesIntoKey(fcNormal, fcNormal): 1504 return opOK; 1505 } 1506 } 1507 1508 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1509 switch (PackCategoriesIntoKey(category, rhs.category)) { 1510 default: 1511 llvm_unreachable(nullptr); 1512 1513 case PackCategoriesIntoKey(fcZero, fcNaN): 1514 case PackCategoriesIntoKey(fcNormal, fcNaN): 1515 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1516 category = fcNaN; 1517 copySignificand(rhs); 1518 case PackCategoriesIntoKey(fcNaN, fcZero): 1519 case PackCategoriesIntoKey(fcNaN, fcNormal): 1520 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1521 case PackCategoriesIntoKey(fcNaN, fcNaN): 1522 sign = false; 1523 case PackCategoriesIntoKey(fcInfinity, fcZero): 1524 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1525 case PackCategoriesIntoKey(fcZero, fcInfinity): 1526 case PackCategoriesIntoKey(fcZero, fcNormal): 1527 return opOK; 1528 1529 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1530 category = fcZero; 1531 return opOK; 1532 1533 case PackCategoriesIntoKey(fcNormal, fcZero): 1534 category = fcInfinity; 1535 return opDivByZero; 1536 1537 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1538 case PackCategoriesIntoKey(fcZero, fcZero): 1539 makeNaN(); 1540 return opInvalidOp; 1541 1542 case PackCategoriesIntoKey(fcNormal, fcNormal): 1543 return opOK; 1544 } 1545 } 1546 1547 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1548 switch (PackCategoriesIntoKey(category, rhs.category)) { 1549 default: 1550 llvm_unreachable(nullptr); 1551 1552 case PackCategoriesIntoKey(fcNaN, fcZero): 1553 case PackCategoriesIntoKey(fcNaN, fcNormal): 1554 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1555 case PackCategoriesIntoKey(fcNaN, fcNaN): 1556 case PackCategoriesIntoKey(fcZero, fcInfinity): 1557 case PackCategoriesIntoKey(fcZero, fcNormal): 1558 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1559 return opOK; 1560 1561 case PackCategoriesIntoKey(fcZero, fcNaN): 1562 case PackCategoriesIntoKey(fcNormal, fcNaN): 1563 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1564 sign = false; 1565 category = fcNaN; 1566 copySignificand(rhs); 1567 return opOK; 1568 1569 case PackCategoriesIntoKey(fcNormal, fcZero): 1570 case PackCategoriesIntoKey(fcInfinity, fcZero): 1571 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1572 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1573 case PackCategoriesIntoKey(fcZero, fcZero): 1574 makeNaN(); 1575 return opInvalidOp; 1576 1577 case PackCategoriesIntoKey(fcNormal, fcNormal): 1578 return opOK; 1579 } 1580 } 1581 1582 /* Change sign. */ 1583 void IEEEFloat::changeSign() { 1584 /* Look mummy, this one's easy. */ 1585 sign = !sign; 1586 } 1587 1588 void IEEEFloat::clearSign() { 1589 /* So is this one. */ 1590 sign = 0; 1591 } 1592 1593 void IEEEFloat::copySign(const IEEEFloat &rhs) { 1594 /* And this one. */ 1595 sign = rhs.sign; 1596 } 1597 1598 /* Normalized addition or subtraction. */ 1599 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1600 roundingMode rounding_mode, 1601 bool subtract) { 1602 opStatus fs; 1603 1604 fs = addOrSubtractSpecials(rhs, subtract); 1605 1606 /* This return code means it was not a simple case. */ 1607 if (fs == opDivByZero) { 1608 lostFraction lost_fraction; 1609 1610 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1611 fs = normalize(rounding_mode, lost_fraction); 1612 1613 /* Can only be zero if we lost no fraction. */ 1614 assert(category != fcZero || lost_fraction == lfExactlyZero); 1615 } 1616 1617 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1618 positive zero unless rounding to minus infinity, except that 1619 adding two like-signed zeroes gives that zero. */ 1620 if (category == fcZero) { 1621 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1622 sign = (rounding_mode == rmTowardNegative); 1623 } 1624 1625 return fs; 1626 } 1627 1628 /* Normalized addition. */ 1629 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1630 roundingMode rounding_mode) { 1631 return addOrSubtract(rhs, rounding_mode, false); 1632 } 1633 1634 /* Normalized subtraction. */ 1635 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1636 roundingMode rounding_mode) { 1637 return addOrSubtract(rhs, rounding_mode, true); 1638 } 1639 1640 /* Normalized multiply. */ 1641 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1642 roundingMode rounding_mode) { 1643 opStatus fs; 1644 1645 sign ^= rhs.sign; 1646 fs = multiplySpecials(rhs); 1647 1648 if (isFiniteNonZero()) { 1649 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1650 fs = normalize(rounding_mode, lost_fraction); 1651 if (lost_fraction != lfExactlyZero) 1652 fs = (opStatus) (fs | opInexact); 1653 } 1654 1655 return fs; 1656 } 1657 1658 /* Normalized divide. */ 1659 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1660 roundingMode rounding_mode) { 1661 opStatus fs; 1662 1663 sign ^= rhs.sign; 1664 fs = divideSpecials(rhs); 1665 1666 if (isFiniteNonZero()) { 1667 lostFraction lost_fraction = divideSignificand(rhs); 1668 fs = normalize(rounding_mode, lost_fraction); 1669 if (lost_fraction != lfExactlyZero) 1670 fs = (opStatus) (fs | opInexact); 1671 } 1672 1673 return fs; 1674 } 1675 1676 /* Normalized remainder. This is not currently correct in all cases. */ 1677 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1678 opStatus fs; 1679 IEEEFloat V = *this; 1680 unsigned int origSign = sign; 1681 1682 fs = V.divide(rhs, rmNearestTiesToEven); 1683 if (fs == opDivByZero) 1684 return fs; 1685 1686 int parts = partCount(); 1687 integerPart *x = new integerPart[parts]; 1688 bool ignored; 1689 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1690 rmNearestTiesToEven, &ignored); 1691 if (fs==opInvalidOp) 1692 return fs; 1693 1694 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1695 rmNearestTiesToEven); 1696 assert(fs==opOK); // should always work 1697 1698 fs = V.multiply(rhs, rmNearestTiesToEven); 1699 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1700 1701 fs = subtract(V, rmNearestTiesToEven); 1702 assert(fs==opOK || fs==opInexact); // likewise 1703 1704 if (isZero()) 1705 sign = origSign; // IEEE754 requires this 1706 delete[] x; 1707 return fs; 1708 } 1709 1710 /* Normalized llvm frem (C fmod). 1711 This is not currently correct in all cases. */ 1712 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1713 opStatus fs; 1714 fs = modSpecials(rhs); 1715 1716 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1717 IEEEFloat V = *this; 1718 unsigned int origSign = sign; 1719 1720 fs = V.divide(rhs, rmNearestTiesToEven); 1721 if (fs == opDivByZero) 1722 return fs; 1723 1724 int parts = partCount(); 1725 integerPart *x = new integerPart[parts]; 1726 bool ignored; 1727 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1728 rmTowardZero, &ignored); 1729 if (fs==opInvalidOp) 1730 return fs; 1731 1732 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1733 rmNearestTiesToEven); 1734 assert(fs==opOK); // should always work 1735 1736 fs = V.multiply(rhs, rmNearestTiesToEven); 1737 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1738 1739 fs = subtract(V, rmNearestTiesToEven); 1740 assert(fs==opOK || fs==opInexact); // likewise 1741 1742 if (isZero()) 1743 sign = origSign; // IEEE754 requires this 1744 delete[] x; 1745 } 1746 return fs; 1747 } 1748 1749 /* Normalized fused-multiply-add. */ 1750 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1751 const IEEEFloat &addend, 1752 roundingMode rounding_mode) { 1753 opStatus fs; 1754 1755 /* Post-multiplication sign, before addition. */ 1756 sign ^= multiplicand.sign; 1757 1758 /* If and only if all arguments are normal do we need to do an 1759 extended-precision calculation. */ 1760 if (isFiniteNonZero() && 1761 multiplicand.isFiniteNonZero() && 1762 addend.isFinite()) { 1763 lostFraction lost_fraction; 1764 1765 lost_fraction = multiplySignificand(multiplicand, &addend); 1766 fs = normalize(rounding_mode, lost_fraction); 1767 if (lost_fraction != lfExactlyZero) 1768 fs = (opStatus) (fs | opInexact); 1769 1770 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1771 positive zero unless rounding to minus infinity, except that 1772 adding two like-signed zeroes gives that zero. */ 1773 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1774 sign = (rounding_mode == rmTowardNegative); 1775 } else { 1776 fs = multiplySpecials(multiplicand); 1777 1778 /* FS can only be opOK or opInvalidOp. There is no more work 1779 to do in the latter case. The IEEE-754R standard says it is 1780 implementation-defined in this case whether, if ADDEND is a 1781 quiet NaN, we raise invalid op; this implementation does so. 1782 1783 If we need to do the addition we can do so with normal 1784 precision. */ 1785 if (fs == opOK) 1786 fs = addOrSubtract(addend, rounding_mode, false); 1787 } 1788 1789 return fs; 1790 } 1791 1792 /* Rounding-mode corrrect round to integral value. */ 1793 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 1794 opStatus fs; 1795 1796 // If the exponent is large enough, we know that this value is already 1797 // integral, and the arithmetic below would potentially cause it to saturate 1798 // to +/-Inf. Bail out early instead. 1799 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1800 return opOK; 1801 1802 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1803 // precision of our format, and then subtract it back off again. The choice 1804 // of rounding modes for the addition/subtraction determines the rounding mode 1805 // for our integral rounding as well. 1806 // NOTE: When the input value is negative, we do subtraction followed by 1807 // addition instead. 1808 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1809 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1810 IEEEFloat MagicConstant(*semantics); 1811 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1812 rmNearestTiesToEven); 1813 MagicConstant.copySign(*this); 1814 1815 if (fs != opOK) 1816 return fs; 1817 1818 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1819 bool inputSign = isNegative(); 1820 1821 fs = add(MagicConstant, rounding_mode); 1822 if (fs != opOK && fs != opInexact) 1823 return fs; 1824 1825 fs = subtract(MagicConstant, rounding_mode); 1826 1827 // Restore the input sign. 1828 if (inputSign != isNegative()) 1829 changeSign(); 1830 1831 return fs; 1832 } 1833 1834 1835 /* Comparison requires normalized numbers. */ 1836 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 1837 cmpResult result; 1838 1839 assert(semantics == rhs.semantics); 1840 1841 switch (PackCategoriesIntoKey(category, rhs.category)) { 1842 default: 1843 llvm_unreachable(nullptr); 1844 1845 case PackCategoriesIntoKey(fcNaN, fcZero): 1846 case PackCategoriesIntoKey(fcNaN, fcNormal): 1847 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1848 case PackCategoriesIntoKey(fcNaN, fcNaN): 1849 case PackCategoriesIntoKey(fcZero, fcNaN): 1850 case PackCategoriesIntoKey(fcNormal, fcNaN): 1851 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1852 return cmpUnordered; 1853 1854 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1855 case PackCategoriesIntoKey(fcInfinity, fcZero): 1856 case PackCategoriesIntoKey(fcNormal, fcZero): 1857 if (sign) 1858 return cmpLessThan; 1859 else 1860 return cmpGreaterThan; 1861 1862 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1863 case PackCategoriesIntoKey(fcZero, fcInfinity): 1864 case PackCategoriesIntoKey(fcZero, fcNormal): 1865 if (rhs.sign) 1866 return cmpGreaterThan; 1867 else 1868 return cmpLessThan; 1869 1870 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1871 if (sign == rhs.sign) 1872 return cmpEqual; 1873 else if (sign) 1874 return cmpLessThan; 1875 else 1876 return cmpGreaterThan; 1877 1878 case PackCategoriesIntoKey(fcZero, fcZero): 1879 return cmpEqual; 1880 1881 case PackCategoriesIntoKey(fcNormal, fcNormal): 1882 break; 1883 } 1884 1885 /* Two normal numbers. Do they have the same sign? */ 1886 if (sign != rhs.sign) { 1887 if (sign) 1888 result = cmpLessThan; 1889 else 1890 result = cmpGreaterThan; 1891 } else { 1892 /* Compare absolute values; invert result if negative. */ 1893 result = compareAbsoluteValue(rhs); 1894 1895 if (sign) { 1896 if (result == cmpLessThan) 1897 result = cmpGreaterThan; 1898 else if (result == cmpGreaterThan) 1899 result = cmpLessThan; 1900 } 1901 } 1902 1903 return result; 1904 } 1905 1906 /// IEEEFloat::convert - convert a value of one floating point type to another. 1907 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1908 /// records whether the transformation lost information, i.e. whether 1909 /// converting the result back to the original type will produce the 1910 /// original value (this is almost the same as return value==fsOK, but there 1911 /// are edge cases where this is not so). 1912 1913 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 1914 roundingMode rounding_mode, 1915 bool *losesInfo) { 1916 lostFraction lostFraction; 1917 unsigned int newPartCount, oldPartCount; 1918 opStatus fs; 1919 int shift; 1920 const fltSemantics &fromSemantics = *semantics; 1921 1922 lostFraction = lfExactlyZero; 1923 newPartCount = partCountForBits(toSemantics.precision + 1); 1924 oldPartCount = partCount(); 1925 shift = toSemantics.precision - fromSemantics.precision; 1926 1927 bool X86SpecialNan = false; 1928 if (&fromSemantics == &IEEEFloat::x87DoubleExtended && 1929 &toSemantics != &IEEEFloat::x87DoubleExtended && category == fcNaN && 1930 (!(*significandParts() & 0x8000000000000000ULL) || 1931 !(*significandParts() & 0x4000000000000000ULL))) { 1932 // x86 has some unusual NaNs which cannot be represented in any other 1933 // format; note them here. 1934 X86SpecialNan = true; 1935 } 1936 1937 // If this is a truncation of a denormal number, and the target semantics 1938 // has larger exponent range than the source semantics (this can happen 1939 // when truncating from PowerPC double-double to double format), the 1940 // right shift could lose result mantissa bits. Adjust exponent instead 1941 // of performing excessive shift. 1942 if (shift < 0 && isFiniteNonZero()) { 1943 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 1944 if (exponent + exponentChange < toSemantics.minExponent) 1945 exponentChange = toSemantics.minExponent - exponent; 1946 if (exponentChange < shift) 1947 exponentChange = shift; 1948 if (exponentChange < 0) { 1949 shift -= exponentChange; 1950 exponent += exponentChange; 1951 } 1952 } 1953 1954 // If this is a truncation, perform the shift before we narrow the storage. 1955 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 1956 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 1957 1958 // Fix the storage so it can hold to new value. 1959 if (newPartCount > oldPartCount) { 1960 // The new type requires more storage; make it available. 1961 integerPart *newParts; 1962 newParts = new integerPart[newPartCount]; 1963 APInt::tcSet(newParts, 0, newPartCount); 1964 if (isFiniteNonZero() || category==fcNaN) 1965 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1966 freeSignificand(); 1967 significand.parts = newParts; 1968 } else if (newPartCount == 1 && oldPartCount != 1) { 1969 // Switch to built-in storage for a single part. 1970 integerPart newPart = 0; 1971 if (isFiniteNonZero() || category==fcNaN) 1972 newPart = significandParts()[0]; 1973 freeSignificand(); 1974 significand.part = newPart; 1975 } 1976 1977 // Now that we have the right storage, switch the semantics. 1978 semantics = &toSemantics; 1979 1980 // If this is an extension, perform the shift now that the storage is 1981 // available. 1982 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 1983 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1984 1985 if (isFiniteNonZero()) { 1986 fs = normalize(rounding_mode, lostFraction); 1987 *losesInfo = (fs != opOK); 1988 } else if (category == fcNaN) { 1989 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 1990 1991 // For x87 extended precision, we want to make a NaN, not a special NaN if 1992 // the input wasn't special either. 1993 if (!X86SpecialNan && semantics == &IEEEFloat::x87DoubleExtended) 1994 APInt::tcSetBit(significandParts(), semantics->precision - 1); 1995 1996 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1997 // does not give you back the same bits. This is dubious, and we 1998 // don't currently do it. You're really supposed to get 1999 // an invalid operation signal at runtime, but nobody does that. 2000 fs = opOK; 2001 } else { 2002 *losesInfo = false; 2003 fs = opOK; 2004 } 2005 2006 return fs; 2007 } 2008 2009 /* Convert a floating point number to an integer according to the 2010 rounding mode. If the rounded integer value is out of range this 2011 returns an invalid operation exception and the contents of the 2012 destination parts are unspecified. If the rounded value is in 2013 range but the floating point number is not the exact integer, the C 2014 standard doesn't require an inexact exception to be raised. IEEE 2015 854 does require it so we do that. 2016 2017 Note that for conversions to integer type the C standard requires 2018 round-to-zero to always be used. */ 2019 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2020 integerPart *parts, unsigned int width, bool isSigned, 2021 roundingMode rounding_mode, bool *isExact) const { 2022 lostFraction lost_fraction; 2023 const integerPart *src; 2024 unsigned int dstPartsCount, truncatedBits; 2025 2026 *isExact = false; 2027 2028 /* Handle the three special cases first. */ 2029 if (category == fcInfinity || category == fcNaN) 2030 return opInvalidOp; 2031 2032 dstPartsCount = partCountForBits(width); 2033 2034 if (category == fcZero) { 2035 APInt::tcSet(parts, 0, dstPartsCount); 2036 // Negative zero can't be represented as an int. 2037 *isExact = !sign; 2038 return opOK; 2039 } 2040 2041 src = significandParts(); 2042 2043 /* Step 1: place our absolute value, with any fraction truncated, in 2044 the destination. */ 2045 if (exponent < 0) { 2046 /* Our absolute value is less than one; truncate everything. */ 2047 APInt::tcSet(parts, 0, dstPartsCount); 2048 /* For exponent -1 the integer bit represents .5, look at that. 2049 For smaller exponents leftmost truncated bit is 0. */ 2050 truncatedBits = semantics->precision -1U - exponent; 2051 } else { 2052 /* We want the most significant (exponent + 1) bits; the rest are 2053 truncated. */ 2054 unsigned int bits = exponent + 1U; 2055 2056 /* Hopelessly large in magnitude? */ 2057 if (bits > width) 2058 return opInvalidOp; 2059 2060 if (bits < semantics->precision) { 2061 /* We truncate (semantics->precision - bits) bits. */ 2062 truncatedBits = semantics->precision - bits; 2063 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2064 } else { 2065 /* We want at least as many bits as are available. */ 2066 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2067 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2068 truncatedBits = 0; 2069 } 2070 } 2071 2072 /* Step 2: work out any lost fraction, and increment the absolute 2073 value if we would round away from zero. */ 2074 if (truncatedBits) { 2075 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2076 truncatedBits); 2077 if (lost_fraction != lfExactlyZero && 2078 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2079 if (APInt::tcIncrement(parts, dstPartsCount)) 2080 return opInvalidOp; /* Overflow. */ 2081 } 2082 } else { 2083 lost_fraction = lfExactlyZero; 2084 } 2085 2086 /* Step 3: check if we fit in the destination. */ 2087 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2088 2089 if (sign) { 2090 if (!isSigned) { 2091 /* Negative numbers cannot be represented as unsigned. */ 2092 if (omsb != 0) 2093 return opInvalidOp; 2094 } else { 2095 /* It takes omsb bits to represent the unsigned integer value. 2096 We lose a bit for the sign, but care is needed as the 2097 maximally negative integer is a special case. */ 2098 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2099 return opInvalidOp; 2100 2101 /* This case can happen because of rounding. */ 2102 if (omsb > width) 2103 return opInvalidOp; 2104 } 2105 2106 APInt::tcNegate (parts, dstPartsCount); 2107 } else { 2108 if (omsb >= width + !isSigned) 2109 return opInvalidOp; 2110 } 2111 2112 if (lost_fraction == lfExactlyZero) { 2113 *isExact = true; 2114 return opOK; 2115 } else 2116 return opInexact; 2117 } 2118 2119 /* Same as convertToSignExtendedInteger, except we provide 2120 deterministic values in case of an invalid operation exception, 2121 namely zero for NaNs and the minimal or maximal value respectively 2122 for underflow or overflow. 2123 The *isExact output tells whether the result is exact, in the sense 2124 that converting it back to the original floating point type produces 2125 the original value. This is almost equivalent to result==opOK, 2126 except for negative zeroes. 2127 */ 2128 IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts, 2129 unsigned int width, 2130 bool isSigned, 2131 roundingMode rounding_mode, 2132 bool *isExact) const { 2133 opStatus fs; 2134 2135 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2136 isExact); 2137 2138 if (fs == opInvalidOp) { 2139 unsigned int bits, dstPartsCount; 2140 2141 dstPartsCount = partCountForBits(width); 2142 2143 if (category == fcNaN) 2144 bits = 0; 2145 else if (sign) 2146 bits = isSigned; 2147 else 2148 bits = width - isSigned; 2149 2150 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2151 if (sign && isSigned) 2152 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2153 } 2154 2155 return fs; 2156 } 2157 2158 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2159 an APSInt, whose initial bit-width and signed-ness are used to determine the 2160 precision of the conversion. 2161 */ 2162 IEEEFloat::opStatus IEEEFloat::convertToInteger(APSInt &result, 2163 roundingMode rounding_mode, 2164 bool *isExact) const { 2165 unsigned bitWidth = result.getBitWidth(); 2166 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2167 opStatus status = convertToInteger( 2168 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2169 // Keeps the original signed-ness. 2170 result = APInt(bitWidth, parts); 2171 return status; 2172 } 2173 2174 /* Convert an unsigned integer SRC to a floating point number, 2175 rounding according to ROUNDING_MODE. The sign of the floating 2176 point number is not modified. */ 2177 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2178 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2179 unsigned int omsb, precision, dstCount; 2180 integerPart *dst; 2181 lostFraction lost_fraction; 2182 2183 category = fcNormal; 2184 omsb = APInt::tcMSB(src, srcCount) + 1; 2185 dst = significandParts(); 2186 dstCount = partCount(); 2187 precision = semantics->precision; 2188 2189 /* We want the most significant PRECISION bits of SRC. There may not 2190 be that many; extract what we can. */ 2191 if (precision <= omsb) { 2192 exponent = omsb - 1; 2193 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2194 omsb - precision); 2195 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2196 } else { 2197 exponent = precision - 1; 2198 lost_fraction = lfExactlyZero; 2199 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2200 } 2201 2202 return normalize(rounding_mode, lost_fraction); 2203 } 2204 2205 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2206 roundingMode rounding_mode) { 2207 unsigned int partCount = Val.getNumWords(); 2208 APInt api = Val; 2209 2210 sign = false; 2211 if (isSigned && api.isNegative()) { 2212 sign = true; 2213 api = -api; 2214 } 2215 2216 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2217 } 2218 2219 /* Convert a two's complement integer SRC to a floating point number, 2220 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2221 integer is signed, in which case it must be sign-extended. */ 2222 IEEEFloat::opStatus 2223 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2224 unsigned int srcCount, bool isSigned, 2225 roundingMode rounding_mode) { 2226 opStatus status; 2227 2228 if (isSigned && 2229 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2230 integerPart *copy; 2231 2232 /* If we're signed and negative negate a copy. */ 2233 sign = true; 2234 copy = new integerPart[srcCount]; 2235 APInt::tcAssign(copy, src, srcCount); 2236 APInt::tcNegate(copy, srcCount); 2237 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2238 delete [] copy; 2239 } else { 2240 sign = false; 2241 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2242 } 2243 2244 return status; 2245 } 2246 2247 /* FIXME: should this just take a const APInt reference? */ 2248 IEEEFloat::opStatus 2249 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2250 unsigned int width, bool isSigned, 2251 roundingMode rounding_mode) { 2252 unsigned int partCount = partCountForBits(width); 2253 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2254 2255 sign = false; 2256 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2257 sign = true; 2258 api = -api; 2259 } 2260 2261 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2262 } 2263 2264 IEEEFloat::opStatus 2265 IEEEFloat::convertFromHexadecimalString(StringRef s, 2266 roundingMode rounding_mode) { 2267 lostFraction lost_fraction = lfExactlyZero; 2268 2269 category = fcNormal; 2270 zeroSignificand(); 2271 exponent = 0; 2272 2273 integerPart *significand = significandParts(); 2274 unsigned partsCount = partCount(); 2275 unsigned bitPos = partsCount * integerPartWidth; 2276 bool computedTrailingFraction = false; 2277 2278 // Skip leading zeroes and any (hexa)decimal point. 2279 StringRef::iterator begin = s.begin(); 2280 StringRef::iterator end = s.end(); 2281 StringRef::iterator dot; 2282 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2283 StringRef::iterator firstSignificantDigit = p; 2284 2285 while (p != end) { 2286 integerPart hex_value; 2287 2288 if (*p == '.') { 2289 assert(dot == end && "String contains multiple dots"); 2290 dot = p++; 2291 continue; 2292 } 2293 2294 hex_value = hexDigitValue(*p); 2295 if (hex_value == -1U) 2296 break; 2297 2298 p++; 2299 2300 // Store the number while we have space. 2301 if (bitPos) { 2302 bitPos -= 4; 2303 hex_value <<= bitPos % integerPartWidth; 2304 significand[bitPos / integerPartWidth] |= hex_value; 2305 } else if (!computedTrailingFraction) { 2306 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2307 computedTrailingFraction = true; 2308 } 2309 } 2310 2311 /* Hex floats require an exponent but not a hexadecimal point. */ 2312 assert(p != end && "Hex strings require an exponent"); 2313 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2314 assert(p != begin && "Significand has no digits"); 2315 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2316 2317 /* Ignore the exponent if we are zero. */ 2318 if (p != firstSignificantDigit) { 2319 int expAdjustment; 2320 2321 /* Implicit hexadecimal point? */ 2322 if (dot == end) 2323 dot = p; 2324 2325 /* Calculate the exponent adjustment implicit in the number of 2326 significant digits. */ 2327 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2328 if (expAdjustment < 0) 2329 expAdjustment++; 2330 expAdjustment = expAdjustment * 4 - 1; 2331 2332 /* Adjust for writing the significand starting at the most 2333 significant nibble. */ 2334 expAdjustment += semantics->precision; 2335 expAdjustment -= partsCount * integerPartWidth; 2336 2337 /* Adjust for the given exponent. */ 2338 exponent = totalExponent(p + 1, end, expAdjustment); 2339 } 2340 2341 return normalize(rounding_mode, lost_fraction); 2342 } 2343 2344 IEEEFloat::opStatus 2345 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2346 unsigned sigPartCount, int exp, 2347 roundingMode rounding_mode) { 2348 unsigned int parts, pow5PartCount; 2349 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2350 integerPart pow5Parts[maxPowerOfFiveParts]; 2351 bool isNearest; 2352 2353 isNearest = (rounding_mode == rmNearestTiesToEven || 2354 rounding_mode == rmNearestTiesToAway); 2355 2356 parts = partCountForBits(semantics->precision + 11); 2357 2358 /* Calculate pow(5, abs(exp)). */ 2359 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2360 2361 for (;; parts *= 2) { 2362 opStatus sigStatus, powStatus; 2363 unsigned int excessPrecision, truncatedBits; 2364 2365 calcSemantics.precision = parts * integerPartWidth - 1; 2366 excessPrecision = calcSemantics.precision - semantics->precision; 2367 truncatedBits = excessPrecision; 2368 2369 IEEEFloat decSig = IEEEFloat::getZero(calcSemantics, sign); 2370 IEEEFloat pow5(calcSemantics); 2371 2372 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2373 rmNearestTiesToEven); 2374 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2375 rmNearestTiesToEven); 2376 /* Add exp, as 10^n = 5^n * 2^n. */ 2377 decSig.exponent += exp; 2378 2379 lostFraction calcLostFraction; 2380 integerPart HUerr, HUdistance; 2381 unsigned int powHUerr; 2382 2383 if (exp >= 0) { 2384 /* multiplySignificand leaves the precision-th bit set to 1. */ 2385 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2386 powHUerr = powStatus != opOK; 2387 } else { 2388 calcLostFraction = decSig.divideSignificand(pow5); 2389 /* Denormal numbers have less precision. */ 2390 if (decSig.exponent < semantics->minExponent) { 2391 excessPrecision += (semantics->minExponent - decSig.exponent); 2392 truncatedBits = excessPrecision; 2393 if (excessPrecision > calcSemantics.precision) 2394 excessPrecision = calcSemantics.precision; 2395 } 2396 /* Extra half-ulp lost in reciprocal of exponent. */ 2397 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2398 } 2399 2400 /* Both multiplySignificand and divideSignificand return the 2401 result with the integer bit set. */ 2402 assert(APInt::tcExtractBit 2403 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2404 2405 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2406 powHUerr); 2407 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2408 excessPrecision, isNearest); 2409 2410 /* Are we guaranteed to round correctly if we truncate? */ 2411 if (HUdistance >= HUerr) { 2412 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2413 calcSemantics.precision - excessPrecision, 2414 excessPrecision); 2415 /* Take the exponent of decSig. If we tcExtract-ed less bits 2416 above we must adjust our exponent to compensate for the 2417 implicit right shift. */ 2418 exponent = (decSig.exponent + semantics->precision 2419 - (calcSemantics.precision - excessPrecision)); 2420 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2421 decSig.partCount(), 2422 truncatedBits); 2423 return normalize(rounding_mode, calcLostFraction); 2424 } 2425 } 2426 } 2427 2428 IEEEFloat::opStatus 2429 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2430 decimalInfo D; 2431 opStatus fs; 2432 2433 /* Scan the text. */ 2434 StringRef::iterator p = str.begin(); 2435 interpretDecimal(p, str.end(), &D); 2436 2437 /* Handle the quick cases. First the case of no significant digits, 2438 i.e. zero, and then exponents that are obviously too large or too 2439 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2440 definitely overflows if 2441 2442 (exp - 1) * L >= maxExponent 2443 2444 and definitely underflows to zero where 2445 2446 (exp + 1) * L <= minExponent - precision 2447 2448 With integer arithmetic the tightest bounds for L are 2449 2450 93/28 < L < 196/59 [ numerator <= 256 ] 2451 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2452 */ 2453 2454 // Test if we have a zero number allowing for strings with no null terminators 2455 // and zero decimals with non-zero exponents. 2456 // 2457 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2458 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2459 // be at most one dot. On the other hand, if we have a zero with a non-zero 2460 // exponent, then we know that D.firstSigDigit will be non-numeric. 2461 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2462 category = fcZero; 2463 fs = opOK; 2464 2465 /* Check whether the normalized exponent is high enough to overflow 2466 max during the log-rebasing in the max-exponent check below. */ 2467 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2468 fs = handleOverflow(rounding_mode); 2469 2470 /* If it wasn't, then it also wasn't high enough to overflow max 2471 during the log-rebasing in the min-exponent check. Check that it 2472 won't overflow min in either check, then perform the min-exponent 2473 check. */ 2474 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2475 (D.normalizedExponent + 1) * 28738 <= 2476 8651 * (semantics->minExponent - (int) semantics->precision)) { 2477 /* Underflow to zero and round. */ 2478 category = fcNormal; 2479 zeroSignificand(); 2480 fs = normalize(rounding_mode, lfLessThanHalf); 2481 2482 /* We can finally safely perform the max-exponent check. */ 2483 } else if ((D.normalizedExponent - 1) * 42039 2484 >= 12655 * semantics->maxExponent) { 2485 /* Overflow and round. */ 2486 fs = handleOverflow(rounding_mode); 2487 } else { 2488 integerPart *decSignificand; 2489 unsigned int partCount; 2490 2491 /* A tight upper bound on number of bits required to hold an 2492 N-digit decimal integer is N * 196 / 59. Allocate enough space 2493 to hold the full significand, and an extra part required by 2494 tcMultiplyPart. */ 2495 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2496 partCount = partCountForBits(1 + 196 * partCount / 59); 2497 decSignificand = new integerPart[partCount + 1]; 2498 partCount = 0; 2499 2500 /* Convert to binary efficiently - we do almost all multiplication 2501 in an integerPart. When this would overflow do we do a single 2502 bignum multiplication, and then revert again to multiplication 2503 in an integerPart. */ 2504 do { 2505 integerPart decValue, val, multiplier; 2506 2507 val = 0; 2508 multiplier = 1; 2509 2510 do { 2511 if (*p == '.') { 2512 p++; 2513 if (p == str.end()) { 2514 break; 2515 } 2516 } 2517 decValue = decDigitValue(*p++); 2518 assert(decValue < 10U && "Invalid character in significand"); 2519 multiplier *= 10; 2520 val = val * 10 + decValue; 2521 /* The maximum number that can be multiplied by ten with any 2522 digit added without overflowing an integerPart. */ 2523 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2524 2525 /* Multiply out the current part. */ 2526 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2527 partCount, partCount + 1, false); 2528 2529 /* If we used another part (likely but not guaranteed), increase 2530 the count. */ 2531 if (decSignificand[partCount]) 2532 partCount++; 2533 } while (p <= D.lastSigDigit); 2534 2535 category = fcNormal; 2536 fs = roundSignificandWithExponent(decSignificand, partCount, 2537 D.exponent, rounding_mode); 2538 2539 delete [] decSignificand; 2540 } 2541 2542 return fs; 2543 } 2544 2545 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2546 if (str.equals("inf") || str.equals("INFINITY")) { 2547 makeInf(false); 2548 return true; 2549 } 2550 2551 if (str.equals("-inf") || str.equals("-INFINITY")) { 2552 makeInf(true); 2553 return true; 2554 } 2555 2556 if (str.equals("nan") || str.equals("NaN")) { 2557 makeNaN(false, false); 2558 return true; 2559 } 2560 2561 if (str.equals("-nan") || str.equals("-NaN")) { 2562 makeNaN(false, true); 2563 return true; 2564 } 2565 2566 return false; 2567 } 2568 2569 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str, 2570 roundingMode rounding_mode) { 2571 assert(!str.empty() && "Invalid string length"); 2572 2573 // Handle special cases. 2574 if (convertFromStringSpecials(str)) 2575 return opOK; 2576 2577 /* Handle a leading minus sign. */ 2578 StringRef::iterator p = str.begin(); 2579 size_t slen = str.size(); 2580 sign = *p == '-' ? 1 : 0; 2581 if (*p == '-' || *p == '+') { 2582 p++; 2583 slen--; 2584 assert(slen && "String has no digits"); 2585 } 2586 2587 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2588 assert(slen - 2 && "Invalid string"); 2589 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2590 rounding_mode); 2591 } 2592 2593 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2594 } 2595 2596 /* Write out a hexadecimal representation of the floating point value 2597 to DST, which must be of sufficient size, in the C99 form 2598 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2599 excluding the terminating NUL. 2600 2601 If UPPERCASE, the output is in upper case, otherwise in lower case. 2602 2603 HEXDIGITS digits appear altogether, rounding the value if 2604 necessary. If HEXDIGITS is 0, the minimal precision to display the 2605 number precisely is used instead. If nothing would appear after 2606 the decimal point it is suppressed. 2607 2608 The decimal exponent is always printed and has at least one digit. 2609 Zero values display an exponent of zero. Infinities and NaNs 2610 appear as "infinity" or "nan" respectively. 2611 2612 The above rules are as specified by C99. There is ambiguity about 2613 what the leading hexadecimal digit should be. This implementation 2614 uses whatever is necessary so that the exponent is displayed as 2615 stored. This implies the exponent will fall within the IEEE format 2616 range, and the leading hexadecimal digit will be 0 (for denormals), 2617 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2618 any other digits zero). 2619 */ 2620 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2621 bool upperCase, 2622 roundingMode rounding_mode) const { 2623 char *p; 2624 2625 p = dst; 2626 if (sign) 2627 *dst++ = '-'; 2628 2629 switch (category) { 2630 case fcInfinity: 2631 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2632 dst += sizeof infinityL - 1; 2633 break; 2634 2635 case fcNaN: 2636 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2637 dst += sizeof NaNU - 1; 2638 break; 2639 2640 case fcZero: 2641 *dst++ = '0'; 2642 *dst++ = upperCase ? 'X': 'x'; 2643 *dst++ = '0'; 2644 if (hexDigits > 1) { 2645 *dst++ = '.'; 2646 memset (dst, '0', hexDigits - 1); 2647 dst += hexDigits - 1; 2648 } 2649 *dst++ = upperCase ? 'P': 'p'; 2650 *dst++ = '0'; 2651 break; 2652 2653 case fcNormal: 2654 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2655 break; 2656 } 2657 2658 *dst = 0; 2659 2660 return static_cast<unsigned int>(dst - p); 2661 } 2662 2663 /* Does the hard work of outputting the correctly rounded hexadecimal 2664 form of a normal floating point number with the specified number of 2665 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2666 digits necessary to print the value precisely is output. */ 2667 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2668 bool upperCase, 2669 roundingMode rounding_mode) const { 2670 unsigned int count, valueBits, shift, partsCount, outputDigits; 2671 const char *hexDigitChars; 2672 const integerPart *significand; 2673 char *p; 2674 bool roundUp; 2675 2676 *dst++ = '0'; 2677 *dst++ = upperCase ? 'X': 'x'; 2678 2679 roundUp = false; 2680 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2681 2682 significand = significandParts(); 2683 partsCount = partCount(); 2684 2685 /* +3 because the first digit only uses the single integer bit, so 2686 we have 3 virtual zero most-significant-bits. */ 2687 valueBits = semantics->precision + 3; 2688 shift = integerPartWidth - valueBits % integerPartWidth; 2689 2690 /* The natural number of digits required ignoring trailing 2691 insignificant zeroes. */ 2692 outputDigits = (valueBits - significandLSB () + 3) / 4; 2693 2694 /* hexDigits of zero means use the required number for the 2695 precision. Otherwise, see if we are truncating. If we are, 2696 find out if we need to round away from zero. */ 2697 if (hexDigits) { 2698 if (hexDigits < outputDigits) { 2699 /* We are dropping non-zero bits, so need to check how to round. 2700 "bits" is the number of dropped bits. */ 2701 unsigned int bits; 2702 lostFraction fraction; 2703 2704 bits = valueBits - hexDigits * 4; 2705 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2706 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2707 } 2708 outputDigits = hexDigits; 2709 } 2710 2711 /* Write the digits consecutively, and start writing in the location 2712 of the hexadecimal point. We move the most significant digit 2713 left and add the hexadecimal point later. */ 2714 p = ++dst; 2715 2716 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2717 2718 while (outputDigits && count) { 2719 integerPart part; 2720 2721 /* Put the most significant integerPartWidth bits in "part". */ 2722 if (--count == partsCount) 2723 part = 0; /* An imaginary higher zero part. */ 2724 else 2725 part = significand[count] << shift; 2726 2727 if (count && shift) 2728 part |= significand[count - 1] >> (integerPartWidth - shift); 2729 2730 /* Convert as much of "part" to hexdigits as we can. */ 2731 unsigned int curDigits = integerPartWidth / 4; 2732 2733 if (curDigits > outputDigits) 2734 curDigits = outputDigits; 2735 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2736 outputDigits -= curDigits; 2737 } 2738 2739 if (roundUp) { 2740 char *q = dst; 2741 2742 /* Note that hexDigitChars has a trailing '0'. */ 2743 do { 2744 q--; 2745 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2746 } while (*q == '0'); 2747 assert(q >= p); 2748 } else { 2749 /* Add trailing zeroes. */ 2750 memset (dst, '0', outputDigits); 2751 dst += outputDigits; 2752 } 2753 2754 /* Move the most significant digit to before the point, and if there 2755 is something after the decimal point add it. This must come 2756 after rounding above. */ 2757 p[-1] = p[0]; 2758 if (dst -1 == p) 2759 dst--; 2760 else 2761 p[0] = '.'; 2762 2763 /* Finally output the exponent. */ 2764 *dst++ = upperCase ? 'P': 'p'; 2765 2766 return writeSignedDecimal (dst, exponent); 2767 } 2768 2769 hash_code hash_value(const IEEEFloat &Arg) { 2770 if (!Arg.isFiniteNonZero()) 2771 return hash_combine((uint8_t)Arg.category, 2772 // NaN has no sign, fix it at zero. 2773 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2774 Arg.semantics->precision); 2775 2776 // Normal floats need their exponent and significand hashed. 2777 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2778 Arg.semantics->precision, Arg.exponent, 2779 hash_combine_range( 2780 Arg.significandParts(), 2781 Arg.significandParts() + Arg.partCount())); 2782 } 2783 2784 // Conversion from APFloat to/from host float/double. It may eventually be 2785 // possible to eliminate these and have everybody deal with APFloats, but that 2786 // will take a while. This approach will not easily extend to long double. 2787 // Current implementation requires integerPartWidth==64, which is correct at 2788 // the moment but could be made more general. 2789 2790 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2791 // the actual IEEE respresentations. We compensate for that here. 2792 2793 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 2794 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2795 assert(partCount()==2); 2796 2797 uint64_t myexponent, mysignificand; 2798 2799 if (isFiniteNonZero()) { 2800 myexponent = exponent+16383; //bias 2801 mysignificand = significandParts()[0]; 2802 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2803 myexponent = 0; // denormal 2804 } else if (category==fcZero) { 2805 myexponent = 0; 2806 mysignificand = 0; 2807 } else if (category==fcInfinity) { 2808 myexponent = 0x7fff; 2809 mysignificand = 0x8000000000000000ULL; 2810 } else { 2811 assert(category == fcNaN && "Unknown category"); 2812 myexponent = 0x7fff; 2813 mysignificand = significandParts()[0]; 2814 } 2815 2816 uint64_t words[2]; 2817 words[0] = mysignificand; 2818 words[1] = ((uint64_t)(sign & 1) << 15) | 2819 (myexponent & 0x7fffLL); 2820 return APInt(80, words); 2821 } 2822 2823 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 2824 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2825 assert(partCount()==2); 2826 2827 uint64_t words[2]; 2828 opStatus fs; 2829 bool losesInfo; 2830 2831 // Convert number to double. To avoid spurious underflows, we re- 2832 // normalize against the "double" minExponent first, and only *then* 2833 // truncate the mantissa. The result of that second conversion 2834 // may be inexact, but should never underflow. 2835 // Declare fltSemantics before APFloat that uses it (and 2836 // saves pointer to it) to ensure correct destruction order. 2837 fltSemantics extendedSemantics = *semantics; 2838 extendedSemantics.minExponent = IEEEdouble.minExponent; 2839 IEEEFloat extended(*this); 2840 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2841 assert(fs == opOK && !losesInfo); 2842 (void)fs; 2843 2844 IEEEFloat u(extended); 2845 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2846 assert(fs == opOK || fs == opInexact); 2847 (void)fs; 2848 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2849 2850 // If conversion was exact or resulted in a special case, we're done; 2851 // just set the second double to zero. Otherwise, re-convert back to 2852 // the extended format and compute the difference. This now should 2853 // convert exactly to double. 2854 if (u.isFiniteNonZero() && losesInfo) { 2855 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2856 assert(fs == opOK && !losesInfo); 2857 (void)fs; 2858 2859 IEEEFloat v(extended); 2860 v.subtract(u, rmNearestTiesToEven); 2861 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2862 assert(fs == opOK && !losesInfo); 2863 (void)fs; 2864 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2865 } else { 2866 words[1] = 0; 2867 } 2868 2869 return APInt(128, words); 2870 } 2871 2872 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 2873 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2874 assert(partCount()==2); 2875 2876 uint64_t myexponent, mysignificand, mysignificand2; 2877 2878 if (isFiniteNonZero()) { 2879 myexponent = exponent+16383; //bias 2880 mysignificand = significandParts()[0]; 2881 mysignificand2 = significandParts()[1]; 2882 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2883 myexponent = 0; // denormal 2884 } else if (category==fcZero) { 2885 myexponent = 0; 2886 mysignificand = mysignificand2 = 0; 2887 } else if (category==fcInfinity) { 2888 myexponent = 0x7fff; 2889 mysignificand = mysignificand2 = 0; 2890 } else { 2891 assert(category == fcNaN && "Unknown category!"); 2892 myexponent = 0x7fff; 2893 mysignificand = significandParts()[0]; 2894 mysignificand2 = significandParts()[1]; 2895 } 2896 2897 uint64_t words[2]; 2898 words[0] = mysignificand; 2899 words[1] = ((uint64_t)(sign & 1) << 63) | 2900 ((myexponent & 0x7fff) << 48) | 2901 (mysignificand2 & 0xffffffffffffLL); 2902 2903 return APInt(128, words); 2904 } 2905 2906 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 2907 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2908 assert(partCount()==1); 2909 2910 uint64_t myexponent, mysignificand; 2911 2912 if (isFiniteNonZero()) { 2913 myexponent = exponent+1023; //bias 2914 mysignificand = *significandParts(); 2915 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2916 myexponent = 0; // denormal 2917 } else if (category==fcZero) { 2918 myexponent = 0; 2919 mysignificand = 0; 2920 } else if (category==fcInfinity) { 2921 myexponent = 0x7ff; 2922 mysignificand = 0; 2923 } else { 2924 assert(category == fcNaN && "Unknown category!"); 2925 myexponent = 0x7ff; 2926 mysignificand = *significandParts(); 2927 } 2928 2929 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2930 ((myexponent & 0x7ff) << 52) | 2931 (mysignificand & 0xfffffffffffffLL)))); 2932 } 2933 2934 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 2935 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2936 assert(partCount()==1); 2937 2938 uint32_t myexponent, mysignificand; 2939 2940 if (isFiniteNonZero()) { 2941 myexponent = exponent+127; //bias 2942 mysignificand = (uint32_t)*significandParts(); 2943 if (myexponent == 1 && !(mysignificand & 0x800000)) 2944 myexponent = 0; // denormal 2945 } else if (category==fcZero) { 2946 myexponent = 0; 2947 mysignificand = 0; 2948 } else if (category==fcInfinity) { 2949 myexponent = 0xff; 2950 mysignificand = 0; 2951 } else { 2952 assert(category == fcNaN && "Unknown category!"); 2953 myexponent = 0xff; 2954 mysignificand = (uint32_t)*significandParts(); 2955 } 2956 2957 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2958 (mysignificand & 0x7fffff))); 2959 } 2960 2961 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 2962 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 2963 assert(partCount()==1); 2964 2965 uint32_t myexponent, mysignificand; 2966 2967 if (isFiniteNonZero()) { 2968 myexponent = exponent+15; //bias 2969 mysignificand = (uint32_t)*significandParts(); 2970 if (myexponent == 1 && !(mysignificand & 0x400)) 2971 myexponent = 0; // denormal 2972 } else if (category==fcZero) { 2973 myexponent = 0; 2974 mysignificand = 0; 2975 } else if (category==fcInfinity) { 2976 myexponent = 0x1f; 2977 mysignificand = 0; 2978 } else { 2979 assert(category == fcNaN && "Unknown category!"); 2980 myexponent = 0x1f; 2981 mysignificand = (uint32_t)*significandParts(); 2982 } 2983 2984 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 2985 (mysignificand & 0x3ff))); 2986 } 2987 2988 // This function creates an APInt that is just a bit map of the floating 2989 // point constant as it would appear in memory. It is not a conversion, 2990 // and treating the result as a normal integer is unlikely to be useful. 2991 2992 APInt IEEEFloat::bitcastToAPInt() const { 2993 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 2994 return convertHalfAPFloatToAPInt(); 2995 2996 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 2997 return convertFloatAPFloatToAPInt(); 2998 2999 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3000 return convertDoubleAPFloatToAPInt(); 3001 3002 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3003 return convertQuadrupleAPFloatToAPInt(); 3004 3005 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3006 return convertPPCDoubleDoubleAPFloatToAPInt(); 3007 3008 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3009 "unknown format!"); 3010 return convertF80LongDoubleAPFloatToAPInt(); 3011 } 3012 3013 float IEEEFloat::convertToFloat() const { 3014 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3015 "Float semantics are not IEEEsingle"); 3016 APInt api = bitcastToAPInt(); 3017 return api.bitsToFloat(); 3018 } 3019 3020 double IEEEFloat::convertToDouble() const { 3021 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3022 "Float semantics are not IEEEdouble"); 3023 APInt api = bitcastToAPInt(); 3024 return api.bitsToDouble(); 3025 } 3026 3027 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3028 /// does not support these bit patterns: 3029 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3030 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3031 /// exponent = 0, integer bit 1 ("pseudodenormal") 3032 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3033 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3034 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3035 assert(api.getBitWidth()==80); 3036 uint64_t i1 = api.getRawData()[0]; 3037 uint64_t i2 = api.getRawData()[1]; 3038 uint64_t myexponent = (i2 & 0x7fff); 3039 uint64_t mysignificand = i1; 3040 3041 initialize(&IEEEFloat::x87DoubleExtended); 3042 assert(partCount()==2); 3043 3044 sign = static_cast<unsigned int>(i2>>15); 3045 if (myexponent==0 && mysignificand==0) { 3046 // exponent, significand meaningless 3047 category = fcZero; 3048 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3049 // exponent, significand meaningless 3050 category = fcInfinity; 3051 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3052 // exponent meaningless 3053 category = fcNaN; 3054 significandParts()[0] = mysignificand; 3055 significandParts()[1] = 0; 3056 } else { 3057 category = fcNormal; 3058 exponent = myexponent - 16383; 3059 significandParts()[0] = mysignificand; 3060 significandParts()[1] = 0; 3061 if (myexponent==0) // denormal 3062 exponent = -16382; 3063 } 3064 } 3065 3066 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3067 assert(api.getBitWidth()==128); 3068 uint64_t i1 = api.getRawData()[0]; 3069 uint64_t i2 = api.getRawData()[1]; 3070 opStatus fs; 3071 bool losesInfo; 3072 3073 // Get the first double and convert to our format. 3074 initFromDoubleAPInt(APInt(64, i1)); 3075 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3076 assert(fs == opOK && !losesInfo); 3077 (void)fs; 3078 3079 // Unless we have a special case, add in second double. 3080 if (isFiniteNonZero()) { 3081 IEEEFloat v(IEEEdouble, APInt(64, i2)); 3082 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3083 assert(fs == opOK && !losesInfo); 3084 (void)fs; 3085 3086 add(v, rmNearestTiesToEven); 3087 } 3088 } 3089 3090 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3091 assert(api.getBitWidth()==128); 3092 uint64_t i1 = api.getRawData()[0]; 3093 uint64_t i2 = api.getRawData()[1]; 3094 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3095 uint64_t mysignificand = i1; 3096 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3097 3098 initialize(&IEEEFloat::IEEEquad); 3099 assert(partCount()==2); 3100 3101 sign = static_cast<unsigned int>(i2>>63); 3102 if (myexponent==0 && 3103 (mysignificand==0 && mysignificand2==0)) { 3104 // exponent, significand meaningless 3105 category = fcZero; 3106 } else if (myexponent==0x7fff && 3107 (mysignificand==0 && mysignificand2==0)) { 3108 // exponent, significand meaningless 3109 category = fcInfinity; 3110 } else if (myexponent==0x7fff && 3111 (mysignificand!=0 || mysignificand2 !=0)) { 3112 // exponent meaningless 3113 category = fcNaN; 3114 significandParts()[0] = mysignificand; 3115 significandParts()[1] = mysignificand2; 3116 } else { 3117 category = fcNormal; 3118 exponent = myexponent - 16383; 3119 significandParts()[0] = mysignificand; 3120 significandParts()[1] = mysignificand2; 3121 if (myexponent==0) // denormal 3122 exponent = -16382; 3123 else 3124 significandParts()[1] |= 0x1000000000000LL; // integer bit 3125 } 3126 } 3127 3128 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3129 assert(api.getBitWidth()==64); 3130 uint64_t i = *api.getRawData(); 3131 uint64_t myexponent = (i >> 52) & 0x7ff; 3132 uint64_t mysignificand = i & 0xfffffffffffffLL; 3133 3134 initialize(&IEEEFloat::IEEEdouble); 3135 assert(partCount()==1); 3136 3137 sign = static_cast<unsigned int>(i>>63); 3138 if (myexponent==0 && mysignificand==0) { 3139 // exponent, significand meaningless 3140 category = fcZero; 3141 } else if (myexponent==0x7ff && mysignificand==0) { 3142 // exponent, significand meaningless 3143 category = fcInfinity; 3144 } else if (myexponent==0x7ff && mysignificand!=0) { 3145 // exponent meaningless 3146 category = fcNaN; 3147 *significandParts() = mysignificand; 3148 } else { 3149 category = fcNormal; 3150 exponent = myexponent - 1023; 3151 *significandParts() = mysignificand; 3152 if (myexponent==0) // denormal 3153 exponent = -1022; 3154 else 3155 *significandParts() |= 0x10000000000000LL; // integer bit 3156 } 3157 } 3158 3159 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3160 assert(api.getBitWidth()==32); 3161 uint32_t i = (uint32_t)*api.getRawData(); 3162 uint32_t myexponent = (i >> 23) & 0xff; 3163 uint32_t mysignificand = i & 0x7fffff; 3164 3165 initialize(&IEEEFloat::IEEEsingle); 3166 assert(partCount()==1); 3167 3168 sign = i >> 31; 3169 if (myexponent==0 && mysignificand==0) { 3170 // exponent, significand meaningless 3171 category = fcZero; 3172 } else if (myexponent==0xff && mysignificand==0) { 3173 // exponent, significand meaningless 3174 category = fcInfinity; 3175 } else if (myexponent==0xff && mysignificand!=0) { 3176 // sign, exponent, significand meaningless 3177 category = fcNaN; 3178 *significandParts() = mysignificand; 3179 } else { 3180 category = fcNormal; 3181 exponent = myexponent - 127; //bias 3182 *significandParts() = mysignificand; 3183 if (myexponent==0) // denormal 3184 exponent = -126; 3185 else 3186 *significandParts() |= 0x800000; // integer bit 3187 } 3188 } 3189 3190 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3191 assert(api.getBitWidth()==16); 3192 uint32_t i = (uint32_t)*api.getRawData(); 3193 uint32_t myexponent = (i >> 10) & 0x1f; 3194 uint32_t mysignificand = i & 0x3ff; 3195 3196 initialize(&IEEEFloat::IEEEhalf); 3197 assert(partCount()==1); 3198 3199 sign = i >> 15; 3200 if (myexponent==0 && mysignificand==0) { 3201 // exponent, significand meaningless 3202 category = fcZero; 3203 } else if (myexponent==0x1f && mysignificand==0) { 3204 // exponent, significand meaningless 3205 category = fcInfinity; 3206 } else if (myexponent==0x1f && mysignificand!=0) { 3207 // sign, exponent, significand meaningless 3208 category = fcNaN; 3209 *significandParts() = mysignificand; 3210 } else { 3211 category = fcNormal; 3212 exponent = myexponent - 15; //bias 3213 *significandParts() = mysignificand; 3214 if (myexponent==0) // denormal 3215 exponent = -14; 3216 else 3217 *significandParts() |= 0x400; // integer bit 3218 } 3219 } 3220 3221 /// Treat api as containing the bits of a floating point number. Currently 3222 /// we infer the floating point type from the size of the APInt. The 3223 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3224 /// when the size is anything else). 3225 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3226 if (Sem == &IEEEhalf) 3227 return initFromHalfAPInt(api); 3228 if (Sem == &IEEEsingle) 3229 return initFromFloatAPInt(api); 3230 if (Sem == &IEEEdouble) 3231 return initFromDoubleAPInt(api); 3232 if (Sem == &x87DoubleExtended) 3233 return initFromF80LongDoubleAPInt(api); 3234 if (Sem == &IEEEquad) 3235 return initFromQuadrupleAPInt(api); 3236 if (Sem == &PPCDoubleDouble) 3237 return initFromPPCDoubleDoubleAPInt(api); 3238 3239 llvm_unreachable(nullptr); 3240 } 3241 3242 IEEEFloat IEEEFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { 3243 switch (BitWidth) { 3244 case 16: 3245 return IEEEFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3246 case 32: 3247 return IEEEFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3248 case 64: 3249 return IEEEFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3250 case 80: 3251 return IEEEFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3252 case 128: 3253 if (isIEEE) 3254 return IEEEFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3255 return IEEEFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3256 default: 3257 llvm_unreachable("Unknown floating bit width"); 3258 } 3259 } 3260 3261 /// Make this number the largest magnitude normal number in the given 3262 /// semantics. 3263 void IEEEFloat::makeLargest(bool Negative) { 3264 // We want (in interchange format): 3265 // sign = {Negative} 3266 // exponent = 1..10 3267 // significand = 1..1 3268 category = fcNormal; 3269 sign = Negative; 3270 exponent = semantics->maxExponent; 3271 3272 // Use memset to set all but the highest integerPart to all ones. 3273 integerPart *significand = significandParts(); 3274 unsigned PartCount = partCount(); 3275 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3276 3277 // Set the high integerPart especially setting all unused top bits for 3278 // internal consistency. 3279 const unsigned NumUnusedHighBits = 3280 PartCount*integerPartWidth - semantics->precision; 3281 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3282 ? (~integerPart(0) >> NumUnusedHighBits) 3283 : 0; 3284 } 3285 3286 /// Make this number the smallest magnitude denormal number in the given 3287 /// semantics. 3288 void IEEEFloat::makeSmallest(bool Negative) { 3289 // We want (in interchange format): 3290 // sign = {Negative} 3291 // exponent = 0..0 3292 // significand = 0..01 3293 category = fcNormal; 3294 sign = Negative; 3295 exponent = semantics->minExponent; 3296 APInt::tcSet(significandParts(), 1, partCount()); 3297 } 3298 3299 IEEEFloat IEEEFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3300 // We want (in interchange format): 3301 // sign = {Negative} 3302 // exponent = 1..10 3303 // significand = 1..1 3304 IEEEFloat Val(Sem, uninitialized); 3305 Val.makeLargest(Negative); 3306 return Val; 3307 } 3308 3309 IEEEFloat IEEEFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3310 // We want (in interchange format): 3311 // sign = {Negative} 3312 // exponent = 0..0 3313 // significand = 0..01 3314 IEEEFloat Val(Sem, uninitialized); 3315 Val.makeSmallest(Negative); 3316 return Val; 3317 } 3318 3319 IEEEFloat IEEEFloat::getSmallestNormalized(const fltSemantics &Sem, 3320 bool Negative) { 3321 IEEEFloat Val(Sem, uninitialized); 3322 3323 // We want (in interchange format): 3324 // sign = {Negative} 3325 // exponent = 0..0 3326 // significand = 10..0 3327 3328 Val.category = fcNormal; 3329 Val.zeroSignificand(); 3330 Val.sign = Negative; 3331 Val.exponent = Sem.minExponent; 3332 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3333 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3334 3335 return Val; 3336 } 3337 3338 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3339 initFromAPInt(&Sem, API); 3340 } 3341 3342 IEEEFloat::IEEEFloat(float f) { 3343 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3344 } 3345 3346 IEEEFloat::IEEEFloat(double d) { 3347 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3348 } 3349 3350 namespace { 3351 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3352 Buffer.append(Str.begin(), Str.end()); 3353 } 3354 3355 /// Removes data from the given significand until it is no more 3356 /// precise than is required for the desired precision. 3357 void AdjustToPrecision(APInt &significand, 3358 int &exp, unsigned FormatPrecision) { 3359 unsigned bits = significand.getActiveBits(); 3360 3361 // 196/59 is a very slight overestimate of lg_2(10). 3362 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3363 3364 if (bits <= bitsRequired) return; 3365 3366 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3367 if (!tensRemovable) return; 3368 3369 exp += tensRemovable; 3370 3371 APInt divisor(significand.getBitWidth(), 1); 3372 APInt powten(significand.getBitWidth(), 10); 3373 while (true) { 3374 if (tensRemovable & 1) 3375 divisor *= powten; 3376 tensRemovable >>= 1; 3377 if (!tensRemovable) break; 3378 powten *= powten; 3379 } 3380 3381 significand = significand.udiv(divisor); 3382 3383 // Truncate the significand down to its active bit count. 3384 significand = significand.trunc(significand.getActiveBits()); 3385 } 3386 3387 3388 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3389 int &exp, unsigned FormatPrecision) { 3390 unsigned N = buffer.size(); 3391 if (N <= FormatPrecision) return; 3392 3393 // The most significant figures are the last ones in the buffer. 3394 unsigned FirstSignificant = N - FormatPrecision; 3395 3396 // Round. 3397 // FIXME: this probably shouldn't use 'round half up'. 3398 3399 // Rounding down is just a truncation, except we also want to drop 3400 // trailing zeros from the new result. 3401 if (buffer[FirstSignificant - 1] < '5') { 3402 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3403 FirstSignificant++; 3404 3405 exp += FirstSignificant; 3406 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3407 return; 3408 } 3409 3410 // Rounding up requires a decimal add-with-carry. If we continue 3411 // the carry, the newly-introduced zeros will just be truncated. 3412 for (unsigned I = FirstSignificant; I != N; ++I) { 3413 if (buffer[I] == '9') { 3414 FirstSignificant++; 3415 } else { 3416 buffer[I]++; 3417 break; 3418 } 3419 } 3420 3421 // If we carried through, we have exactly one digit of precision. 3422 if (FirstSignificant == N) { 3423 exp += FirstSignificant; 3424 buffer.clear(); 3425 buffer.push_back('1'); 3426 return; 3427 } 3428 3429 exp += FirstSignificant; 3430 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3431 } 3432 } 3433 3434 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3435 unsigned FormatMaxPadding) const { 3436 switch (category) { 3437 case fcInfinity: 3438 if (isNegative()) 3439 return append(Str, "-Inf"); 3440 else 3441 return append(Str, "+Inf"); 3442 3443 case fcNaN: return append(Str, "NaN"); 3444 3445 case fcZero: 3446 if (isNegative()) 3447 Str.push_back('-'); 3448 3449 if (!FormatMaxPadding) 3450 append(Str, "0.0E+0"); 3451 else 3452 Str.push_back('0'); 3453 return; 3454 3455 case fcNormal: 3456 break; 3457 } 3458 3459 if (isNegative()) 3460 Str.push_back('-'); 3461 3462 // Decompose the number into an APInt and an exponent. 3463 int exp = exponent - ((int) semantics->precision - 1); 3464 APInt significand(semantics->precision, 3465 makeArrayRef(significandParts(), 3466 partCountForBits(semantics->precision))); 3467 3468 // Set FormatPrecision if zero. We want to do this before we 3469 // truncate trailing zeros, as those are part of the precision. 3470 if (!FormatPrecision) { 3471 // We use enough digits so the number can be round-tripped back to an 3472 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3473 // Accurately" by Steele and White. 3474 // FIXME: Using a formula based purely on the precision is conservative; 3475 // we can print fewer digits depending on the actual value being printed. 3476 3477 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3478 FormatPrecision = 2 + semantics->precision * 59 / 196; 3479 } 3480 3481 // Ignore trailing binary zeros. 3482 int trailingZeros = significand.countTrailingZeros(); 3483 exp += trailingZeros; 3484 significand = significand.lshr(trailingZeros); 3485 3486 // Change the exponent from 2^e to 10^e. 3487 if (exp == 0) { 3488 // Nothing to do. 3489 } else if (exp > 0) { 3490 // Just shift left. 3491 significand = significand.zext(semantics->precision + exp); 3492 significand <<= exp; 3493 exp = 0; 3494 } else { /* exp < 0 */ 3495 int texp = -exp; 3496 3497 // We transform this using the identity: 3498 // (N)(2^-e) == (N)(5^e)(10^-e) 3499 // This means we have to multiply N (the significand) by 5^e. 3500 // To avoid overflow, we have to operate on numbers large 3501 // enough to store N * 5^e: 3502 // log2(N * 5^e) == log2(N) + e * log2(5) 3503 // <= semantics->precision + e * 137 / 59 3504 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3505 3506 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3507 3508 // Multiply significand by 5^e. 3509 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3510 significand = significand.zext(precision); 3511 APInt five_to_the_i(precision, 5); 3512 while (true) { 3513 if (texp & 1) significand *= five_to_the_i; 3514 3515 texp >>= 1; 3516 if (!texp) break; 3517 five_to_the_i *= five_to_the_i; 3518 } 3519 } 3520 3521 AdjustToPrecision(significand, exp, FormatPrecision); 3522 3523 SmallVector<char, 256> buffer; 3524 3525 // Fill the buffer. 3526 unsigned precision = significand.getBitWidth(); 3527 APInt ten(precision, 10); 3528 APInt digit(precision, 0); 3529 3530 bool inTrail = true; 3531 while (significand != 0) { 3532 // digit <- significand % 10 3533 // significand <- significand / 10 3534 APInt::udivrem(significand, ten, significand, digit); 3535 3536 unsigned d = digit.getZExtValue(); 3537 3538 // Drop trailing zeros. 3539 if (inTrail && !d) exp++; 3540 else { 3541 buffer.push_back((char) ('0' + d)); 3542 inTrail = false; 3543 } 3544 } 3545 3546 assert(!buffer.empty() && "no characters in buffer!"); 3547 3548 // Drop down to FormatPrecision. 3549 // TODO: don't do more precise calculations above than are required. 3550 AdjustToPrecision(buffer, exp, FormatPrecision); 3551 3552 unsigned NDigits = buffer.size(); 3553 3554 // Check whether we should use scientific notation. 3555 bool FormatScientific; 3556 if (!FormatMaxPadding) 3557 FormatScientific = true; 3558 else { 3559 if (exp >= 0) { 3560 // 765e3 --> 765000 3561 // ^^^ 3562 // But we shouldn't make the number look more precise than it is. 3563 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3564 NDigits + (unsigned) exp > FormatPrecision); 3565 } else { 3566 // Power of the most significant digit. 3567 int MSD = exp + (int) (NDigits - 1); 3568 if (MSD >= 0) { 3569 // 765e-2 == 7.65 3570 FormatScientific = false; 3571 } else { 3572 // 765e-5 == 0.00765 3573 // ^ ^^ 3574 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3575 } 3576 } 3577 } 3578 3579 // Scientific formatting is pretty straightforward. 3580 if (FormatScientific) { 3581 exp += (NDigits - 1); 3582 3583 Str.push_back(buffer[NDigits-1]); 3584 Str.push_back('.'); 3585 if (NDigits == 1) 3586 Str.push_back('0'); 3587 else 3588 for (unsigned I = 1; I != NDigits; ++I) 3589 Str.push_back(buffer[NDigits-1-I]); 3590 Str.push_back('E'); 3591 3592 Str.push_back(exp >= 0 ? '+' : '-'); 3593 if (exp < 0) exp = -exp; 3594 SmallVector<char, 6> expbuf; 3595 do { 3596 expbuf.push_back((char) ('0' + (exp % 10))); 3597 exp /= 10; 3598 } while (exp); 3599 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3600 Str.push_back(expbuf[E-1-I]); 3601 return; 3602 } 3603 3604 // Non-scientific, positive exponents. 3605 if (exp >= 0) { 3606 for (unsigned I = 0; I != NDigits; ++I) 3607 Str.push_back(buffer[NDigits-1-I]); 3608 for (unsigned I = 0; I != (unsigned) exp; ++I) 3609 Str.push_back('0'); 3610 return; 3611 } 3612 3613 // Non-scientific, negative exponents. 3614 3615 // The number of digits to the left of the decimal point. 3616 int NWholeDigits = exp + (int) NDigits; 3617 3618 unsigned I = 0; 3619 if (NWholeDigits > 0) { 3620 for (; I != (unsigned) NWholeDigits; ++I) 3621 Str.push_back(buffer[NDigits-I-1]); 3622 Str.push_back('.'); 3623 } else { 3624 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3625 3626 Str.push_back('0'); 3627 Str.push_back('.'); 3628 for (unsigned Z = 1; Z != NZeros; ++Z) 3629 Str.push_back('0'); 3630 } 3631 3632 for (; I != NDigits; ++I) 3633 Str.push_back(buffer[NDigits-I-1]); 3634 } 3635 3636 bool IEEEFloat::getExactInverse(IEEEFloat *inv) const { 3637 // Special floats and denormals have no exact inverse. 3638 if (!isFiniteNonZero()) 3639 return false; 3640 3641 // Check that the number is a power of two by making sure that only the 3642 // integer bit is set in the significand. 3643 if (significandLSB() != semantics->precision - 1) 3644 return false; 3645 3646 // Get the inverse. 3647 IEEEFloat reciprocal(*semantics, 1ULL); 3648 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3649 return false; 3650 3651 // Avoid multiplication with a denormal, it is not safe on all platforms and 3652 // may be slower than a normal division. 3653 if (reciprocal.isDenormal()) 3654 return false; 3655 3656 assert(reciprocal.isFiniteNonZero() && 3657 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3658 3659 if (inv) 3660 *inv = reciprocal; 3661 3662 return true; 3663 } 3664 3665 bool IEEEFloat::isSignaling() const { 3666 if (!isNaN()) 3667 return false; 3668 3669 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3670 // first bit of the trailing significand being 0. 3671 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3672 } 3673 3674 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3675 /// 3676 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3677 /// appropriate sign switching before/after the computation. 3678 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 3679 // If we are performing nextDown, swap sign so we have -x. 3680 if (nextDown) 3681 changeSign(); 3682 3683 // Compute nextUp(x) 3684 opStatus result = opOK; 3685 3686 // Handle each float category separately. 3687 switch (category) { 3688 case fcInfinity: 3689 // nextUp(+inf) = +inf 3690 if (!isNegative()) 3691 break; 3692 // nextUp(-inf) = -getLargest() 3693 makeLargest(true); 3694 break; 3695 case fcNaN: 3696 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3697 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3698 // change the payload. 3699 if (isSignaling()) { 3700 result = opInvalidOp; 3701 // For consistency, propagate the sign of the sNaN to the qNaN. 3702 makeNaN(false, isNegative(), nullptr); 3703 } 3704 break; 3705 case fcZero: 3706 // nextUp(pm 0) = +getSmallest() 3707 makeSmallest(false); 3708 break; 3709 case fcNormal: 3710 // nextUp(-getSmallest()) = -0 3711 if (isSmallest() && isNegative()) { 3712 APInt::tcSet(significandParts(), 0, partCount()); 3713 category = fcZero; 3714 exponent = 0; 3715 break; 3716 } 3717 3718 // nextUp(getLargest()) == INFINITY 3719 if (isLargest() && !isNegative()) { 3720 APInt::tcSet(significandParts(), 0, partCount()); 3721 category = fcInfinity; 3722 exponent = semantics->maxExponent + 1; 3723 break; 3724 } 3725 3726 // nextUp(normal) == normal + inc. 3727 if (isNegative()) { 3728 // If we are negative, we need to decrement the significand. 3729 3730 // We only cross a binade boundary that requires adjusting the exponent 3731 // if: 3732 // 1. exponent != semantics->minExponent. This implies we are not in the 3733 // smallest binade or are dealing with denormals. 3734 // 2. Our significand excluding the integral bit is all zeros. 3735 bool WillCrossBinadeBoundary = 3736 exponent != semantics->minExponent && isSignificandAllZeros(); 3737 3738 // Decrement the significand. 3739 // 3740 // We always do this since: 3741 // 1. If we are dealing with a non-binade decrement, by definition we 3742 // just decrement the significand. 3743 // 2. If we are dealing with a normal -> normal binade decrement, since 3744 // we have an explicit integral bit the fact that all bits but the 3745 // integral bit are zero implies that subtracting one will yield a 3746 // significand with 0 integral bit and 1 in all other spots. Thus we 3747 // must just adjust the exponent and set the integral bit to 1. 3748 // 3. If we are dealing with a normal -> denormal binade decrement, 3749 // since we set the integral bit to 0 when we represent denormals, we 3750 // just decrement the significand. 3751 integerPart *Parts = significandParts(); 3752 APInt::tcDecrement(Parts, partCount()); 3753 3754 if (WillCrossBinadeBoundary) { 3755 // Our result is a normal number. Do the following: 3756 // 1. Set the integral bit to 1. 3757 // 2. Decrement the exponent. 3758 APInt::tcSetBit(Parts, semantics->precision - 1); 3759 exponent--; 3760 } 3761 } else { 3762 // If we are positive, we need to increment the significand. 3763 3764 // We only cross a binade boundary that requires adjusting the exponent if 3765 // the input is not a denormal and all of said input's significand bits 3766 // are set. If all of said conditions are true: clear the significand, set 3767 // the integral bit to 1, and increment the exponent. If we have a 3768 // denormal always increment since moving denormals and the numbers in the 3769 // smallest normal binade have the same exponent in our representation. 3770 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3771 3772 if (WillCrossBinadeBoundary) { 3773 integerPart *Parts = significandParts(); 3774 APInt::tcSet(Parts, 0, partCount()); 3775 APInt::tcSetBit(Parts, semantics->precision - 1); 3776 assert(exponent != semantics->maxExponent && 3777 "We can not increment an exponent beyond the maxExponent allowed" 3778 " by the given floating point semantics."); 3779 exponent++; 3780 } else { 3781 incrementSignificand(); 3782 } 3783 } 3784 break; 3785 } 3786 3787 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3788 if (nextDown) 3789 changeSign(); 3790 3791 return result; 3792 } 3793 3794 void IEEEFloat::makeInf(bool Negative) { 3795 category = fcInfinity; 3796 sign = Negative; 3797 exponent = semantics->maxExponent + 1; 3798 APInt::tcSet(significandParts(), 0, partCount()); 3799 } 3800 3801 void IEEEFloat::makeZero(bool Negative) { 3802 category = fcZero; 3803 sign = Negative; 3804 exponent = semantics->minExponent-1; 3805 APInt::tcSet(significandParts(), 0, partCount()); 3806 } 3807 3808 void IEEEFloat::makeQuiet() { 3809 assert(isNaN()); 3810 APInt::tcSetBit(significandParts(), semantics->precision - 2); 3811 } 3812 3813 int ilogb(const IEEEFloat &Arg) { 3814 if (Arg.isNaN()) 3815 return IEEEFloat::IEK_NaN; 3816 if (Arg.isZero()) 3817 return IEEEFloat::IEK_Zero; 3818 if (Arg.isInfinity()) 3819 return IEEEFloat::IEK_Inf; 3820 if (!Arg.isDenormal()) 3821 return Arg.exponent; 3822 3823 IEEEFloat Normalized(Arg); 3824 int SignificandBits = Arg.getSemantics().precision - 1; 3825 3826 Normalized.exponent += SignificandBits; 3827 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 3828 return Normalized.exponent - SignificandBits; 3829 } 3830 3831 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 3832 auto MaxExp = X.getSemantics().maxExponent; 3833 auto MinExp = X.getSemantics().minExponent; 3834 3835 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 3836 // overflow; clamp it to a safe range before adding, but ensure that the range 3837 // is large enough that the clamp does not change the result. The range we 3838 // need to support is the difference between the largest possible exponent and 3839 // the normalized exponent of half the smallest denormal. 3840 3841 int SignificandBits = X.getSemantics().precision - 1; 3842 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 3843 3844 // Clamp to one past the range ends to let normalize handle overlflow. 3845 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 3846 X.normalize(RoundingMode, lfExactlyZero); 3847 if (X.isNaN()) 3848 X.makeQuiet(); 3849 return X; 3850 } 3851 3852 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 3853 Exp = ilogb(Val); 3854 3855 // Quiet signalling nans. 3856 if (Exp == IEEEFloat::IEK_NaN) { 3857 IEEEFloat Quiet(Val); 3858 Quiet.makeQuiet(); 3859 return Quiet; 3860 } 3861 3862 if (Exp == IEEEFloat::IEK_Inf) 3863 return Val; 3864 3865 // 1 is added because frexp is defined to return a normalized fraction in 3866 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 3867 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 3868 return scalbn(Val, -Exp, RM); 3869 } 3870 3871 } // End detail namespace 3872 3873 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) { 3874 return IEEE.convertFromString(Str, RM); 3875 } 3876 3877 hash_code hash_value(const APFloat &Arg) { return hash_value(Arg.IEEE); } 3878 3879 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 3880 : APFloat(IEEEFloat(Semantics, S)) {} 3881 3882 } // End llvm namespace 3883