1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 /// A macro used to combine two fcCategory enums into one key which can be used 29 /// in a switch statement to classify how the interaction of two APFloat's 30 /// categories affects an operation. 31 /// 32 /// TODO: If clang source code is ever allowed to use constexpr in its own 33 /// codebase, change this into a static inline function. 34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36 /* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40 namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 56 /* Number of bits actually used in the semantics. */ 57 unsigned int sizeInBits; 58 }; 59 60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; 61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; 62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; 63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; 64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; 65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; 66 67 /* The PowerPC format consists of two doubles. It does not map cleanly 68 onto the usual format above. It is approximated using twice the 69 mantissa bits. Note that for exponents near the double minimum, 70 we no longer can represent the full 106 mantissa bits, so those 71 will be treated as denormal numbers. 72 73 FIXME: While this approximation is equivalent to what GCC uses for 74 compile-time arithmetic on PPC double-double numbers, it is not able 75 to represent all possible values held by a PPC double-double number, 76 for example: (long double) 1.0 + (long double) 0x1p-106 77 Should this be replaced by a full emulation of PPC double-double? */ 78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; 79 80 /* A tight upper bound on number of parts required to hold the value 81 pow(5, power) is 82 83 power * 815 / (351 * integerPartWidth) + 1 84 85 However, whilst the result may require only this many parts, 86 because we are multiplying two values to get it, the 87 multiplication may require an extra part with the excess part 88 being zero (consider the trivial case of 1 * 1, tcFullMultiply 89 requires two parts to hold the single-part result). So we add an 90 extra one to guarantee enough space whilst multiplying. */ 91 const unsigned int maxExponent = 16383; 92 const unsigned int maxPrecision = 113; 93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 95 / (351 * integerPartWidth)); 96 } 97 98 /* A bunch of private, handy routines. */ 99 100 static inline unsigned int 101 partCountForBits(unsigned int bits) 102 { 103 return ((bits) + integerPartWidth - 1) / integerPartWidth; 104 } 105 106 /* Returns 0U-9U. Return values >= 10U are not digits. */ 107 static inline unsigned int 108 decDigitValue(unsigned int c) 109 { 110 return c - '0'; 111 } 112 113 /* Return the value of a decimal exponent of the form 114 [+-]ddddddd. 115 116 If the exponent overflows, returns a large exponent with the 117 appropriate sign. */ 118 static int 119 readExponent(StringRef::iterator begin, StringRef::iterator end) 120 { 121 bool isNegative; 122 unsigned int absExponent; 123 const unsigned int overlargeExponent = 24000; /* FIXME. */ 124 StringRef::iterator p = begin; 125 126 assert(p != end && "Exponent has no digits"); 127 128 isNegative = (*p == '-'); 129 if (*p == '-' || *p == '+') { 130 p++; 131 assert(p != end && "Exponent has no digits"); 132 } 133 134 absExponent = decDigitValue(*p++); 135 assert(absExponent < 10U && "Invalid character in exponent"); 136 137 for (; p != end; ++p) { 138 unsigned int value; 139 140 value = decDigitValue(*p); 141 assert(value < 10U && "Invalid character in exponent"); 142 143 value += absExponent * 10; 144 if (absExponent >= overlargeExponent) { 145 absExponent = overlargeExponent; 146 p = end; /* outwit assert below */ 147 break; 148 } 149 absExponent = value; 150 } 151 152 assert(p == end && "Invalid exponent in exponent"); 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158 } 159 160 /* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162 static int 163 totalExponent(StringRef::iterator p, StringRef::iterator end, 164 int exponentAdjustment) 165 { 166 int unsignedExponent; 167 bool negative, overflow; 168 int exponent = 0; 169 170 assert(p != end && "Exponent has no digits"); 171 172 negative = *p == '-'; 173 if (*p == '-' || *p == '+') { 174 p++; 175 assert(p != end && "Exponent has no digits"); 176 } 177 178 unsignedExponent = 0; 179 overflow = false; 180 for (; p != end; ++p) { 181 unsigned int value; 182 183 value = decDigitValue(*p); 184 assert(value < 10U && "Invalid character in exponent"); 185 186 unsignedExponent = unsignedExponent * 10 + value; 187 if (unsignedExponent > 32767) { 188 overflow = true; 189 break; 190 } 191 } 192 193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 194 overflow = true; 195 196 if (!overflow) { 197 exponent = unsignedExponent; 198 if (negative) 199 exponent = -exponent; 200 exponent += exponentAdjustment; 201 if (exponent > 32767 || exponent < -32768) 202 overflow = true; 203 } 204 205 if (overflow) 206 exponent = negative ? -32768: 32767; 207 208 return exponent; 209 } 210 211 static StringRef::iterator 212 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 213 StringRef::iterator *dot) 214 { 215 StringRef::iterator p = begin; 216 *dot = end; 217 while (p != end && *p == '0') 218 p++; 219 220 if (p != end && *p == '.') { 221 *dot = p++; 222 223 assert(end - begin != 1 && "Significand has no digits"); 224 225 while (p != end && *p == '0') 226 p++; 227 } 228 229 return p; 230 } 231 232 /* Given a normal decimal floating point number of the form 233 234 dddd.dddd[eE][+-]ddd 235 236 where the decimal point and exponent are optional, fill out the 237 structure D. Exponent is appropriate if the significand is 238 treated as an integer, and normalizedExponent if the significand 239 is taken to have the decimal point after a single leading 240 non-zero digit. 241 242 If the value is zero, V->firstSigDigit points to a non-digit, and 243 the return exponent is zero. 244 */ 245 struct decimalInfo { 246 const char *firstSigDigit; 247 const char *lastSigDigit; 248 int exponent; 249 int normalizedExponent; 250 }; 251 252 static void 253 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 254 decimalInfo *D) 255 { 256 StringRef::iterator dot = end; 257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 258 259 D->firstSigDigit = p; 260 D->exponent = 0; 261 D->normalizedExponent = 0; 262 263 for (; p != end; ++p) { 264 if (*p == '.') { 265 assert(dot == end && "String contains multiple dots"); 266 dot = p++; 267 if (p == end) 268 break; 269 } 270 if (decDigitValue(*p) >= 10U) 271 break; 272 } 273 274 if (p != end) { 275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 276 assert(p != begin && "Significand has no digits"); 277 assert((dot == end || p - begin != 1) && "Significand has no digits"); 278 279 /* p points to the first non-digit in the string */ 280 D->exponent = readExponent(p + 1, end); 281 282 /* Implied decimal point? */ 283 if (dot == end) 284 dot = p; 285 } 286 287 /* If number is all zeroes accept any exponent. */ 288 if (p != D->firstSigDigit) { 289 /* Drop insignificant trailing zeroes. */ 290 if (p != begin) { 291 do 292 do 293 p--; 294 while (p != begin && *p == '0'); 295 while (p != begin && *p == '.'); 296 } 297 298 /* Adjust the exponents for any decimal point. */ 299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 300 D->normalizedExponent = (D->exponent + 301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 302 - (dot > D->firstSigDigit && dot < p))); 303 } 304 305 D->lastSigDigit = p; 306 } 307 308 /* Return the trailing fraction of a hexadecimal number. 309 DIGITVALUE is the first hex digit of the fraction, P points to 310 the next digit. */ 311 static lostFraction 312 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 313 unsigned int digitValue) 314 { 315 unsigned int hexDigit; 316 317 /* If the first trailing digit isn't 0 or 8 we can work out the 318 fraction immediately. */ 319 if (digitValue > 8) 320 return lfMoreThanHalf; 321 else if (digitValue < 8 && digitValue > 0) 322 return lfLessThanHalf; 323 324 // Otherwise we need to find the first non-zero digit. 325 while (p != end && (*p == '0' || *p == '.')) 326 p++; 327 328 assert(p != end && "Invalid trailing hexadecimal fraction!"); 329 330 hexDigit = hexDigitValue(*p); 331 332 /* If we ran off the end it is exactly zero or one-half, otherwise 333 a little more. */ 334 if (hexDigit == -1U) 335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 336 else 337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 338 } 339 340 /* Return the fraction lost were a bignum truncated losing the least 341 significant BITS bits. */ 342 static lostFraction 343 lostFractionThroughTruncation(const integerPart *parts, 344 unsigned int partCount, 345 unsigned int bits) 346 { 347 unsigned int lsb; 348 349 lsb = APInt::tcLSB(parts, partCount); 350 351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 352 if (bits <= lsb) 353 return lfExactlyZero; 354 if (bits == lsb + 1) 355 return lfExactlyHalf; 356 if (bits <= partCount * integerPartWidth && 357 APInt::tcExtractBit(parts, bits - 1)) 358 return lfMoreThanHalf; 359 360 return lfLessThanHalf; 361 } 362 363 /* Shift DST right BITS bits noting lost fraction. */ 364 static lostFraction 365 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 366 { 367 lostFraction lost_fraction; 368 369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 370 371 APInt::tcShiftRight(dst, parts, bits); 372 373 return lost_fraction; 374 } 375 376 /* Combine the effect of two lost fractions. */ 377 static lostFraction 378 combineLostFractions(lostFraction moreSignificant, 379 lostFraction lessSignificant) 380 { 381 if (lessSignificant != lfExactlyZero) { 382 if (moreSignificant == lfExactlyZero) 383 moreSignificant = lfLessThanHalf; 384 else if (moreSignificant == lfExactlyHalf) 385 moreSignificant = lfMoreThanHalf; 386 } 387 388 return moreSignificant; 389 } 390 391 /* The error from the true value, in half-ulps, on multiplying two 392 floating point numbers, which differ from the value they 393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 394 than the returned value. 395 396 See "How to Read Floating Point Numbers Accurately" by William D 397 Clinger. */ 398 static unsigned int 399 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 400 { 401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 402 403 if (HUerr1 + HUerr2 == 0) 404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 405 else 406 return inexactMultiply + 2 * (HUerr1 + HUerr2); 407 } 408 409 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 410 when the least significant BITS are truncated. BITS cannot be 411 zero. */ 412 static integerPart 413 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 414 { 415 unsigned int count, partBits; 416 integerPart part, boundary; 417 418 assert(bits != 0); 419 420 bits--; 421 count = bits / integerPartWidth; 422 partBits = bits % integerPartWidth + 1; 423 424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 425 426 if (isNearest) 427 boundary = (integerPart) 1 << (partBits - 1); 428 else 429 boundary = 0; 430 431 if (count == 0) { 432 if (part - boundary <= boundary - part) 433 return part - boundary; 434 else 435 return boundary - part; 436 } 437 438 if (part == boundary) { 439 while (--count) 440 if (parts[count]) 441 return ~(integerPart) 0; /* A lot. */ 442 443 return parts[0]; 444 } else if (part == boundary - 1) { 445 while (--count) 446 if (~parts[count]) 447 return ~(integerPart) 0; /* A lot. */ 448 449 return -parts[0]; 450 } 451 452 return ~(integerPart) 0; /* A lot. */ 453 } 454 455 /* Place pow(5, power) in DST, and return the number of parts used. 456 DST must be at least one part larger than size of the answer. */ 457 static unsigned int 458 powerOf5(integerPart *dst, unsigned int power) 459 { 460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 461 15625, 78125 }; 462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 463 pow5s[0] = 78125 * 5; 464 465 unsigned int partsCount[16] = { 1 }; 466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 467 unsigned int result; 468 assert(power <= maxExponent); 469 470 p1 = dst; 471 p2 = scratch; 472 473 *p1 = firstEightPowers[power & 7]; 474 power >>= 3; 475 476 result = 1; 477 pow5 = pow5s; 478 479 for (unsigned int n = 0; power; power >>= 1, n++) { 480 unsigned int pc; 481 482 pc = partsCount[n]; 483 484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 485 if (pc == 0) { 486 pc = partsCount[n - 1]; 487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 488 pc *= 2; 489 if (pow5[pc - 1] == 0) 490 pc--; 491 partsCount[n] = pc; 492 } 493 494 if (power & 1) { 495 integerPart *tmp; 496 497 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 498 result += pc; 499 if (p2[result - 1] == 0) 500 result--; 501 502 /* Now result is in p1 with partsCount parts and p2 is scratch 503 space. */ 504 tmp = p1, p1 = p2, p2 = tmp; 505 } 506 507 pow5 += pc; 508 } 509 510 if (p1 != dst) 511 APInt::tcAssign(dst, p1, result); 512 513 return result; 514 } 515 516 /* Zero at the end to avoid modular arithmetic when adding one; used 517 when rounding up during hexadecimal output. */ 518 static const char hexDigitsLower[] = "0123456789abcdef0"; 519 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 520 static const char infinityL[] = "infinity"; 521 static const char infinityU[] = "INFINITY"; 522 static const char NaNL[] = "nan"; 523 static const char NaNU[] = "NAN"; 524 525 /* Write out an integerPart in hexadecimal, starting with the most 526 significant nibble. Write out exactly COUNT hexdigits, return 527 COUNT. */ 528 static unsigned int 529 partAsHex (char *dst, integerPart part, unsigned int count, 530 const char *hexDigitChars) 531 { 532 unsigned int result = count; 533 534 assert(count != 0 && count <= integerPartWidth / 4); 535 536 part >>= (integerPartWidth - 4 * count); 537 while (count--) { 538 dst[count] = hexDigitChars[part & 0xf]; 539 part >>= 4; 540 } 541 542 return result; 543 } 544 545 /* Write out an unsigned decimal integer. */ 546 static char * 547 writeUnsignedDecimal (char *dst, unsigned int n) 548 { 549 char buff[40], *p; 550 551 p = buff; 552 do 553 *p++ = '0' + n % 10; 554 while (n /= 10); 555 556 do 557 *dst++ = *--p; 558 while (p != buff); 559 560 return dst; 561 } 562 563 /* Write out a signed decimal integer. */ 564 static char * 565 writeSignedDecimal (char *dst, int value) 566 { 567 if (value < 0) { 568 *dst++ = '-'; 569 dst = writeUnsignedDecimal(dst, -(unsigned) value); 570 } else 571 dst = writeUnsignedDecimal(dst, value); 572 573 return dst; 574 } 575 576 /* Constructors. */ 577 void 578 APFloat::initialize(const fltSemantics *ourSemantics) 579 { 580 unsigned int count; 581 582 semantics = ourSemantics; 583 count = partCount(); 584 if (count > 1) 585 significand.parts = new integerPart[count]; 586 } 587 588 void 589 APFloat::freeSignificand() 590 { 591 if (needsCleanup()) 592 delete [] significand.parts; 593 } 594 595 void 596 APFloat::assign(const APFloat &rhs) 597 { 598 assert(semantics == rhs.semantics); 599 600 sign = rhs.sign; 601 category = rhs.category; 602 exponent = rhs.exponent; 603 if (isFiniteNonZero() || category == fcNaN) 604 copySignificand(rhs); 605 } 606 607 void 608 APFloat::copySignificand(const APFloat &rhs) 609 { 610 assert(isFiniteNonZero() || category == fcNaN); 611 assert(rhs.partCount() >= partCount()); 612 613 APInt::tcAssign(significandParts(), rhs.significandParts(), 614 partCount()); 615 } 616 617 /* Make this number a NaN, with an arbitrary but deterministic value 618 for the significand. If double or longer, this is a signalling NaN, 619 which may not be ideal. If float, this is QNaN(0). */ 620 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 621 { 622 category = fcNaN; 623 sign = Negative; 624 625 integerPart *significand = significandParts(); 626 unsigned numParts = partCount(); 627 628 // Set the significand bits to the fill. 629 if (!fill || fill->getNumWords() < numParts) 630 APInt::tcSet(significand, 0, numParts); 631 if (fill) { 632 APInt::tcAssign(significand, fill->getRawData(), 633 std::min(fill->getNumWords(), numParts)); 634 635 // Zero out the excess bits of the significand. 636 unsigned bitsToPreserve = semantics->precision - 1; 637 unsigned part = bitsToPreserve / 64; 638 bitsToPreserve %= 64; 639 significand[part] &= ((1ULL << bitsToPreserve) - 1); 640 for (part++; part != numParts; ++part) 641 significand[part] = 0; 642 } 643 644 unsigned QNaNBit = semantics->precision - 2; 645 646 if (SNaN) { 647 // We always have to clear the QNaN bit to make it an SNaN. 648 APInt::tcClearBit(significand, QNaNBit); 649 650 // If there are no bits set in the payload, we have to set 651 // *something* to make it a NaN instead of an infinity; 652 // conventionally, this is the next bit down from the QNaN bit. 653 if (APInt::tcIsZero(significand, numParts)) 654 APInt::tcSetBit(significand, QNaNBit - 1); 655 } else { 656 // We always have to set the QNaN bit to make it a QNaN. 657 APInt::tcSetBit(significand, QNaNBit); 658 } 659 660 // For x87 extended precision, we want to make a NaN, not a 661 // pseudo-NaN. Maybe we should expose the ability to make 662 // pseudo-NaNs? 663 if (semantics == &APFloat::x87DoubleExtended) 664 APInt::tcSetBit(significand, QNaNBit + 1); 665 } 666 667 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 668 const APInt *fill) { 669 APFloat value(Sem, uninitialized); 670 value.makeNaN(SNaN, Negative, fill); 671 return value; 672 } 673 674 APFloat & 675 APFloat::operator=(const APFloat &rhs) 676 { 677 if (this != &rhs) { 678 if (semantics != rhs.semantics) { 679 freeSignificand(); 680 initialize(rhs.semantics); 681 } 682 assign(rhs); 683 } 684 685 return *this; 686 } 687 688 APFloat & 689 APFloat::operator=(APFloat &&rhs) { 690 freeSignificand(); 691 692 semantics = rhs.semantics; 693 significand = rhs.significand; 694 exponent = rhs.exponent; 695 category = rhs.category; 696 sign = rhs.sign; 697 698 rhs.semantics = &Bogus; 699 return *this; 700 } 701 702 bool 703 APFloat::isDenormal() const { 704 return isFiniteNonZero() && (exponent == semantics->minExponent) && 705 (APInt::tcExtractBit(significandParts(), 706 semantics->precision - 1) == 0); 707 } 708 709 bool 710 APFloat::isSmallest() const { 711 // The smallest number by magnitude in our format will be the smallest 712 // denormal, i.e. the floating point number with exponent being minimum 713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 714 return isFiniteNonZero() && exponent == semantics->minExponent && 715 significandMSB() == 0; 716 } 717 718 bool APFloat::isSignificandAllOnes() const { 719 // Test if the significand excluding the integral bit is all ones. This allows 720 // us to test for binade boundaries. 721 const integerPart *Parts = significandParts(); 722 const unsigned PartCount = partCount(); 723 for (unsigned i = 0; i < PartCount - 1; i++) 724 if (~Parts[i]) 725 return false; 726 727 // Set the unused high bits to all ones when we compare. 728 const unsigned NumHighBits = 729 PartCount*integerPartWidth - semantics->precision + 1; 730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 731 "fill than integerPartWidth"); 732 const integerPart HighBitFill = 733 ~integerPart(0) << (integerPartWidth - NumHighBits); 734 if (~(Parts[PartCount - 1] | HighBitFill)) 735 return false; 736 737 return true; 738 } 739 740 bool APFloat::isSignificandAllZeros() const { 741 // Test if the significand excluding the integral bit is all zeros. This 742 // allows us to test for binade boundaries. 743 const integerPart *Parts = significandParts(); 744 const unsigned PartCount = partCount(); 745 746 for (unsigned i = 0; i < PartCount - 1; i++) 747 if (Parts[i]) 748 return false; 749 750 const unsigned NumHighBits = 751 PartCount*integerPartWidth - semantics->precision + 1; 752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 753 "clear than integerPartWidth"); 754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 755 756 if (Parts[PartCount - 1] & HighBitMask) 757 return false; 758 759 return true; 760 } 761 762 bool 763 APFloat::isLargest() const { 764 // The largest number by magnitude in our format will be the floating point 765 // number with maximum exponent and with significand that is all ones. 766 return isFiniteNonZero() && exponent == semantics->maxExponent 767 && isSignificandAllOnes(); 768 } 769 770 bool 771 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 772 if (this == &rhs) 773 return true; 774 if (semantics != rhs.semantics || 775 category != rhs.category || 776 sign != rhs.sign) 777 return false; 778 if (category==fcZero || category==fcInfinity) 779 return true; 780 781 if (isFiniteNonZero() && exponent != rhs.exponent) 782 return false; 783 784 return std::equal(significandParts(), significandParts() + partCount(), 785 rhs.significandParts()); 786 } 787 788 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 789 initialize(&ourSemantics); 790 sign = 0; 791 category = fcNormal; 792 zeroSignificand(); 793 exponent = ourSemantics.precision - 1; 794 significandParts()[0] = value; 795 normalize(rmNearestTiesToEven, lfExactlyZero); 796 } 797 798 APFloat::APFloat(const fltSemantics &ourSemantics) { 799 initialize(&ourSemantics); 800 category = fcZero; 801 sign = false; 802 } 803 804 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 805 // Allocates storage if necessary but does not initialize it. 806 initialize(&ourSemantics); 807 } 808 809 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 810 initialize(&ourSemantics); 811 convertFromString(text, rmNearestTiesToEven); 812 } 813 814 APFloat::APFloat(const APFloat &rhs) { 815 initialize(rhs.semantics); 816 assign(rhs); 817 } 818 819 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 820 *this = std::move(rhs); 821 } 822 823 APFloat::~APFloat() 824 { 825 freeSignificand(); 826 } 827 828 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 829 void APFloat::Profile(FoldingSetNodeID& ID) const { 830 ID.Add(bitcastToAPInt()); 831 } 832 833 unsigned int 834 APFloat::partCount() const 835 { 836 return partCountForBits(semantics->precision + 1); 837 } 838 839 unsigned int 840 APFloat::semanticsPrecision(const fltSemantics &semantics) 841 { 842 return semantics.precision; 843 } 844 APFloat::ExponentType 845 APFloat::semanticsMaxExponent(const fltSemantics &semantics) 846 { 847 return semantics.maxExponent; 848 } 849 APFloat::ExponentType 850 APFloat::semanticsMinExponent(const fltSemantics &semantics) 851 { 852 return semantics.minExponent; 853 } 854 unsigned int 855 APFloat::semanticsSizeInBits(const fltSemantics &semantics) 856 { 857 return semantics.sizeInBits; 858 } 859 860 const integerPart * 861 APFloat::significandParts() const 862 { 863 return const_cast<APFloat *>(this)->significandParts(); 864 } 865 866 integerPart * 867 APFloat::significandParts() 868 { 869 if (partCount() > 1) 870 return significand.parts; 871 else 872 return &significand.part; 873 } 874 875 void 876 APFloat::zeroSignificand() 877 { 878 APInt::tcSet(significandParts(), 0, partCount()); 879 } 880 881 /* Increment an fcNormal floating point number's significand. */ 882 void 883 APFloat::incrementSignificand() 884 { 885 integerPart carry; 886 887 carry = APInt::tcIncrement(significandParts(), partCount()); 888 889 /* Our callers should never cause us to overflow. */ 890 assert(carry == 0); 891 (void)carry; 892 } 893 894 /* Add the significand of the RHS. Returns the carry flag. */ 895 integerPart 896 APFloat::addSignificand(const APFloat &rhs) 897 { 898 integerPart *parts; 899 900 parts = significandParts(); 901 902 assert(semantics == rhs.semantics); 903 assert(exponent == rhs.exponent); 904 905 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 906 } 907 908 /* Subtract the significand of the RHS with a borrow flag. Returns 909 the borrow flag. */ 910 integerPart 911 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 912 { 913 integerPart *parts; 914 915 parts = significandParts(); 916 917 assert(semantics == rhs.semantics); 918 assert(exponent == rhs.exponent); 919 920 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 921 partCount()); 922 } 923 924 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 925 on to the full-precision result of the multiplication. Returns the 926 lost fraction. */ 927 lostFraction 928 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 929 { 930 unsigned int omsb; // One, not zero, based MSB. 931 unsigned int partsCount, newPartsCount, precision; 932 integerPart *lhsSignificand; 933 integerPart scratch[4]; 934 integerPart *fullSignificand; 935 lostFraction lost_fraction; 936 bool ignored; 937 938 assert(semantics == rhs.semantics); 939 940 precision = semantics->precision; 941 942 // Allocate space for twice as many bits as the original significand, plus one 943 // extra bit for the addition to overflow into. 944 newPartsCount = partCountForBits(precision * 2 + 1); 945 946 if (newPartsCount > 4) 947 fullSignificand = new integerPart[newPartsCount]; 948 else 949 fullSignificand = scratch; 950 951 lhsSignificand = significandParts(); 952 partsCount = partCount(); 953 954 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 955 rhs.significandParts(), partsCount, partsCount); 956 957 lost_fraction = lfExactlyZero; 958 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 959 exponent += rhs.exponent; 960 961 // Assume the operands involved in the multiplication are single-precision 962 // FP, and the two multiplicants are: 963 // *this = a23 . a22 ... a0 * 2^e1 964 // rhs = b23 . b22 ... b0 * 2^e2 965 // the result of multiplication is: 966 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 967 // Note that there are three significant bits at the left-hand side of the 968 // radix point: two for the multiplication, and an overflow bit for the 969 // addition (that will always be zero at this point). Move the radix point 970 // toward left by two bits, and adjust exponent accordingly. 971 exponent += 2; 972 973 if (addend && addend->isNonZero()) { 974 // The intermediate result of the multiplication has "2 * precision" 975 // signicant bit; adjust the addend to be consistent with mul result. 976 // 977 Significand savedSignificand = significand; 978 const fltSemantics *savedSemantics = semantics; 979 fltSemantics extendedSemantics; 980 opStatus status; 981 unsigned int extendedPrecision; 982 983 // Normalize our MSB to one below the top bit to allow for overflow. 984 extendedPrecision = 2 * precision + 1; 985 if (omsb != extendedPrecision - 1) { 986 assert(extendedPrecision > omsb); 987 APInt::tcShiftLeft(fullSignificand, newPartsCount, 988 (extendedPrecision - 1) - omsb); 989 exponent -= (extendedPrecision - 1) - omsb; 990 } 991 992 /* Create new semantics. */ 993 extendedSemantics = *semantics; 994 extendedSemantics.precision = extendedPrecision; 995 996 if (newPartsCount == 1) 997 significand.part = fullSignificand[0]; 998 else 999 significand.parts = fullSignificand; 1000 semantics = &extendedSemantics; 1001 1002 APFloat extendedAddend(*addend); 1003 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1004 assert(status == opOK); 1005 (void)status; 1006 1007 // Shift the significand of the addend right by one bit. This guarantees 1008 // that the high bit of the significand is zero (same as fullSignificand), 1009 // so the addition will overflow (if it does overflow at all) into the top bit. 1010 lost_fraction = extendedAddend.shiftSignificandRight(1); 1011 assert(lost_fraction == lfExactlyZero && 1012 "Lost precision while shifting addend for fused-multiply-add."); 1013 1014 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1015 1016 /* Restore our state. */ 1017 if (newPartsCount == 1) 1018 fullSignificand[0] = significand.part; 1019 significand = savedSignificand; 1020 semantics = savedSemantics; 1021 1022 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1023 } 1024 1025 // Convert the result having "2 * precision" significant-bits back to the one 1026 // having "precision" significant-bits. First, move the radix point from 1027 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1028 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1029 exponent -= precision + 1; 1030 1031 // In case MSB resides at the left-hand side of radix point, shift the 1032 // mantissa right by some amount to make sure the MSB reside right before 1033 // the radix point (i.e. "MSB . rest-significant-bits"). 1034 // 1035 // Note that the result is not normalized when "omsb < precision". So, the 1036 // caller needs to call APFloat::normalize() if normalized value is expected. 1037 if (omsb > precision) { 1038 unsigned int bits, significantParts; 1039 lostFraction lf; 1040 1041 bits = omsb - precision; 1042 significantParts = partCountForBits(omsb); 1043 lf = shiftRight(fullSignificand, significantParts, bits); 1044 lost_fraction = combineLostFractions(lf, lost_fraction); 1045 exponent += bits; 1046 } 1047 1048 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1049 1050 if (newPartsCount > 4) 1051 delete [] fullSignificand; 1052 1053 return lost_fraction; 1054 } 1055 1056 /* Multiply the significands of LHS and RHS to DST. */ 1057 lostFraction 1058 APFloat::divideSignificand(const APFloat &rhs) 1059 { 1060 unsigned int bit, i, partsCount; 1061 const integerPart *rhsSignificand; 1062 integerPart *lhsSignificand, *dividend, *divisor; 1063 integerPart scratch[4]; 1064 lostFraction lost_fraction; 1065 1066 assert(semantics == rhs.semantics); 1067 1068 lhsSignificand = significandParts(); 1069 rhsSignificand = rhs.significandParts(); 1070 partsCount = partCount(); 1071 1072 if (partsCount > 2) 1073 dividend = new integerPart[partsCount * 2]; 1074 else 1075 dividend = scratch; 1076 1077 divisor = dividend + partsCount; 1078 1079 /* Copy the dividend and divisor as they will be modified in-place. */ 1080 for (i = 0; i < partsCount; i++) { 1081 dividend[i] = lhsSignificand[i]; 1082 divisor[i] = rhsSignificand[i]; 1083 lhsSignificand[i] = 0; 1084 } 1085 1086 exponent -= rhs.exponent; 1087 1088 unsigned int precision = semantics->precision; 1089 1090 /* Normalize the divisor. */ 1091 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1092 if (bit) { 1093 exponent += bit; 1094 APInt::tcShiftLeft(divisor, partsCount, bit); 1095 } 1096 1097 /* Normalize the dividend. */ 1098 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1099 if (bit) { 1100 exponent -= bit; 1101 APInt::tcShiftLeft(dividend, partsCount, bit); 1102 } 1103 1104 /* Ensure the dividend >= divisor initially for the loop below. 1105 Incidentally, this means that the division loop below is 1106 guaranteed to set the integer bit to one. */ 1107 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1108 exponent--; 1109 APInt::tcShiftLeft(dividend, partsCount, 1); 1110 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1111 } 1112 1113 /* Long division. */ 1114 for (bit = precision; bit; bit -= 1) { 1115 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1116 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1117 APInt::tcSetBit(lhsSignificand, bit - 1); 1118 } 1119 1120 APInt::tcShiftLeft(dividend, partsCount, 1); 1121 } 1122 1123 /* Figure out the lost fraction. */ 1124 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1125 1126 if (cmp > 0) 1127 lost_fraction = lfMoreThanHalf; 1128 else if (cmp == 0) 1129 lost_fraction = lfExactlyHalf; 1130 else if (APInt::tcIsZero(dividend, partsCount)) 1131 lost_fraction = lfExactlyZero; 1132 else 1133 lost_fraction = lfLessThanHalf; 1134 1135 if (partsCount > 2) 1136 delete [] dividend; 1137 1138 return lost_fraction; 1139 } 1140 1141 unsigned int 1142 APFloat::significandMSB() const 1143 { 1144 return APInt::tcMSB(significandParts(), partCount()); 1145 } 1146 1147 unsigned int 1148 APFloat::significandLSB() const 1149 { 1150 return APInt::tcLSB(significandParts(), partCount()); 1151 } 1152 1153 /* Note that a zero result is NOT normalized to fcZero. */ 1154 lostFraction 1155 APFloat::shiftSignificandRight(unsigned int bits) 1156 { 1157 /* Our exponent should not overflow. */ 1158 assert((ExponentType) (exponent + bits) >= exponent); 1159 1160 exponent += bits; 1161 1162 return shiftRight(significandParts(), partCount(), bits); 1163 } 1164 1165 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1166 void 1167 APFloat::shiftSignificandLeft(unsigned int bits) 1168 { 1169 assert(bits < semantics->precision); 1170 1171 if (bits) { 1172 unsigned int partsCount = partCount(); 1173 1174 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1175 exponent -= bits; 1176 1177 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1178 } 1179 } 1180 1181 APFloat::cmpResult 1182 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1183 { 1184 int compare; 1185 1186 assert(semantics == rhs.semantics); 1187 assert(isFiniteNonZero()); 1188 assert(rhs.isFiniteNonZero()); 1189 1190 compare = exponent - rhs.exponent; 1191 1192 /* If exponents are equal, do an unsigned bignum comparison of the 1193 significands. */ 1194 if (compare == 0) 1195 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1196 partCount()); 1197 1198 if (compare > 0) 1199 return cmpGreaterThan; 1200 else if (compare < 0) 1201 return cmpLessThan; 1202 else 1203 return cmpEqual; 1204 } 1205 1206 /* Handle overflow. Sign is preserved. We either become infinity or 1207 the largest finite number. */ 1208 APFloat::opStatus 1209 APFloat::handleOverflow(roundingMode rounding_mode) 1210 { 1211 /* Infinity? */ 1212 if (rounding_mode == rmNearestTiesToEven || 1213 rounding_mode == rmNearestTiesToAway || 1214 (rounding_mode == rmTowardPositive && !sign) || 1215 (rounding_mode == rmTowardNegative && sign)) { 1216 category = fcInfinity; 1217 return (opStatus) (opOverflow | opInexact); 1218 } 1219 1220 /* Otherwise we become the largest finite number. */ 1221 category = fcNormal; 1222 exponent = semantics->maxExponent; 1223 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1224 semantics->precision); 1225 1226 return opInexact; 1227 } 1228 1229 /* Returns TRUE if, when truncating the current number, with BIT the 1230 new LSB, with the given lost fraction and rounding mode, the result 1231 would need to be rounded away from zero (i.e., by increasing the 1232 signficand). This routine must work for fcZero of both signs, and 1233 fcNormal numbers. */ 1234 bool 1235 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1236 lostFraction lost_fraction, 1237 unsigned int bit) const 1238 { 1239 /* NaNs and infinities should not have lost fractions. */ 1240 assert(isFiniteNonZero() || category == fcZero); 1241 1242 /* Current callers never pass this so we don't handle it. */ 1243 assert(lost_fraction != lfExactlyZero); 1244 1245 switch (rounding_mode) { 1246 case rmNearestTiesToAway: 1247 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1248 1249 case rmNearestTiesToEven: 1250 if (lost_fraction == lfMoreThanHalf) 1251 return true; 1252 1253 /* Our zeroes don't have a significand to test. */ 1254 if (lost_fraction == lfExactlyHalf && category != fcZero) 1255 return APInt::tcExtractBit(significandParts(), bit); 1256 1257 return false; 1258 1259 case rmTowardZero: 1260 return false; 1261 1262 case rmTowardPositive: 1263 return !sign; 1264 1265 case rmTowardNegative: 1266 return sign; 1267 } 1268 llvm_unreachable("Invalid rounding mode found"); 1269 } 1270 1271 APFloat::opStatus 1272 APFloat::normalize(roundingMode rounding_mode, 1273 lostFraction lost_fraction) 1274 { 1275 unsigned int omsb; /* One, not zero, based MSB. */ 1276 int exponentChange; 1277 1278 if (!isFiniteNonZero()) 1279 return opOK; 1280 1281 /* Before rounding normalize the exponent of fcNormal numbers. */ 1282 omsb = significandMSB() + 1; 1283 1284 if (omsb) { 1285 /* OMSB is numbered from 1. We want to place it in the integer 1286 bit numbered PRECISION if possible, with a compensating change in 1287 the exponent. */ 1288 exponentChange = omsb - semantics->precision; 1289 1290 /* If the resulting exponent is too high, overflow according to 1291 the rounding mode. */ 1292 if (exponent + exponentChange > semantics->maxExponent) 1293 return handleOverflow(rounding_mode); 1294 1295 /* Subnormal numbers have exponent minExponent, and their MSB 1296 is forced based on that. */ 1297 if (exponent + exponentChange < semantics->minExponent) 1298 exponentChange = semantics->minExponent - exponent; 1299 1300 /* Shifting left is easy as we don't lose precision. */ 1301 if (exponentChange < 0) { 1302 assert(lost_fraction == lfExactlyZero); 1303 1304 shiftSignificandLeft(-exponentChange); 1305 1306 return opOK; 1307 } 1308 1309 if (exponentChange > 0) { 1310 lostFraction lf; 1311 1312 /* Shift right and capture any new lost fraction. */ 1313 lf = shiftSignificandRight(exponentChange); 1314 1315 lost_fraction = combineLostFractions(lf, lost_fraction); 1316 1317 /* Keep OMSB up-to-date. */ 1318 if (omsb > (unsigned) exponentChange) 1319 omsb -= exponentChange; 1320 else 1321 omsb = 0; 1322 } 1323 } 1324 1325 /* Now round the number according to rounding_mode given the lost 1326 fraction. */ 1327 1328 /* As specified in IEEE 754, since we do not trap we do not report 1329 underflow for exact results. */ 1330 if (lost_fraction == lfExactlyZero) { 1331 /* Canonicalize zeroes. */ 1332 if (omsb == 0) 1333 category = fcZero; 1334 1335 return opOK; 1336 } 1337 1338 /* Increment the significand if we're rounding away from zero. */ 1339 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1340 if (omsb == 0) 1341 exponent = semantics->minExponent; 1342 1343 incrementSignificand(); 1344 omsb = significandMSB() + 1; 1345 1346 /* Did the significand increment overflow? */ 1347 if (omsb == (unsigned) semantics->precision + 1) { 1348 /* Renormalize by incrementing the exponent and shifting our 1349 significand right one. However if we already have the 1350 maximum exponent we overflow to infinity. */ 1351 if (exponent == semantics->maxExponent) { 1352 category = fcInfinity; 1353 1354 return (opStatus) (opOverflow | opInexact); 1355 } 1356 1357 shiftSignificandRight(1); 1358 1359 return opInexact; 1360 } 1361 } 1362 1363 /* The normal case - we were and are not denormal, and any 1364 significand increment above didn't overflow. */ 1365 if (omsb == semantics->precision) 1366 return opInexact; 1367 1368 /* We have a non-zero denormal. */ 1369 assert(omsb < semantics->precision); 1370 1371 /* Canonicalize zeroes. */ 1372 if (omsb == 0) 1373 category = fcZero; 1374 1375 /* The fcZero case is a denormal that underflowed to zero. */ 1376 return (opStatus) (opUnderflow | opInexact); 1377 } 1378 1379 APFloat::opStatus 1380 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1381 { 1382 switch (PackCategoriesIntoKey(category, rhs.category)) { 1383 default: 1384 llvm_unreachable(nullptr); 1385 1386 case PackCategoriesIntoKey(fcNaN, fcZero): 1387 case PackCategoriesIntoKey(fcNaN, fcNormal): 1388 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1389 case PackCategoriesIntoKey(fcNaN, fcNaN): 1390 case PackCategoriesIntoKey(fcNormal, fcZero): 1391 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1392 case PackCategoriesIntoKey(fcInfinity, fcZero): 1393 return opOK; 1394 1395 case PackCategoriesIntoKey(fcZero, fcNaN): 1396 case PackCategoriesIntoKey(fcNormal, fcNaN): 1397 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1398 // We need to be sure to flip the sign here for subtraction because we 1399 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1400 sign = rhs.sign ^ subtract; 1401 category = fcNaN; 1402 copySignificand(rhs); 1403 return opOK; 1404 1405 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1406 case PackCategoriesIntoKey(fcZero, fcInfinity): 1407 category = fcInfinity; 1408 sign = rhs.sign ^ subtract; 1409 return opOK; 1410 1411 case PackCategoriesIntoKey(fcZero, fcNormal): 1412 assign(rhs); 1413 sign = rhs.sign ^ subtract; 1414 return opOK; 1415 1416 case PackCategoriesIntoKey(fcZero, fcZero): 1417 /* Sign depends on rounding mode; handled by caller. */ 1418 return opOK; 1419 1420 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1421 /* Differently signed infinities can only be validly 1422 subtracted. */ 1423 if (((sign ^ rhs.sign)!=0) != subtract) { 1424 makeNaN(); 1425 return opInvalidOp; 1426 } 1427 1428 return opOK; 1429 1430 case PackCategoriesIntoKey(fcNormal, fcNormal): 1431 return opDivByZero; 1432 } 1433 } 1434 1435 /* Add or subtract two normal numbers. */ 1436 lostFraction 1437 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1438 { 1439 integerPart carry; 1440 lostFraction lost_fraction; 1441 int bits; 1442 1443 /* Determine if the operation on the absolute values is effectively 1444 an addition or subtraction. */ 1445 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1446 1447 /* Are we bigger exponent-wise than the RHS? */ 1448 bits = exponent - rhs.exponent; 1449 1450 /* Subtraction is more subtle than one might naively expect. */ 1451 if (subtract) { 1452 APFloat temp_rhs(rhs); 1453 bool reverse; 1454 1455 if (bits == 0) { 1456 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1457 lost_fraction = lfExactlyZero; 1458 } else if (bits > 0) { 1459 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1460 shiftSignificandLeft(1); 1461 reverse = false; 1462 } else { 1463 lost_fraction = shiftSignificandRight(-bits - 1); 1464 temp_rhs.shiftSignificandLeft(1); 1465 reverse = true; 1466 } 1467 1468 if (reverse) { 1469 carry = temp_rhs.subtractSignificand 1470 (*this, lost_fraction != lfExactlyZero); 1471 copySignificand(temp_rhs); 1472 sign = !sign; 1473 } else { 1474 carry = subtractSignificand 1475 (temp_rhs, lost_fraction != lfExactlyZero); 1476 } 1477 1478 /* Invert the lost fraction - it was on the RHS and 1479 subtracted. */ 1480 if (lost_fraction == lfLessThanHalf) 1481 lost_fraction = lfMoreThanHalf; 1482 else if (lost_fraction == lfMoreThanHalf) 1483 lost_fraction = lfLessThanHalf; 1484 1485 /* The code above is intended to ensure that no borrow is 1486 necessary. */ 1487 assert(!carry); 1488 (void)carry; 1489 } else { 1490 if (bits > 0) { 1491 APFloat temp_rhs(rhs); 1492 1493 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1494 carry = addSignificand(temp_rhs); 1495 } else { 1496 lost_fraction = shiftSignificandRight(-bits); 1497 carry = addSignificand(rhs); 1498 } 1499 1500 /* We have a guard bit; generating a carry cannot happen. */ 1501 assert(!carry); 1502 (void)carry; 1503 } 1504 1505 return lost_fraction; 1506 } 1507 1508 APFloat::opStatus 1509 APFloat::multiplySpecials(const APFloat &rhs) 1510 { 1511 switch (PackCategoriesIntoKey(category, rhs.category)) { 1512 default: 1513 llvm_unreachable(nullptr); 1514 1515 case PackCategoriesIntoKey(fcNaN, fcZero): 1516 case PackCategoriesIntoKey(fcNaN, fcNormal): 1517 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1518 case PackCategoriesIntoKey(fcNaN, fcNaN): 1519 sign = false; 1520 return opOK; 1521 1522 case PackCategoriesIntoKey(fcZero, fcNaN): 1523 case PackCategoriesIntoKey(fcNormal, fcNaN): 1524 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1525 sign = false; 1526 category = fcNaN; 1527 copySignificand(rhs); 1528 return opOK; 1529 1530 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1531 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1532 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1533 category = fcInfinity; 1534 return opOK; 1535 1536 case PackCategoriesIntoKey(fcZero, fcNormal): 1537 case PackCategoriesIntoKey(fcNormal, fcZero): 1538 case PackCategoriesIntoKey(fcZero, fcZero): 1539 category = fcZero; 1540 return opOK; 1541 1542 case PackCategoriesIntoKey(fcZero, fcInfinity): 1543 case PackCategoriesIntoKey(fcInfinity, fcZero): 1544 makeNaN(); 1545 return opInvalidOp; 1546 1547 case PackCategoriesIntoKey(fcNormal, fcNormal): 1548 return opOK; 1549 } 1550 } 1551 1552 APFloat::opStatus 1553 APFloat::divideSpecials(const APFloat &rhs) 1554 { 1555 switch (PackCategoriesIntoKey(category, rhs.category)) { 1556 default: 1557 llvm_unreachable(nullptr); 1558 1559 case PackCategoriesIntoKey(fcZero, fcNaN): 1560 case PackCategoriesIntoKey(fcNormal, fcNaN): 1561 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1562 category = fcNaN; 1563 copySignificand(rhs); 1564 case PackCategoriesIntoKey(fcNaN, fcZero): 1565 case PackCategoriesIntoKey(fcNaN, fcNormal): 1566 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1567 case PackCategoriesIntoKey(fcNaN, fcNaN): 1568 sign = false; 1569 case PackCategoriesIntoKey(fcInfinity, fcZero): 1570 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1571 case PackCategoriesIntoKey(fcZero, fcInfinity): 1572 case PackCategoriesIntoKey(fcZero, fcNormal): 1573 return opOK; 1574 1575 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1576 category = fcZero; 1577 return opOK; 1578 1579 case PackCategoriesIntoKey(fcNormal, fcZero): 1580 category = fcInfinity; 1581 return opDivByZero; 1582 1583 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1584 case PackCategoriesIntoKey(fcZero, fcZero): 1585 makeNaN(); 1586 return opInvalidOp; 1587 1588 case PackCategoriesIntoKey(fcNormal, fcNormal): 1589 return opOK; 1590 } 1591 } 1592 1593 APFloat::opStatus 1594 APFloat::modSpecials(const APFloat &rhs) 1595 { 1596 switch (PackCategoriesIntoKey(category, rhs.category)) { 1597 default: 1598 llvm_unreachable(nullptr); 1599 1600 case PackCategoriesIntoKey(fcNaN, fcZero): 1601 case PackCategoriesIntoKey(fcNaN, fcNormal): 1602 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1603 case PackCategoriesIntoKey(fcNaN, fcNaN): 1604 case PackCategoriesIntoKey(fcZero, fcInfinity): 1605 case PackCategoriesIntoKey(fcZero, fcNormal): 1606 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1607 return opOK; 1608 1609 case PackCategoriesIntoKey(fcZero, fcNaN): 1610 case PackCategoriesIntoKey(fcNormal, fcNaN): 1611 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1612 sign = false; 1613 category = fcNaN; 1614 copySignificand(rhs); 1615 return opOK; 1616 1617 case PackCategoriesIntoKey(fcNormal, fcZero): 1618 case PackCategoriesIntoKey(fcInfinity, fcZero): 1619 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1620 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1621 case PackCategoriesIntoKey(fcZero, fcZero): 1622 makeNaN(); 1623 return opInvalidOp; 1624 1625 case PackCategoriesIntoKey(fcNormal, fcNormal): 1626 return opOK; 1627 } 1628 } 1629 1630 /* Change sign. */ 1631 void 1632 APFloat::changeSign() 1633 { 1634 /* Look mummy, this one's easy. */ 1635 sign = !sign; 1636 } 1637 1638 void 1639 APFloat::clearSign() 1640 { 1641 /* So is this one. */ 1642 sign = 0; 1643 } 1644 1645 void 1646 APFloat::copySign(const APFloat &rhs) 1647 { 1648 /* And this one. */ 1649 sign = rhs.sign; 1650 } 1651 1652 /* Normalized addition or subtraction. */ 1653 APFloat::opStatus 1654 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1655 bool subtract) 1656 { 1657 opStatus fs; 1658 1659 fs = addOrSubtractSpecials(rhs, subtract); 1660 1661 /* This return code means it was not a simple case. */ 1662 if (fs == opDivByZero) { 1663 lostFraction lost_fraction; 1664 1665 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1666 fs = normalize(rounding_mode, lost_fraction); 1667 1668 /* Can only be zero if we lost no fraction. */ 1669 assert(category != fcZero || lost_fraction == lfExactlyZero); 1670 } 1671 1672 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1673 positive zero unless rounding to minus infinity, except that 1674 adding two like-signed zeroes gives that zero. */ 1675 if (category == fcZero) { 1676 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1677 sign = (rounding_mode == rmTowardNegative); 1678 } 1679 1680 return fs; 1681 } 1682 1683 /* Normalized addition. */ 1684 APFloat::opStatus 1685 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1686 { 1687 return addOrSubtract(rhs, rounding_mode, false); 1688 } 1689 1690 /* Normalized subtraction. */ 1691 APFloat::opStatus 1692 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1693 { 1694 return addOrSubtract(rhs, rounding_mode, true); 1695 } 1696 1697 /* Normalized multiply. */ 1698 APFloat::opStatus 1699 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1700 { 1701 opStatus fs; 1702 1703 sign ^= rhs.sign; 1704 fs = multiplySpecials(rhs); 1705 1706 if (isFiniteNonZero()) { 1707 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1708 fs = normalize(rounding_mode, lost_fraction); 1709 if (lost_fraction != lfExactlyZero) 1710 fs = (opStatus) (fs | opInexact); 1711 } 1712 1713 return fs; 1714 } 1715 1716 /* Normalized divide. */ 1717 APFloat::opStatus 1718 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1719 { 1720 opStatus fs; 1721 1722 sign ^= rhs.sign; 1723 fs = divideSpecials(rhs); 1724 1725 if (isFiniteNonZero()) { 1726 lostFraction lost_fraction = divideSignificand(rhs); 1727 fs = normalize(rounding_mode, lost_fraction); 1728 if (lost_fraction != lfExactlyZero) 1729 fs = (opStatus) (fs | opInexact); 1730 } 1731 1732 return fs; 1733 } 1734 1735 /* Normalized remainder. This is not currently correct in all cases. */ 1736 APFloat::opStatus 1737 APFloat::remainder(const APFloat &rhs) 1738 { 1739 opStatus fs; 1740 APFloat V = *this; 1741 unsigned int origSign = sign; 1742 1743 fs = V.divide(rhs, rmNearestTiesToEven); 1744 if (fs == opDivByZero) 1745 return fs; 1746 1747 int parts = partCount(); 1748 integerPart *x = new integerPart[parts]; 1749 bool ignored; 1750 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1751 rmNearestTiesToEven, &ignored); 1752 if (fs==opInvalidOp) 1753 return fs; 1754 1755 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1756 rmNearestTiesToEven); 1757 assert(fs==opOK); // should always work 1758 1759 fs = V.multiply(rhs, rmNearestTiesToEven); 1760 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1761 1762 fs = subtract(V, rmNearestTiesToEven); 1763 assert(fs==opOK || fs==opInexact); // likewise 1764 1765 if (isZero()) 1766 sign = origSign; // IEEE754 requires this 1767 delete[] x; 1768 return fs; 1769 } 1770 1771 /* Normalized llvm frem (C fmod). 1772 This is not currently correct in all cases. */ 1773 APFloat::opStatus 1774 APFloat::mod(const APFloat &rhs) 1775 { 1776 opStatus fs; 1777 fs = modSpecials(rhs); 1778 1779 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1780 APFloat V = *this; 1781 unsigned int origSign = sign; 1782 1783 fs = V.divide(rhs, rmNearestTiesToEven); 1784 if (fs == opDivByZero) 1785 return fs; 1786 1787 int parts = partCount(); 1788 integerPart *x = new integerPart[parts]; 1789 bool ignored; 1790 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1791 rmTowardZero, &ignored); 1792 if (fs==opInvalidOp) 1793 return fs; 1794 1795 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1796 rmNearestTiesToEven); 1797 assert(fs==opOK); // should always work 1798 1799 fs = V.multiply(rhs, rmNearestTiesToEven); 1800 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1801 1802 fs = subtract(V, rmNearestTiesToEven); 1803 assert(fs==opOK || fs==opInexact); // likewise 1804 1805 if (isZero()) 1806 sign = origSign; // IEEE754 requires this 1807 delete[] x; 1808 } 1809 return fs; 1810 } 1811 1812 /* Normalized fused-multiply-add. */ 1813 APFloat::opStatus 1814 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1815 const APFloat &addend, 1816 roundingMode rounding_mode) 1817 { 1818 opStatus fs; 1819 1820 /* Post-multiplication sign, before addition. */ 1821 sign ^= multiplicand.sign; 1822 1823 /* If and only if all arguments are normal do we need to do an 1824 extended-precision calculation. */ 1825 if (isFiniteNonZero() && 1826 multiplicand.isFiniteNonZero() && 1827 addend.isFinite()) { 1828 lostFraction lost_fraction; 1829 1830 lost_fraction = multiplySignificand(multiplicand, &addend); 1831 fs = normalize(rounding_mode, lost_fraction); 1832 if (lost_fraction != lfExactlyZero) 1833 fs = (opStatus) (fs | opInexact); 1834 1835 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1836 positive zero unless rounding to minus infinity, except that 1837 adding two like-signed zeroes gives that zero. */ 1838 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1839 sign = (rounding_mode == rmTowardNegative); 1840 } else { 1841 fs = multiplySpecials(multiplicand); 1842 1843 /* FS can only be opOK or opInvalidOp. There is no more work 1844 to do in the latter case. The IEEE-754R standard says it is 1845 implementation-defined in this case whether, if ADDEND is a 1846 quiet NaN, we raise invalid op; this implementation does so. 1847 1848 If we need to do the addition we can do so with normal 1849 precision. */ 1850 if (fs == opOK) 1851 fs = addOrSubtract(addend, rounding_mode, false); 1852 } 1853 1854 return fs; 1855 } 1856 1857 /* Rounding-mode corrrect round to integral value. */ 1858 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1859 opStatus fs; 1860 1861 // If the exponent is large enough, we know that this value is already 1862 // integral, and the arithmetic below would potentially cause it to saturate 1863 // to +/-Inf. Bail out early instead. 1864 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1865 return opOK; 1866 1867 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1868 // precision of our format, and then subtract it back off again. The choice 1869 // of rounding modes for the addition/subtraction determines the rounding mode 1870 // for our integral rounding as well. 1871 // NOTE: When the input value is negative, we do subtraction followed by 1872 // addition instead. 1873 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1874 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1875 APFloat MagicConstant(*semantics); 1876 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1877 rmNearestTiesToEven); 1878 MagicConstant.copySign(*this); 1879 1880 if (fs != opOK) 1881 return fs; 1882 1883 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1884 bool inputSign = isNegative(); 1885 1886 fs = add(MagicConstant, rounding_mode); 1887 if (fs != opOK && fs != opInexact) 1888 return fs; 1889 1890 fs = subtract(MagicConstant, rounding_mode); 1891 1892 // Restore the input sign. 1893 if (inputSign != isNegative()) 1894 changeSign(); 1895 1896 return fs; 1897 } 1898 1899 1900 /* Comparison requires normalized numbers. */ 1901 APFloat::cmpResult 1902 APFloat::compare(const APFloat &rhs) const 1903 { 1904 cmpResult result; 1905 1906 assert(semantics == rhs.semantics); 1907 1908 switch (PackCategoriesIntoKey(category, rhs.category)) { 1909 default: 1910 llvm_unreachable(nullptr); 1911 1912 case PackCategoriesIntoKey(fcNaN, fcZero): 1913 case PackCategoriesIntoKey(fcNaN, fcNormal): 1914 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1915 case PackCategoriesIntoKey(fcNaN, fcNaN): 1916 case PackCategoriesIntoKey(fcZero, fcNaN): 1917 case PackCategoriesIntoKey(fcNormal, fcNaN): 1918 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1919 return cmpUnordered; 1920 1921 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1922 case PackCategoriesIntoKey(fcInfinity, fcZero): 1923 case PackCategoriesIntoKey(fcNormal, fcZero): 1924 if (sign) 1925 return cmpLessThan; 1926 else 1927 return cmpGreaterThan; 1928 1929 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1930 case PackCategoriesIntoKey(fcZero, fcInfinity): 1931 case PackCategoriesIntoKey(fcZero, fcNormal): 1932 if (rhs.sign) 1933 return cmpGreaterThan; 1934 else 1935 return cmpLessThan; 1936 1937 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1938 if (sign == rhs.sign) 1939 return cmpEqual; 1940 else if (sign) 1941 return cmpLessThan; 1942 else 1943 return cmpGreaterThan; 1944 1945 case PackCategoriesIntoKey(fcZero, fcZero): 1946 return cmpEqual; 1947 1948 case PackCategoriesIntoKey(fcNormal, fcNormal): 1949 break; 1950 } 1951 1952 /* Two normal numbers. Do they have the same sign? */ 1953 if (sign != rhs.sign) { 1954 if (sign) 1955 result = cmpLessThan; 1956 else 1957 result = cmpGreaterThan; 1958 } else { 1959 /* Compare absolute values; invert result if negative. */ 1960 result = compareAbsoluteValue(rhs); 1961 1962 if (sign) { 1963 if (result == cmpLessThan) 1964 result = cmpGreaterThan; 1965 else if (result == cmpGreaterThan) 1966 result = cmpLessThan; 1967 } 1968 } 1969 1970 return result; 1971 } 1972 1973 /// APFloat::convert - convert a value of one floating point type to another. 1974 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1975 /// records whether the transformation lost information, i.e. whether 1976 /// converting the result back to the original type will produce the 1977 /// original value (this is almost the same as return value==fsOK, but there 1978 /// are edge cases where this is not so). 1979 1980 APFloat::opStatus 1981 APFloat::convert(const fltSemantics &toSemantics, 1982 roundingMode rounding_mode, bool *losesInfo) 1983 { 1984 lostFraction lostFraction; 1985 unsigned int newPartCount, oldPartCount; 1986 opStatus fs; 1987 int shift; 1988 const fltSemantics &fromSemantics = *semantics; 1989 1990 lostFraction = lfExactlyZero; 1991 newPartCount = partCountForBits(toSemantics.precision + 1); 1992 oldPartCount = partCount(); 1993 shift = toSemantics.precision - fromSemantics.precision; 1994 1995 bool X86SpecialNan = false; 1996 if (&fromSemantics == &APFloat::x87DoubleExtended && 1997 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1998 (!(*significandParts() & 0x8000000000000000ULL) || 1999 !(*significandParts() & 0x4000000000000000ULL))) { 2000 // x86 has some unusual NaNs which cannot be represented in any other 2001 // format; note them here. 2002 X86SpecialNan = true; 2003 } 2004 2005 // If this is a truncation of a denormal number, and the target semantics 2006 // has larger exponent range than the source semantics (this can happen 2007 // when truncating from PowerPC double-double to double format), the 2008 // right shift could lose result mantissa bits. Adjust exponent instead 2009 // of performing excessive shift. 2010 if (shift < 0 && isFiniteNonZero()) { 2011 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2012 if (exponent + exponentChange < toSemantics.minExponent) 2013 exponentChange = toSemantics.minExponent - exponent; 2014 if (exponentChange < shift) 2015 exponentChange = shift; 2016 if (exponentChange < 0) { 2017 shift -= exponentChange; 2018 exponent += exponentChange; 2019 } 2020 } 2021 2022 // If this is a truncation, perform the shift before we narrow the storage. 2023 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2024 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2025 2026 // Fix the storage so it can hold to new value. 2027 if (newPartCount > oldPartCount) { 2028 // The new type requires more storage; make it available. 2029 integerPart *newParts; 2030 newParts = new integerPart[newPartCount]; 2031 APInt::tcSet(newParts, 0, newPartCount); 2032 if (isFiniteNonZero() || category==fcNaN) 2033 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2034 freeSignificand(); 2035 significand.parts = newParts; 2036 } else if (newPartCount == 1 && oldPartCount != 1) { 2037 // Switch to built-in storage for a single part. 2038 integerPart newPart = 0; 2039 if (isFiniteNonZero() || category==fcNaN) 2040 newPart = significandParts()[0]; 2041 freeSignificand(); 2042 significand.part = newPart; 2043 } 2044 2045 // Now that we have the right storage, switch the semantics. 2046 semantics = &toSemantics; 2047 2048 // If this is an extension, perform the shift now that the storage is 2049 // available. 2050 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2051 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2052 2053 if (isFiniteNonZero()) { 2054 fs = normalize(rounding_mode, lostFraction); 2055 *losesInfo = (fs != opOK); 2056 } else if (category == fcNaN) { 2057 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2058 2059 // For x87 extended precision, we want to make a NaN, not a special NaN if 2060 // the input wasn't special either. 2061 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2062 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2063 2064 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2065 // does not give you back the same bits. This is dubious, and we 2066 // don't currently do it. You're really supposed to get 2067 // an invalid operation signal at runtime, but nobody does that. 2068 fs = opOK; 2069 } else { 2070 *losesInfo = false; 2071 fs = opOK; 2072 } 2073 2074 return fs; 2075 } 2076 2077 /* Convert a floating point number to an integer according to the 2078 rounding mode. If the rounded integer value is out of range this 2079 returns an invalid operation exception and the contents of the 2080 destination parts are unspecified. If the rounded value is in 2081 range but the floating point number is not the exact integer, the C 2082 standard doesn't require an inexact exception to be raised. IEEE 2083 854 does require it so we do that. 2084 2085 Note that for conversions to integer type the C standard requires 2086 round-to-zero to always be used. */ 2087 APFloat::opStatus 2088 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2089 bool isSigned, 2090 roundingMode rounding_mode, 2091 bool *isExact) const 2092 { 2093 lostFraction lost_fraction; 2094 const integerPart *src; 2095 unsigned int dstPartsCount, truncatedBits; 2096 2097 *isExact = false; 2098 2099 /* Handle the three special cases first. */ 2100 if (category == fcInfinity || category == fcNaN) 2101 return opInvalidOp; 2102 2103 dstPartsCount = partCountForBits(width); 2104 2105 if (category == fcZero) { 2106 APInt::tcSet(parts, 0, dstPartsCount); 2107 // Negative zero can't be represented as an int. 2108 *isExact = !sign; 2109 return opOK; 2110 } 2111 2112 src = significandParts(); 2113 2114 /* Step 1: place our absolute value, with any fraction truncated, in 2115 the destination. */ 2116 if (exponent < 0) { 2117 /* Our absolute value is less than one; truncate everything. */ 2118 APInt::tcSet(parts, 0, dstPartsCount); 2119 /* For exponent -1 the integer bit represents .5, look at that. 2120 For smaller exponents leftmost truncated bit is 0. */ 2121 truncatedBits = semantics->precision -1U - exponent; 2122 } else { 2123 /* We want the most significant (exponent + 1) bits; the rest are 2124 truncated. */ 2125 unsigned int bits = exponent + 1U; 2126 2127 /* Hopelessly large in magnitude? */ 2128 if (bits > width) 2129 return opInvalidOp; 2130 2131 if (bits < semantics->precision) { 2132 /* We truncate (semantics->precision - bits) bits. */ 2133 truncatedBits = semantics->precision - bits; 2134 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2135 } else { 2136 /* We want at least as many bits as are available. */ 2137 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2138 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2139 truncatedBits = 0; 2140 } 2141 } 2142 2143 /* Step 2: work out any lost fraction, and increment the absolute 2144 value if we would round away from zero. */ 2145 if (truncatedBits) { 2146 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2147 truncatedBits); 2148 if (lost_fraction != lfExactlyZero && 2149 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2150 if (APInt::tcIncrement(parts, dstPartsCount)) 2151 return opInvalidOp; /* Overflow. */ 2152 } 2153 } else { 2154 lost_fraction = lfExactlyZero; 2155 } 2156 2157 /* Step 3: check if we fit in the destination. */ 2158 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2159 2160 if (sign) { 2161 if (!isSigned) { 2162 /* Negative numbers cannot be represented as unsigned. */ 2163 if (omsb != 0) 2164 return opInvalidOp; 2165 } else { 2166 /* It takes omsb bits to represent the unsigned integer value. 2167 We lose a bit for the sign, but care is needed as the 2168 maximally negative integer is a special case. */ 2169 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2170 return opInvalidOp; 2171 2172 /* This case can happen because of rounding. */ 2173 if (omsb > width) 2174 return opInvalidOp; 2175 } 2176 2177 APInt::tcNegate (parts, dstPartsCount); 2178 } else { 2179 if (omsb >= width + !isSigned) 2180 return opInvalidOp; 2181 } 2182 2183 if (lost_fraction == lfExactlyZero) { 2184 *isExact = true; 2185 return opOK; 2186 } else 2187 return opInexact; 2188 } 2189 2190 /* Same as convertToSignExtendedInteger, except we provide 2191 deterministic values in case of an invalid operation exception, 2192 namely zero for NaNs and the minimal or maximal value respectively 2193 for underflow or overflow. 2194 The *isExact output tells whether the result is exact, in the sense 2195 that converting it back to the original floating point type produces 2196 the original value. This is almost equivalent to result==opOK, 2197 except for negative zeroes. 2198 */ 2199 APFloat::opStatus 2200 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2201 bool isSigned, 2202 roundingMode rounding_mode, bool *isExact) const 2203 { 2204 opStatus fs; 2205 2206 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2207 isExact); 2208 2209 if (fs == opInvalidOp) { 2210 unsigned int bits, dstPartsCount; 2211 2212 dstPartsCount = partCountForBits(width); 2213 2214 if (category == fcNaN) 2215 bits = 0; 2216 else if (sign) 2217 bits = isSigned; 2218 else 2219 bits = width - isSigned; 2220 2221 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2222 if (sign && isSigned) 2223 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2224 } 2225 2226 return fs; 2227 } 2228 2229 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2230 an APSInt, whose initial bit-width and signed-ness are used to determine the 2231 precision of the conversion. 2232 */ 2233 APFloat::opStatus 2234 APFloat::convertToInteger(APSInt &result, 2235 roundingMode rounding_mode, bool *isExact) const 2236 { 2237 unsigned bitWidth = result.getBitWidth(); 2238 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2239 opStatus status = convertToInteger( 2240 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2241 // Keeps the original signed-ness. 2242 result = APInt(bitWidth, parts); 2243 return status; 2244 } 2245 2246 /* Convert an unsigned integer SRC to a floating point number, 2247 rounding according to ROUNDING_MODE. The sign of the floating 2248 point number is not modified. */ 2249 APFloat::opStatus 2250 APFloat::convertFromUnsignedParts(const integerPart *src, 2251 unsigned int srcCount, 2252 roundingMode rounding_mode) 2253 { 2254 unsigned int omsb, precision, dstCount; 2255 integerPart *dst; 2256 lostFraction lost_fraction; 2257 2258 category = fcNormal; 2259 omsb = APInt::tcMSB(src, srcCount) + 1; 2260 dst = significandParts(); 2261 dstCount = partCount(); 2262 precision = semantics->precision; 2263 2264 /* We want the most significant PRECISION bits of SRC. There may not 2265 be that many; extract what we can. */ 2266 if (precision <= omsb) { 2267 exponent = omsb - 1; 2268 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2269 omsb - precision); 2270 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2271 } else { 2272 exponent = precision - 1; 2273 lost_fraction = lfExactlyZero; 2274 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2275 } 2276 2277 return normalize(rounding_mode, lost_fraction); 2278 } 2279 2280 APFloat::opStatus 2281 APFloat::convertFromAPInt(const APInt &Val, 2282 bool isSigned, 2283 roundingMode rounding_mode) 2284 { 2285 unsigned int partCount = Val.getNumWords(); 2286 APInt api = Val; 2287 2288 sign = false; 2289 if (isSigned && api.isNegative()) { 2290 sign = true; 2291 api = -api; 2292 } 2293 2294 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2295 } 2296 2297 /* Convert a two's complement integer SRC to a floating point number, 2298 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2299 integer is signed, in which case it must be sign-extended. */ 2300 APFloat::opStatus 2301 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2302 unsigned int srcCount, 2303 bool isSigned, 2304 roundingMode rounding_mode) 2305 { 2306 opStatus status; 2307 2308 if (isSigned && 2309 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2310 integerPart *copy; 2311 2312 /* If we're signed and negative negate a copy. */ 2313 sign = true; 2314 copy = new integerPart[srcCount]; 2315 APInt::tcAssign(copy, src, srcCount); 2316 APInt::tcNegate(copy, srcCount); 2317 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2318 delete [] copy; 2319 } else { 2320 sign = false; 2321 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2322 } 2323 2324 return status; 2325 } 2326 2327 /* FIXME: should this just take a const APInt reference? */ 2328 APFloat::opStatus 2329 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2330 unsigned int width, bool isSigned, 2331 roundingMode rounding_mode) 2332 { 2333 unsigned int partCount = partCountForBits(width); 2334 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2335 2336 sign = false; 2337 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2338 sign = true; 2339 api = -api; 2340 } 2341 2342 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2343 } 2344 2345 APFloat::opStatus 2346 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2347 { 2348 lostFraction lost_fraction = lfExactlyZero; 2349 2350 category = fcNormal; 2351 zeroSignificand(); 2352 exponent = 0; 2353 2354 integerPart *significand = significandParts(); 2355 unsigned partsCount = partCount(); 2356 unsigned bitPos = partsCount * integerPartWidth; 2357 bool computedTrailingFraction = false; 2358 2359 // Skip leading zeroes and any (hexa)decimal point. 2360 StringRef::iterator begin = s.begin(); 2361 StringRef::iterator end = s.end(); 2362 StringRef::iterator dot; 2363 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2364 StringRef::iterator firstSignificantDigit = p; 2365 2366 while (p != end) { 2367 integerPart hex_value; 2368 2369 if (*p == '.') { 2370 assert(dot == end && "String contains multiple dots"); 2371 dot = p++; 2372 continue; 2373 } 2374 2375 hex_value = hexDigitValue(*p); 2376 if (hex_value == -1U) 2377 break; 2378 2379 p++; 2380 2381 // Store the number while we have space. 2382 if (bitPos) { 2383 bitPos -= 4; 2384 hex_value <<= bitPos % integerPartWidth; 2385 significand[bitPos / integerPartWidth] |= hex_value; 2386 } else if (!computedTrailingFraction) { 2387 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2388 computedTrailingFraction = true; 2389 } 2390 } 2391 2392 /* Hex floats require an exponent but not a hexadecimal point. */ 2393 assert(p != end && "Hex strings require an exponent"); 2394 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2395 assert(p != begin && "Significand has no digits"); 2396 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2397 2398 /* Ignore the exponent if we are zero. */ 2399 if (p != firstSignificantDigit) { 2400 int expAdjustment; 2401 2402 /* Implicit hexadecimal point? */ 2403 if (dot == end) 2404 dot = p; 2405 2406 /* Calculate the exponent adjustment implicit in the number of 2407 significant digits. */ 2408 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2409 if (expAdjustment < 0) 2410 expAdjustment++; 2411 expAdjustment = expAdjustment * 4 - 1; 2412 2413 /* Adjust for writing the significand starting at the most 2414 significant nibble. */ 2415 expAdjustment += semantics->precision; 2416 expAdjustment -= partsCount * integerPartWidth; 2417 2418 /* Adjust for the given exponent. */ 2419 exponent = totalExponent(p + 1, end, expAdjustment); 2420 } 2421 2422 return normalize(rounding_mode, lost_fraction); 2423 } 2424 2425 APFloat::opStatus 2426 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2427 unsigned sigPartCount, int exp, 2428 roundingMode rounding_mode) 2429 { 2430 unsigned int parts, pow5PartCount; 2431 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2432 integerPart pow5Parts[maxPowerOfFiveParts]; 2433 bool isNearest; 2434 2435 isNearest = (rounding_mode == rmNearestTiesToEven || 2436 rounding_mode == rmNearestTiesToAway); 2437 2438 parts = partCountForBits(semantics->precision + 11); 2439 2440 /* Calculate pow(5, abs(exp)). */ 2441 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2442 2443 for (;; parts *= 2) { 2444 opStatus sigStatus, powStatus; 2445 unsigned int excessPrecision, truncatedBits; 2446 2447 calcSemantics.precision = parts * integerPartWidth - 1; 2448 excessPrecision = calcSemantics.precision - semantics->precision; 2449 truncatedBits = excessPrecision; 2450 2451 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2452 APFloat pow5(calcSemantics); 2453 2454 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2455 rmNearestTiesToEven); 2456 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2457 rmNearestTiesToEven); 2458 /* Add exp, as 10^n = 5^n * 2^n. */ 2459 decSig.exponent += exp; 2460 2461 lostFraction calcLostFraction; 2462 integerPart HUerr, HUdistance; 2463 unsigned int powHUerr; 2464 2465 if (exp >= 0) { 2466 /* multiplySignificand leaves the precision-th bit set to 1. */ 2467 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2468 powHUerr = powStatus != opOK; 2469 } else { 2470 calcLostFraction = decSig.divideSignificand(pow5); 2471 /* Denormal numbers have less precision. */ 2472 if (decSig.exponent < semantics->minExponent) { 2473 excessPrecision += (semantics->minExponent - decSig.exponent); 2474 truncatedBits = excessPrecision; 2475 if (excessPrecision > calcSemantics.precision) 2476 excessPrecision = calcSemantics.precision; 2477 } 2478 /* Extra half-ulp lost in reciprocal of exponent. */ 2479 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2480 } 2481 2482 /* Both multiplySignificand and divideSignificand return the 2483 result with the integer bit set. */ 2484 assert(APInt::tcExtractBit 2485 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2486 2487 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2488 powHUerr); 2489 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2490 excessPrecision, isNearest); 2491 2492 /* Are we guaranteed to round correctly if we truncate? */ 2493 if (HUdistance >= HUerr) { 2494 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2495 calcSemantics.precision - excessPrecision, 2496 excessPrecision); 2497 /* Take the exponent of decSig. If we tcExtract-ed less bits 2498 above we must adjust our exponent to compensate for the 2499 implicit right shift. */ 2500 exponent = (decSig.exponent + semantics->precision 2501 - (calcSemantics.precision - excessPrecision)); 2502 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2503 decSig.partCount(), 2504 truncatedBits); 2505 return normalize(rounding_mode, calcLostFraction); 2506 } 2507 } 2508 } 2509 2510 APFloat::opStatus 2511 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2512 { 2513 decimalInfo D; 2514 opStatus fs; 2515 2516 /* Scan the text. */ 2517 StringRef::iterator p = str.begin(); 2518 interpretDecimal(p, str.end(), &D); 2519 2520 /* Handle the quick cases. First the case of no significant digits, 2521 i.e. zero, and then exponents that are obviously too large or too 2522 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2523 definitely overflows if 2524 2525 (exp - 1) * L >= maxExponent 2526 2527 and definitely underflows to zero where 2528 2529 (exp + 1) * L <= minExponent - precision 2530 2531 With integer arithmetic the tightest bounds for L are 2532 2533 93/28 < L < 196/59 [ numerator <= 256 ] 2534 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2535 */ 2536 2537 // Test if we have a zero number allowing for strings with no null terminators 2538 // and zero decimals with non-zero exponents. 2539 // 2540 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2541 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2542 // be at most one dot. On the other hand, if we have a zero with a non-zero 2543 // exponent, then we know that D.firstSigDigit will be non-numeric. 2544 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2545 category = fcZero; 2546 fs = opOK; 2547 2548 /* Check whether the normalized exponent is high enough to overflow 2549 max during the log-rebasing in the max-exponent check below. */ 2550 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2551 fs = handleOverflow(rounding_mode); 2552 2553 /* If it wasn't, then it also wasn't high enough to overflow max 2554 during the log-rebasing in the min-exponent check. Check that it 2555 won't overflow min in either check, then perform the min-exponent 2556 check. */ 2557 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2558 (D.normalizedExponent + 1) * 28738 <= 2559 8651 * (semantics->minExponent - (int) semantics->precision)) { 2560 /* Underflow to zero and round. */ 2561 category = fcNormal; 2562 zeroSignificand(); 2563 fs = normalize(rounding_mode, lfLessThanHalf); 2564 2565 /* We can finally safely perform the max-exponent check. */ 2566 } else if ((D.normalizedExponent - 1) * 42039 2567 >= 12655 * semantics->maxExponent) { 2568 /* Overflow and round. */ 2569 fs = handleOverflow(rounding_mode); 2570 } else { 2571 integerPart *decSignificand; 2572 unsigned int partCount; 2573 2574 /* A tight upper bound on number of bits required to hold an 2575 N-digit decimal integer is N * 196 / 59. Allocate enough space 2576 to hold the full significand, and an extra part required by 2577 tcMultiplyPart. */ 2578 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2579 partCount = partCountForBits(1 + 196 * partCount / 59); 2580 decSignificand = new integerPart[partCount + 1]; 2581 partCount = 0; 2582 2583 /* Convert to binary efficiently - we do almost all multiplication 2584 in an integerPart. When this would overflow do we do a single 2585 bignum multiplication, and then revert again to multiplication 2586 in an integerPart. */ 2587 do { 2588 integerPart decValue, val, multiplier; 2589 2590 val = 0; 2591 multiplier = 1; 2592 2593 do { 2594 if (*p == '.') { 2595 p++; 2596 if (p == str.end()) { 2597 break; 2598 } 2599 } 2600 decValue = decDigitValue(*p++); 2601 assert(decValue < 10U && "Invalid character in significand"); 2602 multiplier *= 10; 2603 val = val * 10 + decValue; 2604 /* The maximum number that can be multiplied by ten with any 2605 digit added without overflowing an integerPart. */ 2606 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2607 2608 /* Multiply out the current part. */ 2609 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2610 partCount, partCount + 1, false); 2611 2612 /* If we used another part (likely but not guaranteed), increase 2613 the count. */ 2614 if (decSignificand[partCount]) 2615 partCount++; 2616 } while (p <= D.lastSigDigit); 2617 2618 category = fcNormal; 2619 fs = roundSignificandWithExponent(decSignificand, partCount, 2620 D.exponent, rounding_mode); 2621 2622 delete [] decSignificand; 2623 } 2624 2625 return fs; 2626 } 2627 2628 bool 2629 APFloat::convertFromStringSpecials(StringRef str) { 2630 if (str.equals("inf") || str.equals("INFINITY")) { 2631 makeInf(false); 2632 return true; 2633 } 2634 2635 if (str.equals("-inf") || str.equals("-INFINITY")) { 2636 makeInf(true); 2637 return true; 2638 } 2639 2640 if (str.equals("nan") || str.equals("NaN")) { 2641 makeNaN(false, false); 2642 return true; 2643 } 2644 2645 if (str.equals("-nan") || str.equals("-NaN")) { 2646 makeNaN(false, true); 2647 return true; 2648 } 2649 2650 return false; 2651 } 2652 2653 APFloat::opStatus 2654 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2655 { 2656 assert(!str.empty() && "Invalid string length"); 2657 2658 // Handle special cases. 2659 if (convertFromStringSpecials(str)) 2660 return opOK; 2661 2662 /* Handle a leading minus sign. */ 2663 StringRef::iterator p = str.begin(); 2664 size_t slen = str.size(); 2665 sign = *p == '-' ? 1 : 0; 2666 if (*p == '-' || *p == '+') { 2667 p++; 2668 slen--; 2669 assert(slen && "String has no digits"); 2670 } 2671 2672 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2673 assert(slen - 2 && "Invalid string"); 2674 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2675 rounding_mode); 2676 } 2677 2678 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2679 } 2680 2681 /* Write out a hexadecimal representation of the floating point value 2682 to DST, which must be of sufficient size, in the C99 form 2683 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2684 excluding the terminating NUL. 2685 2686 If UPPERCASE, the output is in upper case, otherwise in lower case. 2687 2688 HEXDIGITS digits appear altogether, rounding the value if 2689 necessary. If HEXDIGITS is 0, the minimal precision to display the 2690 number precisely is used instead. If nothing would appear after 2691 the decimal point it is suppressed. 2692 2693 The decimal exponent is always printed and has at least one digit. 2694 Zero values display an exponent of zero. Infinities and NaNs 2695 appear as "infinity" or "nan" respectively. 2696 2697 The above rules are as specified by C99. There is ambiguity about 2698 what the leading hexadecimal digit should be. This implementation 2699 uses whatever is necessary so that the exponent is displayed as 2700 stored. This implies the exponent will fall within the IEEE format 2701 range, and the leading hexadecimal digit will be 0 (for denormals), 2702 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2703 any other digits zero). 2704 */ 2705 unsigned int 2706 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2707 bool upperCase, roundingMode rounding_mode) const 2708 { 2709 char *p; 2710 2711 p = dst; 2712 if (sign) 2713 *dst++ = '-'; 2714 2715 switch (category) { 2716 case fcInfinity: 2717 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2718 dst += sizeof infinityL - 1; 2719 break; 2720 2721 case fcNaN: 2722 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2723 dst += sizeof NaNU - 1; 2724 break; 2725 2726 case fcZero: 2727 *dst++ = '0'; 2728 *dst++ = upperCase ? 'X': 'x'; 2729 *dst++ = '0'; 2730 if (hexDigits > 1) { 2731 *dst++ = '.'; 2732 memset (dst, '0', hexDigits - 1); 2733 dst += hexDigits - 1; 2734 } 2735 *dst++ = upperCase ? 'P': 'p'; 2736 *dst++ = '0'; 2737 break; 2738 2739 case fcNormal: 2740 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2741 break; 2742 } 2743 2744 *dst = 0; 2745 2746 return static_cast<unsigned int>(dst - p); 2747 } 2748 2749 /* Does the hard work of outputting the correctly rounded hexadecimal 2750 form of a normal floating point number with the specified number of 2751 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2752 digits necessary to print the value precisely is output. */ 2753 char * 2754 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2755 bool upperCase, 2756 roundingMode rounding_mode) const 2757 { 2758 unsigned int count, valueBits, shift, partsCount, outputDigits; 2759 const char *hexDigitChars; 2760 const integerPart *significand; 2761 char *p; 2762 bool roundUp; 2763 2764 *dst++ = '0'; 2765 *dst++ = upperCase ? 'X': 'x'; 2766 2767 roundUp = false; 2768 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2769 2770 significand = significandParts(); 2771 partsCount = partCount(); 2772 2773 /* +3 because the first digit only uses the single integer bit, so 2774 we have 3 virtual zero most-significant-bits. */ 2775 valueBits = semantics->precision + 3; 2776 shift = integerPartWidth - valueBits % integerPartWidth; 2777 2778 /* The natural number of digits required ignoring trailing 2779 insignificant zeroes. */ 2780 outputDigits = (valueBits - significandLSB () + 3) / 4; 2781 2782 /* hexDigits of zero means use the required number for the 2783 precision. Otherwise, see if we are truncating. If we are, 2784 find out if we need to round away from zero. */ 2785 if (hexDigits) { 2786 if (hexDigits < outputDigits) { 2787 /* We are dropping non-zero bits, so need to check how to round. 2788 "bits" is the number of dropped bits. */ 2789 unsigned int bits; 2790 lostFraction fraction; 2791 2792 bits = valueBits - hexDigits * 4; 2793 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2794 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2795 } 2796 outputDigits = hexDigits; 2797 } 2798 2799 /* Write the digits consecutively, and start writing in the location 2800 of the hexadecimal point. We move the most significant digit 2801 left and add the hexadecimal point later. */ 2802 p = ++dst; 2803 2804 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2805 2806 while (outputDigits && count) { 2807 integerPart part; 2808 2809 /* Put the most significant integerPartWidth bits in "part". */ 2810 if (--count == partsCount) 2811 part = 0; /* An imaginary higher zero part. */ 2812 else 2813 part = significand[count] << shift; 2814 2815 if (count && shift) 2816 part |= significand[count - 1] >> (integerPartWidth - shift); 2817 2818 /* Convert as much of "part" to hexdigits as we can. */ 2819 unsigned int curDigits = integerPartWidth / 4; 2820 2821 if (curDigits > outputDigits) 2822 curDigits = outputDigits; 2823 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2824 outputDigits -= curDigits; 2825 } 2826 2827 if (roundUp) { 2828 char *q = dst; 2829 2830 /* Note that hexDigitChars has a trailing '0'. */ 2831 do { 2832 q--; 2833 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2834 } while (*q == '0'); 2835 assert(q >= p); 2836 } else { 2837 /* Add trailing zeroes. */ 2838 memset (dst, '0', outputDigits); 2839 dst += outputDigits; 2840 } 2841 2842 /* Move the most significant digit to before the point, and if there 2843 is something after the decimal point add it. This must come 2844 after rounding above. */ 2845 p[-1] = p[0]; 2846 if (dst -1 == p) 2847 dst--; 2848 else 2849 p[0] = '.'; 2850 2851 /* Finally output the exponent. */ 2852 *dst++ = upperCase ? 'P': 'p'; 2853 2854 return writeSignedDecimal (dst, exponent); 2855 } 2856 2857 hash_code llvm::hash_value(const APFloat &Arg) { 2858 if (!Arg.isFiniteNonZero()) 2859 return hash_combine((uint8_t)Arg.category, 2860 // NaN has no sign, fix it at zero. 2861 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2862 Arg.semantics->precision); 2863 2864 // Normal floats need their exponent and significand hashed. 2865 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2866 Arg.semantics->precision, Arg.exponent, 2867 hash_combine_range( 2868 Arg.significandParts(), 2869 Arg.significandParts() + Arg.partCount())); 2870 } 2871 2872 // Conversion from APFloat to/from host float/double. It may eventually be 2873 // possible to eliminate these and have everybody deal with APFloats, but that 2874 // will take a while. This approach will not easily extend to long double. 2875 // Current implementation requires integerPartWidth==64, which is correct at 2876 // the moment but could be made more general. 2877 2878 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2879 // the actual IEEE respresentations. We compensate for that here. 2880 2881 APInt 2882 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2883 { 2884 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2885 assert(partCount()==2); 2886 2887 uint64_t myexponent, mysignificand; 2888 2889 if (isFiniteNonZero()) { 2890 myexponent = exponent+16383; //bias 2891 mysignificand = significandParts()[0]; 2892 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2893 myexponent = 0; // denormal 2894 } else if (category==fcZero) { 2895 myexponent = 0; 2896 mysignificand = 0; 2897 } else if (category==fcInfinity) { 2898 myexponent = 0x7fff; 2899 mysignificand = 0x8000000000000000ULL; 2900 } else { 2901 assert(category == fcNaN && "Unknown category"); 2902 myexponent = 0x7fff; 2903 mysignificand = significandParts()[0]; 2904 } 2905 2906 uint64_t words[2]; 2907 words[0] = mysignificand; 2908 words[1] = ((uint64_t)(sign & 1) << 15) | 2909 (myexponent & 0x7fffLL); 2910 return APInt(80, words); 2911 } 2912 2913 APInt 2914 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2915 { 2916 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2917 assert(partCount()==2); 2918 2919 uint64_t words[2]; 2920 opStatus fs; 2921 bool losesInfo; 2922 2923 // Convert number to double. To avoid spurious underflows, we re- 2924 // normalize against the "double" minExponent first, and only *then* 2925 // truncate the mantissa. The result of that second conversion 2926 // may be inexact, but should never underflow. 2927 // Declare fltSemantics before APFloat that uses it (and 2928 // saves pointer to it) to ensure correct destruction order. 2929 fltSemantics extendedSemantics = *semantics; 2930 extendedSemantics.minExponent = IEEEdouble.minExponent; 2931 APFloat extended(*this); 2932 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2933 assert(fs == opOK && !losesInfo); 2934 (void)fs; 2935 2936 APFloat u(extended); 2937 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2938 assert(fs == opOK || fs == opInexact); 2939 (void)fs; 2940 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2941 2942 // If conversion was exact or resulted in a special case, we're done; 2943 // just set the second double to zero. Otherwise, re-convert back to 2944 // the extended format and compute the difference. This now should 2945 // convert exactly to double. 2946 if (u.isFiniteNonZero() && losesInfo) { 2947 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2948 assert(fs == opOK && !losesInfo); 2949 (void)fs; 2950 2951 APFloat v(extended); 2952 v.subtract(u, rmNearestTiesToEven); 2953 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2954 assert(fs == opOK && !losesInfo); 2955 (void)fs; 2956 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2957 } else { 2958 words[1] = 0; 2959 } 2960 2961 return APInt(128, words); 2962 } 2963 2964 APInt 2965 APFloat::convertQuadrupleAPFloatToAPInt() const 2966 { 2967 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2968 assert(partCount()==2); 2969 2970 uint64_t myexponent, mysignificand, mysignificand2; 2971 2972 if (isFiniteNonZero()) { 2973 myexponent = exponent+16383; //bias 2974 mysignificand = significandParts()[0]; 2975 mysignificand2 = significandParts()[1]; 2976 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2977 myexponent = 0; // denormal 2978 } else if (category==fcZero) { 2979 myexponent = 0; 2980 mysignificand = mysignificand2 = 0; 2981 } else if (category==fcInfinity) { 2982 myexponent = 0x7fff; 2983 mysignificand = mysignificand2 = 0; 2984 } else { 2985 assert(category == fcNaN && "Unknown category!"); 2986 myexponent = 0x7fff; 2987 mysignificand = significandParts()[0]; 2988 mysignificand2 = significandParts()[1]; 2989 } 2990 2991 uint64_t words[2]; 2992 words[0] = mysignificand; 2993 words[1] = ((uint64_t)(sign & 1) << 63) | 2994 ((myexponent & 0x7fff) << 48) | 2995 (mysignificand2 & 0xffffffffffffLL); 2996 2997 return APInt(128, words); 2998 } 2999 3000 APInt 3001 APFloat::convertDoubleAPFloatToAPInt() const 3002 { 3003 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 3004 assert(partCount()==1); 3005 3006 uint64_t myexponent, mysignificand; 3007 3008 if (isFiniteNonZero()) { 3009 myexponent = exponent+1023; //bias 3010 mysignificand = *significandParts(); 3011 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3012 myexponent = 0; // denormal 3013 } else if (category==fcZero) { 3014 myexponent = 0; 3015 mysignificand = 0; 3016 } else if (category==fcInfinity) { 3017 myexponent = 0x7ff; 3018 mysignificand = 0; 3019 } else { 3020 assert(category == fcNaN && "Unknown category!"); 3021 myexponent = 0x7ff; 3022 mysignificand = *significandParts(); 3023 } 3024 3025 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3026 ((myexponent & 0x7ff) << 52) | 3027 (mysignificand & 0xfffffffffffffLL)))); 3028 } 3029 3030 APInt 3031 APFloat::convertFloatAPFloatToAPInt() const 3032 { 3033 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3034 assert(partCount()==1); 3035 3036 uint32_t myexponent, mysignificand; 3037 3038 if (isFiniteNonZero()) { 3039 myexponent = exponent+127; //bias 3040 mysignificand = (uint32_t)*significandParts(); 3041 if (myexponent == 1 && !(mysignificand & 0x800000)) 3042 myexponent = 0; // denormal 3043 } else if (category==fcZero) { 3044 myexponent = 0; 3045 mysignificand = 0; 3046 } else if (category==fcInfinity) { 3047 myexponent = 0xff; 3048 mysignificand = 0; 3049 } else { 3050 assert(category == fcNaN && "Unknown category!"); 3051 myexponent = 0xff; 3052 mysignificand = (uint32_t)*significandParts(); 3053 } 3054 3055 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3056 (mysignificand & 0x7fffff))); 3057 } 3058 3059 APInt 3060 APFloat::convertHalfAPFloatToAPInt() const 3061 { 3062 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3063 assert(partCount()==1); 3064 3065 uint32_t myexponent, mysignificand; 3066 3067 if (isFiniteNonZero()) { 3068 myexponent = exponent+15; //bias 3069 mysignificand = (uint32_t)*significandParts(); 3070 if (myexponent == 1 && !(mysignificand & 0x400)) 3071 myexponent = 0; // denormal 3072 } else if (category==fcZero) { 3073 myexponent = 0; 3074 mysignificand = 0; 3075 } else if (category==fcInfinity) { 3076 myexponent = 0x1f; 3077 mysignificand = 0; 3078 } else { 3079 assert(category == fcNaN && "Unknown category!"); 3080 myexponent = 0x1f; 3081 mysignificand = (uint32_t)*significandParts(); 3082 } 3083 3084 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3085 (mysignificand & 0x3ff))); 3086 } 3087 3088 // This function creates an APInt that is just a bit map of the floating 3089 // point constant as it would appear in memory. It is not a conversion, 3090 // and treating the result as a normal integer is unlikely to be useful. 3091 3092 APInt 3093 APFloat::bitcastToAPInt() const 3094 { 3095 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3096 return convertHalfAPFloatToAPInt(); 3097 3098 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3099 return convertFloatAPFloatToAPInt(); 3100 3101 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3102 return convertDoubleAPFloatToAPInt(); 3103 3104 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3105 return convertQuadrupleAPFloatToAPInt(); 3106 3107 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3108 return convertPPCDoubleDoubleAPFloatToAPInt(); 3109 3110 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3111 "unknown format!"); 3112 return convertF80LongDoubleAPFloatToAPInt(); 3113 } 3114 3115 float 3116 APFloat::convertToFloat() const 3117 { 3118 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3119 "Float semantics are not IEEEsingle"); 3120 APInt api = bitcastToAPInt(); 3121 return api.bitsToFloat(); 3122 } 3123 3124 double 3125 APFloat::convertToDouble() const 3126 { 3127 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3128 "Float semantics are not IEEEdouble"); 3129 APInt api = bitcastToAPInt(); 3130 return api.bitsToDouble(); 3131 } 3132 3133 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3134 /// does not support these bit patterns: 3135 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3136 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3137 /// exponent = 0, integer bit 1 ("pseudodenormal") 3138 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3139 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3140 void 3141 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3142 { 3143 assert(api.getBitWidth()==80); 3144 uint64_t i1 = api.getRawData()[0]; 3145 uint64_t i2 = api.getRawData()[1]; 3146 uint64_t myexponent = (i2 & 0x7fff); 3147 uint64_t mysignificand = i1; 3148 3149 initialize(&APFloat::x87DoubleExtended); 3150 assert(partCount()==2); 3151 3152 sign = static_cast<unsigned int>(i2>>15); 3153 if (myexponent==0 && mysignificand==0) { 3154 // exponent, significand meaningless 3155 category = fcZero; 3156 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3157 // exponent, significand meaningless 3158 category = fcInfinity; 3159 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3160 // exponent meaningless 3161 category = fcNaN; 3162 significandParts()[0] = mysignificand; 3163 significandParts()[1] = 0; 3164 } else { 3165 category = fcNormal; 3166 exponent = myexponent - 16383; 3167 significandParts()[0] = mysignificand; 3168 significandParts()[1] = 0; 3169 if (myexponent==0) // denormal 3170 exponent = -16382; 3171 } 3172 } 3173 3174 void 3175 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3176 { 3177 assert(api.getBitWidth()==128); 3178 uint64_t i1 = api.getRawData()[0]; 3179 uint64_t i2 = api.getRawData()[1]; 3180 opStatus fs; 3181 bool losesInfo; 3182 3183 // Get the first double and convert to our format. 3184 initFromDoubleAPInt(APInt(64, i1)); 3185 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3186 assert(fs == opOK && !losesInfo); 3187 (void)fs; 3188 3189 // Unless we have a special case, add in second double. 3190 if (isFiniteNonZero()) { 3191 APFloat v(IEEEdouble, APInt(64, i2)); 3192 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3193 assert(fs == opOK && !losesInfo); 3194 (void)fs; 3195 3196 add(v, rmNearestTiesToEven); 3197 } 3198 } 3199 3200 void 3201 APFloat::initFromQuadrupleAPInt(const APInt &api) 3202 { 3203 assert(api.getBitWidth()==128); 3204 uint64_t i1 = api.getRawData()[0]; 3205 uint64_t i2 = api.getRawData()[1]; 3206 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3207 uint64_t mysignificand = i1; 3208 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3209 3210 initialize(&APFloat::IEEEquad); 3211 assert(partCount()==2); 3212 3213 sign = static_cast<unsigned int>(i2>>63); 3214 if (myexponent==0 && 3215 (mysignificand==0 && mysignificand2==0)) { 3216 // exponent, significand meaningless 3217 category = fcZero; 3218 } else if (myexponent==0x7fff && 3219 (mysignificand==0 && mysignificand2==0)) { 3220 // exponent, significand meaningless 3221 category = fcInfinity; 3222 } else if (myexponent==0x7fff && 3223 (mysignificand!=0 || mysignificand2 !=0)) { 3224 // exponent meaningless 3225 category = fcNaN; 3226 significandParts()[0] = mysignificand; 3227 significandParts()[1] = mysignificand2; 3228 } else { 3229 category = fcNormal; 3230 exponent = myexponent - 16383; 3231 significandParts()[0] = mysignificand; 3232 significandParts()[1] = mysignificand2; 3233 if (myexponent==0) // denormal 3234 exponent = -16382; 3235 else 3236 significandParts()[1] |= 0x1000000000000LL; // integer bit 3237 } 3238 } 3239 3240 void 3241 APFloat::initFromDoubleAPInt(const APInt &api) 3242 { 3243 assert(api.getBitWidth()==64); 3244 uint64_t i = *api.getRawData(); 3245 uint64_t myexponent = (i >> 52) & 0x7ff; 3246 uint64_t mysignificand = i & 0xfffffffffffffLL; 3247 3248 initialize(&APFloat::IEEEdouble); 3249 assert(partCount()==1); 3250 3251 sign = static_cast<unsigned int>(i>>63); 3252 if (myexponent==0 && mysignificand==0) { 3253 // exponent, significand meaningless 3254 category = fcZero; 3255 } else if (myexponent==0x7ff && mysignificand==0) { 3256 // exponent, significand meaningless 3257 category = fcInfinity; 3258 } else if (myexponent==0x7ff && mysignificand!=0) { 3259 // exponent meaningless 3260 category = fcNaN; 3261 *significandParts() = mysignificand; 3262 } else { 3263 category = fcNormal; 3264 exponent = myexponent - 1023; 3265 *significandParts() = mysignificand; 3266 if (myexponent==0) // denormal 3267 exponent = -1022; 3268 else 3269 *significandParts() |= 0x10000000000000LL; // integer bit 3270 } 3271 } 3272 3273 void 3274 APFloat::initFromFloatAPInt(const APInt & api) 3275 { 3276 assert(api.getBitWidth()==32); 3277 uint32_t i = (uint32_t)*api.getRawData(); 3278 uint32_t myexponent = (i >> 23) & 0xff; 3279 uint32_t mysignificand = i & 0x7fffff; 3280 3281 initialize(&APFloat::IEEEsingle); 3282 assert(partCount()==1); 3283 3284 sign = i >> 31; 3285 if (myexponent==0 && mysignificand==0) { 3286 // exponent, significand meaningless 3287 category = fcZero; 3288 } else if (myexponent==0xff && mysignificand==0) { 3289 // exponent, significand meaningless 3290 category = fcInfinity; 3291 } else if (myexponent==0xff && mysignificand!=0) { 3292 // sign, exponent, significand meaningless 3293 category = fcNaN; 3294 *significandParts() = mysignificand; 3295 } else { 3296 category = fcNormal; 3297 exponent = myexponent - 127; //bias 3298 *significandParts() = mysignificand; 3299 if (myexponent==0) // denormal 3300 exponent = -126; 3301 else 3302 *significandParts() |= 0x800000; // integer bit 3303 } 3304 } 3305 3306 void 3307 APFloat::initFromHalfAPInt(const APInt & api) 3308 { 3309 assert(api.getBitWidth()==16); 3310 uint32_t i = (uint32_t)*api.getRawData(); 3311 uint32_t myexponent = (i >> 10) & 0x1f; 3312 uint32_t mysignificand = i & 0x3ff; 3313 3314 initialize(&APFloat::IEEEhalf); 3315 assert(partCount()==1); 3316 3317 sign = i >> 15; 3318 if (myexponent==0 && mysignificand==0) { 3319 // exponent, significand meaningless 3320 category = fcZero; 3321 } else if (myexponent==0x1f && mysignificand==0) { 3322 // exponent, significand meaningless 3323 category = fcInfinity; 3324 } else if (myexponent==0x1f && mysignificand!=0) { 3325 // sign, exponent, significand meaningless 3326 category = fcNaN; 3327 *significandParts() = mysignificand; 3328 } else { 3329 category = fcNormal; 3330 exponent = myexponent - 15; //bias 3331 *significandParts() = mysignificand; 3332 if (myexponent==0) // denormal 3333 exponent = -14; 3334 else 3335 *significandParts() |= 0x400; // integer bit 3336 } 3337 } 3338 3339 /// Treat api as containing the bits of a floating point number. Currently 3340 /// we infer the floating point type from the size of the APInt. The 3341 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3342 /// when the size is anything else). 3343 void 3344 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3345 { 3346 if (Sem == &IEEEhalf) 3347 return initFromHalfAPInt(api); 3348 if (Sem == &IEEEsingle) 3349 return initFromFloatAPInt(api); 3350 if (Sem == &IEEEdouble) 3351 return initFromDoubleAPInt(api); 3352 if (Sem == &x87DoubleExtended) 3353 return initFromF80LongDoubleAPInt(api); 3354 if (Sem == &IEEEquad) 3355 return initFromQuadrupleAPInt(api); 3356 if (Sem == &PPCDoubleDouble) 3357 return initFromPPCDoubleDoubleAPInt(api); 3358 3359 llvm_unreachable(nullptr); 3360 } 3361 3362 APFloat 3363 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3364 { 3365 switch (BitWidth) { 3366 case 16: 3367 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3368 case 32: 3369 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3370 case 64: 3371 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3372 case 80: 3373 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3374 case 128: 3375 if (isIEEE) 3376 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3377 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3378 default: 3379 llvm_unreachable("Unknown floating bit width"); 3380 } 3381 } 3382 3383 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { 3384 return Sem.sizeInBits; 3385 } 3386 3387 /// Make this number the largest magnitude normal number in the given 3388 /// semantics. 3389 void APFloat::makeLargest(bool Negative) { 3390 // We want (in interchange format): 3391 // sign = {Negative} 3392 // exponent = 1..10 3393 // significand = 1..1 3394 category = fcNormal; 3395 sign = Negative; 3396 exponent = semantics->maxExponent; 3397 3398 // Use memset to set all but the highest integerPart to all ones. 3399 integerPart *significand = significandParts(); 3400 unsigned PartCount = partCount(); 3401 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3402 3403 // Set the high integerPart especially setting all unused top bits for 3404 // internal consistency. 3405 const unsigned NumUnusedHighBits = 3406 PartCount*integerPartWidth - semantics->precision; 3407 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3408 ? (~integerPart(0) >> NumUnusedHighBits) 3409 : 0; 3410 } 3411 3412 /// Make this number the smallest magnitude denormal number in the given 3413 /// semantics. 3414 void APFloat::makeSmallest(bool Negative) { 3415 // We want (in interchange format): 3416 // sign = {Negative} 3417 // exponent = 0..0 3418 // significand = 0..01 3419 category = fcNormal; 3420 sign = Negative; 3421 exponent = semantics->minExponent; 3422 APInt::tcSet(significandParts(), 1, partCount()); 3423 } 3424 3425 3426 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3427 // We want (in interchange format): 3428 // sign = {Negative} 3429 // exponent = 1..10 3430 // significand = 1..1 3431 APFloat Val(Sem, uninitialized); 3432 Val.makeLargest(Negative); 3433 return Val; 3434 } 3435 3436 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3437 // We want (in interchange format): 3438 // sign = {Negative} 3439 // exponent = 0..0 3440 // significand = 0..01 3441 APFloat Val(Sem, uninitialized); 3442 Val.makeSmallest(Negative); 3443 return Val; 3444 } 3445 3446 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3447 APFloat Val(Sem, uninitialized); 3448 3449 // We want (in interchange format): 3450 // sign = {Negative} 3451 // exponent = 0..0 3452 // significand = 10..0 3453 3454 Val.category = fcNormal; 3455 Val.zeroSignificand(); 3456 Val.sign = Negative; 3457 Val.exponent = Sem.minExponent; 3458 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3459 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3460 3461 return Val; 3462 } 3463 3464 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3465 initFromAPInt(&Sem, API); 3466 } 3467 3468 APFloat::APFloat(float f) { 3469 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3470 } 3471 3472 APFloat::APFloat(double d) { 3473 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3474 } 3475 3476 namespace { 3477 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3478 Buffer.append(Str.begin(), Str.end()); 3479 } 3480 3481 /// Removes data from the given significand until it is no more 3482 /// precise than is required for the desired precision. 3483 void AdjustToPrecision(APInt &significand, 3484 int &exp, unsigned FormatPrecision) { 3485 unsigned bits = significand.getActiveBits(); 3486 3487 // 196/59 is a very slight overestimate of lg_2(10). 3488 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3489 3490 if (bits <= bitsRequired) return; 3491 3492 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3493 if (!tensRemovable) return; 3494 3495 exp += tensRemovable; 3496 3497 APInt divisor(significand.getBitWidth(), 1); 3498 APInt powten(significand.getBitWidth(), 10); 3499 while (true) { 3500 if (tensRemovable & 1) 3501 divisor *= powten; 3502 tensRemovable >>= 1; 3503 if (!tensRemovable) break; 3504 powten *= powten; 3505 } 3506 3507 significand = significand.udiv(divisor); 3508 3509 // Truncate the significand down to its active bit count. 3510 significand = significand.trunc(significand.getActiveBits()); 3511 } 3512 3513 3514 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3515 int &exp, unsigned FormatPrecision) { 3516 unsigned N = buffer.size(); 3517 if (N <= FormatPrecision) return; 3518 3519 // The most significant figures are the last ones in the buffer. 3520 unsigned FirstSignificant = N - FormatPrecision; 3521 3522 // Round. 3523 // FIXME: this probably shouldn't use 'round half up'. 3524 3525 // Rounding down is just a truncation, except we also want to drop 3526 // trailing zeros from the new result. 3527 if (buffer[FirstSignificant - 1] < '5') { 3528 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3529 FirstSignificant++; 3530 3531 exp += FirstSignificant; 3532 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3533 return; 3534 } 3535 3536 // Rounding up requires a decimal add-with-carry. If we continue 3537 // the carry, the newly-introduced zeros will just be truncated. 3538 for (unsigned I = FirstSignificant; I != N; ++I) { 3539 if (buffer[I] == '9') { 3540 FirstSignificant++; 3541 } else { 3542 buffer[I]++; 3543 break; 3544 } 3545 } 3546 3547 // If we carried through, we have exactly one digit of precision. 3548 if (FirstSignificant == N) { 3549 exp += FirstSignificant; 3550 buffer.clear(); 3551 buffer.push_back('1'); 3552 return; 3553 } 3554 3555 exp += FirstSignificant; 3556 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3557 } 3558 } 3559 3560 void APFloat::toString(SmallVectorImpl<char> &Str, 3561 unsigned FormatPrecision, 3562 unsigned FormatMaxPadding) const { 3563 switch (category) { 3564 case fcInfinity: 3565 if (isNegative()) 3566 return append(Str, "-Inf"); 3567 else 3568 return append(Str, "+Inf"); 3569 3570 case fcNaN: return append(Str, "NaN"); 3571 3572 case fcZero: 3573 if (isNegative()) 3574 Str.push_back('-'); 3575 3576 if (!FormatMaxPadding) 3577 append(Str, "0.0E+0"); 3578 else 3579 Str.push_back('0'); 3580 return; 3581 3582 case fcNormal: 3583 break; 3584 } 3585 3586 if (isNegative()) 3587 Str.push_back('-'); 3588 3589 // Decompose the number into an APInt and an exponent. 3590 int exp = exponent - ((int) semantics->precision - 1); 3591 APInt significand(semantics->precision, 3592 makeArrayRef(significandParts(), 3593 partCountForBits(semantics->precision))); 3594 3595 // Set FormatPrecision if zero. We want to do this before we 3596 // truncate trailing zeros, as those are part of the precision. 3597 if (!FormatPrecision) { 3598 // We use enough digits so the number can be round-tripped back to an 3599 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3600 // Accurately" by Steele and White. 3601 // FIXME: Using a formula based purely on the precision is conservative; 3602 // we can print fewer digits depending on the actual value being printed. 3603 3604 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3605 FormatPrecision = 2 + semantics->precision * 59 / 196; 3606 } 3607 3608 // Ignore trailing binary zeros. 3609 int trailingZeros = significand.countTrailingZeros(); 3610 exp += trailingZeros; 3611 significand = significand.lshr(trailingZeros); 3612 3613 // Change the exponent from 2^e to 10^e. 3614 if (exp == 0) { 3615 // Nothing to do. 3616 } else if (exp > 0) { 3617 // Just shift left. 3618 significand = significand.zext(semantics->precision + exp); 3619 significand <<= exp; 3620 exp = 0; 3621 } else { /* exp < 0 */ 3622 int texp = -exp; 3623 3624 // We transform this using the identity: 3625 // (N)(2^-e) == (N)(5^e)(10^-e) 3626 // This means we have to multiply N (the significand) by 5^e. 3627 // To avoid overflow, we have to operate on numbers large 3628 // enough to store N * 5^e: 3629 // log2(N * 5^e) == log2(N) + e * log2(5) 3630 // <= semantics->precision + e * 137 / 59 3631 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3632 3633 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3634 3635 // Multiply significand by 5^e. 3636 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3637 significand = significand.zext(precision); 3638 APInt five_to_the_i(precision, 5); 3639 while (true) { 3640 if (texp & 1) significand *= five_to_the_i; 3641 3642 texp >>= 1; 3643 if (!texp) break; 3644 five_to_the_i *= five_to_the_i; 3645 } 3646 } 3647 3648 AdjustToPrecision(significand, exp, FormatPrecision); 3649 3650 SmallVector<char, 256> buffer; 3651 3652 // Fill the buffer. 3653 unsigned precision = significand.getBitWidth(); 3654 APInt ten(precision, 10); 3655 APInt digit(precision, 0); 3656 3657 bool inTrail = true; 3658 while (significand != 0) { 3659 // digit <- significand % 10 3660 // significand <- significand / 10 3661 APInt::udivrem(significand, ten, significand, digit); 3662 3663 unsigned d = digit.getZExtValue(); 3664 3665 // Drop trailing zeros. 3666 if (inTrail && !d) exp++; 3667 else { 3668 buffer.push_back((char) ('0' + d)); 3669 inTrail = false; 3670 } 3671 } 3672 3673 assert(!buffer.empty() && "no characters in buffer!"); 3674 3675 // Drop down to FormatPrecision. 3676 // TODO: don't do more precise calculations above than are required. 3677 AdjustToPrecision(buffer, exp, FormatPrecision); 3678 3679 unsigned NDigits = buffer.size(); 3680 3681 // Check whether we should use scientific notation. 3682 bool FormatScientific; 3683 if (!FormatMaxPadding) 3684 FormatScientific = true; 3685 else { 3686 if (exp >= 0) { 3687 // 765e3 --> 765000 3688 // ^^^ 3689 // But we shouldn't make the number look more precise than it is. 3690 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3691 NDigits + (unsigned) exp > FormatPrecision); 3692 } else { 3693 // Power of the most significant digit. 3694 int MSD = exp + (int) (NDigits - 1); 3695 if (MSD >= 0) { 3696 // 765e-2 == 7.65 3697 FormatScientific = false; 3698 } else { 3699 // 765e-5 == 0.00765 3700 // ^ ^^ 3701 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3702 } 3703 } 3704 } 3705 3706 // Scientific formatting is pretty straightforward. 3707 if (FormatScientific) { 3708 exp += (NDigits - 1); 3709 3710 Str.push_back(buffer[NDigits-1]); 3711 Str.push_back('.'); 3712 if (NDigits == 1) 3713 Str.push_back('0'); 3714 else 3715 for (unsigned I = 1; I != NDigits; ++I) 3716 Str.push_back(buffer[NDigits-1-I]); 3717 Str.push_back('E'); 3718 3719 Str.push_back(exp >= 0 ? '+' : '-'); 3720 if (exp < 0) exp = -exp; 3721 SmallVector<char, 6> expbuf; 3722 do { 3723 expbuf.push_back((char) ('0' + (exp % 10))); 3724 exp /= 10; 3725 } while (exp); 3726 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3727 Str.push_back(expbuf[E-1-I]); 3728 return; 3729 } 3730 3731 // Non-scientific, positive exponents. 3732 if (exp >= 0) { 3733 for (unsigned I = 0; I != NDigits; ++I) 3734 Str.push_back(buffer[NDigits-1-I]); 3735 for (unsigned I = 0; I != (unsigned) exp; ++I) 3736 Str.push_back('0'); 3737 return; 3738 } 3739 3740 // Non-scientific, negative exponents. 3741 3742 // The number of digits to the left of the decimal point. 3743 int NWholeDigits = exp + (int) NDigits; 3744 3745 unsigned I = 0; 3746 if (NWholeDigits > 0) { 3747 for (; I != (unsigned) NWholeDigits; ++I) 3748 Str.push_back(buffer[NDigits-I-1]); 3749 Str.push_back('.'); 3750 } else { 3751 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3752 3753 Str.push_back('0'); 3754 Str.push_back('.'); 3755 for (unsigned Z = 1; Z != NZeros; ++Z) 3756 Str.push_back('0'); 3757 } 3758 3759 for (; I != NDigits; ++I) 3760 Str.push_back(buffer[NDigits-I-1]); 3761 } 3762 3763 bool APFloat::getExactInverse(APFloat *inv) const { 3764 // Special floats and denormals have no exact inverse. 3765 if (!isFiniteNonZero()) 3766 return false; 3767 3768 // Check that the number is a power of two by making sure that only the 3769 // integer bit is set in the significand. 3770 if (significandLSB() != semantics->precision - 1) 3771 return false; 3772 3773 // Get the inverse. 3774 APFloat reciprocal(*semantics, 1ULL); 3775 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3776 return false; 3777 3778 // Avoid multiplication with a denormal, it is not safe on all platforms and 3779 // may be slower than a normal division. 3780 if (reciprocal.isDenormal()) 3781 return false; 3782 3783 assert(reciprocal.isFiniteNonZero() && 3784 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3785 3786 if (inv) 3787 *inv = reciprocal; 3788 3789 return true; 3790 } 3791 3792 bool APFloat::isSignaling() const { 3793 if (!isNaN()) 3794 return false; 3795 3796 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3797 // first bit of the trailing significand being 0. 3798 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3799 } 3800 3801 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3802 /// 3803 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3804 /// appropriate sign switching before/after the computation. 3805 APFloat::opStatus APFloat::next(bool nextDown) { 3806 // If we are performing nextDown, swap sign so we have -x. 3807 if (nextDown) 3808 changeSign(); 3809 3810 // Compute nextUp(x) 3811 opStatus result = opOK; 3812 3813 // Handle each float category separately. 3814 switch (category) { 3815 case fcInfinity: 3816 // nextUp(+inf) = +inf 3817 if (!isNegative()) 3818 break; 3819 // nextUp(-inf) = -getLargest() 3820 makeLargest(true); 3821 break; 3822 case fcNaN: 3823 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3824 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3825 // change the payload. 3826 if (isSignaling()) { 3827 result = opInvalidOp; 3828 // For consistency, propagate the sign of the sNaN to the qNaN. 3829 makeNaN(false, isNegative(), nullptr); 3830 } 3831 break; 3832 case fcZero: 3833 // nextUp(pm 0) = +getSmallest() 3834 makeSmallest(false); 3835 break; 3836 case fcNormal: 3837 // nextUp(-getSmallest()) = -0 3838 if (isSmallest() && isNegative()) { 3839 APInt::tcSet(significandParts(), 0, partCount()); 3840 category = fcZero; 3841 exponent = 0; 3842 break; 3843 } 3844 3845 // nextUp(getLargest()) == INFINITY 3846 if (isLargest() && !isNegative()) { 3847 APInt::tcSet(significandParts(), 0, partCount()); 3848 category = fcInfinity; 3849 exponent = semantics->maxExponent + 1; 3850 break; 3851 } 3852 3853 // nextUp(normal) == normal + inc. 3854 if (isNegative()) { 3855 // If we are negative, we need to decrement the significand. 3856 3857 // We only cross a binade boundary that requires adjusting the exponent 3858 // if: 3859 // 1. exponent != semantics->minExponent. This implies we are not in the 3860 // smallest binade or are dealing with denormals. 3861 // 2. Our significand excluding the integral bit is all zeros. 3862 bool WillCrossBinadeBoundary = 3863 exponent != semantics->minExponent && isSignificandAllZeros(); 3864 3865 // Decrement the significand. 3866 // 3867 // We always do this since: 3868 // 1. If we are dealing with a non-binade decrement, by definition we 3869 // just decrement the significand. 3870 // 2. If we are dealing with a normal -> normal binade decrement, since 3871 // we have an explicit integral bit the fact that all bits but the 3872 // integral bit are zero implies that subtracting one will yield a 3873 // significand with 0 integral bit and 1 in all other spots. Thus we 3874 // must just adjust the exponent and set the integral bit to 1. 3875 // 3. If we are dealing with a normal -> denormal binade decrement, 3876 // since we set the integral bit to 0 when we represent denormals, we 3877 // just decrement the significand. 3878 integerPart *Parts = significandParts(); 3879 APInt::tcDecrement(Parts, partCount()); 3880 3881 if (WillCrossBinadeBoundary) { 3882 // Our result is a normal number. Do the following: 3883 // 1. Set the integral bit to 1. 3884 // 2. Decrement the exponent. 3885 APInt::tcSetBit(Parts, semantics->precision - 1); 3886 exponent--; 3887 } 3888 } else { 3889 // If we are positive, we need to increment the significand. 3890 3891 // We only cross a binade boundary that requires adjusting the exponent if 3892 // the input is not a denormal and all of said input's significand bits 3893 // are set. If all of said conditions are true: clear the significand, set 3894 // the integral bit to 1, and increment the exponent. If we have a 3895 // denormal always increment since moving denormals and the numbers in the 3896 // smallest normal binade have the same exponent in our representation. 3897 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3898 3899 if (WillCrossBinadeBoundary) { 3900 integerPart *Parts = significandParts(); 3901 APInt::tcSet(Parts, 0, partCount()); 3902 APInt::tcSetBit(Parts, semantics->precision - 1); 3903 assert(exponent != semantics->maxExponent && 3904 "We can not increment an exponent beyond the maxExponent allowed" 3905 " by the given floating point semantics."); 3906 exponent++; 3907 } else { 3908 incrementSignificand(); 3909 } 3910 } 3911 break; 3912 } 3913 3914 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3915 if (nextDown) 3916 changeSign(); 3917 3918 return result; 3919 } 3920 3921 void 3922 APFloat::makeInf(bool Negative) { 3923 category = fcInfinity; 3924 sign = Negative; 3925 exponent = semantics->maxExponent + 1; 3926 APInt::tcSet(significandParts(), 0, partCount()); 3927 } 3928 3929 void 3930 APFloat::makeZero(bool Negative) { 3931 category = fcZero; 3932 sign = Negative; 3933 exponent = semantics->minExponent-1; 3934 APInt::tcSet(significandParts(), 0, partCount()); 3935 } 3936 3937 APFloat llvm::scalbn(APFloat X, int Exp) { 3938 if (X.isInfinity() || X.isZero() || X.isNaN()) 3939 return X; 3940 3941 auto MaxExp = X.getSemantics().maxExponent; 3942 auto MinExp = X.getSemantics().minExponent; 3943 if (Exp > (MaxExp - X.exponent)) 3944 // Overflow saturates to infinity. 3945 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3946 if (Exp < (MinExp - X.exponent)) 3947 // Underflow saturates to zero. 3948 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3949 3950 X.exponent += Exp; 3951 return X; 3952 } 3953