1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 /// A macro used to combine two fcCategory enums into one key which can be used 29 /// in a switch statement to classify how the interaction of two APFloat's 30 /// categories affects an operation. 31 /// 32 /// TODO: If clang source code is ever allowed to use constexpr in its own 33 /// codebase, change this into a static inline function. 34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36 /* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40 namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 56 /* Number of bits actually used in the semantics. */ 57 unsigned int sizeInBits; 58 }; 59 60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; 61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; 62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; 63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; 64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; 65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; 66 67 /* The PowerPC format consists of two doubles. It does not map cleanly 68 onto the usual format above. It is approximated using twice the 69 mantissa bits. Note that for exponents near the double minimum, 70 we no longer can represent the full 106 mantissa bits, so those 71 will be treated as denormal numbers. 72 73 FIXME: While this approximation is equivalent to what GCC uses for 74 compile-time arithmetic on PPC double-double numbers, it is not able 75 to represent all possible values held by a PPC double-double number, 76 for example: (long double) 1.0 + (long double) 0x1p-106 77 Should this be replaced by a full emulation of PPC double-double? */ 78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; 79 80 /* A tight upper bound on number of parts required to hold the value 81 pow(5, power) is 82 83 power * 815 / (351 * integerPartWidth) + 1 84 85 However, whilst the result may require only this many parts, 86 because we are multiplying two values to get it, the 87 multiplication may require an extra part with the excess part 88 being zero (consider the trivial case of 1 * 1, tcFullMultiply 89 requires two parts to hold the single-part result). So we add an 90 extra one to guarantee enough space whilst multiplying. */ 91 const unsigned int maxExponent = 16383; 92 const unsigned int maxPrecision = 113; 93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 95 / (351 * integerPartWidth)); 96 } 97 98 /* A bunch of private, handy routines. */ 99 100 static inline unsigned int 101 partCountForBits(unsigned int bits) 102 { 103 return ((bits) + integerPartWidth - 1) / integerPartWidth; 104 } 105 106 /* Returns 0U-9U. Return values >= 10U are not digits. */ 107 static inline unsigned int 108 decDigitValue(unsigned int c) 109 { 110 return c - '0'; 111 } 112 113 /* Return the value of a decimal exponent of the form 114 [+-]ddddddd. 115 116 If the exponent overflows, returns a large exponent with the 117 appropriate sign. */ 118 static int 119 readExponent(StringRef::iterator begin, StringRef::iterator end) 120 { 121 bool isNegative; 122 unsigned int absExponent; 123 const unsigned int overlargeExponent = 24000; /* FIXME. */ 124 StringRef::iterator p = begin; 125 126 assert(p != end && "Exponent has no digits"); 127 128 isNegative = (*p == '-'); 129 if (*p == '-' || *p == '+') { 130 p++; 131 assert(p != end && "Exponent has no digits"); 132 } 133 134 absExponent = decDigitValue(*p++); 135 assert(absExponent < 10U && "Invalid character in exponent"); 136 137 for (; p != end; ++p) { 138 unsigned int value; 139 140 value = decDigitValue(*p); 141 assert(value < 10U && "Invalid character in exponent"); 142 143 value += absExponent * 10; 144 if (absExponent >= overlargeExponent) { 145 absExponent = overlargeExponent; 146 p = end; /* outwit assert below */ 147 break; 148 } 149 absExponent = value; 150 } 151 152 assert(p == end && "Invalid exponent in exponent"); 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158 } 159 160 /* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162 static int 163 totalExponent(StringRef::iterator p, StringRef::iterator end, 164 int exponentAdjustment) 165 { 166 int unsignedExponent; 167 bool negative, overflow; 168 int exponent = 0; 169 170 assert(p != end && "Exponent has no digits"); 171 172 negative = *p == '-'; 173 if (*p == '-' || *p == '+') { 174 p++; 175 assert(p != end && "Exponent has no digits"); 176 } 177 178 unsignedExponent = 0; 179 overflow = false; 180 for (; p != end; ++p) { 181 unsigned int value; 182 183 value = decDigitValue(*p); 184 assert(value < 10U && "Invalid character in exponent"); 185 186 unsignedExponent = unsignedExponent * 10 + value; 187 if (unsignedExponent > 32767) { 188 overflow = true; 189 break; 190 } 191 } 192 193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 194 overflow = true; 195 196 if (!overflow) { 197 exponent = unsignedExponent; 198 if (negative) 199 exponent = -exponent; 200 exponent += exponentAdjustment; 201 if (exponent > 32767 || exponent < -32768) 202 overflow = true; 203 } 204 205 if (overflow) 206 exponent = negative ? -32768: 32767; 207 208 return exponent; 209 } 210 211 static StringRef::iterator 212 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 213 StringRef::iterator *dot) 214 { 215 StringRef::iterator p = begin; 216 *dot = end; 217 while (p != end && *p == '0') 218 p++; 219 220 if (p != end && *p == '.') { 221 *dot = p++; 222 223 assert(end - begin != 1 && "Significand has no digits"); 224 225 while (p != end && *p == '0') 226 p++; 227 } 228 229 return p; 230 } 231 232 /* Given a normal decimal floating point number of the form 233 234 dddd.dddd[eE][+-]ddd 235 236 where the decimal point and exponent are optional, fill out the 237 structure D. Exponent is appropriate if the significand is 238 treated as an integer, and normalizedExponent if the significand 239 is taken to have the decimal point after a single leading 240 non-zero digit. 241 242 If the value is zero, V->firstSigDigit points to a non-digit, and 243 the return exponent is zero. 244 */ 245 struct decimalInfo { 246 const char *firstSigDigit; 247 const char *lastSigDigit; 248 int exponent; 249 int normalizedExponent; 250 }; 251 252 static void 253 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 254 decimalInfo *D) 255 { 256 StringRef::iterator dot = end; 257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 258 259 D->firstSigDigit = p; 260 D->exponent = 0; 261 D->normalizedExponent = 0; 262 263 for (; p != end; ++p) { 264 if (*p == '.') { 265 assert(dot == end && "String contains multiple dots"); 266 dot = p++; 267 if (p == end) 268 break; 269 } 270 if (decDigitValue(*p) >= 10U) 271 break; 272 } 273 274 if (p != end) { 275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 276 assert(p != begin && "Significand has no digits"); 277 assert((dot == end || p - begin != 1) && "Significand has no digits"); 278 279 /* p points to the first non-digit in the string */ 280 D->exponent = readExponent(p + 1, end); 281 282 /* Implied decimal point? */ 283 if (dot == end) 284 dot = p; 285 } 286 287 /* If number is all zeroes accept any exponent. */ 288 if (p != D->firstSigDigit) { 289 /* Drop insignificant trailing zeroes. */ 290 if (p != begin) { 291 do 292 do 293 p--; 294 while (p != begin && *p == '0'); 295 while (p != begin && *p == '.'); 296 } 297 298 /* Adjust the exponents for any decimal point. */ 299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 300 D->normalizedExponent = (D->exponent + 301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 302 - (dot > D->firstSigDigit && dot < p))); 303 } 304 305 D->lastSigDigit = p; 306 } 307 308 /* Return the trailing fraction of a hexadecimal number. 309 DIGITVALUE is the first hex digit of the fraction, P points to 310 the next digit. */ 311 static lostFraction 312 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 313 unsigned int digitValue) 314 { 315 unsigned int hexDigit; 316 317 /* If the first trailing digit isn't 0 or 8 we can work out the 318 fraction immediately. */ 319 if (digitValue > 8) 320 return lfMoreThanHalf; 321 else if (digitValue < 8 && digitValue > 0) 322 return lfLessThanHalf; 323 324 // Otherwise we need to find the first non-zero digit. 325 while (p != end && (*p == '0' || *p == '.')) 326 p++; 327 328 assert(p != end && "Invalid trailing hexadecimal fraction!"); 329 330 hexDigit = hexDigitValue(*p); 331 332 /* If we ran off the end it is exactly zero or one-half, otherwise 333 a little more. */ 334 if (hexDigit == -1U) 335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 336 else 337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 338 } 339 340 /* Return the fraction lost were a bignum truncated losing the least 341 significant BITS bits. */ 342 static lostFraction 343 lostFractionThroughTruncation(const integerPart *parts, 344 unsigned int partCount, 345 unsigned int bits) 346 { 347 unsigned int lsb; 348 349 lsb = APInt::tcLSB(parts, partCount); 350 351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 352 if (bits <= lsb) 353 return lfExactlyZero; 354 if (bits == lsb + 1) 355 return lfExactlyHalf; 356 if (bits <= partCount * integerPartWidth && 357 APInt::tcExtractBit(parts, bits - 1)) 358 return lfMoreThanHalf; 359 360 return lfLessThanHalf; 361 } 362 363 /* Shift DST right BITS bits noting lost fraction. */ 364 static lostFraction 365 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 366 { 367 lostFraction lost_fraction; 368 369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 370 371 APInt::tcShiftRight(dst, parts, bits); 372 373 return lost_fraction; 374 } 375 376 /* Combine the effect of two lost fractions. */ 377 static lostFraction 378 combineLostFractions(lostFraction moreSignificant, 379 lostFraction lessSignificant) 380 { 381 if (lessSignificant != lfExactlyZero) { 382 if (moreSignificant == lfExactlyZero) 383 moreSignificant = lfLessThanHalf; 384 else if (moreSignificant == lfExactlyHalf) 385 moreSignificant = lfMoreThanHalf; 386 } 387 388 return moreSignificant; 389 } 390 391 /* The error from the true value, in half-ulps, on multiplying two 392 floating point numbers, which differ from the value they 393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 394 than the returned value. 395 396 See "How to Read Floating Point Numbers Accurately" by William D 397 Clinger. */ 398 static unsigned int 399 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 400 { 401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 402 403 if (HUerr1 + HUerr2 == 0) 404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 405 else 406 return inexactMultiply + 2 * (HUerr1 + HUerr2); 407 } 408 409 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 410 when the least significant BITS are truncated. BITS cannot be 411 zero. */ 412 static integerPart 413 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 414 { 415 unsigned int count, partBits; 416 integerPart part, boundary; 417 418 assert(bits != 0); 419 420 bits--; 421 count = bits / integerPartWidth; 422 partBits = bits % integerPartWidth + 1; 423 424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 425 426 if (isNearest) 427 boundary = (integerPart) 1 << (partBits - 1); 428 else 429 boundary = 0; 430 431 if (count == 0) { 432 if (part - boundary <= boundary - part) 433 return part - boundary; 434 else 435 return boundary - part; 436 } 437 438 if (part == boundary) { 439 while (--count) 440 if (parts[count]) 441 return ~(integerPart) 0; /* A lot. */ 442 443 return parts[0]; 444 } else if (part == boundary - 1) { 445 while (--count) 446 if (~parts[count]) 447 return ~(integerPart) 0; /* A lot. */ 448 449 return -parts[0]; 450 } 451 452 return ~(integerPart) 0; /* A lot. */ 453 } 454 455 /* Place pow(5, power) in DST, and return the number of parts used. 456 DST must be at least one part larger than size of the answer. */ 457 static unsigned int 458 powerOf5(integerPart *dst, unsigned int power) 459 { 460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 461 15625, 78125 }; 462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 463 pow5s[0] = 78125 * 5; 464 465 unsigned int partsCount[16] = { 1 }; 466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 467 unsigned int result; 468 assert(power <= maxExponent); 469 470 p1 = dst; 471 p2 = scratch; 472 473 *p1 = firstEightPowers[power & 7]; 474 power >>= 3; 475 476 result = 1; 477 pow5 = pow5s; 478 479 for (unsigned int n = 0; power; power >>= 1, n++) { 480 unsigned int pc; 481 482 pc = partsCount[n]; 483 484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 485 if (pc == 0) { 486 pc = partsCount[n - 1]; 487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 488 pc *= 2; 489 if (pow5[pc - 1] == 0) 490 pc--; 491 partsCount[n] = pc; 492 } 493 494 if (power & 1) { 495 integerPart *tmp; 496 497 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 498 result += pc; 499 if (p2[result - 1] == 0) 500 result--; 501 502 /* Now result is in p1 with partsCount parts and p2 is scratch 503 space. */ 504 tmp = p1, p1 = p2, p2 = tmp; 505 } 506 507 pow5 += pc; 508 } 509 510 if (p1 != dst) 511 APInt::tcAssign(dst, p1, result); 512 513 return result; 514 } 515 516 /* Zero at the end to avoid modular arithmetic when adding one; used 517 when rounding up during hexadecimal output. */ 518 static const char hexDigitsLower[] = "0123456789abcdef0"; 519 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 520 static const char infinityL[] = "infinity"; 521 static const char infinityU[] = "INFINITY"; 522 static const char NaNL[] = "nan"; 523 static const char NaNU[] = "NAN"; 524 525 /* Write out an integerPart in hexadecimal, starting with the most 526 significant nibble. Write out exactly COUNT hexdigits, return 527 COUNT. */ 528 static unsigned int 529 partAsHex (char *dst, integerPart part, unsigned int count, 530 const char *hexDigitChars) 531 { 532 unsigned int result = count; 533 534 assert(count != 0 && count <= integerPartWidth / 4); 535 536 part >>= (integerPartWidth - 4 * count); 537 while (count--) { 538 dst[count] = hexDigitChars[part & 0xf]; 539 part >>= 4; 540 } 541 542 return result; 543 } 544 545 /* Write out an unsigned decimal integer. */ 546 static char * 547 writeUnsignedDecimal (char *dst, unsigned int n) 548 { 549 char buff[40], *p; 550 551 p = buff; 552 do 553 *p++ = '0' + n % 10; 554 while (n /= 10); 555 556 do 557 *dst++ = *--p; 558 while (p != buff); 559 560 return dst; 561 } 562 563 /* Write out a signed decimal integer. */ 564 static char * 565 writeSignedDecimal (char *dst, int value) 566 { 567 if (value < 0) { 568 *dst++ = '-'; 569 dst = writeUnsignedDecimal(dst, -(unsigned) value); 570 } else 571 dst = writeUnsignedDecimal(dst, value); 572 573 return dst; 574 } 575 576 /* Constructors. */ 577 void 578 APFloat::initialize(const fltSemantics *ourSemantics) 579 { 580 unsigned int count; 581 582 semantics = ourSemantics; 583 count = partCount(); 584 if (count > 1) 585 significand.parts = new integerPart[count]; 586 } 587 588 void 589 APFloat::freeSignificand() 590 { 591 if (needsCleanup()) 592 delete [] significand.parts; 593 } 594 595 void 596 APFloat::assign(const APFloat &rhs) 597 { 598 assert(semantics == rhs.semantics); 599 600 sign = rhs.sign; 601 category = rhs.category; 602 exponent = rhs.exponent; 603 if (isFiniteNonZero() || category == fcNaN) 604 copySignificand(rhs); 605 } 606 607 void 608 APFloat::copySignificand(const APFloat &rhs) 609 { 610 assert(isFiniteNonZero() || category == fcNaN); 611 assert(rhs.partCount() >= partCount()); 612 613 APInt::tcAssign(significandParts(), rhs.significandParts(), 614 partCount()); 615 } 616 617 /* Make this number a NaN, with an arbitrary but deterministic value 618 for the significand. If double or longer, this is a signalling NaN, 619 which may not be ideal. If float, this is QNaN(0). */ 620 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 621 { 622 category = fcNaN; 623 sign = Negative; 624 625 integerPart *significand = significandParts(); 626 unsigned numParts = partCount(); 627 628 // Set the significand bits to the fill. 629 if (!fill || fill->getNumWords() < numParts) 630 APInt::tcSet(significand, 0, numParts); 631 if (fill) { 632 APInt::tcAssign(significand, fill->getRawData(), 633 std::min(fill->getNumWords(), numParts)); 634 635 // Zero out the excess bits of the significand. 636 unsigned bitsToPreserve = semantics->precision - 1; 637 unsigned part = bitsToPreserve / 64; 638 bitsToPreserve %= 64; 639 significand[part] &= ((1ULL << bitsToPreserve) - 1); 640 for (part++; part != numParts; ++part) 641 significand[part] = 0; 642 } 643 644 unsigned QNaNBit = semantics->precision - 2; 645 646 if (SNaN) { 647 // We always have to clear the QNaN bit to make it an SNaN. 648 APInt::tcClearBit(significand, QNaNBit); 649 650 // If there are no bits set in the payload, we have to set 651 // *something* to make it a NaN instead of an infinity; 652 // conventionally, this is the next bit down from the QNaN bit. 653 if (APInt::tcIsZero(significand, numParts)) 654 APInt::tcSetBit(significand, QNaNBit - 1); 655 } else { 656 // We always have to set the QNaN bit to make it a QNaN. 657 APInt::tcSetBit(significand, QNaNBit); 658 } 659 660 // For x87 extended precision, we want to make a NaN, not a 661 // pseudo-NaN. Maybe we should expose the ability to make 662 // pseudo-NaNs? 663 if (semantics == &APFloat::x87DoubleExtended) 664 APInt::tcSetBit(significand, QNaNBit + 1); 665 } 666 667 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 668 const APInt *fill) { 669 APFloat value(Sem, uninitialized); 670 value.makeNaN(SNaN, Negative, fill); 671 return value; 672 } 673 674 APFloat & 675 APFloat::operator=(const APFloat &rhs) 676 { 677 if (this != &rhs) { 678 if (semantics != rhs.semantics) { 679 freeSignificand(); 680 initialize(rhs.semantics); 681 } 682 assign(rhs); 683 } 684 685 return *this; 686 } 687 688 APFloat & 689 APFloat::operator=(APFloat &&rhs) { 690 freeSignificand(); 691 692 semantics = rhs.semantics; 693 significand = rhs.significand; 694 exponent = rhs.exponent; 695 category = rhs.category; 696 sign = rhs.sign; 697 698 rhs.semantics = &Bogus; 699 return *this; 700 } 701 702 bool 703 APFloat::isDenormal() const { 704 return isFiniteNonZero() && (exponent == semantics->minExponent) && 705 (APInt::tcExtractBit(significandParts(), 706 semantics->precision - 1) == 0); 707 } 708 709 bool 710 APFloat::isSmallest() const { 711 // The smallest number by magnitude in our format will be the smallest 712 // denormal, i.e. the floating point number with exponent being minimum 713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 714 return isFiniteNonZero() && exponent == semantics->minExponent && 715 significandMSB() == 0; 716 } 717 718 bool APFloat::isSignificandAllOnes() const { 719 // Test if the significand excluding the integral bit is all ones. This allows 720 // us to test for binade boundaries. 721 const integerPart *Parts = significandParts(); 722 const unsigned PartCount = partCount(); 723 for (unsigned i = 0; i < PartCount - 1; i++) 724 if (~Parts[i]) 725 return false; 726 727 // Set the unused high bits to all ones when we compare. 728 const unsigned NumHighBits = 729 PartCount*integerPartWidth - semantics->precision + 1; 730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 731 "fill than integerPartWidth"); 732 const integerPart HighBitFill = 733 ~integerPart(0) << (integerPartWidth - NumHighBits); 734 if (~(Parts[PartCount - 1] | HighBitFill)) 735 return false; 736 737 return true; 738 } 739 740 bool APFloat::isSignificandAllZeros() const { 741 // Test if the significand excluding the integral bit is all zeros. This 742 // allows us to test for binade boundaries. 743 const integerPart *Parts = significandParts(); 744 const unsigned PartCount = partCount(); 745 746 for (unsigned i = 0; i < PartCount - 1; i++) 747 if (Parts[i]) 748 return false; 749 750 const unsigned NumHighBits = 751 PartCount*integerPartWidth - semantics->precision + 1; 752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 753 "clear than integerPartWidth"); 754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 755 756 if (Parts[PartCount - 1] & HighBitMask) 757 return false; 758 759 return true; 760 } 761 762 bool 763 APFloat::isLargest() const { 764 // The largest number by magnitude in our format will be the floating point 765 // number with maximum exponent and with significand that is all ones. 766 return isFiniteNonZero() && exponent == semantics->maxExponent 767 && isSignificandAllOnes(); 768 } 769 770 bool 771 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 772 if (this == &rhs) 773 return true; 774 if (semantics != rhs.semantics || 775 category != rhs.category || 776 sign != rhs.sign) 777 return false; 778 if (category==fcZero || category==fcInfinity) 779 return true; 780 781 if (isFiniteNonZero() && exponent != rhs.exponent) 782 return false; 783 784 return std::equal(significandParts(), significandParts() + partCount(), 785 rhs.significandParts()); 786 } 787 788 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 789 initialize(&ourSemantics); 790 sign = 0; 791 category = fcNormal; 792 zeroSignificand(); 793 exponent = ourSemantics.precision - 1; 794 significandParts()[0] = value; 795 normalize(rmNearestTiesToEven, lfExactlyZero); 796 } 797 798 APFloat::APFloat(const fltSemantics &ourSemantics) { 799 initialize(&ourSemantics); 800 category = fcZero; 801 sign = false; 802 } 803 804 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 805 // Allocates storage if necessary but does not initialize it. 806 initialize(&ourSemantics); 807 } 808 809 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 810 initialize(&ourSemantics); 811 convertFromString(text, rmNearestTiesToEven); 812 } 813 814 APFloat::APFloat(const APFloat &rhs) { 815 initialize(rhs.semantics); 816 assign(rhs); 817 } 818 819 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 820 *this = std::move(rhs); 821 } 822 823 APFloat::~APFloat() 824 { 825 freeSignificand(); 826 } 827 828 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 829 void APFloat::Profile(FoldingSetNodeID& ID) const { 830 ID.Add(bitcastToAPInt()); 831 } 832 833 unsigned int 834 APFloat::partCount() const 835 { 836 return partCountForBits(semantics->precision + 1); 837 } 838 839 unsigned int 840 APFloat::semanticsPrecision(const fltSemantics &semantics) 841 { 842 return semantics.precision; 843 } 844 845 const integerPart * 846 APFloat::significandParts() const 847 { 848 return const_cast<APFloat *>(this)->significandParts(); 849 } 850 851 integerPart * 852 APFloat::significandParts() 853 { 854 if (partCount() > 1) 855 return significand.parts; 856 else 857 return &significand.part; 858 } 859 860 void 861 APFloat::zeroSignificand() 862 { 863 APInt::tcSet(significandParts(), 0, partCount()); 864 } 865 866 /* Increment an fcNormal floating point number's significand. */ 867 void 868 APFloat::incrementSignificand() 869 { 870 integerPart carry; 871 872 carry = APInt::tcIncrement(significandParts(), partCount()); 873 874 /* Our callers should never cause us to overflow. */ 875 assert(carry == 0); 876 (void)carry; 877 } 878 879 /* Add the significand of the RHS. Returns the carry flag. */ 880 integerPart 881 APFloat::addSignificand(const APFloat &rhs) 882 { 883 integerPart *parts; 884 885 parts = significandParts(); 886 887 assert(semantics == rhs.semantics); 888 assert(exponent == rhs.exponent); 889 890 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 891 } 892 893 /* Subtract the significand of the RHS with a borrow flag. Returns 894 the borrow flag. */ 895 integerPart 896 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 897 { 898 integerPart *parts; 899 900 parts = significandParts(); 901 902 assert(semantics == rhs.semantics); 903 assert(exponent == rhs.exponent); 904 905 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 906 partCount()); 907 } 908 909 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 910 on to the full-precision result of the multiplication. Returns the 911 lost fraction. */ 912 lostFraction 913 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 914 { 915 unsigned int omsb; // One, not zero, based MSB. 916 unsigned int partsCount, newPartsCount, precision; 917 integerPart *lhsSignificand; 918 integerPart scratch[4]; 919 integerPart *fullSignificand; 920 lostFraction lost_fraction; 921 bool ignored; 922 923 assert(semantics == rhs.semantics); 924 925 precision = semantics->precision; 926 927 // Allocate space for twice as many bits as the original significand, plus one 928 // extra bit for the addition to overflow into. 929 newPartsCount = partCountForBits(precision * 2 + 1); 930 931 if (newPartsCount > 4) 932 fullSignificand = new integerPart[newPartsCount]; 933 else 934 fullSignificand = scratch; 935 936 lhsSignificand = significandParts(); 937 partsCount = partCount(); 938 939 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 940 rhs.significandParts(), partsCount, partsCount); 941 942 lost_fraction = lfExactlyZero; 943 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 944 exponent += rhs.exponent; 945 946 // Assume the operands involved in the multiplication are single-precision 947 // FP, and the two multiplicants are: 948 // *this = a23 . a22 ... a0 * 2^e1 949 // rhs = b23 . b22 ... b0 * 2^e2 950 // the result of multiplication is: 951 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 952 // Note that there are three significant bits at the left-hand side of the 953 // radix point: two for the multiplication, and an overflow bit for the 954 // addition (that will always be zero at this point). Move the radix point 955 // toward left by two bits, and adjust exponent accordingly. 956 exponent += 2; 957 958 if (addend && addend->isNonZero()) { 959 // The intermediate result of the multiplication has "2 * precision" 960 // signicant bit; adjust the addend to be consistent with mul result. 961 // 962 Significand savedSignificand = significand; 963 const fltSemantics *savedSemantics = semantics; 964 fltSemantics extendedSemantics; 965 opStatus status; 966 unsigned int extendedPrecision; 967 968 // Normalize our MSB to one below the top bit to allow for overflow. 969 extendedPrecision = 2 * precision + 1; 970 if (omsb != extendedPrecision - 1) { 971 assert(extendedPrecision > omsb); 972 APInt::tcShiftLeft(fullSignificand, newPartsCount, 973 (extendedPrecision - 1) - omsb); 974 exponent -= (extendedPrecision - 1) - omsb; 975 } 976 977 /* Create new semantics. */ 978 extendedSemantics = *semantics; 979 extendedSemantics.precision = extendedPrecision; 980 981 if (newPartsCount == 1) 982 significand.part = fullSignificand[0]; 983 else 984 significand.parts = fullSignificand; 985 semantics = &extendedSemantics; 986 987 APFloat extendedAddend(*addend); 988 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 989 assert(status == opOK); 990 (void)status; 991 992 // Shift the significand of the addend right by one bit. This guarantees 993 // that the high bit of the significand is zero (same as fullSignificand), 994 // so the addition will overflow (if it does overflow at all) into the top bit. 995 lost_fraction = extendedAddend.shiftSignificandRight(1); 996 assert(lost_fraction == lfExactlyZero && 997 "Lost precision while shifting addend for fused-multiply-add."); 998 999 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1000 1001 /* Restore our state. */ 1002 if (newPartsCount == 1) 1003 fullSignificand[0] = significand.part; 1004 significand = savedSignificand; 1005 semantics = savedSemantics; 1006 1007 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1008 } 1009 1010 // Convert the result having "2 * precision" significant-bits back to the one 1011 // having "precision" significant-bits. First, move the radix point from 1012 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1013 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1014 exponent -= precision + 1; 1015 1016 // In case MSB resides at the left-hand side of radix point, shift the 1017 // mantissa right by some amount to make sure the MSB reside right before 1018 // the radix point (i.e. "MSB . rest-significant-bits"). 1019 // 1020 // Note that the result is not normalized when "omsb < precision". So, the 1021 // caller needs to call APFloat::normalize() if normalized value is expected. 1022 if (omsb > precision) { 1023 unsigned int bits, significantParts; 1024 lostFraction lf; 1025 1026 bits = omsb - precision; 1027 significantParts = partCountForBits(omsb); 1028 lf = shiftRight(fullSignificand, significantParts, bits); 1029 lost_fraction = combineLostFractions(lf, lost_fraction); 1030 exponent += bits; 1031 } 1032 1033 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1034 1035 if (newPartsCount > 4) 1036 delete [] fullSignificand; 1037 1038 return lost_fraction; 1039 } 1040 1041 /* Multiply the significands of LHS and RHS to DST. */ 1042 lostFraction 1043 APFloat::divideSignificand(const APFloat &rhs) 1044 { 1045 unsigned int bit, i, partsCount; 1046 const integerPart *rhsSignificand; 1047 integerPart *lhsSignificand, *dividend, *divisor; 1048 integerPart scratch[4]; 1049 lostFraction lost_fraction; 1050 1051 assert(semantics == rhs.semantics); 1052 1053 lhsSignificand = significandParts(); 1054 rhsSignificand = rhs.significandParts(); 1055 partsCount = partCount(); 1056 1057 if (partsCount > 2) 1058 dividend = new integerPart[partsCount * 2]; 1059 else 1060 dividend = scratch; 1061 1062 divisor = dividend + partsCount; 1063 1064 /* Copy the dividend and divisor as they will be modified in-place. */ 1065 for (i = 0; i < partsCount; i++) { 1066 dividend[i] = lhsSignificand[i]; 1067 divisor[i] = rhsSignificand[i]; 1068 lhsSignificand[i] = 0; 1069 } 1070 1071 exponent -= rhs.exponent; 1072 1073 unsigned int precision = semantics->precision; 1074 1075 /* Normalize the divisor. */ 1076 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1077 if (bit) { 1078 exponent += bit; 1079 APInt::tcShiftLeft(divisor, partsCount, bit); 1080 } 1081 1082 /* Normalize the dividend. */ 1083 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1084 if (bit) { 1085 exponent -= bit; 1086 APInt::tcShiftLeft(dividend, partsCount, bit); 1087 } 1088 1089 /* Ensure the dividend >= divisor initially for the loop below. 1090 Incidentally, this means that the division loop below is 1091 guaranteed to set the integer bit to one. */ 1092 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1093 exponent--; 1094 APInt::tcShiftLeft(dividend, partsCount, 1); 1095 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1096 } 1097 1098 /* Long division. */ 1099 for (bit = precision; bit; bit -= 1) { 1100 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1101 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1102 APInt::tcSetBit(lhsSignificand, bit - 1); 1103 } 1104 1105 APInt::tcShiftLeft(dividend, partsCount, 1); 1106 } 1107 1108 /* Figure out the lost fraction. */ 1109 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1110 1111 if (cmp > 0) 1112 lost_fraction = lfMoreThanHalf; 1113 else if (cmp == 0) 1114 lost_fraction = lfExactlyHalf; 1115 else if (APInt::tcIsZero(dividend, partsCount)) 1116 lost_fraction = lfExactlyZero; 1117 else 1118 lost_fraction = lfLessThanHalf; 1119 1120 if (partsCount > 2) 1121 delete [] dividend; 1122 1123 return lost_fraction; 1124 } 1125 1126 unsigned int 1127 APFloat::significandMSB() const 1128 { 1129 return APInt::tcMSB(significandParts(), partCount()); 1130 } 1131 1132 unsigned int 1133 APFloat::significandLSB() const 1134 { 1135 return APInt::tcLSB(significandParts(), partCount()); 1136 } 1137 1138 /* Note that a zero result is NOT normalized to fcZero. */ 1139 lostFraction 1140 APFloat::shiftSignificandRight(unsigned int bits) 1141 { 1142 /* Our exponent should not overflow. */ 1143 assert((ExponentType) (exponent + bits) >= exponent); 1144 1145 exponent += bits; 1146 1147 return shiftRight(significandParts(), partCount(), bits); 1148 } 1149 1150 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1151 void 1152 APFloat::shiftSignificandLeft(unsigned int bits) 1153 { 1154 assert(bits < semantics->precision); 1155 1156 if (bits) { 1157 unsigned int partsCount = partCount(); 1158 1159 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1160 exponent -= bits; 1161 1162 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1163 } 1164 } 1165 1166 APFloat::cmpResult 1167 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1168 { 1169 int compare; 1170 1171 assert(semantics == rhs.semantics); 1172 assert(isFiniteNonZero()); 1173 assert(rhs.isFiniteNonZero()); 1174 1175 compare = exponent - rhs.exponent; 1176 1177 /* If exponents are equal, do an unsigned bignum comparison of the 1178 significands. */ 1179 if (compare == 0) 1180 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1181 partCount()); 1182 1183 if (compare > 0) 1184 return cmpGreaterThan; 1185 else if (compare < 0) 1186 return cmpLessThan; 1187 else 1188 return cmpEqual; 1189 } 1190 1191 /* Handle overflow. Sign is preserved. We either become infinity or 1192 the largest finite number. */ 1193 APFloat::opStatus 1194 APFloat::handleOverflow(roundingMode rounding_mode) 1195 { 1196 /* Infinity? */ 1197 if (rounding_mode == rmNearestTiesToEven || 1198 rounding_mode == rmNearestTiesToAway || 1199 (rounding_mode == rmTowardPositive && !sign) || 1200 (rounding_mode == rmTowardNegative && sign)) { 1201 category = fcInfinity; 1202 return (opStatus) (opOverflow | opInexact); 1203 } 1204 1205 /* Otherwise we become the largest finite number. */ 1206 category = fcNormal; 1207 exponent = semantics->maxExponent; 1208 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1209 semantics->precision); 1210 1211 return opInexact; 1212 } 1213 1214 /* Returns TRUE if, when truncating the current number, with BIT the 1215 new LSB, with the given lost fraction and rounding mode, the result 1216 would need to be rounded away from zero (i.e., by increasing the 1217 signficand). This routine must work for fcZero of both signs, and 1218 fcNormal numbers. */ 1219 bool 1220 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1221 lostFraction lost_fraction, 1222 unsigned int bit) const 1223 { 1224 /* NaNs and infinities should not have lost fractions. */ 1225 assert(isFiniteNonZero() || category == fcZero); 1226 1227 /* Current callers never pass this so we don't handle it. */ 1228 assert(lost_fraction != lfExactlyZero); 1229 1230 switch (rounding_mode) { 1231 case rmNearestTiesToAway: 1232 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1233 1234 case rmNearestTiesToEven: 1235 if (lost_fraction == lfMoreThanHalf) 1236 return true; 1237 1238 /* Our zeroes don't have a significand to test. */ 1239 if (lost_fraction == lfExactlyHalf && category != fcZero) 1240 return APInt::tcExtractBit(significandParts(), bit); 1241 1242 return false; 1243 1244 case rmTowardZero: 1245 return false; 1246 1247 case rmTowardPositive: 1248 return !sign; 1249 1250 case rmTowardNegative: 1251 return sign; 1252 } 1253 llvm_unreachable("Invalid rounding mode found"); 1254 } 1255 1256 APFloat::opStatus 1257 APFloat::normalize(roundingMode rounding_mode, 1258 lostFraction lost_fraction) 1259 { 1260 unsigned int omsb; /* One, not zero, based MSB. */ 1261 int exponentChange; 1262 1263 if (!isFiniteNonZero()) 1264 return opOK; 1265 1266 /* Before rounding normalize the exponent of fcNormal numbers. */ 1267 omsb = significandMSB() + 1; 1268 1269 if (omsb) { 1270 /* OMSB is numbered from 1. We want to place it in the integer 1271 bit numbered PRECISION if possible, with a compensating change in 1272 the exponent. */ 1273 exponentChange = omsb - semantics->precision; 1274 1275 /* If the resulting exponent is too high, overflow according to 1276 the rounding mode. */ 1277 if (exponent + exponentChange > semantics->maxExponent) 1278 return handleOverflow(rounding_mode); 1279 1280 /* Subnormal numbers have exponent minExponent, and their MSB 1281 is forced based on that. */ 1282 if (exponent + exponentChange < semantics->minExponent) 1283 exponentChange = semantics->minExponent - exponent; 1284 1285 /* Shifting left is easy as we don't lose precision. */ 1286 if (exponentChange < 0) { 1287 assert(lost_fraction == lfExactlyZero); 1288 1289 shiftSignificandLeft(-exponentChange); 1290 1291 return opOK; 1292 } 1293 1294 if (exponentChange > 0) { 1295 lostFraction lf; 1296 1297 /* Shift right and capture any new lost fraction. */ 1298 lf = shiftSignificandRight(exponentChange); 1299 1300 lost_fraction = combineLostFractions(lf, lost_fraction); 1301 1302 /* Keep OMSB up-to-date. */ 1303 if (omsb > (unsigned) exponentChange) 1304 omsb -= exponentChange; 1305 else 1306 omsb = 0; 1307 } 1308 } 1309 1310 /* Now round the number according to rounding_mode given the lost 1311 fraction. */ 1312 1313 /* As specified in IEEE 754, since we do not trap we do not report 1314 underflow for exact results. */ 1315 if (lost_fraction == lfExactlyZero) { 1316 /* Canonicalize zeroes. */ 1317 if (omsb == 0) 1318 category = fcZero; 1319 1320 return opOK; 1321 } 1322 1323 /* Increment the significand if we're rounding away from zero. */ 1324 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1325 if (omsb == 0) 1326 exponent = semantics->minExponent; 1327 1328 incrementSignificand(); 1329 omsb = significandMSB() + 1; 1330 1331 /* Did the significand increment overflow? */ 1332 if (omsb == (unsigned) semantics->precision + 1) { 1333 /* Renormalize by incrementing the exponent and shifting our 1334 significand right one. However if we already have the 1335 maximum exponent we overflow to infinity. */ 1336 if (exponent == semantics->maxExponent) { 1337 category = fcInfinity; 1338 1339 return (opStatus) (opOverflow | opInexact); 1340 } 1341 1342 shiftSignificandRight(1); 1343 1344 return opInexact; 1345 } 1346 } 1347 1348 /* The normal case - we were and are not denormal, and any 1349 significand increment above didn't overflow. */ 1350 if (omsb == semantics->precision) 1351 return opInexact; 1352 1353 /* We have a non-zero denormal. */ 1354 assert(omsb < semantics->precision); 1355 1356 /* Canonicalize zeroes. */ 1357 if (omsb == 0) 1358 category = fcZero; 1359 1360 /* The fcZero case is a denormal that underflowed to zero. */ 1361 return (opStatus) (opUnderflow | opInexact); 1362 } 1363 1364 APFloat::opStatus 1365 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1366 { 1367 switch (PackCategoriesIntoKey(category, rhs.category)) { 1368 default: 1369 llvm_unreachable(nullptr); 1370 1371 case PackCategoriesIntoKey(fcNaN, fcZero): 1372 case PackCategoriesIntoKey(fcNaN, fcNormal): 1373 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1374 case PackCategoriesIntoKey(fcNaN, fcNaN): 1375 case PackCategoriesIntoKey(fcNormal, fcZero): 1376 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1377 case PackCategoriesIntoKey(fcInfinity, fcZero): 1378 return opOK; 1379 1380 case PackCategoriesIntoKey(fcZero, fcNaN): 1381 case PackCategoriesIntoKey(fcNormal, fcNaN): 1382 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1383 // We need to be sure to flip the sign here for subtraction because we 1384 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1385 sign = rhs.sign ^ subtract; 1386 category = fcNaN; 1387 copySignificand(rhs); 1388 return opOK; 1389 1390 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1391 case PackCategoriesIntoKey(fcZero, fcInfinity): 1392 category = fcInfinity; 1393 sign = rhs.sign ^ subtract; 1394 return opOK; 1395 1396 case PackCategoriesIntoKey(fcZero, fcNormal): 1397 assign(rhs); 1398 sign = rhs.sign ^ subtract; 1399 return opOK; 1400 1401 case PackCategoriesIntoKey(fcZero, fcZero): 1402 /* Sign depends on rounding mode; handled by caller. */ 1403 return opOK; 1404 1405 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1406 /* Differently signed infinities can only be validly 1407 subtracted. */ 1408 if (((sign ^ rhs.sign)!=0) != subtract) { 1409 makeNaN(); 1410 return opInvalidOp; 1411 } 1412 1413 return opOK; 1414 1415 case PackCategoriesIntoKey(fcNormal, fcNormal): 1416 return opDivByZero; 1417 } 1418 } 1419 1420 /* Add or subtract two normal numbers. */ 1421 lostFraction 1422 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1423 { 1424 integerPart carry; 1425 lostFraction lost_fraction; 1426 int bits; 1427 1428 /* Determine if the operation on the absolute values is effectively 1429 an addition or subtraction. */ 1430 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1431 1432 /* Are we bigger exponent-wise than the RHS? */ 1433 bits = exponent - rhs.exponent; 1434 1435 /* Subtraction is more subtle than one might naively expect. */ 1436 if (subtract) { 1437 APFloat temp_rhs(rhs); 1438 bool reverse; 1439 1440 if (bits == 0) { 1441 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1442 lost_fraction = lfExactlyZero; 1443 } else if (bits > 0) { 1444 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1445 shiftSignificandLeft(1); 1446 reverse = false; 1447 } else { 1448 lost_fraction = shiftSignificandRight(-bits - 1); 1449 temp_rhs.shiftSignificandLeft(1); 1450 reverse = true; 1451 } 1452 1453 if (reverse) { 1454 carry = temp_rhs.subtractSignificand 1455 (*this, lost_fraction != lfExactlyZero); 1456 copySignificand(temp_rhs); 1457 sign = !sign; 1458 } else { 1459 carry = subtractSignificand 1460 (temp_rhs, lost_fraction != lfExactlyZero); 1461 } 1462 1463 /* Invert the lost fraction - it was on the RHS and 1464 subtracted. */ 1465 if (lost_fraction == lfLessThanHalf) 1466 lost_fraction = lfMoreThanHalf; 1467 else if (lost_fraction == lfMoreThanHalf) 1468 lost_fraction = lfLessThanHalf; 1469 1470 /* The code above is intended to ensure that no borrow is 1471 necessary. */ 1472 assert(!carry); 1473 (void)carry; 1474 } else { 1475 if (bits > 0) { 1476 APFloat temp_rhs(rhs); 1477 1478 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1479 carry = addSignificand(temp_rhs); 1480 } else { 1481 lost_fraction = shiftSignificandRight(-bits); 1482 carry = addSignificand(rhs); 1483 } 1484 1485 /* We have a guard bit; generating a carry cannot happen. */ 1486 assert(!carry); 1487 (void)carry; 1488 } 1489 1490 return lost_fraction; 1491 } 1492 1493 APFloat::opStatus 1494 APFloat::multiplySpecials(const APFloat &rhs) 1495 { 1496 switch (PackCategoriesIntoKey(category, rhs.category)) { 1497 default: 1498 llvm_unreachable(nullptr); 1499 1500 case PackCategoriesIntoKey(fcNaN, fcZero): 1501 case PackCategoriesIntoKey(fcNaN, fcNormal): 1502 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1503 case PackCategoriesIntoKey(fcNaN, fcNaN): 1504 sign = false; 1505 return opOK; 1506 1507 case PackCategoriesIntoKey(fcZero, fcNaN): 1508 case PackCategoriesIntoKey(fcNormal, fcNaN): 1509 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1510 sign = false; 1511 category = fcNaN; 1512 copySignificand(rhs); 1513 return opOK; 1514 1515 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1516 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1517 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1518 category = fcInfinity; 1519 return opOK; 1520 1521 case PackCategoriesIntoKey(fcZero, fcNormal): 1522 case PackCategoriesIntoKey(fcNormal, fcZero): 1523 case PackCategoriesIntoKey(fcZero, fcZero): 1524 category = fcZero; 1525 return opOK; 1526 1527 case PackCategoriesIntoKey(fcZero, fcInfinity): 1528 case PackCategoriesIntoKey(fcInfinity, fcZero): 1529 makeNaN(); 1530 return opInvalidOp; 1531 1532 case PackCategoriesIntoKey(fcNormal, fcNormal): 1533 return opOK; 1534 } 1535 } 1536 1537 APFloat::opStatus 1538 APFloat::divideSpecials(const APFloat &rhs) 1539 { 1540 switch (PackCategoriesIntoKey(category, rhs.category)) { 1541 default: 1542 llvm_unreachable(nullptr); 1543 1544 case PackCategoriesIntoKey(fcZero, fcNaN): 1545 case PackCategoriesIntoKey(fcNormal, fcNaN): 1546 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1547 category = fcNaN; 1548 copySignificand(rhs); 1549 case PackCategoriesIntoKey(fcNaN, fcZero): 1550 case PackCategoriesIntoKey(fcNaN, fcNormal): 1551 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1552 case PackCategoriesIntoKey(fcNaN, fcNaN): 1553 sign = false; 1554 case PackCategoriesIntoKey(fcInfinity, fcZero): 1555 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1556 case PackCategoriesIntoKey(fcZero, fcInfinity): 1557 case PackCategoriesIntoKey(fcZero, fcNormal): 1558 return opOK; 1559 1560 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1561 category = fcZero; 1562 return opOK; 1563 1564 case PackCategoriesIntoKey(fcNormal, fcZero): 1565 category = fcInfinity; 1566 return opDivByZero; 1567 1568 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1569 case PackCategoriesIntoKey(fcZero, fcZero): 1570 makeNaN(); 1571 return opInvalidOp; 1572 1573 case PackCategoriesIntoKey(fcNormal, fcNormal): 1574 return opOK; 1575 } 1576 } 1577 1578 APFloat::opStatus 1579 APFloat::modSpecials(const APFloat &rhs) 1580 { 1581 switch (PackCategoriesIntoKey(category, rhs.category)) { 1582 default: 1583 llvm_unreachable(nullptr); 1584 1585 case PackCategoriesIntoKey(fcNaN, fcZero): 1586 case PackCategoriesIntoKey(fcNaN, fcNormal): 1587 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1588 case PackCategoriesIntoKey(fcNaN, fcNaN): 1589 case PackCategoriesIntoKey(fcZero, fcInfinity): 1590 case PackCategoriesIntoKey(fcZero, fcNormal): 1591 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1592 return opOK; 1593 1594 case PackCategoriesIntoKey(fcZero, fcNaN): 1595 case PackCategoriesIntoKey(fcNormal, fcNaN): 1596 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1597 sign = false; 1598 category = fcNaN; 1599 copySignificand(rhs); 1600 return opOK; 1601 1602 case PackCategoriesIntoKey(fcNormal, fcZero): 1603 case PackCategoriesIntoKey(fcInfinity, fcZero): 1604 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1605 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1606 case PackCategoriesIntoKey(fcZero, fcZero): 1607 makeNaN(); 1608 return opInvalidOp; 1609 1610 case PackCategoriesIntoKey(fcNormal, fcNormal): 1611 return opOK; 1612 } 1613 } 1614 1615 /* Change sign. */ 1616 void 1617 APFloat::changeSign() 1618 { 1619 /* Look mummy, this one's easy. */ 1620 sign = !sign; 1621 } 1622 1623 void 1624 APFloat::clearSign() 1625 { 1626 /* So is this one. */ 1627 sign = 0; 1628 } 1629 1630 void 1631 APFloat::copySign(const APFloat &rhs) 1632 { 1633 /* And this one. */ 1634 sign = rhs.sign; 1635 } 1636 1637 /* Normalized addition or subtraction. */ 1638 APFloat::opStatus 1639 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1640 bool subtract) 1641 { 1642 opStatus fs; 1643 1644 fs = addOrSubtractSpecials(rhs, subtract); 1645 1646 /* This return code means it was not a simple case. */ 1647 if (fs == opDivByZero) { 1648 lostFraction lost_fraction; 1649 1650 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1651 fs = normalize(rounding_mode, lost_fraction); 1652 1653 /* Can only be zero if we lost no fraction. */ 1654 assert(category != fcZero || lost_fraction == lfExactlyZero); 1655 } 1656 1657 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1658 positive zero unless rounding to minus infinity, except that 1659 adding two like-signed zeroes gives that zero. */ 1660 if (category == fcZero) { 1661 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1662 sign = (rounding_mode == rmTowardNegative); 1663 } 1664 1665 return fs; 1666 } 1667 1668 /* Normalized addition. */ 1669 APFloat::opStatus 1670 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1671 { 1672 return addOrSubtract(rhs, rounding_mode, false); 1673 } 1674 1675 /* Normalized subtraction. */ 1676 APFloat::opStatus 1677 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1678 { 1679 return addOrSubtract(rhs, rounding_mode, true); 1680 } 1681 1682 /* Normalized multiply. */ 1683 APFloat::opStatus 1684 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1685 { 1686 opStatus fs; 1687 1688 sign ^= rhs.sign; 1689 fs = multiplySpecials(rhs); 1690 1691 if (isFiniteNonZero()) { 1692 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1693 fs = normalize(rounding_mode, lost_fraction); 1694 if (lost_fraction != lfExactlyZero) 1695 fs = (opStatus) (fs | opInexact); 1696 } 1697 1698 return fs; 1699 } 1700 1701 /* Normalized divide. */ 1702 APFloat::opStatus 1703 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1704 { 1705 opStatus fs; 1706 1707 sign ^= rhs.sign; 1708 fs = divideSpecials(rhs); 1709 1710 if (isFiniteNonZero()) { 1711 lostFraction lost_fraction = divideSignificand(rhs); 1712 fs = normalize(rounding_mode, lost_fraction); 1713 if (lost_fraction != lfExactlyZero) 1714 fs = (opStatus) (fs | opInexact); 1715 } 1716 1717 return fs; 1718 } 1719 1720 /* Normalized remainder. This is not currently correct in all cases. */ 1721 APFloat::opStatus 1722 APFloat::remainder(const APFloat &rhs) 1723 { 1724 opStatus fs; 1725 APFloat V = *this; 1726 unsigned int origSign = sign; 1727 1728 fs = V.divide(rhs, rmNearestTiesToEven); 1729 if (fs == opDivByZero) 1730 return fs; 1731 1732 int parts = partCount(); 1733 integerPart *x = new integerPart[parts]; 1734 bool ignored; 1735 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1736 rmNearestTiesToEven, &ignored); 1737 if (fs==opInvalidOp) 1738 return fs; 1739 1740 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1741 rmNearestTiesToEven); 1742 assert(fs==opOK); // should always work 1743 1744 fs = V.multiply(rhs, rmNearestTiesToEven); 1745 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1746 1747 fs = subtract(V, rmNearestTiesToEven); 1748 assert(fs==opOK || fs==opInexact); // likewise 1749 1750 if (isZero()) 1751 sign = origSign; // IEEE754 requires this 1752 delete[] x; 1753 return fs; 1754 } 1755 1756 /* Normalized llvm frem (C fmod). 1757 This is not currently correct in all cases. */ 1758 APFloat::opStatus 1759 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1760 { 1761 opStatus fs; 1762 fs = modSpecials(rhs); 1763 1764 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1765 APFloat V = *this; 1766 unsigned int origSign = sign; 1767 1768 fs = V.divide(rhs, rmNearestTiesToEven); 1769 if (fs == opDivByZero) 1770 return fs; 1771 1772 int parts = partCount(); 1773 integerPart *x = new integerPart[parts]; 1774 bool ignored; 1775 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1776 rmTowardZero, &ignored); 1777 if (fs==opInvalidOp) 1778 return fs; 1779 1780 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1781 rmNearestTiesToEven); 1782 assert(fs==opOK); // should always work 1783 1784 fs = V.multiply(rhs, rounding_mode); 1785 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1786 1787 fs = subtract(V, rounding_mode); 1788 assert(fs==opOK || fs==opInexact); // likewise 1789 1790 if (isZero()) 1791 sign = origSign; // IEEE754 requires this 1792 delete[] x; 1793 } 1794 return fs; 1795 } 1796 1797 /* Normalized fused-multiply-add. */ 1798 APFloat::opStatus 1799 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1800 const APFloat &addend, 1801 roundingMode rounding_mode) 1802 { 1803 opStatus fs; 1804 1805 /* Post-multiplication sign, before addition. */ 1806 sign ^= multiplicand.sign; 1807 1808 /* If and only if all arguments are normal do we need to do an 1809 extended-precision calculation. */ 1810 if (isFiniteNonZero() && 1811 multiplicand.isFiniteNonZero() && 1812 addend.isFinite()) { 1813 lostFraction lost_fraction; 1814 1815 lost_fraction = multiplySignificand(multiplicand, &addend); 1816 fs = normalize(rounding_mode, lost_fraction); 1817 if (lost_fraction != lfExactlyZero) 1818 fs = (opStatus) (fs | opInexact); 1819 1820 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1821 positive zero unless rounding to minus infinity, except that 1822 adding two like-signed zeroes gives that zero. */ 1823 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1824 sign = (rounding_mode == rmTowardNegative); 1825 } else { 1826 fs = multiplySpecials(multiplicand); 1827 1828 /* FS can only be opOK or opInvalidOp. There is no more work 1829 to do in the latter case. The IEEE-754R standard says it is 1830 implementation-defined in this case whether, if ADDEND is a 1831 quiet NaN, we raise invalid op; this implementation does so. 1832 1833 If we need to do the addition we can do so with normal 1834 precision. */ 1835 if (fs == opOK) 1836 fs = addOrSubtract(addend, rounding_mode, false); 1837 } 1838 1839 return fs; 1840 } 1841 1842 /* Rounding-mode corrrect round to integral value. */ 1843 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1844 opStatus fs; 1845 1846 // If the exponent is large enough, we know that this value is already 1847 // integral, and the arithmetic below would potentially cause it to saturate 1848 // to +/-Inf. Bail out early instead. 1849 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1850 return opOK; 1851 1852 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1853 // precision of our format, and then subtract it back off again. The choice 1854 // of rounding modes for the addition/subtraction determines the rounding mode 1855 // for our integral rounding as well. 1856 // NOTE: When the input value is negative, we do subtraction followed by 1857 // addition instead. 1858 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1859 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1860 APFloat MagicConstant(*semantics); 1861 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1862 rmNearestTiesToEven); 1863 MagicConstant.copySign(*this); 1864 1865 if (fs != opOK) 1866 return fs; 1867 1868 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1869 bool inputSign = isNegative(); 1870 1871 fs = add(MagicConstant, rounding_mode); 1872 if (fs != opOK && fs != opInexact) 1873 return fs; 1874 1875 fs = subtract(MagicConstant, rounding_mode); 1876 1877 // Restore the input sign. 1878 if (inputSign != isNegative()) 1879 changeSign(); 1880 1881 return fs; 1882 } 1883 1884 1885 /* Comparison requires normalized numbers. */ 1886 APFloat::cmpResult 1887 APFloat::compare(const APFloat &rhs) const 1888 { 1889 cmpResult result; 1890 1891 assert(semantics == rhs.semantics); 1892 1893 switch (PackCategoriesIntoKey(category, rhs.category)) { 1894 default: 1895 llvm_unreachable(nullptr); 1896 1897 case PackCategoriesIntoKey(fcNaN, fcZero): 1898 case PackCategoriesIntoKey(fcNaN, fcNormal): 1899 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1900 case PackCategoriesIntoKey(fcNaN, fcNaN): 1901 case PackCategoriesIntoKey(fcZero, fcNaN): 1902 case PackCategoriesIntoKey(fcNormal, fcNaN): 1903 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1904 return cmpUnordered; 1905 1906 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1907 case PackCategoriesIntoKey(fcInfinity, fcZero): 1908 case PackCategoriesIntoKey(fcNormal, fcZero): 1909 if (sign) 1910 return cmpLessThan; 1911 else 1912 return cmpGreaterThan; 1913 1914 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1915 case PackCategoriesIntoKey(fcZero, fcInfinity): 1916 case PackCategoriesIntoKey(fcZero, fcNormal): 1917 if (rhs.sign) 1918 return cmpGreaterThan; 1919 else 1920 return cmpLessThan; 1921 1922 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1923 if (sign == rhs.sign) 1924 return cmpEqual; 1925 else if (sign) 1926 return cmpLessThan; 1927 else 1928 return cmpGreaterThan; 1929 1930 case PackCategoriesIntoKey(fcZero, fcZero): 1931 return cmpEqual; 1932 1933 case PackCategoriesIntoKey(fcNormal, fcNormal): 1934 break; 1935 } 1936 1937 /* Two normal numbers. Do they have the same sign? */ 1938 if (sign != rhs.sign) { 1939 if (sign) 1940 result = cmpLessThan; 1941 else 1942 result = cmpGreaterThan; 1943 } else { 1944 /* Compare absolute values; invert result if negative. */ 1945 result = compareAbsoluteValue(rhs); 1946 1947 if (sign) { 1948 if (result == cmpLessThan) 1949 result = cmpGreaterThan; 1950 else if (result == cmpGreaterThan) 1951 result = cmpLessThan; 1952 } 1953 } 1954 1955 return result; 1956 } 1957 1958 /// APFloat::convert - convert a value of one floating point type to another. 1959 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1960 /// records whether the transformation lost information, i.e. whether 1961 /// converting the result back to the original type will produce the 1962 /// original value (this is almost the same as return value==fsOK, but there 1963 /// are edge cases where this is not so). 1964 1965 APFloat::opStatus 1966 APFloat::convert(const fltSemantics &toSemantics, 1967 roundingMode rounding_mode, bool *losesInfo) 1968 { 1969 lostFraction lostFraction; 1970 unsigned int newPartCount, oldPartCount; 1971 opStatus fs; 1972 int shift; 1973 const fltSemantics &fromSemantics = *semantics; 1974 1975 lostFraction = lfExactlyZero; 1976 newPartCount = partCountForBits(toSemantics.precision + 1); 1977 oldPartCount = partCount(); 1978 shift = toSemantics.precision - fromSemantics.precision; 1979 1980 bool X86SpecialNan = false; 1981 if (&fromSemantics == &APFloat::x87DoubleExtended && 1982 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1983 (!(*significandParts() & 0x8000000000000000ULL) || 1984 !(*significandParts() & 0x4000000000000000ULL))) { 1985 // x86 has some unusual NaNs which cannot be represented in any other 1986 // format; note them here. 1987 X86SpecialNan = true; 1988 } 1989 1990 // If this is a truncation of a denormal number, and the target semantics 1991 // has larger exponent range than the source semantics (this can happen 1992 // when truncating from PowerPC double-double to double format), the 1993 // right shift could lose result mantissa bits. Adjust exponent instead 1994 // of performing excessive shift. 1995 if (shift < 0 && isFiniteNonZero()) { 1996 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 1997 if (exponent + exponentChange < toSemantics.minExponent) 1998 exponentChange = toSemantics.minExponent - exponent; 1999 if (exponentChange < shift) 2000 exponentChange = shift; 2001 if (exponentChange < 0) { 2002 shift -= exponentChange; 2003 exponent += exponentChange; 2004 } 2005 } 2006 2007 // If this is a truncation, perform the shift before we narrow the storage. 2008 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2009 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2010 2011 // Fix the storage so it can hold to new value. 2012 if (newPartCount > oldPartCount) { 2013 // The new type requires more storage; make it available. 2014 integerPart *newParts; 2015 newParts = new integerPart[newPartCount]; 2016 APInt::tcSet(newParts, 0, newPartCount); 2017 if (isFiniteNonZero() || category==fcNaN) 2018 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2019 freeSignificand(); 2020 significand.parts = newParts; 2021 } else if (newPartCount == 1 && oldPartCount != 1) { 2022 // Switch to built-in storage for a single part. 2023 integerPart newPart = 0; 2024 if (isFiniteNonZero() || category==fcNaN) 2025 newPart = significandParts()[0]; 2026 freeSignificand(); 2027 significand.part = newPart; 2028 } 2029 2030 // Now that we have the right storage, switch the semantics. 2031 semantics = &toSemantics; 2032 2033 // If this is an extension, perform the shift now that the storage is 2034 // available. 2035 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2036 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2037 2038 if (isFiniteNonZero()) { 2039 fs = normalize(rounding_mode, lostFraction); 2040 *losesInfo = (fs != opOK); 2041 } else if (category == fcNaN) { 2042 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2043 2044 // For x87 extended precision, we want to make a NaN, not a special NaN if 2045 // the input wasn't special either. 2046 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2047 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2048 2049 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2050 // does not give you back the same bits. This is dubious, and we 2051 // don't currently do it. You're really supposed to get 2052 // an invalid operation signal at runtime, but nobody does that. 2053 fs = opOK; 2054 } else { 2055 *losesInfo = false; 2056 fs = opOK; 2057 } 2058 2059 return fs; 2060 } 2061 2062 /* Convert a floating point number to an integer according to the 2063 rounding mode. If the rounded integer value is out of range this 2064 returns an invalid operation exception and the contents of the 2065 destination parts are unspecified. If the rounded value is in 2066 range but the floating point number is not the exact integer, the C 2067 standard doesn't require an inexact exception to be raised. IEEE 2068 854 does require it so we do that. 2069 2070 Note that for conversions to integer type the C standard requires 2071 round-to-zero to always be used. */ 2072 APFloat::opStatus 2073 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2074 bool isSigned, 2075 roundingMode rounding_mode, 2076 bool *isExact) const 2077 { 2078 lostFraction lost_fraction; 2079 const integerPart *src; 2080 unsigned int dstPartsCount, truncatedBits; 2081 2082 *isExact = false; 2083 2084 /* Handle the three special cases first. */ 2085 if (category == fcInfinity || category == fcNaN) 2086 return opInvalidOp; 2087 2088 dstPartsCount = partCountForBits(width); 2089 2090 if (category == fcZero) { 2091 APInt::tcSet(parts, 0, dstPartsCount); 2092 // Negative zero can't be represented as an int. 2093 *isExact = !sign; 2094 return opOK; 2095 } 2096 2097 src = significandParts(); 2098 2099 /* Step 1: place our absolute value, with any fraction truncated, in 2100 the destination. */ 2101 if (exponent < 0) { 2102 /* Our absolute value is less than one; truncate everything. */ 2103 APInt::tcSet(parts, 0, dstPartsCount); 2104 /* For exponent -1 the integer bit represents .5, look at that. 2105 For smaller exponents leftmost truncated bit is 0. */ 2106 truncatedBits = semantics->precision -1U - exponent; 2107 } else { 2108 /* We want the most significant (exponent + 1) bits; the rest are 2109 truncated. */ 2110 unsigned int bits = exponent + 1U; 2111 2112 /* Hopelessly large in magnitude? */ 2113 if (bits > width) 2114 return opInvalidOp; 2115 2116 if (bits < semantics->precision) { 2117 /* We truncate (semantics->precision - bits) bits. */ 2118 truncatedBits = semantics->precision - bits; 2119 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2120 } else { 2121 /* We want at least as many bits as are available. */ 2122 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2123 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2124 truncatedBits = 0; 2125 } 2126 } 2127 2128 /* Step 2: work out any lost fraction, and increment the absolute 2129 value if we would round away from zero. */ 2130 if (truncatedBits) { 2131 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2132 truncatedBits); 2133 if (lost_fraction != lfExactlyZero && 2134 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2135 if (APInt::tcIncrement(parts, dstPartsCount)) 2136 return opInvalidOp; /* Overflow. */ 2137 } 2138 } else { 2139 lost_fraction = lfExactlyZero; 2140 } 2141 2142 /* Step 3: check if we fit in the destination. */ 2143 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2144 2145 if (sign) { 2146 if (!isSigned) { 2147 /* Negative numbers cannot be represented as unsigned. */ 2148 if (omsb != 0) 2149 return opInvalidOp; 2150 } else { 2151 /* It takes omsb bits to represent the unsigned integer value. 2152 We lose a bit for the sign, but care is needed as the 2153 maximally negative integer is a special case. */ 2154 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2155 return opInvalidOp; 2156 2157 /* This case can happen because of rounding. */ 2158 if (omsb > width) 2159 return opInvalidOp; 2160 } 2161 2162 APInt::tcNegate (parts, dstPartsCount); 2163 } else { 2164 if (omsb >= width + !isSigned) 2165 return opInvalidOp; 2166 } 2167 2168 if (lost_fraction == lfExactlyZero) { 2169 *isExact = true; 2170 return opOK; 2171 } else 2172 return opInexact; 2173 } 2174 2175 /* Same as convertToSignExtendedInteger, except we provide 2176 deterministic values in case of an invalid operation exception, 2177 namely zero for NaNs and the minimal or maximal value respectively 2178 for underflow or overflow. 2179 The *isExact output tells whether the result is exact, in the sense 2180 that converting it back to the original floating point type produces 2181 the original value. This is almost equivalent to result==opOK, 2182 except for negative zeroes. 2183 */ 2184 APFloat::opStatus 2185 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2186 bool isSigned, 2187 roundingMode rounding_mode, bool *isExact) const 2188 { 2189 opStatus fs; 2190 2191 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2192 isExact); 2193 2194 if (fs == opInvalidOp) { 2195 unsigned int bits, dstPartsCount; 2196 2197 dstPartsCount = partCountForBits(width); 2198 2199 if (category == fcNaN) 2200 bits = 0; 2201 else if (sign) 2202 bits = isSigned; 2203 else 2204 bits = width - isSigned; 2205 2206 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2207 if (sign && isSigned) 2208 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2209 } 2210 2211 return fs; 2212 } 2213 2214 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2215 an APSInt, whose initial bit-width and signed-ness are used to determine the 2216 precision of the conversion. 2217 */ 2218 APFloat::opStatus 2219 APFloat::convertToInteger(APSInt &result, 2220 roundingMode rounding_mode, bool *isExact) const 2221 { 2222 unsigned bitWidth = result.getBitWidth(); 2223 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2224 opStatus status = convertToInteger( 2225 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2226 // Keeps the original signed-ness. 2227 result = APInt(bitWidth, parts); 2228 return status; 2229 } 2230 2231 /* Convert an unsigned integer SRC to a floating point number, 2232 rounding according to ROUNDING_MODE. The sign of the floating 2233 point number is not modified. */ 2234 APFloat::opStatus 2235 APFloat::convertFromUnsignedParts(const integerPart *src, 2236 unsigned int srcCount, 2237 roundingMode rounding_mode) 2238 { 2239 unsigned int omsb, precision, dstCount; 2240 integerPart *dst; 2241 lostFraction lost_fraction; 2242 2243 category = fcNormal; 2244 omsb = APInt::tcMSB(src, srcCount) + 1; 2245 dst = significandParts(); 2246 dstCount = partCount(); 2247 precision = semantics->precision; 2248 2249 /* We want the most significant PRECISION bits of SRC. There may not 2250 be that many; extract what we can. */ 2251 if (precision <= omsb) { 2252 exponent = omsb - 1; 2253 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2254 omsb - precision); 2255 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2256 } else { 2257 exponent = precision - 1; 2258 lost_fraction = lfExactlyZero; 2259 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2260 } 2261 2262 return normalize(rounding_mode, lost_fraction); 2263 } 2264 2265 APFloat::opStatus 2266 APFloat::convertFromAPInt(const APInt &Val, 2267 bool isSigned, 2268 roundingMode rounding_mode) 2269 { 2270 unsigned int partCount = Val.getNumWords(); 2271 APInt api = Val; 2272 2273 sign = false; 2274 if (isSigned && api.isNegative()) { 2275 sign = true; 2276 api = -api; 2277 } 2278 2279 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2280 } 2281 2282 /* Convert a two's complement integer SRC to a floating point number, 2283 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2284 integer is signed, in which case it must be sign-extended. */ 2285 APFloat::opStatus 2286 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2287 unsigned int srcCount, 2288 bool isSigned, 2289 roundingMode rounding_mode) 2290 { 2291 opStatus status; 2292 2293 if (isSigned && 2294 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2295 integerPart *copy; 2296 2297 /* If we're signed and negative negate a copy. */ 2298 sign = true; 2299 copy = new integerPart[srcCount]; 2300 APInt::tcAssign(copy, src, srcCount); 2301 APInt::tcNegate(copy, srcCount); 2302 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2303 delete [] copy; 2304 } else { 2305 sign = false; 2306 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2307 } 2308 2309 return status; 2310 } 2311 2312 /* FIXME: should this just take a const APInt reference? */ 2313 APFloat::opStatus 2314 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2315 unsigned int width, bool isSigned, 2316 roundingMode rounding_mode) 2317 { 2318 unsigned int partCount = partCountForBits(width); 2319 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2320 2321 sign = false; 2322 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2323 sign = true; 2324 api = -api; 2325 } 2326 2327 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2328 } 2329 2330 APFloat::opStatus 2331 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2332 { 2333 lostFraction lost_fraction = lfExactlyZero; 2334 2335 category = fcNormal; 2336 zeroSignificand(); 2337 exponent = 0; 2338 2339 integerPart *significand = significandParts(); 2340 unsigned partsCount = partCount(); 2341 unsigned bitPos = partsCount * integerPartWidth; 2342 bool computedTrailingFraction = false; 2343 2344 // Skip leading zeroes and any (hexa)decimal point. 2345 StringRef::iterator begin = s.begin(); 2346 StringRef::iterator end = s.end(); 2347 StringRef::iterator dot; 2348 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2349 StringRef::iterator firstSignificantDigit = p; 2350 2351 while (p != end) { 2352 integerPart hex_value; 2353 2354 if (*p == '.') { 2355 assert(dot == end && "String contains multiple dots"); 2356 dot = p++; 2357 continue; 2358 } 2359 2360 hex_value = hexDigitValue(*p); 2361 if (hex_value == -1U) 2362 break; 2363 2364 p++; 2365 2366 // Store the number while we have space. 2367 if (bitPos) { 2368 bitPos -= 4; 2369 hex_value <<= bitPos % integerPartWidth; 2370 significand[bitPos / integerPartWidth] |= hex_value; 2371 } else if (!computedTrailingFraction) { 2372 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2373 computedTrailingFraction = true; 2374 } 2375 } 2376 2377 /* Hex floats require an exponent but not a hexadecimal point. */ 2378 assert(p != end && "Hex strings require an exponent"); 2379 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2380 assert(p != begin && "Significand has no digits"); 2381 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2382 2383 /* Ignore the exponent if we are zero. */ 2384 if (p != firstSignificantDigit) { 2385 int expAdjustment; 2386 2387 /* Implicit hexadecimal point? */ 2388 if (dot == end) 2389 dot = p; 2390 2391 /* Calculate the exponent adjustment implicit in the number of 2392 significant digits. */ 2393 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2394 if (expAdjustment < 0) 2395 expAdjustment++; 2396 expAdjustment = expAdjustment * 4 - 1; 2397 2398 /* Adjust for writing the significand starting at the most 2399 significant nibble. */ 2400 expAdjustment += semantics->precision; 2401 expAdjustment -= partsCount * integerPartWidth; 2402 2403 /* Adjust for the given exponent. */ 2404 exponent = totalExponent(p + 1, end, expAdjustment); 2405 } 2406 2407 return normalize(rounding_mode, lost_fraction); 2408 } 2409 2410 APFloat::opStatus 2411 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2412 unsigned sigPartCount, int exp, 2413 roundingMode rounding_mode) 2414 { 2415 unsigned int parts, pow5PartCount; 2416 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2417 integerPart pow5Parts[maxPowerOfFiveParts]; 2418 bool isNearest; 2419 2420 isNearest = (rounding_mode == rmNearestTiesToEven || 2421 rounding_mode == rmNearestTiesToAway); 2422 2423 parts = partCountForBits(semantics->precision + 11); 2424 2425 /* Calculate pow(5, abs(exp)). */ 2426 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2427 2428 for (;; parts *= 2) { 2429 opStatus sigStatus, powStatus; 2430 unsigned int excessPrecision, truncatedBits; 2431 2432 calcSemantics.precision = parts * integerPartWidth - 1; 2433 excessPrecision = calcSemantics.precision - semantics->precision; 2434 truncatedBits = excessPrecision; 2435 2436 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2437 APFloat pow5(calcSemantics); 2438 2439 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2440 rmNearestTiesToEven); 2441 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2442 rmNearestTiesToEven); 2443 /* Add exp, as 10^n = 5^n * 2^n. */ 2444 decSig.exponent += exp; 2445 2446 lostFraction calcLostFraction; 2447 integerPart HUerr, HUdistance; 2448 unsigned int powHUerr; 2449 2450 if (exp >= 0) { 2451 /* multiplySignificand leaves the precision-th bit set to 1. */ 2452 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2453 powHUerr = powStatus != opOK; 2454 } else { 2455 calcLostFraction = decSig.divideSignificand(pow5); 2456 /* Denormal numbers have less precision. */ 2457 if (decSig.exponent < semantics->minExponent) { 2458 excessPrecision += (semantics->minExponent - decSig.exponent); 2459 truncatedBits = excessPrecision; 2460 if (excessPrecision > calcSemantics.precision) 2461 excessPrecision = calcSemantics.precision; 2462 } 2463 /* Extra half-ulp lost in reciprocal of exponent. */ 2464 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2465 } 2466 2467 /* Both multiplySignificand and divideSignificand return the 2468 result with the integer bit set. */ 2469 assert(APInt::tcExtractBit 2470 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2471 2472 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2473 powHUerr); 2474 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2475 excessPrecision, isNearest); 2476 2477 /* Are we guaranteed to round correctly if we truncate? */ 2478 if (HUdistance >= HUerr) { 2479 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2480 calcSemantics.precision - excessPrecision, 2481 excessPrecision); 2482 /* Take the exponent of decSig. If we tcExtract-ed less bits 2483 above we must adjust our exponent to compensate for the 2484 implicit right shift. */ 2485 exponent = (decSig.exponent + semantics->precision 2486 - (calcSemantics.precision - excessPrecision)); 2487 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2488 decSig.partCount(), 2489 truncatedBits); 2490 return normalize(rounding_mode, calcLostFraction); 2491 } 2492 } 2493 } 2494 2495 APFloat::opStatus 2496 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2497 { 2498 decimalInfo D; 2499 opStatus fs; 2500 2501 /* Scan the text. */ 2502 StringRef::iterator p = str.begin(); 2503 interpretDecimal(p, str.end(), &D); 2504 2505 /* Handle the quick cases. First the case of no significant digits, 2506 i.e. zero, and then exponents that are obviously too large or too 2507 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2508 definitely overflows if 2509 2510 (exp - 1) * L >= maxExponent 2511 2512 and definitely underflows to zero where 2513 2514 (exp + 1) * L <= minExponent - precision 2515 2516 With integer arithmetic the tightest bounds for L are 2517 2518 93/28 < L < 196/59 [ numerator <= 256 ] 2519 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2520 */ 2521 2522 // Test if we have a zero number allowing for strings with no null terminators 2523 // and zero decimals with non-zero exponents. 2524 // 2525 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2526 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2527 // be at most one dot. On the other hand, if we have a zero with a non-zero 2528 // exponent, then we know that D.firstSigDigit will be non-numeric. 2529 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2530 category = fcZero; 2531 fs = opOK; 2532 2533 /* Check whether the normalized exponent is high enough to overflow 2534 max during the log-rebasing in the max-exponent check below. */ 2535 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2536 fs = handleOverflow(rounding_mode); 2537 2538 /* If it wasn't, then it also wasn't high enough to overflow max 2539 during the log-rebasing in the min-exponent check. Check that it 2540 won't overflow min in either check, then perform the min-exponent 2541 check. */ 2542 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2543 (D.normalizedExponent + 1) * 28738 <= 2544 8651 * (semantics->minExponent - (int) semantics->precision)) { 2545 /* Underflow to zero and round. */ 2546 category = fcNormal; 2547 zeroSignificand(); 2548 fs = normalize(rounding_mode, lfLessThanHalf); 2549 2550 /* We can finally safely perform the max-exponent check. */ 2551 } else if ((D.normalizedExponent - 1) * 42039 2552 >= 12655 * semantics->maxExponent) { 2553 /* Overflow and round. */ 2554 fs = handleOverflow(rounding_mode); 2555 } else { 2556 integerPart *decSignificand; 2557 unsigned int partCount; 2558 2559 /* A tight upper bound on number of bits required to hold an 2560 N-digit decimal integer is N * 196 / 59. Allocate enough space 2561 to hold the full significand, and an extra part required by 2562 tcMultiplyPart. */ 2563 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2564 partCount = partCountForBits(1 + 196 * partCount / 59); 2565 decSignificand = new integerPart[partCount + 1]; 2566 partCount = 0; 2567 2568 /* Convert to binary efficiently - we do almost all multiplication 2569 in an integerPart. When this would overflow do we do a single 2570 bignum multiplication, and then revert again to multiplication 2571 in an integerPart. */ 2572 do { 2573 integerPart decValue, val, multiplier; 2574 2575 val = 0; 2576 multiplier = 1; 2577 2578 do { 2579 if (*p == '.') { 2580 p++; 2581 if (p == str.end()) { 2582 break; 2583 } 2584 } 2585 decValue = decDigitValue(*p++); 2586 assert(decValue < 10U && "Invalid character in significand"); 2587 multiplier *= 10; 2588 val = val * 10 + decValue; 2589 /* The maximum number that can be multiplied by ten with any 2590 digit added without overflowing an integerPart. */ 2591 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2592 2593 /* Multiply out the current part. */ 2594 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2595 partCount, partCount + 1, false); 2596 2597 /* If we used another part (likely but not guaranteed), increase 2598 the count. */ 2599 if (decSignificand[partCount]) 2600 partCount++; 2601 } while (p <= D.lastSigDigit); 2602 2603 category = fcNormal; 2604 fs = roundSignificandWithExponent(decSignificand, partCount, 2605 D.exponent, rounding_mode); 2606 2607 delete [] decSignificand; 2608 } 2609 2610 return fs; 2611 } 2612 2613 bool 2614 APFloat::convertFromStringSpecials(StringRef str) { 2615 if (str.equals("inf") || str.equals("INFINITY")) { 2616 makeInf(false); 2617 return true; 2618 } 2619 2620 if (str.equals("-inf") || str.equals("-INFINITY")) { 2621 makeInf(true); 2622 return true; 2623 } 2624 2625 if (str.equals("nan") || str.equals("NaN")) { 2626 makeNaN(false, false); 2627 return true; 2628 } 2629 2630 if (str.equals("-nan") || str.equals("-NaN")) { 2631 makeNaN(false, true); 2632 return true; 2633 } 2634 2635 return false; 2636 } 2637 2638 APFloat::opStatus 2639 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2640 { 2641 assert(!str.empty() && "Invalid string length"); 2642 2643 // Handle special cases. 2644 if (convertFromStringSpecials(str)) 2645 return opOK; 2646 2647 /* Handle a leading minus sign. */ 2648 StringRef::iterator p = str.begin(); 2649 size_t slen = str.size(); 2650 sign = *p == '-' ? 1 : 0; 2651 if (*p == '-' || *p == '+') { 2652 p++; 2653 slen--; 2654 assert(slen && "String has no digits"); 2655 } 2656 2657 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2658 assert(slen - 2 && "Invalid string"); 2659 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2660 rounding_mode); 2661 } 2662 2663 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2664 } 2665 2666 /* Write out a hexadecimal representation of the floating point value 2667 to DST, which must be of sufficient size, in the C99 form 2668 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2669 excluding the terminating NUL. 2670 2671 If UPPERCASE, the output is in upper case, otherwise in lower case. 2672 2673 HEXDIGITS digits appear altogether, rounding the value if 2674 necessary. If HEXDIGITS is 0, the minimal precision to display the 2675 number precisely is used instead. If nothing would appear after 2676 the decimal point it is suppressed. 2677 2678 The decimal exponent is always printed and has at least one digit. 2679 Zero values display an exponent of zero. Infinities and NaNs 2680 appear as "infinity" or "nan" respectively. 2681 2682 The above rules are as specified by C99. There is ambiguity about 2683 what the leading hexadecimal digit should be. This implementation 2684 uses whatever is necessary so that the exponent is displayed as 2685 stored. This implies the exponent will fall within the IEEE format 2686 range, and the leading hexadecimal digit will be 0 (for denormals), 2687 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2688 any other digits zero). 2689 */ 2690 unsigned int 2691 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2692 bool upperCase, roundingMode rounding_mode) const 2693 { 2694 char *p; 2695 2696 p = dst; 2697 if (sign) 2698 *dst++ = '-'; 2699 2700 switch (category) { 2701 case fcInfinity: 2702 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2703 dst += sizeof infinityL - 1; 2704 break; 2705 2706 case fcNaN: 2707 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2708 dst += sizeof NaNU - 1; 2709 break; 2710 2711 case fcZero: 2712 *dst++ = '0'; 2713 *dst++ = upperCase ? 'X': 'x'; 2714 *dst++ = '0'; 2715 if (hexDigits > 1) { 2716 *dst++ = '.'; 2717 memset (dst, '0', hexDigits - 1); 2718 dst += hexDigits - 1; 2719 } 2720 *dst++ = upperCase ? 'P': 'p'; 2721 *dst++ = '0'; 2722 break; 2723 2724 case fcNormal: 2725 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2726 break; 2727 } 2728 2729 *dst = 0; 2730 2731 return static_cast<unsigned int>(dst - p); 2732 } 2733 2734 /* Does the hard work of outputting the correctly rounded hexadecimal 2735 form of a normal floating point number with the specified number of 2736 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2737 digits necessary to print the value precisely is output. */ 2738 char * 2739 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2740 bool upperCase, 2741 roundingMode rounding_mode) const 2742 { 2743 unsigned int count, valueBits, shift, partsCount, outputDigits; 2744 const char *hexDigitChars; 2745 const integerPart *significand; 2746 char *p; 2747 bool roundUp; 2748 2749 *dst++ = '0'; 2750 *dst++ = upperCase ? 'X': 'x'; 2751 2752 roundUp = false; 2753 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2754 2755 significand = significandParts(); 2756 partsCount = partCount(); 2757 2758 /* +3 because the first digit only uses the single integer bit, so 2759 we have 3 virtual zero most-significant-bits. */ 2760 valueBits = semantics->precision + 3; 2761 shift = integerPartWidth - valueBits % integerPartWidth; 2762 2763 /* The natural number of digits required ignoring trailing 2764 insignificant zeroes. */ 2765 outputDigits = (valueBits - significandLSB () + 3) / 4; 2766 2767 /* hexDigits of zero means use the required number for the 2768 precision. Otherwise, see if we are truncating. If we are, 2769 find out if we need to round away from zero. */ 2770 if (hexDigits) { 2771 if (hexDigits < outputDigits) { 2772 /* We are dropping non-zero bits, so need to check how to round. 2773 "bits" is the number of dropped bits. */ 2774 unsigned int bits; 2775 lostFraction fraction; 2776 2777 bits = valueBits - hexDigits * 4; 2778 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2779 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2780 } 2781 outputDigits = hexDigits; 2782 } 2783 2784 /* Write the digits consecutively, and start writing in the location 2785 of the hexadecimal point. We move the most significant digit 2786 left and add the hexadecimal point later. */ 2787 p = ++dst; 2788 2789 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2790 2791 while (outputDigits && count) { 2792 integerPart part; 2793 2794 /* Put the most significant integerPartWidth bits in "part". */ 2795 if (--count == partsCount) 2796 part = 0; /* An imaginary higher zero part. */ 2797 else 2798 part = significand[count] << shift; 2799 2800 if (count && shift) 2801 part |= significand[count - 1] >> (integerPartWidth - shift); 2802 2803 /* Convert as much of "part" to hexdigits as we can. */ 2804 unsigned int curDigits = integerPartWidth / 4; 2805 2806 if (curDigits > outputDigits) 2807 curDigits = outputDigits; 2808 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2809 outputDigits -= curDigits; 2810 } 2811 2812 if (roundUp) { 2813 char *q = dst; 2814 2815 /* Note that hexDigitChars has a trailing '0'. */ 2816 do { 2817 q--; 2818 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2819 } while (*q == '0'); 2820 assert(q >= p); 2821 } else { 2822 /* Add trailing zeroes. */ 2823 memset (dst, '0', outputDigits); 2824 dst += outputDigits; 2825 } 2826 2827 /* Move the most significant digit to before the point, and if there 2828 is something after the decimal point add it. This must come 2829 after rounding above. */ 2830 p[-1] = p[0]; 2831 if (dst -1 == p) 2832 dst--; 2833 else 2834 p[0] = '.'; 2835 2836 /* Finally output the exponent. */ 2837 *dst++ = upperCase ? 'P': 'p'; 2838 2839 return writeSignedDecimal (dst, exponent); 2840 } 2841 2842 hash_code llvm::hash_value(const APFloat &Arg) { 2843 if (!Arg.isFiniteNonZero()) 2844 return hash_combine((uint8_t)Arg.category, 2845 // NaN has no sign, fix it at zero. 2846 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2847 Arg.semantics->precision); 2848 2849 // Normal floats need their exponent and significand hashed. 2850 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2851 Arg.semantics->precision, Arg.exponent, 2852 hash_combine_range( 2853 Arg.significandParts(), 2854 Arg.significandParts() + Arg.partCount())); 2855 } 2856 2857 // Conversion from APFloat to/from host float/double. It may eventually be 2858 // possible to eliminate these and have everybody deal with APFloats, but that 2859 // will take a while. This approach will not easily extend to long double. 2860 // Current implementation requires integerPartWidth==64, which is correct at 2861 // the moment but could be made more general. 2862 2863 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2864 // the actual IEEE respresentations. We compensate for that here. 2865 2866 APInt 2867 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2868 { 2869 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2870 assert(partCount()==2); 2871 2872 uint64_t myexponent, mysignificand; 2873 2874 if (isFiniteNonZero()) { 2875 myexponent = exponent+16383; //bias 2876 mysignificand = significandParts()[0]; 2877 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2878 myexponent = 0; // denormal 2879 } else if (category==fcZero) { 2880 myexponent = 0; 2881 mysignificand = 0; 2882 } else if (category==fcInfinity) { 2883 myexponent = 0x7fff; 2884 mysignificand = 0x8000000000000000ULL; 2885 } else { 2886 assert(category == fcNaN && "Unknown category"); 2887 myexponent = 0x7fff; 2888 mysignificand = significandParts()[0]; 2889 } 2890 2891 uint64_t words[2]; 2892 words[0] = mysignificand; 2893 words[1] = ((uint64_t)(sign & 1) << 15) | 2894 (myexponent & 0x7fffLL); 2895 return APInt(80, words); 2896 } 2897 2898 APInt 2899 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2900 { 2901 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2902 assert(partCount()==2); 2903 2904 uint64_t words[2]; 2905 opStatus fs; 2906 bool losesInfo; 2907 2908 // Convert number to double. To avoid spurious underflows, we re- 2909 // normalize against the "double" minExponent first, and only *then* 2910 // truncate the mantissa. The result of that second conversion 2911 // may be inexact, but should never underflow. 2912 // Declare fltSemantics before APFloat that uses it (and 2913 // saves pointer to it) to ensure correct destruction order. 2914 fltSemantics extendedSemantics = *semantics; 2915 extendedSemantics.minExponent = IEEEdouble.minExponent; 2916 APFloat extended(*this); 2917 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2918 assert(fs == opOK && !losesInfo); 2919 (void)fs; 2920 2921 APFloat u(extended); 2922 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2923 assert(fs == opOK || fs == opInexact); 2924 (void)fs; 2925 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2926 2927 // If conversion was exact or resulted in a special case, we're done; 2928 // just set the second double to zero. Otherwise, re-convert back to 2929 // the extended format and compute the difference. This now should 2930 // convert exactly to double. 2931 if (u.isFiniteNonZero() && losesInfo) { 2932 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2933 assert(fs == opOK && !losesInfo); 2934 (void)fs; 2935 2936 APFloat v(extended); 2937 v.subtract(u, rmNearestTiesToEven); 2938 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2939 assert(fs == opOK && !losesInfo); 2940 (void)fs; 2941 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2942 } else { 2943 words[1] = 0; 2944 } 2945 2946 return APInt(128, words); 2947 } 2948 2949 APInt 2950 APFloat::convertQuadrupleAPFloatToAPInt() const 2951 { 2952 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2953 assert(partCount()==2); 2954 2955 uint64_t myexponent, mysignificand, mysignificand2; 2956 2957 if (isFiniteNonZero()) { 2958 myexponent = exponent+16383; //bias 2959 mysignificand = significandParts()[0]; 2960 mysignificand2 = significandParts()[1]; 2961 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2962 myexponent = 0; // denormal 2963 } else if (category==fcZero) { 2964 myexponent = 0; 2965 mysignificand = mysignificand2 = 0; 2966 } else if (category==fcInfinity) { 2967 myexponent = 0x7fff; 2968 mysignificand = mysignificand2 = 0; 2969 } else { 2970 assert(category == fcNaN && "Unknown category!"); 2971 myexponent = 0x7fff; 2972 mysignificand = significandParts()[0]; 2973 mysignificand2 = significandParts()[1]; 2974 } 2975 2976 uint64_t words[2]; 2977 words[0] = mysignificand; 2978 words[1] = ((uint64_t)(sign & 1) << 63) | 2979 ((myexponent & 0x7fff) << 48) | 2980 (mysignificand2 & 0xffffffffffffLL); 2981 2982 return APInt(128, words); 2983 } 2984 2985 APInt 2986 APFloat::convertDoubleAPFloatToAPInt() const 2987 { 2988 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2989 assert(partCount()==1); 2990 2991 uint64_t myexponent, mysignificand; 2992 2993 if (isFiniteNonZero()) { 2994 myexponent = exponent+1023; //bias 2995 mysignificand = *significandParts(); 2996 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2997 myexponent = 0; // denormal 2998 } else if (category==fcZero) { 2999 myexponent = 0; 3000 mysignificand = 0; 3001 } else if (category==fcInfinity) { 3002 myexponent = 0x7ff; 3003 mysignificand = 0; 3004 } else { 3005 assert(category == fcNaN && "Unknown category!"); 3006 myexponent = 0x7ff; 3007 mysignificand = *significandParts(); 3008 } 3009 3010 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3011 ((myexponent & 0x7ff) << 52) | 3012 (mysignificand & 0xfffffffffffffLL)))); 3013 } 3014 3015 APInt 3016 APFloat::convertFloatAPFloatToAPInt() const 3017 { 3018 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3019 assert(partCount()==1); 3020 3021 uint32_t myexponent, mysignificand; 3022 3023 if (isFiniteNonZero()) { 3024 myexponent = exponent+127; //bias 3025 mysignificand = (uint32_t)*significandParts(); 3026 if (myexponent == 1 && !(mysignificand & 0x800000)) 3027 myexponent = 0; // denormal 3028 } else if (category==fcZero) { 3029 myexponent = 0; 3030 mysignificand = 0; 3031 } else if (category==fcInfinity) { 3032 myexponent = 0xff; 3033 mysignificand = 0; 3034 } else { 3035 assert(category == fcNaN && "Unknown category!"); 3036 myexponent = 0xff; 3037 mysignificand = (uint32_t)*significandParts(); 3038 } 3039 3040 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3041 (mysignificand & 0x7fffff))); 3042 } 3043 3044 APInt 3045 APFloat::convertHalfAPFloatToAPInt() const 3046 { 3047 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3048 assert(partCount()==1); 3049 3050 uint32_t myexponent, mysignificand; 3051 3052 if (isFiniteNonZero()) { 3053 myexponent = exponent+15; //bias 3054 mysignificand = (uint32_t)*significandParts(); 3055 if (myexponent == 1 && !(mysignificand & 0x400)) 3056 myexponent = 0; // denormal 3057 } else if (category==fcZero) { 3058 myexponent = 0; 3059 mysignificand = 0; 3060 } else if (category==fcInfinity) { 3061 myexponent = 0x1f; 3062 mysignificand = 0; 3063 } else { 3064 assert(category == fcNaN && "Unknown category!"); 3065 myexponent = 0x1f; 3066 mysignificand = (uint32_t)*significandParts(); 3067 } 3068 3069 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3070 (mysignificand & 0x3ff))); 3071 } 3072 3073 // This function creates an APInt that is just a bit map of the floating 3074 // point constant as it would appear in memory. It is not a conversion, 3075 // and treating the result as a normal integer is unlikely to be useful. 3076 3077 APInt 3078 APFloat::bitcastToAPInt() const 3079 { 3080 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3081 return convertHalfAPFloatToAPInt(); 3082 3083 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3084 return convertFloatAPFloatToAPInt(); 3085 3086 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3087 return convertDoubleAPFloatToAPInt(); 3088 3089 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3090 return convertQuadrupleAPFloatToAPInt(); 3091 3092 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3093 return convertPPCDoubleDoubleAPFloatToAPInt(); 3094 3095 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3096 "unknown format!"); 3097 return convertF80LongDoubleAPFloatToAPInt(); 3098 } 3099 3100 float 3101 APFloat::convertToFloat() const 3102 { 3103 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3104 "Float semantics are not IEEEsingle"); 3105 APInt api = bitcastToAPInt(); 3106 return api.bitsToFloat(); 3107 } 3108 3109 double 3110 APFloat::convertToDouble() const 3111 { 3112 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3113 "Float semantics are not IEEEdouble"); 3114 APInt api = bitcastToAPInt(); 3115 return api.bitsToDouble(); 3116 } 3117 3118 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3119 /// does not support these bit patterns: 3120 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3121 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3122 /// exponent = 0, integer bit 1 ("pseudodenormal") 3123 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3124 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3125 void 3126 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3127 { 3128 assert(api.getBitWidth()==80); 3129 uint64_t i1 = api.getRawData()[0]; 3130 uint64_t i2 = api.getRawData()[1]; 3131 uint64_t myexponent = (i2 & 0x7fff); 3132 uint64_t mysignificand = i1; 3133 3134 initialize(&APFloat::x87DoubleExtended); 3135 assert(partCount()==2); 3136 3137 sign = static_cast<unsigned int>(i2>>15); 3138 if (myexponent==0 && mysignificand==0) { 3139 // exponent, significand meaningless 3140 category = fcZero; 3141 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3142 // exponent, significand meaningless 3143 category = fcInfinity; 3144 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3145 // exponent meaningless 3146 category = fcNaN; 3147 significandParts()[0] = mysignificand; 3148 significandParts()[1] = 0; 3149 } else { 3150 category = fcNormal; 3151 exponent = myexponent - 16383; 3152 significandParts()[0] = mysignificand; 3153 significandParts()[1] = 0; 3154 if (myexponent==0) // denormal 3155 exponent = -16382; 3156 } 3157 } 3158 3159 void 3160 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3161 { 3162 assert(api.getBitWidth()==128); 3163 uint64_t i1 = api.getRawData()[0]; 3164 uint64_t i2 = api.getRawData()[1]; 3165 opStatus fs; 3166 bool losesInfo; 3167 3168 // Get the first double and convert to our format. 3169 initFromDoubleAPInt(APInt(64, i1)); 3170 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3171 assert(fs == opOK && !losesInfo); 3172 (void)fs; 3173 3174 // Unless we have a special case, add in second double. 3175 if (isFiniteNonZero()) { 3176 APFloat v(IEEEdouble, APInt(64, i2)); 3177 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3178 assert(fs == opOK && !losesInfo); 3179 (void)fs; 3180 3181 add(v, rmNearestTiesToEven); 3182 } 3183 } 3184 3185 void 3186 APFloat::initFromQuadrupleAPInt(const APInt &api) 3187 { 3188 assert(api.getBitWidth()==128); 3189 uint64_t i1 = api.getRawData()[0]; 3190 uint64_t i2 = api.getRawData()[1]; 3191 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3192 uint64_t mysignificand = i1; 3193 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3194 3195 initialize(&APFloat::IEEEquad); 3196 assert(partCount()==2); 3197 3198 sign = static_cast<unsigned int>(i2>>63); 3199 if (myexponent==0 && 3200 (mysignificand==0 && mysignificand2==0)) { 3201 // exponent, significand meaningless 3202 category = fcZero; 3203 } else if (myexponent==0x7fff && 3204 (mysignificand==0 && mysignificand2==0)) { 3205 // exponent, significand meaningless 3206 category = fcInfinity; 3207 } else if (myexponent==0x7fff && 3208 (mysignificand!=0 || mysignificand2 !=0)) { 3209 // exponent meaningless 3210 category = fcNaN; 3211 significandParts()[0] = mysignificand; 3212 significandParts()[1] = mysignificand2; 3213 } else { 3214 category = fcNormal; 3215 exponent = myexponent - 16383; 3216 significandParts()[0] = mysignificand; 3217 significandParts()[1] = mysignificand2; 3218 if (myexponent==0) // denormal 3219 exponent = -16382; 3220 else 3221 significandParts()[1] |= 0x1000000000000LL; // integer bit 3222 } 3223 } 3224 3225 void 3226 APFloat::initFromDoubleAPInt(const APInt &api) 3227 { 3228 assert(api.getBitWidth()==64); 3229 uint64_t i = *api.getRawData(); 3230 uint64_t myexponent = (i >> 52) & 0x7ff; 3231 uint64_t mysignificand = i & 0xfffffffffffffLL; 3232 3233 initialize(&APFloat::IEEEdouble); 3234 assert(partCount()==1); 3235 3236 sign = static_cast<unsigned int>(i>>63); 3237 if (myexponent==0 && mysignificand==0) { 3238 // exponent, significand meaningless 3239 category = fcZero; 3240 } else if (myexponent==0x7ff && mysignificand==0) { 3241 // exponent, significand meaningless 3242 category = fcInfinity; 3243 } else if (myexponent==0x7ff && mysignificand!=0) { 3244 // exponent meaningless 3245 category = fcNaN; 3246 *significandParts() = mysignificand; 3247 } else { 3248 category = fcNormal; 3249 exponent = myexponent - 1023; 3250 *significandParts() = mysignificand; 3251 if (myexponent==0) // denormal 3252 exponent = -1022; 3253 else 3254 *significandParts() |= 0x10000000000000LL; // integer bit 3255 } 3256 } 3257 3258 void 3259 APFloat::initFromFloatAPInt(const APInt & api) 3260 { 3261 assert(api.getBitWidth()==32); 3262 uint32_t i = (uint32_t)*api.getRawData(); 3263 uint32_t myexponent = (i >> 23) & 0xff; 3264 uint32_t mysignificand = i & 0x7fffff; 3265 3266 initialize(&APFloat::IEEEsingle); 3267 assert(partCount()==1); 3268 3269 sign = i >> 31; 3270 if (myexponent==0 && mysignificand==0) { 3271 // exponent, significand meaningless 3272 category = fcZero; 3273 } else if (myexponent==0xff && mysignificand==0) { 3274 // exponent, significand meaningless 3275 category = fcInfinity; 3276 } else if (myexponent==0xff && mysignificand!=0) { 3277 // sign, exponent, significand meaningless 3278 category = fcNaN; 3279 *significandParts() = mysignificand; 3280 } else { 3281 category = fcNormal; 3282 exponent = myexponent - 127; //bias 3283 *significandParts() = mysignificand; 3284 if (myexponent==0) // denormal 3285 exponent = -126; 3286 else 3287 *significandParts() |= 0x800000; // integer bit 3288 } 3289 } 3290 3291 void 3292 APFloat::initFromHalfAPInt(const APInt & api) 3293 { 3294 assert(api.getBitWidth()==16); 3295 uint32_t i = (uint32_t)*api.getRawData(); 3296 uint32_t myexponent = (i >> 10) & 0x1f; 3297 uint32_t mysignificand = i & 0x3ff; 3298 3299 initialize(&APFloat::IEEEhalf); 3300 assert(partCount()==1); 3301 3302 sign = i >> 15; 3303 if (myexponent==0 && mysignificand==0) { 3304 // exponent, significand meaningless 3305 category = fcZero; 3306 } else if (myexponent==0x1f && mysignificand==0) { 3307 // exponent, significand meaningless 3308 category = fcInfinity; 3309 } else if (myexponent==0x1f && mysignificand!=0) { 3310 // sign, exponent, significand meaningless 3311 category = fcNaN; 3312 *significandParts() = mysignificand; 3313 } else { 3314 category = fcNormal; 3315 exponent = myexponent - 15; //bias 3316 *significandParts() = mysignificand; 3317 if (myexponent==0) // denormal 3318 exponent = -14; 3319 else 3320 *significandParts() |= 0x400; // integer bit 3321 } 3322 } 3323 3324 /// Treat api as containing the bits of a floating point number. Currently 3325 /// we infer the floating point type from the size of the APInt. The 3326 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3327 /// when the size is anything else). 3328 void 3329 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3330 { 3331 if (Sem == &IEEEhalf) 3332 return initFromHalfAPInt(api); 3333 if (Sem == &IEEEsingle) 3334 return initFromFloatAPInt(api); 3335 if (Sem == &IEEEdouble) 3336 return initFromDoubleAPInt(api); 3337 if (Sem == &x87DoubleExtended) 3338 return initFromF80LongDoubleAPInt(api); 3339 if (Sem == &IEEEquad) 3340 return initFromQuadrupleAPInt(api); 3341 if (Sem == &PPCDoubleDouble) 3342 return initFromPPCDoubleDoubleAPInt(api); 3343 3344 llvm_unreachable(nullptr); 3345 } 3346 3347 APFloat 3348 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3349 { 3350 switch (BitWidth) { 3351 case 16: 3352 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3353 case 32: 3354 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3355 case 64: 3356 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3357 case 80: 3358 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3359 case 128: 3360 if (isIEEE) 3361 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3362 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3363 default: 3364 llvm_unreachable("Unknown floating bit width"); 3365 } 3366 } 3367 3368 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { 3369 return Sem.sizeInBits; 3370 } 3371 3372 /// Make this number the largest magnitude normal number in the given 3373 /// semantics. 3374 void APFloat::makeLargest(bool Negative) { 3375 // We want (in interchange format): 3376 // sign = {Negative} 3377 // exponent = 1..10 3378 // significand = 1..1 3379 category = fcNormal; 3380 sign = Negative; 3381 exponent = semantics->maxExponent; 3382 3383 // Use memset to set all but the highest integerPart to all ones. 3384 integerPart *significand = significandParts(); 3385 unsigned PartCount = partCount(); 3386 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3387 3388 // Set the high integerPart especially setting all unused top bits for 3389 // internal consistency. 3390 const unsigned NumUnusedHighBits = 3391 PartCount*integerPartWidth - semantics->precision; 3392 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3393 ? (~integerPart(0) >> NumUnusedHighBits) 3394 : 0; 3395 } 3396 3397 /// Make this number the smallest magnitude denormal number in the given 3398 /// semantics. 3399 void APFloat::makeSmallest(bool Negative) { 3400 // We want (in interchange format): 3401 // sign = {Negative} 3402 // exponent = 0..0 3403 // significand = 0..01 3404 category = fcNormal; 3405 sign = Negative; 3406 exponent = semantics->minExponent; 3407 APInt::tcSet(significandParts(), 1, partCount()); 3408 } 3409 3410 3411 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3412 // We want (in interchange format): 3413 // sign = {Negative} 3414 // exponent = 1..10 3415 // significand = 1..1 3416 APFloat Val(Sem, uninitialized); 3417 Val.makeLargest(Negative); 3418 return Val; 3419 } 3420 3421 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3422 // We want (in interchange format): 3423 // sign = {Negative} 3424 // exponent = 0..0 3425 // significand = 0..01 3426 APFloat Val(Sem, uninitialized); 3427 Val.makeSmallest(Negative); 3428 return Val; 3429 } 3430 3431 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3432 APFloat Val(Sem, uninitialized); 3433 3434 // We want (in interchange format): 3435 // sign = {Negative} 3436 // exponent = 0..0 3437 // significand = 10..0 3438 3439 Val.category = fcNormal; 3440 Val.zeroSignificand(); 3441 Val.sign = Negative; 3442 Val.exponent = Sem.minExponent; 3443 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3444 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3445 3446 return Val; 3447 } 3448 3449 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3450 initFromAPInt(&Sem, API); 3451 } 3452 3453 APFloat::APFloat(float f) { 3454 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3455 } 3456 3457 APFloat::APFloat(double d) { 3458 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3459 } 3460 3461 namespace { 3462 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3463 Buffer.append(Str.begin(), Str.end()); 3464 } 3465 3466 /// Removes data from the given significand until it is no more 3467 /// precise than is required for the desired precision. 3468 void AdjustToPrecision(APInt &significand, 3469 int &exp, unsigned FormatPrecision) { 3470 unsigned bits = significand.getActiveBits(); 3471 3472 // 196/59 is a very slight overestimate of lg_2(10). 3473 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3474 3475 if (bits <= bitsRequired) return; 3476 3477 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3478 if (!tensRemovable) return; 3479 3480 exp += tensRemovable; 3481 3482 APInt divisor(significand.getBitWidth(), 1); 3483 APInt powten(significand.getBitWidth(), 10); 3484 while (true) { 3485 if (tensRemovable & 1) 3486 divisor *= powten; 3487 tensRemovable >>= 1; 3488 if (!tensRemovable) break; 3489 powten *= powten; 3490 } 3491 3492 significand = significand.udiv(divisor); 3493 3494 // Truncate the significand down to its active bit count. 3495 significand = significand.trunc(significand.getActiveBits()); 3496 } 3497 3498 3499 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3500 int &exp, unsigned FormatPrecision) { 3501 unsigned N = buffer.size(); 3502 if (N <= FormatPrecision) return; 3503 3504 // The most significant figures are the last ones in the buffer. 3505 unsigned FirstSignificant = N - FormatPrecision; 3506 3507 // Round. 3508 // FIXME: this probably shouldn't use 'round half up'. 3509 3510 // Rounding down is just a truncation, except we also want to drop 3511 // trailing zeros from the new result. 3512 if (buffer[FirstSignificant - 1] < '5') { 3513 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3514 FirstSignificant++; 3515 3516 exp += FirstSignificant; 3517 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3518 return; 3519 } 3520 3521 // Rounding up requires a decimal add-with-carry. If we continue 3522 // the carry, the newly-introduced zeros will just be truncated. 3523 for (unsigned I = FirstSignificant; I != N; ++I) { 3524 if (buffer[I] == '9') { 3525 FirstSignificant++; 3526 } else { 3527 buffer[I]++; 3528 break; 3529 } 3530 } 3531 3532 // If we carried through, we have exactly one digit of precision. 3533 if (FirstSignificant == N) { 3534 exp += FirstSignificant; 3535 buffer.clear(); 3536 buffer.push_back('1'); 3537 return; 3538 } 3539 3540 exp += FirstSignificant; 3541 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3542 } 3543 } 3544 3545 void APFloat::toString(SmallVectorImpl<char> &Str, 3546 unsigned FormatPrecision, 3547 unsigned FormatMaxPadding) const { 3548 switch (category) { 3549 case fcInfinity: 3550 if (isNegative()) 3551 return append(Str, "-Inf"); 3552 else 3553 return append(Str, "+Inf"); 3554 3555 case fcNaN: return append(Str, "NaN"); 3556 3557 case fcZero: 3558 if (isNegative()) 3559 Str.push_back('-'); 3560 3561 if (!FormatMaxPadding) 3562 append(Str, "0.0E+0"); 3563 else 3564 Str.push_back('0'); 3565 return; 3566 3567 case fcNormal: 3568 break; 3569 } 3570 3571 if (isNegative()) 3572 Str.push_back('-'); 3573 3574 // Decompose the number into an APInt and an exponent. 3575 int exp = exponent - ((int) semantics->precision - 1); 3576 APInt significand(semantics->precision, 3577 makeArrayRef(significandParts(), 3578 partCountForBits(semantics->precision))); 3579 3580 // Set FormatPrecision if zero. We want to do this before we 3581 // truncate trailing zeros, as those are part of the precision. 3582 if (!FormatPrecision) { 3583 // We use enough digits so the number can be round-tripped back to an 3584 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3585 // Accurately" by Steele and White. 3586 // FIXME: Using a formula based purely on the precision is conservative; 3587 // we can print fewer digits depending on the actual value being printed. 3588 3589 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3590 FormatPrecision = 2 + semantics->precision * 59 / 196; 3591 } 3592 3593 // Ignore trailing binary zeros. 3594 int trailingZeros = significand.countTrailingZeros(); 3595 exp += trailingZeros; 3596 significand = significand.lshr(trailingZeros); 3597 3598 // Change the exponent from 2^e to 10^e. 3599 if (exp == 0) { 3600 // Nothing to do. 3601 } else if (exp > 0) { 3602 // Just shift left. 3603 significand = significand.zext(semantics->precision + exp); 3604 significand <<= exp; 3605 exp = 0; 3606 } else { /* exp < 0 */ 3607 int texp = -exp; 3608 3609 // We transform this using the identity: 3610 // (N)(2^-e) == (N)(5^e)(10^-e) 3611 // This means we have to multiply N (the significand) by 5^e. 3612 // To avoid overflow, we have to operate on numbers large 3613 // enough to store N * 5^e: 3614 // log2(N * 5^e) == log2(N) + e * log2(5) 3615 // <= semantics->precision + e * 137 / 59 3616 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3617 3618 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3619 3620 // Multiply significand by 5^e. 3621 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3622 significand = significand.zext(precision); 3623 APInt five_to_the_i(precision, 5); 3624 while (true) { 3625 if (texp & 1) significand *= five_to_the_i; 3626 3627 texp >>= 1; 3628 if (!texp) break; 3629 five_to_the_i *= five_to_the_i; 3630 } 3631 } 3632 3633 AdjustToPrecision(significand, exp, FormatPrecision); 3634 3635 SmallVector<char, 256> buffer; 3636 3637 // Fill the buffer. 3638 unsigned precision = significand.getBitWidth(); 3639 APInt ten(precision, 10); 3640 APInt digit(precision, 0); 3641 3642 bool inTrail = true; 3643 while (significand != 0) { 3644 // digit <- significand % 10 3645 // significand <- significand / 10 3646 APInt::udivrem(significand, ten, significand, digit); 3647 3648 unsigned d = digit.getZExtValue(); 3649 3650 // Drop trailing zeros. 3651 if (inTrail && !d) exp++; 3652 else { 3653 buffer.push_back((char) ('0' + d)); 3654 inTrail = false; 3655 } 3656 } 3657 3658 assert(!buffer.empty() && "no characters in buffer!"); 3659 3660 // Drop down to FormatPrecision. 3661 // TODO: don't do more precise calculations above than are required. 3662 AdjustToPrecision(buffer, exp, FormatPrecision); 3663 3664 unsigned NDigits = buffer.size(); 3665 3666 // Check whether we should use scientific notation. 3667 bool FormatScientific; 3668 if (!FormatMaxPadding) 3669 FormatScientific = true; 3670 else { 3671 if (exp >= 0) { 3672 // 765e3 --> 765000 3673 // ^^^ 3674 // But we shouldn't make the number look more precise than it is. 3675 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3676 NDigits + (unsigned) exp > FormatPrecision); 3677 } else { 3678 // Power of the most significant digit. 3679 int MSD = exp + (int) (NDigits - 1); 3680 if (MSD >= 0) { 3681 // 765e-2 == 7.65 3682 FormatScientific = false; 3683 } else { 3684 // 765e-5 == 0.00765 3685 // ^ ^^ 3686 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3687 } 3688 } 3689 } 3690 3691 // Scientific formatting is pretty straightforward. 3692 if (FormatScientific) { 3693 exp += (NDigits - 1); 3694 3695 Str.push_back(buffer[NDigits-1]); 3696 Str.push_back('.'); 3697 if (NDigits == 1) 3698 Str.push_back('0'); 3699 else 3700 for (unsigned I = 1; I != NDigits; ++I) 3701 Str.push_back(buffer[NDigits-1-I]); 3702 Str.push_back('E'); 3703 3704 Str.push_back(exp >= 0 ? '+' : '-'); 3705 if (exp < 0) exp = -exp; 3706 SmallVector<char, 6> expbuf; 3707 do { 3708 expbuf.push_back((char) ('0' + (exp % 10))); 3709 exp /= 10; 3710 } while (exp); 3711 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3712 Str.push_back(expbuf[E-1-I]); 3713 return; 3714 } 3715 3716 // Non-scientific, positive exponents. 3717 if (exp >= 0) { 3718 for (unsigned I = 0; I != NDigits; ++I) 3719 Str.push_back(buffer[NDigits-1-I]); 3720 for (unsigned I = 0; I != (unsigned) exp; ++I) 3721 Str.push_back('0'); 3722 return; 3723 } 3724 3725 // Non-scientific, negative exponents. 3726 3727 // The number of digits to the left of the decimal point. 3728 int NWholeDigits = exp + (int) NDigits; 3729 3730 unsigned I = 0; 3731 if (NWholeDigits > 0) { 3732 for (; I != (unsigned) NWholeDigits; ++I) 3733 Str.push_back(buffer[NDigits-I-1]); 3734 Str.push_back('.'); 3735 } else { 3736 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3737 3738 Str.push_back('0'); 3739 Str.push_back('.'); 3740 for (unsigned Z = 1; Z != NZeros; ++Z) 3741 Str.push_back('0'); 3742 } 3743 3744 for (; I != NDigits; ++I) 3745 Str.push_back(buffer[NDigits-I-1]); 3746 } 3747 3748 bool APFloat::getExactInverse(APFloat *inv) const { 3749 // Special floats and denormals have no exact inverse. 3750 if (!isFiniteNonZero()) 3751 return false; 3752 3753 // Check that the number is a power of two by making sure that only the 3754 // integer bit is set in the significand. 3755 if (significandLSB() != semantics->precision - 1) 3756 return false; 3757 3758 // Get the inverse. 3759 APFloat reciprocal(*semantics, 1ULL); 3760 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3761 return false; 3762 3763 // Avoid multiplication with a denormal, it is not safe on all platforms and 3764 // may be slower than a normal division. 3765 if (reciprocal.isDenormal()) 3766 return false; 3767 3768 assert(reciprocal.isFiniteNonZero() && 3769 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3770 3771 if (inv) 3772 *inv = reciprocal; 3773 3774 return true; 3775 } 3776 3777 bool APFloat::isSignaling() const { 3778 if (!isNaN()) 3779 return false; 3780 3781 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3782 // first bit of the trailing significand being 0. 3783 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3784 } 3785 3786 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3787 /// 3788 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3789 /// appropriate sign switching before/after the computation. 3790 APFloat::opStatus APFloat::next(bool nextDown) { 3791 // If we are performing nextDown, swap sign so we have -x. 3792 if (nextDown) 3793 changeSign(); 3794 3795 // Compute nextUp(x) 3796 opStatus result = opOK; 3797 3798 // Handle each float category separately. 3799 switch (category) { 3800 case fcInfinity: 3801 // nextUp(+inf) = +inf 3802 if (!isNegative()) 3803 break; 3804 // nextUp(-inf) = -getLargest() 3805 makeLargest(true); 3806 break; 3807 case fcNaN: 3808 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3809 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3810 // change the payload. 3811 if (isSignaling()) { 3812 result = opInvalidOp; 3813 // For consistency, propagate the sign of the sNaN to the qNaN. 3814 makeNaN(false, isNegative(), nullptr); 3815 } 3816 break; 3817 case fcZero: 3818 // nextUp(pm 0) = +getSmallest() 3819 makeSmallest(false); 3820 break; 3821 case fcNormal: 3822 // nextUp(-getSmallest()) = -0 3823 if (isSmallest() && isNegative()) { 3824 APInt::tcSet(significandParts(), 0, partCount()); 3825 category = fcZero; 3826 exponent = 0; 3827 break; 3828 } 3829 3830 // nextUp(getLargest()) == INFINITY 3831 if (isLargest() && !isNegative()) { 3832 APInt::tcSet(significandParts(), 0, partCount()); 3833 category = fcInfinity; 3834 exponent = semantics->maxExponent + 1; 3835 break; 3836 } 3837 3838 // nextUp(normal) == normal + inc. 3839 if (isNegative()) { 3840 // If we are negative, we need to decrement the significand. 3841 3842 // We only cross a binade boundary that requires adjusting the exponent 3843 // if: 3844 // 1. exponent != semantics->minExponent. This implies we are not in the 3845 // smallest binade or are dealing with denormals. 3846 // 2. Our significand excluding the integral bit is all zeros. 3847 bool WillCrossBinadeBoundary = 3848 exponent != semantics->minExponent && isSignificandAllZeros(); 3849 3850 // Decrement the significand. 3851 // 3852 // We always do this since: 3853 // 1. If we are dealing with a non-binade decrement, by definition we 3854 // just decrement the significand. 3855 // 2. If we are dealing with a normal -> normal binade decrement, since 3856 // we have an explicit integral bit the fact that all bits but the 3857 // integral bit are zero implies that subtracting one will yield a 3858 // significand with 0 integral bit and 1 in all other spots. Thus we 3859 // must just adjust the exponent and set the integral bit to 1. 3860 // 3. If we are dealing with a normal -> denormal binade decrement, 3861 // since we set the integral bit to 0 when we represent denormals, we 3862 // just decrement the significand. 3863 integerPart *Parts = significandParts(); 3864 APInt::tcDecrement(Parts, partCount()); 3865 3866 if (WillCrossBinadeBoundary) { 3867 // Our result is a normal number. Do the following: 3868 // 1. Set the integral bit to 1. 3869 // 2. Decrement the exponent. 3870 APInt::tcSetBit(Parts, semantics->precision - 1); 3871 exponent--; 3872 } 3873 } else { 3874 // If we are positive, we need to increment the significand. 3875 3876 // We only cross a binade boundary that requires adjusting the exponent if 3877 // the input is not a denormal and all of said input's significand bits 3878 // are set. If all of said conditions are true: clear the significand, set 3879 // the integral bit to 1, and increment the exponent. If we have a 3880 // denormal always increment since moving denormals and the numbers in the 3881 // smallest normal binade have the same exponent in our representation. 3882 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3883 3884 if (WillCrossBinadeBoundary) { 3885 integerPart *Parts = significandParts(); 3886 APInt::tcSet(Parts, 0, partCount()); 3887 APInt::tcSetBit(Parts, semantics->precision - 1); 3888 assert(exponent != semantics->maxExponent && 3889 "We can not increment an exponent beyond the maxExponent allowed" 3890 " by the given floating point semantics."); 3891 exponent++; 3892 } else { 3893 incrementSignificand(); 3894 } 3895 } 3896 break; 3897 } 3898 3899 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3900 if (nextDown) 3901 changeSign(); 3902 3903 return result; 3904 } 3905 3906 void 3907 APFloat::makeInf(bool Negative) { 3908 category = fcInfinity; 3909 sign = Negative; 3910 exponent = semantics->maxExponent + 1; 3911 APInt::tcSet(significandParts(), 0, partCount()); 3912 } 3913 3914 void 3915 APFloat::makeZero(bool Negative) { 3916 category = fcZero; 3917 sign = Negative; 3918 exponent = semantics->minExponent-1; 3919 APInt::tcSet(significandParts(), 0, partCount()); 3920 } 3921 3922 APFloat llvm::scalbn(APFloat X, int Exp) { 3923 if (X.isInfinity() || X.isZero() || X.isNaN()) 3924 return X; 3925 3926 auto MaxExp = X.getSemantics().maxExponent; 3927 auto MinExp = X.getSemantics().minExponent; 3928 if (Exp > (MaxExp - X.exponent)) 3929 // Overflow saturates to infinity. 3930 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3931 if (Exp < (MinExp - X.exponent)) 3932 // Underflow saturates to zero. 3933 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3934 3935 X.exponent += Exp; 3936 return X; 3937 } 3938