1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 /// A macro used to combine two fcCategory enums into one key which can be used 29 /// in a switch statement to classify how the interaction of two APFloat's 30 /// categories affects an operation. 31 /// 32 /// TODO: If clang source code is ever allowed to use constexpr in its own 33 /// codebase, change this into a static inline function. 34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36 /* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40 namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 56 /* Number of bits actually used in the semantics. */ 57 unsigned int sizeInBits; 58 }; 59 60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; 61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; 62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; 63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; 64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; 65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; 66 67 /* The PowerPC format consists of two doubles. It does not map cleanly 68 onto the usual format above. It is approximated using twice the 69 mantissa bits. Note that for exponents near the double minimum, 70 we no longer can represent the full 106 mantissa bits, so those 71 will be treated as denormal numbers. 72 73 FIXME: While this approximation is equivalent to what GCC uses for 74 compile-time arithmetic on PPC double-double numbers, it is not able 75 to represent all possible values held by a PPC double-double number, 76 for example: (long double) 1.0 + (long double) 0x1p-106 77 Should this be replaced by a full emulation of PPC double-double? */ 78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; 79 80 /* A tight upper bound on number of parts required to hold the value 81 pow(5, power) is 82 83 power * 815 / (351 * integerPartWidth) + 1 84 85 However, whilst the result may require only this many parts, 86 because we are multiplying two values to get it, the 87 multiplication may require an extra part with the excess part 88 being zero (consider the trivial case of 1 * 1, tcFullMultiply 89 requires two parts to hold the single-part result). So we add an 90 extra one to guarantee enough space whilst multiplying. */ 91 const unsigned int maxExponent = 16383; 92 const unsigned int maxPrecision = 113; 93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 95 / (351 * integerPartWidth)); 96 } 97 98 /* A bunch of private, handy routines. */ 99 100 static inline unsigned int 101 partCountForBits(unsigned int bits) 102 { 103 return ((bits) + integerPartWidth - 1) / integerPartWidth; 104 } 105 106 /* Returns 0U-9U. Return values >= 10U are not digits. */ 107 static inline unsigned int 108 decDigitValue(unsigned int c) 109 { 110 return c - '0'; 111 } 112 113 /* Return the value of a decimal exponent of the form 114 [+-]ddddddd. 115 116 If the exponent overflows, returns a large exponent with the 117 appropriate sign. */ 118 static int 119 readExponent(StringRef::iterator begin, StringRef::iterator end) 120 { 121 bool isNegative; 122 unsigned int absExponent; 123 const unsigned int overlargeExponent = 24000; /* FIXME. */ 124 StringRef::iterator p = begin; 125 126 assert(p != end && "Exponent has no digits"); 127 128 isNegative = (*p == '-'); 129 if (*p == '-' || *p == '+') { 130 p++; 131 assert(p != end && "Exponent has no digits"); 132 } 133 134 absExponent = decDigitValue(*p++); 135 assert(absExponent < 10U && "Invalid character in exponent"); 136 137 for (; p != end; ++p) { 138 unsigned int value; 139 140 value = decDigitValue(*p); 141 assert(value < 10U && "Invalid character in exponent"); 142 143 value += absExponent * 10; 144 if (absExponent >= overlargeExponent) { 145 absExponent = overlargeExponent; 146 p = end; /* outwit assert below */ 147 break; 148 } 149 absExponent = value; 150 } 151 152 assert(p == end && "Invalid exponent in exponent"); 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158 } 159 160 /* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162 static int 163 totalExponent(StringRef::iterator p, StringRef::iterator end, 164 int exponentAdjustment) 165 { 166 int unsignedExponent; 167 bool negative, overflow; 168 int exponent = 0; 169 170 assert(p != end && "Exponent has no digits"); 171 172 negative = *p == '-'; 173 if (*p == '-' || *p == '+') { 174 p++; 175 assert(p != end && "Exponent has no digits"); 176 } 177 178 unsignedExponent = 0; 179 overflow = false; 180 for (; p != end; ++p) { 181 unsigned int value; 182 183 value = decDigitValue(*p); 184 assert(value < 10U && "Invalid character in exponent"); 185 186 unsignedExponent = unsignedExponent * 10 + value; 187 if (unsignedExponent > 32767) { 188 overflow = true; 189 break; 190 } 191 } 192 193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 194 overflow = true; 195 196 if (!overflow) { 197 exponent = unsignedExponent; 198 if (negative) 199 exponent = -exponent; 200 exponent += exponentAdjustment; 201 if (exponent > 32767 || exponent < -32768) 202 overflow = true; 203 } 204 205 if (overflow) 206 exponent = negative ? -32768: 32767; 207 208 return exponent; 209 } 210 211 static StringRef::iterator 212 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 213 StringRef::iterator *dot) 214 { 215 StringRef::iterator p = begin; 216 *dot = end; 217 while (p != end && *p == '0') 218 p++; 219 220 if (p != end && *p == '.') { 221 *dot = p++; 222 223 assert(end - begin != 1 && "Significand has no digits"); 224 225 while (p != end && *p == '0') 226 p++; 227 } 228 229 return p; 230 } 231 232 /* Given a normal decimal floating point number of the form 233 234 dddd.dddd[eE][+-]ddd 235 236 where the decimal point and exponent are optional, fill out the 237 structure D. Exponent is appropriate if the significand is 238 treated as an integer, and normalizedExponent if the significand 239 is taken to have the decimal point after a single leading 240 non-zero digit. 241 242 If the value is zero, V->firstSigDigit points to a non-digit, and 243 the return exponent is zero. 244 */ 245 struct decimalInfo { 246 const char *firstSigDigit; 247 const char *lastSigDigit; 248 int exponent; 249 int normalizedExponent; 250 }; 251 252 static void 253 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 254 decimalInfo *D) 255 { 256 StringRef::iterator dot = end; 257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 258 259 D->firstSigDigit = p; 260 D->exponent = 0; 261 D->normalizedExponent = 0; 262 263 for (; p != end; ++p) { 264 if (*p == '.') { 265 assert(dot == end && "String contains multiple dots"); 266 dot = p++; 267 if (p == end) 268 break; 269 } 270 if (decDigitValue(*p) >= 10U) 271 break; 272 } 273 274 if (p != end) { 275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 276 assert(p != begin && "Significand has no digits"); 277 assert((dot == end || p - begin != 1) && "Significand has no digits"); 278 279 /* p points to the first non-digit in the string */ 280 D->exponent = readExponent(p + 1, end); 281 282 /* Implied decimal point? */ 283 if (dot == end) 284 dot = p; 285 } 286 287 /* If number is all zeroes accept any exponent. */ 288 if (p != D->firstSigDigit) { 289 /* Drop insignificant trailing zeroes. */ 290 if (p != begin) { 291 do 292 do 293 p--; 294 while (p != begin && *p == '0'); 295 while (p != begin && *p == '.'); 296 } 297 298 /* Adjust the exponents for any decimal point. */ 299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 300 D->normalizedExponent = (D->exponent + 301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 302 - (dot > D->firstSigDigit && dot < p))); 303 } 304 305 D->lastSigDigit = p; 306 } 307 308 /* Return the trailing fraction of a hexadecimal number. 309 DIGITVALUE is the first hex digit of the fraction, P points to 310 the next digit. */ 311 static lostFraction 312 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 313 unsigned int digitValue) 314 { 315 unsigned int hexDigit; 316 317 /* If the first trailing digit isn't 0 or 8 we can work out the 318 fraction immediately. */ 319 if (digitValue > 8) 320 return lfMoreThanHalf; 321 else if (digitValue < 8 && digitValue > 0) 322 return lfLessThanHalf; 323 324 // Otherwise we need to find the first non-zero digit. 325 while (p != end && (*p == '0' || *p == '.')) 326 p++; 327 328 assert(p != end && "Invalid trailing hexadecimal fraction!"); 329 330 hexDigit = hexDigitValue(*p); 331 332 /* If we ran off the end it is exactly zero or one-half, otherwise 333 a little more. */ 334 if (hexDigit == -1U) 335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 336 else 337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 338 } 339 340 /* Return the fraction lost were a bignum truncated losing the least 341 significant BITS bits. */ 342 static lostFraction 343 lostFractionThroughTruncation(const integerPart *parts, 344 unsigned int partCount, 345 unsigned int bits) 346 { 347 unsigned int lsb; 348 349 lsb = APInt::tcLSB(parts, partCount); 350 351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 352 if (bits <= lsb) 353 return lfExactlyZero; 354 if (bits == lsb + 1) 355 return lfExactlyHalf; 356 if (bits <= partCount * integerPartWidth && 357 APInt::tcExtractBit(parts, bits - 1)) 358 return lfMoreThanHalf; 359 360 return lfLessThanHalf; 361 } 362 363 /* Shift DST right BITS bits noting lost fraction. */ 364 static lostFraction 365 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 366 { 367 lostFraction lost_fraction; 368 369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 370 371 APInt::tcShiftRight(dst, parts, bits); 372 373 return lost_fraction; 374 } 375 376 /* Combine the effect of two lost fractions. */ 377 static lostFraction 378 combineLostFractions(lostFraction moreSignificant, 379 lostFraction lessSignificant) 380 { 381 if (lessSignificant != lfExactlyZero) { 382 if (moreSignificant == lfExactlyZero) 383 moreSignificant = lfLessThanHalf; 384 else if (moreSignificant == lfExactlyHalf) 385 moreSignificant = lfMoreThanHalf; 386 } 387 388 return moreSignificant; 389 } 390 391 /* The error from the true value, in half-ulps, on multiplying two 392 floating point numbers, which differ from the value they 393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 394 than the returned value. 395 396 See "How to Read Floating Point Numbers Accurately" by William D 397 Clinger. */ 398 static unsigned int 399 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 400 { 401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 402 403 if (HUerr1 + HUerr2 == 0) 404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 405 else 406 return inexactMultiply + 2 * (HUerr1 + HUerr2); 407 } 408 409 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 410 when the least significant BITS are truncated. BITS cannot be 411 zero. */ 412 static integerPart 413 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 414 { 415 unsigned int count, partBits; 416 integerPart part, boundary; 417 418 assert(bits != 0); 419 420 bits--; 421 count = bits / integerPartWidth; 422 partBits = bits % integerPartWidth + 1; 423 424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 425 426 if (isNearest) 427 boundary = (integerPart) 1 << (partBits - 1); 428 else 429 boundary = 0; 430 431 if (count == 0) { 432 if (part - boundary <= boundary - part) 433 return part - boundary; 434 else 435 return boundary - part; 436 } 437 438 if (part == boundary) { 439 while (--count) 440 if (parts[count]) 441 return ~(integerPart) 0; /* A lot. */ 442 443 return parts[0]; 444 } else if (part == boundary - 1) { 445 while (--count) 446 if (~parts[count]) 447 return ~(integerPart) 0; /* A lot. */ 448 449 return -parts[0]; 450 } 451 452 return ~(integerPart) 0; /* A lot. */ 453 } 454 455 /* Place pow(5, power) in DST, and return the number of parts used. 456 DST must be at least one part larger than size of the answer. */ 457 static unsigned int 458 powerOf5(integerPart *dst, unsigned int power) 459 { 460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 461 15625, 78125 }; 462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 463 pow5s[0] = 78125 * 5; 464 465 unsigned int partsCount[16] = { 1 }; 466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 467 unsigned int result; 468 assert(power <= maxExponent); 469 470 p1 = dst; 471 p2 = scratch; 472 473 *p1 = firstEightPowers[power & 7]; 474 power >>= 3; 475 476 result = 1; 477 pow5 = pow5s; 478 479 for (unsigned int n = 0; power; power >>= 1, n++) { 480 unsigned int pc; 481 482 pc = partsCount[n]; 483 484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 485 if (pc == 0) { 486 pc = partsCount[n - 1]; 487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 488 pc *= 2; 489 if (pow5[pc - 1] == 0) 490 pc--; 491 partsCount[n] = pc; 492 } 493 494 if (power & 1) { 495 integerPart *tmp; 496 497 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 498 result += pc; 499 if (p2[result - 1] == 0) 500 result--; 501 502 /* Now result is in p1 with partsCount parts and p2 is scratch 503 space. */ 504 tmp = p1, p1 = p2, p2 = tmp; 505 } 506 507 pow5 += pc; 508 } 509 510 if (p1 != dst) 511 APInt::tcAssign(dst, p1, result); 512 513 return result; 514 } 515 516 /* Zero at the end to avoid modular arithmetic when adding one; used 517 when rounding up during hexadecimal output. */ 518 static const char hexDigitsLower[] = "0123456789abcdef0"; 519 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 520 static const char infinityL[] = "infinity"; 521 static const char infinityU[] = "INFINITY"; 522 static const char NaNL[] = "nan"; 523 static const char NaNU[] = "NAN"; 524 525 /* Write out an integerPart in hexadecimal, starting with the most 526 significant nibble. Write out exactly COUNT hexdigits, return 527 COUNT. */ 528 static unsigned int 529 partAsHex (char *dst, integerPart part, unsigned int count, 530 const char *hexDigitChars) 531 { 532 unsigned int result = count; 533 534 assert(count != 0 && count <= integerPartWidth / 4); 535 536 part >>= (integerPartWidth - 4 * count); 537 while (count--) { 538 dst[count] = hexDigitChars[part & 0xf]; 539 part >>= 4; 540 } 541 542 return result; 543 } 544 545 /* Write out an unsigned decimal integer. */ 546 static char * 547 writeUnsignedDecimal (char *dst, unsigned int n) 548 { 549 char buff[40], *p; 550 551 p = buff; 552 do 553 *p++ = '0' + n % 10; 554 while (n /= 10); 555 556 do 557 *dst++ = *--p; 558 while (p != buff); 559 560 return dst; 561 } 562 563 /* Write out a signed decimal integer. */ 564 static char * 565 writeSignedDecimal (char *dst, int value) 566 { 567 if (value < 0) { 568 *dst++ = '-'; 569 dst = writeUnsignedDecimal(dst, -(unsigned) value); 570 } else 571 dst = writeUnsignedDecimal(dst, value); 572 573 return dst; 574 } 575 576 /* Constructors. */ 577 void 578 APFloat::initialize(const fltSemantics *ourSemantics) 579 { 580 unsigned int count; 581 582 semantics = ourSemantics; 583 count = partCount(); 584 if (count > 1) 585 significand.parts = new integerPart[count]; 586 } 587 588 void 589 APFloat::freeSignificand() 590 { 591 if (needsCleanup()) 592 delete [] significand.parts; 593 } 594 595 void 596 APFloat::assign(const APFloat &rhs) 597 { 598 assert(semantics == rhs.semantics); 599 600 sign = rhs.sign; 601 category = rhs.category; 602 exponent = rhs.exponent; 603 if (isFiniteNonZero() || category == fcNaN) 604 copySignificand(rhs); 605 } 606 607 void 608 APFloat::copySignificand(const APFloat &rhs) 609 { 610 assert(isFiniteNonZero() || category == fcNaN); 611 assert(rhs.partCount() >= partCount()); 612 613 APInt::tcAssign(significandParts(), rhs.significandParts(), 614 partCount()); 615 } 616 617 /* Make this number a NaN, with an arbitrary but deterministic value 618 for the significand. If double or longer, this is a signalling NaN, 619 which may not be ideal. If float, this is QNaN(0). */ 620 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 621 { 622 category = fcNaN; 623 sign = Negative; 624 625 integerPart *significand = significandParts(); 626 unsigned numParts = partCount(); 627 628 // Set the significand bits to the fill. 629 if (!fill || fill->getNumWords() < numParts) 630 APInt::tcSet(significand, 0, numParts); 631 if (fill) { 632 APInt::tcAssign(significand, fill->getRawData(), 633 std::min(fill->getNumWords(), numParts)); 634 635 // Zero out the excess bits of the significand. 636 unsigned bitsToPreserve = semantics->precision - 1; 637 unsigned part = bitsToPreserve / 64; 638 bitsToPreserve %= 64; 639 significand[part] &= ((1ULL << bitsToPreserve) - 1); 640 for (part++; part != numParts; ++part) 641 significand[part] = 0; 642 } 643 644 unsigned QNaNBit = semantics->precision - 2; 645 646 if (SNaN) { 647 // We always have to clear the QNaN bit to make it an SNaN. 648 APInt::tcClearBit(significand, QNaNBit); 649 650 // If there are no bits set in the payload, we have to set 651 // *something* to make it a NaN instead of an infinity; 652 // conventionally, this is the next bit down from the QNaN bit. 653 if (APInt::tcIsZero(significand, numParts)) 654 APInt::tcSetBit(significand, QNaNBit - 1); 655 } else { 656 // We always have to set the QNaN bit to make it a QNaN. 657 APInt::tcSetBit(significand, QNaNBit); 658 } 659 660 // For x87 extended precision, we want to make a NaN, not a 661 // pseudo-NaN. Maybe we should expose the ability to make 662 // pseudo-NaNs? 663 if (semantics == &APFloat::x87DoubleExtended) 664 APInt::tcSetBit(significand, QNaNBit + 1); 665 } 666 667 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 668 const APInt *fill) { 669 APFloat value(Sem, uninitialized); 670 value.makeNaN(SNaN, Negative, fill); 671 return value; 672 } 673 674 APFloat & 675 APFloat::operator=(const APFloat &rhs) 676 { 677 if (this != &rhs) { 678 if (semantics != rhs.semantics) { 679 freeSignificand(); 680 initialize(rhs.semantics); 681 } 682 assign(rhs); 683 } 684 685 return *this; 686 } 687 688 APFloat & 689 APFloat::operator=(APFloat &&rhs) { 690 freeSignificand(); 691 692 semantics = rhs.semantics; 693 significand = rhs.significand; 694 exponent = rhs.exponent; 695 category = rhs.category; 696 sign = rhs.sign; 697 698 rhs.semantics = &Bogus; 699 return *this; 700 } 701 702 bool 703 APFloat::isDenormal() const { 704 return isFiniteNonZero() && (exponent == semantics->minExponent) && 705 (APInt::tcExtractBit(significandParts(), 706 semantics->precision - 1) == 0); 707 } 708 709 bool 710 APFloat::isSmallest() const { 711 // The smallest number by magnitude in our format will be the smallest 712 // denormal, i.e. the floating point number with exponent being minimum 713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 714 return isFiniteNonZero() && exponent == semantics->minExponent && 715 significandMSB() == 0; 716 } 717 718 bool APFloat::isSignificandAllOnes() const { 719 // Test if the significand excluding the integral bit is all ones. This allows 720 // us to test for binade boundaries. 721 const integerPart *Parts = significandParts(); 722 const unsigned PartCount = partCount(); 723 for (unsigned i = 0; i < PartCount - 1; i++) 724 if (~Parts[i]) 725 return false; 726 727 // Set the unused high bits to all ones when we compare. 728 const unsigned NumHighBits = 729 PartCount*integerPartWidth - semantics->precision + 1; 730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 731 "fill than integerPartWidth"); 732 const integerPart HighBitFill = 733 ~integerPart(0) << (integerPartWidth - NumHighBits); 734 if (~(Parts[PartCount - 1] | HighBitFill)) 735 return false; 736 737 return true; 738 } 739 740 bool APFloat::isSignificandAllZeros() const { 741 // Test if the significand excluding the integral bit is all zeros. This 742 // allows us to test for binade boundaries. 743 const integerPart *Parts = significandParts(); 744 const unsigned PartCount = partCount(); 745 746 for (unsigned i = 0; i < PartCount - 1; i++) 747 if (Parts[i]) 748 return false; 749 750 const unsigned NumHighBits = 751 PartCount*integerPartWidth - semantics->precision + 1; 752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 753 "clear than integerPartWidth"); 754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 755 756 if (Parts[PartCount - 1] & HighBitMask) 757 return false; 758 759 return true; 760 } 761 762 bool 763 APFloat::isLargest() const { 764 // The largest number by magnitude in our format will be the floating point 765 // number with maximum exponent and with significand that is all ones. 766 return isFiniteNonZero() && exponent == semantics->maxExponent 767 && isSignificandAllOnes(); 768 } 769 770 bool 771 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 772 if (this == &rhs) 773 return true; 774 if (semantics != rhs.semantics || 775 category != rhs.category || 776 sign != rhs.sign) 777 return false; 778 if (category==fcZero || category==fcInfinity) 779 return true; 780 else if (isFiniteNonZero() && exponent!=rhs.exponent) 781 return false; 782 else { 783 int i= partCount(); 784 const integerPart* p=significandParts(); 785 const integerPart* q=rhs.significandParts(); 786 for (; i>0; i--, p++, q++) { 787 if (*p != *q) 788 return false; 789 } 790 return true; 791 } 792 } 793 794 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 795 initialize(&ourSemantics); 796 sign = 0; 797 category = fcNormal; 798 zeroSignificand(); 799 exponent = ourSemantics.precision - 1; 800 significandParts()[0] = value; 801 normalize(rmNearestTiesToEven, lfExactlyZero); 802 } 803 804 APFloat::APFloat(const fltSemantics &ourSemantics) { 805 initialize(&ourSemantics); 806 category = fcZero; 807 sign = false; 808 } 809 810 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 811 // Allocates storage if necessary but does not initialize it. 812 initialize(&ourSemantics); 813 } 814 815 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 816 initialize(&ourSemantics); 817 convertFromString(text, rmNearestTiesToEven); 818 } 819 820 APFloat::APFloat(const APFloat &rhs) { 821 initialize(rhs.semantics); 822 assign(rhs); 823 } 824 825 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 826 *this = std::move(rhs); 827 } 828 829 APFloat::~APFloat() 830 { 831 freeSignificand(); 832 } 833 834 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 835 void APFloat::Profile(FoldingSetNodeID& ID) const { 836 ID.Add(bitcastToAPInt()); 837 } 838 839 unsigned int 840 APFloat::partCount() const 841 { 842 return partCountForBits(semantics->precision + 1); 843 } 844 845 unsigned int 846 APFloat::semanticsPrecision(const fltSemantics &semantics) 847 { 848 return semantics.precision; 849 } 850 851 const integerPart * 852 APFloat::significandParts() const 853 { 854 return const_cast<APFloat *>(this)->significandParts(); 855 } 856 857 integerPart * 858 APFloat::significandParts() 859 { 860 if (partCount() > 1) 861 return significand.parts; 862 else 863 return &significand.part; 864 } 865 866 void 867 APFloat::zeroSignificand() 868 { 869 APInt::tcSet(significandParts(), 0, partCount()); 870 } 871 872 /* Increment an fcNormal floating point number's significand. */ 873 void 874 APFloat::incrementSignificand() 875 { 876 integerPart carry; 877 878 carry = APInt::tcIncrement(significandParts(), partCount()); 879 880 /* Our callers should never cause us to overflow. */ 881 assert(carry == 0); 882 (void)carry; 883 } 884 885 /* Add the significand of the RHS. Returns the carry flag. */ 886 integerPart 887 APFloat::addSignificand(const APFloat &rhs) 888 { 889 integerPart *parts; 890 891 parts = significandParts(); 892 893 assert(semantics == rhs.semantics); 894 assert(exponent == rhs.exponent); 895 896 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 897 } 898 899 /* Subtract the significand of the RHS with a borrow flag. Returns 900 the borrow flag. */ 901 integerPart 902 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 903 { 904 integerPart *parts; 905 906 parts = significandParts(); 907 908 assert(semantics == rhs.semantics); 909 assert(exponent == rhs.exponent); 910 911 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 912 partCount()); 913 } 914 915 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 916 on to the full-precision result of the multiplication. Returns the 917 lost fraction. */ 918 lostFraction 919 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 920 { 921 unsigned int omsb; // One, not zero, based MSB. 922 unsigned int partsCount, newPartsCount, precision; 923 integerPart *lhsSignificand; 924 integerPart scratch[4]; 925 integerPart *fullSignificand; 926 lostFraction lost_fraction; 927 bool ignored; 928 929 assert(semantics == rhs.semantics); 930 931 precision = semantics->precision; 932 933 // Allocate space for twice as many bits as the original significand, plus one 934 // extra bit for the addition to overflow into. 935 newPartsCount = partCountForBits(precision * 2 + 1); 936 937 if (newPartsCount > 4) 938 fullSignificand = new integerPart[newPartsCount]; 939 else 940 fullSignificand = scratch; 941 942 lhsSignificand = significandParts(); 943 partsCount = partCount(); 944 945 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 946 rhs.significandParts(), partsCount, partsCount); 947 948 lost_fraction = lfExactlyZero; 949 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 950 exponent += rhs.exponent; 951 952 // Assume the operands involved in the multiplication are single-precision 953 // FP, and the two multiplicants are: 954 // *this = a23 . a22 ... a0 * 2^e1 955 // rhs = b23 . b22 ... b0 * 2^e2 956 // the result of multiplication is: 957 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 958 // Note that there are three significant bits at the left-hand side of the 959 // radix point: two for the multiplication, and an overflow bit for the 960 // addition (that will always be zero at this point). Move the radix point 961 // toward left by two bits, and adjust exponent accordingly. 962 exponent += 2; 963 964 if (addend && addend->isNonZero()) { 965 // The intermediate result of the multiplication has "2 * precision" 966 // signicant bit; adjust the addend to be consistent with mul result. 967 // 968 Significand savedSignificand = significand; 969 const fltSemantics *savedSemantics = semantics; 970 fltSemantics extendedSemantics; 971 opStatus status; 972 unsigned int extendedPrecision; 973 974 // Normalize our MSB to one below the top bit to allow for overflow. 975 extendedPrecision = 2 * precision + 1; 976 if (omsb != extendedPrecision - 1) { 977 assert(extendedPrecision > omsb); 978 APInt::tcShiftLeft(fullSignificand, newPartsCount, 979 (extendedPrecision - 1) - omsb); 980 exponent -= (extendedPrecision - 1) - omsb; 981 } 982 983 /* Create new semantics. */ 984 extendedSemantics = *semantics; 985 extendedSemantics.precision = extendedPrecision; 986 987 if (newPartsCount == 1) 988 significand.part = fullSignificand[0]; 989 else 990 significand.parts = fullSignificand; 991 semantics = &extendedSemantics; 992 993 APFloat extendedAddend(*addend); 994 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 995 assert(status == opOK); 996 (void)status; 997 998 // Shift the significand of the addend right by one bit. This guarantees 999 // that the high bit of the significand is zero (same as fullSignificand), 1000 // so the addition will overflow (if it does overflow at all) into the top bit. 1001 lost_fraction = extendedAddend.shiftSignificandRight(1); 1002 assert(lost_fraction == lfExactlyZero && 1003 "Lost precision while shifting addend for fused-multiply-add."); 1004 1005 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1006 1007 /* Restore our state. */ 1008 if (newPartsCount == 1) 1009 fullSignificand[0] = significand.part; 1010 significand = savedSignificand; 1011 semantics = savedSemantics; 1012 1013 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1014 } 1015 1016 // Convert the result having "2 * precision" significant-bits back to the one 1017 // having "precision" significant-bits. First, move the radix point from 1018 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1019 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1020 exponent -= precision + 1; 1021 1022 // In case MSB resides at the left-hand side of radix point, shift the 1023 // mantissa right by some amount to make sure the MSB reside right before 1024 // the radix point (i.e. "MSB . rest-significant-bits"). 1025 // 1026 // Note that the result is not normalized when "omsb < precision". So, the 1027 // caller needs to call APFloat::normalize() if normalized value is expected. 1028 if (omsb > precision) { 1029 unsigned int bits, significantParts; 1030 lostFraction lf; 1031 1032 bits = omsb - precision; 1033 significantParts = partCountForBits(omsb); 1034 lf = shiftRight(fullSignificand, significantParts, bits); 1035 lost_fraction = combineLostFractions(lf, lost_fraction); 1036 exponent += bits; 1037 } 1038 1039 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1040 1041 if (newPartsCount > 4) 1042 delete [] fullSignificand; 1043 1044 return lost_fraction; 1045 } 1046 1047 /* Multiply the significands of LHS and RHS to DST. */ 1048 lostFraction 1049 APFloat::divideSignificand(const APFloat &rhs) 1050 { 1051 unsigned int bit, i, partsCount; 1052 const integerPart *rhsSignificand; 1053 integerPart *lhsSignificand, *dividend, *divisor; 1054 integerPart scratch[4]; 1055 lostFraction lost_fraction; 1056 1057 assert(semantics == rhs.semantics); 1058 1059 lhsSignificand = significandParts(); 1060 rhsSignificand = rhs.significandParts(); 1061 partsCount = partCount(); 1062 1063 if (partsCount > 2) 1064 dividend = new integerPart[partsCount * 2]; 1065 else 1066 dividend = scratch; 1067 1068 divisor = dividend + partsCount; 1069 1070 /* Copy the dividend and divisor as they will be modified in-place. */ 1071 for (i = 0; i < partsCount; i++) { 1072 dividend[i] = lhsSignificand[i]; 1073 divisor[i] = rhsSignificand[i]; 1074 lhsSignificand[i] = 0; 1075 } 1076 1077 exponent -= rhs.exponent; 1078 1079 unsigned int precision = semantics->precision; 1080 1081 /* Normalize the divisor. */ 1082 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1083 if (bit) { 1084 exponent += bit; 1085 APInt::tcShiftLeft(divisor, partsCount, bit); 1086 } 1087 1088 /* Normalize the dividend. */ 1089 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1090 if (bit) { 1091 exponent -= bit; 1092 APInt::tcShiftLeft(dividend, partsCount, bit); 1093 } 1094 1095 /* Ensure the dividend >= divisor initially for the loop below. 1096 Incidentally, this means that the division loop below is 1097 guaranteed to set the integer bit to one. */ 1098 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1099 exponent--; 1100 APInt::tcShiftLeft(dividend, partsCount, 1); 1101 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1102 } 1103 1104 /* Long division. */ 1105 for (bit = precision; bit; bit -= 1) { 1106 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1107 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1108 APInt::tcSetBit(lhsSignificand, bit - 1); 1109 } 1110 1111 APInt::tcShiftLeft(dividend, partsCount, 1); 1112 } 1113 1114 /* Figure out the lost fraction. */ 1115 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1116 1117 if (cmp > 0) 1118 lost_fraction = lfMoreThanHalf; 1119 else if (cmp == 0) 1120 lost_fraction = lfExactlyHalf; 1121 else if (APInt::tcIsZero(dividend, partsCount)) 1122 lost_fraction = lfExactlyZero; 1123 else 1124 lost_fraction = lfLessThanHalf; 1125 1126 if (partsCount > 2) 1127 delete [] dividend; 1128 1129 return lost_fraction; 1130 } 1131 1132 unsigned int 1133 APFloat::significandMSB() const 1134 { 1135 return APInt::tcMSB(significandParts(), partCount()); 1136 } 1137 1138 unsigned int 1139 APFloat::significandLSB() const 1140 { 1141 return APInt::tcLSB(significandParts(), partCount()); 1142 } 1143 1144 /* Note that a zero result is NOT normalized to fcZero. */ 1145 lostFraction 1146 APFloat::shiftSignificandRight(unsigned int bits) 1147 { 1148 /* Our exponent should not overflow. */ 1149 assert((ExponentType) (exponent + bits) >= exponent); 1150 1151 exponent += bits; 1152 1153 return shiftRight(significandParts(), partCount(), bits); 1154 } 1155 1156 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1157 void 1158 APFloat::shiftSignificandLeft(unsigned int bits) 1159 { 1160 assert(bits < semantics->precision); 1161 1162 if (bits) { 1163 unsigned int partsCount = partCount(); 1164 1165 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1166 exponent -= bits; 1167 1168 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1169 } 1170 } 1171 1172 APFloat::cmpResult 1173 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1174 { 1175 int compare; 1176 1177 assert(semantics == rhs.semantics); 1178 assert(isFiniteNonZero()); 1179 assert(rhs.isFiniteNonZero()); 1180 1181 compare = exponent - rhs.exponent; 1182 1183 /* If exponents are equal, do an unsigned bignum comparison of the 1184 significands. */ 1185 if (compare == 0) 1186 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1187 partCount()); 1188 1189 if (compare > 0) 1190 return cmpGreaterThan; 1191 else if (compare < 0) 1192 return cmpLessThan; 1193 else 1194 return cmpEqual; 1195 } 1196 1197 /* Handle overflow. Sign is preserved. We either become infinity or 1198 the largest finite number. */ 1199 APFloat::opStatus 1200 APFloat::handleOverflow(roundingMode rounding_mode) 1201 { 1202 /* Infinity? */ 1203 if (rounding_mode == rmNearestTiesToEven || 1204 rounding_mode == rmNearestTiesToAway || 1205 (rounding_mode == rmTowardPositive && !sign) || 1206 (rounding_mode == rmTowardNegative && sign)) { 1207 category = fcInfinity; 1208 return (opStatus) (opOverflow | opInexact); 1209 } 1210 1211 /* Otherwise we become the largest finite number. */ 1212 category = fcNormal; 1213 exponent = semantics->maxExponent; 1214 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1215 semantics->precision); 1216 1217 return opInexact; 1218 } 1219 1220 /* Returns TRUE if, when truncating the current number, with BIT the 1221 new LSB, with the given lost fraction and rounding mode, the result 1222 would need to be rounded away from zero (i.e., by increasing the 1223 signficand). This routine must work for fcZero of both signs, and 1224 fcNormal numbers. */ 1225 bool 1226 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1227 lostFraction lost_fraction, 1228 unsigned int bit) const 1229 { 1230 /* NaNs and infinities should not have lost fractions. */ 1231 assert(isFiniteNonZero() || category == fcZero); 1232 1233 /* Current callers never pass this so we don't handle it. */ 1234 assert(lost_fraction != lfExactlyZero); 1235 1236 switch (rounding_mode) { 1237 case rmNearestTiesToAway: 1238 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1239 1240 case rmNearestTiesToEven: 1241 if (lost_fraction == lfMoreThanHalf) 1242 return true; 1243 1244 /* Our zeroes don't have a significand to test. */ 1245 if (lost_fraction == lfExactlyHalf && category != fcZero) 1246 return APInt::tcExtractBit(significandParts(), bit); 1247 1248 return false; 1249 1250 case rmTowardZero: 1251 return false; 1252 1253 case rmTowardPositive: 1254 return !sign; 1255 1256 case rmTowardNegative: 1257 return sign; 1258 } 1259 llvm_unreachable("Invalid rounding mode found"); 1260 } 1261 1262 APFloat::opStatus 1263 APFloat::normalize(roundingMode rounding_mode, 1264 lostFraction lost_fraction) 1265 { 1266 unsigned int omsb; /* One, not zero, based MSB. */ 1267 int exponentChange; 1268 1269 if (!isFiniteNonZero()) 1270 return opOK; 1271 1272 /* Before rounding normalize the exponent of fcNormal numbers. */ 1273 omsb = significandMSB() + 1; 1274 1275 if (omsb) { 1276 /* OMSB is numbered from 1. We want to place it in the integer 1277 bit numbered PRECISION if possible, with a compensating change in 1278 the exponent. */ 1279 exponentChange = omsb - semantics->precision; 1280 1281 /* If the resulting exponent is too high, overflow according to 1282 the rounding mode. */ 1283 if (exponent + exponentChange > semantics->maxExponent) 1284 return handleOverflow(rounding_mode); 1285 1286 /* Subnormal numbers have exponent minExponent, and their MSB 1287 is forced based on that. */ 1288 if (exponent + exponentChange < semantics->minExponent) 1289 exponentChange = semantics->minExponent - exponent; 1290 1291 /* Shifting left is easy as we don't lose precision. */ 1292 if (exponentChange < 0) { 1293 assert(lost_fraction == lfExactlyZero); 1294 1295 shiftSignificandLeft(-exponentChange); 1296 1297 return opOK; 1298 } 1299 1300 if (exponentChange > 0) { 1301 lostFraction lf; 1302 1303 /* Shift right and capture any new lost fraction. */ 1304 lf = shiftSignificandRight(exponentChange); 1305 1306 lost_fraction = combineLostFractions(lf, lost_fraction); 1307 1308 /* Keep OMSB up-to-date. */ 1309 if (omsb > (unsigned) exponentChange) 1310 omsb -= exponentChange; 1311 else 1312 omsb = 0; 1313 } 1314 } 1315 1316 /* Now round the number according to rounding_mode given the lost 1317 fraction. */ 1318 1319 /* As specified in IEEE 754, since we do not trap we do not report 1320 underflow for exact results. */ 1321 if (lost_fraction == lfExactlyZero) { 1322 /* Canonicalize zeroes. */ 1323 if (omsb == 0) 1324 category = fcZero; 1325 1326 return opOK; 1327 } 1328 1329 /* Increment the significand if we're rounding away from zero. */ 1330 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1331 if (omsb == 0) 1332 exponent = semantics->minExponent; 1333 1334 incrementSignificand(); 1335 omsb = significandMSB() + 1; 1336 1337 /* Did the significand increment overflow? */ 1338 if (omsb == (unsigned) semantics->precision + 1) { 1339 /* Renormalize by incrementing the exponent and shifting our 1340 significand right one. However if we already have the 1341 maximum exponent we overflow to infinity. */ 1342 if (exponent == semantics->maxExponent) { 1343 category = fcInfinity; 1344 1345 return (opStatus) (opOverflow | opInexact); 1346 } 1347 1348 shiftSignificandRight(1); 1349 1350 return opInexact; 1351 } 1352 } 1353 1354 /* The normal case - we were and are not denormal, and any 1355 significand increment above didn't overflow. */ 1356 if (omsb == semantics->precision) 1357 return opInexact; 1358 1359 /* We have a non-zero denormal. */ 1360 assert(omsb < semantics->precision); 1361 1362 /* Canonicalize zeroes. */ 1363 if (omsb == 0) 1364 category = fcZero; 1365 1366 /* The fcZero case is a denormal that underflowed to zero. */ 1367 return (opStatus) (opUnderflow | opInexact); 1368 } 1369 1370 APFloat::opStatus 1371 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1372 { 1373 switch (PackCategoriesIntoKey(category, rhs.category)) { 1374 default: 1375 llvm_unreachable(nullptr); 1376 1377 case PackCategoriesIntoKey(fcNaN, fcZero): 1378 case PackCategoriesIntoKey(fcNaN, fcNormal): 1379 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1380 case PackCategoriesIntoKey(fcNaN, fcNaN): 1381 case PackCategoriesIntoKey(fcNormal, fcZero): 1382 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1383 case PackCategoriesIntoKey(fcInfinity, fcZero): 1384 return opOK; 1385 1386 case PackCategoriesIntoKey(fcZero, fcNaN): 1387 case PackCategoriesIntoKey(fcNormal, fcNaN): 1388 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1389 // We need to be sure to flip the sign here for subtraction because we 1390 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1391 sign = rhs.sign ^ subtract; 1392 category = fcNaN; 1393 copySignificand(rhs); 1394 return opOK; 1395 1396 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1397 case PackCategoriesIntoKey(fcZero, fcInfinity): 1398 category = fcInfinity; 1399 sign = rhs.sign ^ subtract; 1400 return opOK; 1401 1402 case PackCategoriesIntoKey(fcZero, fcNormal): 1403 assign(rhs); 1404 sign = rhs.sign ^ subtract; 1405 return opOK; 1406 1407 case PackCategoriesIntoKey(fcZero, fcZero): 1408 /* Sign depends on rounding mode; handled by caller. */ 1409 return opOK; 1410 1411 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1412 /* Differently signed infinities can only be validly 1413 subtracted. */ 1414 if (((sign ^ rhs.sign)!=0) != subtract) { 1415 makeNaN(); 1416 return opInvalidOp; 1417 } 1418 1419 return opOK; 1420 1421 case PackCategoriesIntoKey(fcNormal, fcNormal): 1422 return opDivByZero; 1423 } 1424 } 1425 1426 /* Add or subtract two normal numbers. */ 1427 lostFraction 1428 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1429 { 1430 integerPart carry; 1431 lostFraction lost_fraction; 1432 int bits; 1433 1434 /* Determine if the operation on the absolute values is effectively 1435 an addition or subtraction. */ 1436 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1437 1438 /* Are we bigger exponent-wise than the RHS? */ 1439 bits = exponent - rhs.exponent; 1440 1441 /* Subtraction is more subtle than one might naively expect. */ 1442 if (subtract) { 1443 APFloat temp_rhs(rhs); 1444 bool reverse; 1445 1446 if (bits == 0) { 1447 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1448 lost_fraction = lfExactlyZero; 1449 } else if (bits > 0) { 1450 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1451 shiftSignificandLeft(1); 1452 reverse = false; 1453 } else { 1454 lost_fraction = shiftSignificandRight(-bits - 1); 1455 temp_rhs.shiftSignificandLeft(1); 1456 reverse = true; 1457 } 1458 1459 if (reverse) { 1460 carry = temp_rhs.subtractSignificand 1461 (*this, lost_fraction != lfExactlyZero); 1462 copySignificand(temp_rhs); 1463 sign = !sign; 1464 } else { 1465 carry = subtractSignificand 1466 (temp_rhs, lost_fraction != lfExactlyZero); 1467 } 1468 1469 /* Invert the lost fraction - it was on the RHS and 1470 subtracted. */ 1471 if (lost_fraction == lfLessThanHalf) 1472 lost_fraction = lfMoreThanHalf; 1473 else if (lost_fraction == lfMoreThanHalf) 1474 lost_fraction = lfLessThanHalf; 1475 1476 /* The code above is intended to ensure that no borrow is 1477 necessary. */ 1478 assert(!carry); 1479 (void)carry; 1480 } else { 1481 if (bits > 0) { 1482 APFloat temp_rhs(rhs); 1483 1484 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1485 carry = addSignificand(temp_rhs); 1486 } else { 1487 lost_fraction = shiftSignificandRight(-bits); 1488 carry = addSignificand(rhs); 1489 } 1490 1491 /* We have a guard bit; generating a carry cannot happen. */ 1492 assert(!carry); 1493 (void)carry; 1494 } 1495 1496 return lost_fraction; 1497 } 1498 1499 APFloat::opStatus 1500 APFloat::multiplySpecials(const APFloat &rhs) 1501 { 1502 switch (PackCategoriesIntoKey(category, rhs.category)) { 1503 default: 1504 llvm_unreachable(nullptr); 1505 1506 case PackCategoriesIntoKey(fcNaN, fcZero): 1507 case PackCategoriesIntoKey(fcNaN, fcNormal): 1508 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1509 case PackCategoriesIntoKey(fcNaN, fcNaN): 1510 sign = false; 1511 return opOK; 1512 1513 case PackCategoriesIntoKey(fcZero, fcNaN): 1514 case PackCategoriesIntoKey(fcNormal, fcNaN): 1515 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1516 sign = false; 1517 category = fcNaN; 1518 copySignificand(rhs); 1519 return opOK; 1520 1521 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1522 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1523 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1524 category = fcInfinity; 1525 return opOK; 1526 1527 case PackCategoriesIntoKey(fcZero, fcNormal): 1528 case PackCategoriesIntoKey(fcNormal, fcZero): 1529 case PackCategoriesIntoKey(fcZero, fcZero): 1530 category = fcZero; 1531 return opOK; 1532 1533 case PackCategoriesIntoKey(fcZero, fcInfinity): 1534 case PackCategoriesIntoKey(fcInfinity, fcZero): 1535 makeNaN(); 1536 return opInvalidOp; 1537 1538 case PackCategoriesIntoKey(fcNormal, fcNormal): 1539 return opOK; 1540 } 1541 } 1542 1543 APFloat::opStatus 1544 APFloat::divideSpecials(const APFloat &rhs) 1545 { 1546 switch (PackCategoriesIntoKey(category, rhs.category)) { 1547 default: 1548 llvm_unreachable(nullptr); 1549 1550 case PackCategoriesIntoKey(fcZero, fcNaN): 1551 case PackCategoriesIntoKey(fcNormal, fcNaN): 1552 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1553 category = fcNaN; 1554 copySignificand(rhs); 1555 case PackCategoriesIntoKey(fcNaN, fcZero): 1556 case PackCategoriesIntoKey(fcNaN, fcNormal): 1557 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1558 case PackCategoriesIntoKey(fcNaN, fcNaN): 1559 sign = false; 1560 case PackCategoriesIntoKey(fcInfinity, fcZero): 1561 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1562 case PackCategoriesIntoKey(fcZero, fcInfinity): 1563 case PackCategoriesIntoKey(fcZero, fcNormal): 1564 return opOK; 1565 1566 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1567 category = fcZero; 1568 return opOK; 1569 1570 case PackCategoriesIntoKey(fcNormal, fcZero): 1571 category = fcInfinity; 1572 return opDivByZero; 1573 1574 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1575 case PackCategoriesIntoKey(fcZero, fcZero): 1576 makeNaN(); 1577 return opInvalidOp; 1578 1579 case PackCategoriesIntoKey(fcNormal, fcNormal): 1580 return opOK; 1581 } 1582 } 1583 1584 APFloat::opStatus 1585 APFloat::modSpecials(const APFloat &rhs) 1586 { 1587 switch (PackCategoriesIntoKey(category, rhs.category)) { 1588 default: 1589 llvm_unreachable(nullptr); 1590 1591 case PackCategoriesIntoKey(fcNaN, fcZero): 1592 case PackCategoriesIntoKey(fcNaN, fcNormal): 1593 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1594 case PackCategoriesIntoKey(fcNaN, fcNaN): 1595 case PackCategoriesIntoKey(fcZero, fcInfinity): 1596 case PackCategoriesIntoKey(fcZero, fcNormal): 1597 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1598 return opOK; 1599 1600 case PackCategoriesIntoKey(fcZero, fcNaN): 1601 case PackCategoriesIntoKey(fcNormal, fcNaN): 1602 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1603 sign = false; 1604 category = fcNaN; 1605 copySignificand(rhs); 1606 return opOK; 1607 1608 case PackCategoriesIntoKey(fcNormal, fcZero): 1609 case PackCategoriesIntoKey(fcInfinity, fcZero): 1610 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1611 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1612 case PackCategoriesIntoKey(fcZero, fcZero): 1613 makeNaN(); 1614 return opInvalidOp; 1615 1616 case PackCategoriesIntoKey(fcNormal, fcNormal): 1617 return opOK; 1618 } 1619 } 1620 1621 /* Change sign. */ 1622 void 1623 APFloat::changeSign() 1624 { 1625 /* Look mummy, this one's easy. */ 1626 sign = !sign; 1627 } 1628 1629 void 1630 APFloat::clearSign() 1631 { 1632 /* So is this one. */ 1633 sign = 0; 1634 } 1635 1636 void 1637 APFloat::copySign(const APFloat &rhs) 1638 { 1639 /* And this one. */ 1640 sign = rhs.sign; 1641 } 1642 1643 /* Normalized addition or subtraction. */ 1644 APFloat::opStatus 1645 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1646 bool subtract) 1647 { 1648 opStatus fs; 1649 1650 fs = addOrSubtractSpecials(rhs, subtract); 1651 1652 /* This return code means it was not a simple case. */ 1653 if (fs == opDivByZero) { 1654 lostFraction lost_fraction; 1655 1656 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1657 fs = normalize(rounding_mode, lost_fraction); 1658 1659 /* Can only be zero if we lost no fraction. */ 1660 assert(category != fcZero || lost_fraction == lfExactlyZero); 1661 } 1662 1663 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1664 positive zero unless rounding to minus infinity, except that 1665 adding two like-signed zeroes gives that zero. */ 1666 if (category == fcZero) { 1667 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1668 sign = (rounding_mode == rmTowardNegative); 1669 } 1670 1671 return fs; 1672 } 1673 1674 /* Normalized addition. */ 1675 APFloat::opStatus 1676 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1677 { 1678 return addOrSubtract(rhs, rounding_mode, false); 1679 } 1680 1681 /* Normalized subtraction. */ 1682 APFloat::opStatus 1683 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1684 { 1685 return addOrSubtract(rhs, rounding_mode, true); 1686 } 1687 1688 /* Normalized multiply. */ 1689 APFloat::opStatus 1690 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1691 { 1692 opStatus fs; 1693 1694 sign ^= rhs.sign; 1695 fs = multiplySpecials(rhs); 1696 1697 if (isFiniteNonZero()) { 1698 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1699 fs = normalize(rounding_mode, lost_fraction); 1700 if (lost_fraction != lfExactlyZero) 1701 fs = (opStatus) (fs | opInexact); 1702 } 1703 1704 return fs; 1705 } 1706 1707 /* Normalized divide. */ 1708 APFloat::opStatus 1709 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1710 { 1711 opStatus fs; 1712 1713 sign ^= rhs.sign; 1714 fs = divideSpecials(rhs); 1715 1716 if (isFiniteNonZero()) { 1717 lostFraction lost_fraction = divideSignificand(rhs); 1718 fs = normalize(rounding_mode, lost_fraction); 1719 if (lost_fraction != lfExactlyZero) 1720 fs = (opStatus) (fs | opInexact); 1721 } 1722 1723 return fs; 1724 } 1725 1726 /* Normalized remainder. This is not currently correct in all cases. */ 1727 APFloat::opStatus 1728 APFloat::remainder(const APFloat &rhs) 1729 { 1730 opStatus fs; 1731 APFloat V = *this; 1732 unsigned int origSign = sign; 1733 1734 fs = V.divide(rhs, rmNearestTiesToEven); 1735 if (fs == opDivByZero) 1736 return fs; 1737 1738 int parts = partCount(); 1739 integerPart *x = new integerPart[parts]; 1740 bool ignored; 1741 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1742 rmNearestTiesToEven, &ignored); 1743 if (fs==opInvalidOp) 1744 return fs; 1745 1746 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1747 rmNearestTiesToEven); 1748 assert(fs==opOK); // should always work 1749 1750 fs = V.multiply(rhs, rmNearestTiesToEven); 1751 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1752 1753 fs = subtract(V, rmNearestTiesToEven); 1754 assert(fs==opOK || fs==opInexact); // likewise 1755 1756 if (isZero()) 1757 sign = origSign; // IEEE754 requires this 1758 delete[] x; 1759 return fs; 1760 } 1761 1762 /* Normalized llvm frem (C fmod). 1763 This is not currently correct in all cases. */ 1764 APFloat::opStatus 1765 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1766 { 1767 opStatus fs; 1768 fs = modSpecials(rhs); 1769 1770 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1771 APFloat V = *this; 1772 unsigned int origSign = sign; 1773 1774 fs = V.divide(rhs, rmNearestTiesToEven); 1775 if (fs == opDivByZero) 1776 return fs; 1777 1778 int parts = partCount(); 1779 integerPart *x = new integerPart[parts]; 1780 bool ignored; 1781 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1782 rmTowardZero, &ignored); 1783 if (fs==opInvalidOp) 1784 return fs; 1785 1786 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1787 rmNearestTiesToEven); 1788 assert(fs==opOK); // should always work 1789 1790 fs = V.multiply(rhs, rounding_mode); 1791 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1792 1793 fs = subtract(V, rounding_mode); 1794 assert(fs==opOK || fs==opInexact); // likewise 1795 1796 if (isZero()) 1797 sign = origSign; // IEEE754 requires this 1798 delete[] x; 1799 } 1800 return fs; 1801 } 1802 1803 /* Normalized fused-multiply-add. */ 1804 APFloat::opStatus 1805 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1806 const APFloat &addend, 1807 roundingMode rounding_mode) 1808 { 1809 opStatus fs; 1810 1811 /* Post-multiplication sign, before addition. */ 1812 sign ^= multiplicand.sign; 1813 1814 /* If and only if all arguments are normal do we need to do an 1815 extended-precision calculation. */ 1816 if (isFiniteNonZero() && 1817 multiplicand.isFiniteNonZero() && 1818 addend.isFinite()) { 1819 lostFraction lost_fraction; 1820 1821 lost_fraction = multiplySignificand(multiplicand, &addend); 1822 fs = normalize(rounding_mode, lost_fraction); 1823 if (lost_fraction != lfExactlyZero) 1824 fs = (opStatus) (fs | opInexact); 1825 1826 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1827 positive zero unless rounding to minus infinity, except that 1828 adding two like-signed zeroes gives that zero. */ 1829 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1830 sign = (rounding_mode == rmTowardNegative); 1831 } else { 1832 fs = multiplySpecials(multiplicand); 1833 1834 /* FS can only be opOK or opInvalidOp. There is no more work 1835 to do in the latter case. The IEEE-754R standard says it is 1836 implementation-defined in this case whether, if ADDEND is a 1837 quiet NaN, we raise invalid op; this implementation does so. 1838 1839 If we need to do the addition we can do so with normal 1840 precision. */ 1841 if (fs == opOK) 1842 fs = addOrSubtract(addend, rounding_mode, false); 1843 } 1844 1845 return fs; 1846 } 1847 1848 /* Rounding-mode corrrect round to integral value. */ 1849 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1850 opStatus fs; 1851 1852 // If the exponent is large enough, we know that this value is already 1853 // integral, and the arithmetic below would potentially cause it to saturate 1854 // to +/-Inf. Bail out early instead. 1855 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1856 return opOK; 1857 1858 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1859 // precision of our format, and then subtract it back off again. The choice 1860 // of rounding modes for the addition/subtraction determines the rounding mode 1861 // for our integral rounding as well. 1862 // NOTE: When the input value is negative, we do subtraction followed by 1863 // addition instead. 1864 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1865 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1866 APFloat MagicConstant(*semantics); 1867 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1868 rmNearestTiesToEven); 1869 MagicConstant.copySign(*this); 1870 1871 if (fs != opOK) 1872 return fs; 1873 1874 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1875 bool inputSign = isNegative(); 1876 1877 fs = add(MagicConstant, rounding_mode); 1878 if (fs != opOK && fs != opInexact) 1879 return fs; 1880 1881 fs = subtract(MagicConstant, rounding_mode); 1882 1883 // Restore the input sign. 1884 if (inputSign != isNegative()) 1885 changeSign(); 1886 1887 return fs; 1888 } 1889 1890 1891 /* Comparison requires normalized numbers. */ 1892 APFloat::cmpResult 1893 APFloat::compare(const APFloat &rhs) const 1894 { 1895 cmpResult result; 1896 1897 assert(semantics == rhs.semantics); 1898 1899 switch (PackCategoriesIntoKey(category, rhs.category)) { 1900 default: 1901 llvm_unreachable(nullptr); 1902 1903 case PackCategoriesIntoKey(fcNaN, fcZero): 1904 case PackCategoriesIntoKey(fcNaN, fcNormal): 1905 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1906 case PackCategoriesIntoKey(fcNaN, fcNaN): 1907 case PackCategoriesIntoKey(fcZero, fcNaN): 1908 case PackCategoriesIntoKey(fcNormal, fcNaN): 1909 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1910 return cmpUnordered; 1911 1912 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1913 case PackCategoriesIntoKey(fcInfinity, fcZero): 1914 case PackCategoriesIntoKey(fcNormal, fcZero): 1915 if (sign) 1916 return cmpLessThan; 1917 else 1918 return cmpGreaterThan; 1919 1920 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1921 case PackCategoriesIntoKey(fcZero, fcInfinity): 1922 case PackCategoriesIntoKey(fcZero, fcNormal): 1923 if (rhs.sign) 1924 return cmpGreaterThan; 1925 else 1926 return cmpLessThan; 1927 1928 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1929 if (sign == rhs.sign) 1930 return cmpEqual; 1931 else if (sign) 1932 return cmpLessThan; 1933 else 1934 return cmpGreaterThan; 1935 1936 case PackCategoriesIntoKey(fcZero, fcZero): 1937 return cmpEqual; 1938 1939 case PackCategoriesIntoKey(fcNormal, fcNormal): 1940 break; 1941 } 1942 1943 /* Two normal numbers. Do they have the same sign? */ 1944 if (sign != rhs.sign) { 1945 if (sign) 1946 result = cmpLessThan; 1947 else 1948 result = cmpGreaterThan; 1949 } else { 1950 /* Compare absolute values; invert result if negative. */ 1951 result = compareAbsoluteValue(rhs); 1952 1953 if (sign) { 1954 if (result == cmpLessThan) 1955 result = cmpGreaterThan; 1956 else if (result == cmpGreaterThan) 1957 result = cmpLessThan; 1958 } 1959 } 1960 1961 return result; 1962 } 1963 1964 /// APFloat::convert - convert a value of one floating point type to another. 1965 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1966 /// records whether the transformation lost information, i.e. whether 1967 /// converting the result back to the original type will produce the 1968 /// original value (this is almost the same as return value==fsOK, but there 1969 /// are edge cases where this is not so). 1970 1971 APFloat::opStatus 1972 APFloat::convert(const fltSemantics &toSemantics, 1973 roundingMode rounding_mode, bool *losesInfo) 1974 { 1975 lostFraction lostFraction; 1976 unsigned int newPartCount, oldPartCount; 1977 opStatus fs; 1978 int shift; 1979 const fltSemantics &fromSemantics = *semantics; 1980 1981 lostFraction = lfExactlyZero; 1982 newPartCount = partCountForBits(toSemantics.precision + 1); 1983 oldPartCount = partCount(); 1984 shift = toSemantics.precision - fromSemantics.precision; 1985 1986 bool X86SpecialNan = false; 1987 if (&fromSemantics == &APFloat::x87DoubleExtended && 1988 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1989 (!(*significandParts() & 0x8000000000000000ULL) || 1990 !(*significandParts() & 0x4000000000000000ULL))) { 1991 // x86 has some unusual NaNs which cannot be represented in any other 1992 // format; note them here. 1993 X86SpecialNan = true; 1994 } 1995 1996 // If this is a truncation of a denormal number, and the target semantics 1997 // has larger exponent range than the source semantics (this can happen 1998 // when truncating from PowerPC double-double to double format), the 1999 // right shift could lose result mantissa bits. Adjust exponent instead 2000 // of performing excessive shift. 2001 if (shift < 0 && isFiniteNonZero()) { 2002 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2003 if (exponent + exponentChange < toSemantics.minExponent) 2004 exponentChange = toSemantics.minExponent - exponent; 2005 if (exponentChange < shift) 2006 exponentChange = shift; 2007 if (exponentChange < 0) { 2008 shift -= exponentChange; 2009 exponent += exponentChange; 2010 } 2011 } 2012 2013 // If this is a truncation, perform the shift before we narrow the storage. 2014 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2015 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2016 2017 // Fix the storage so it can hold to new value. 2018 if (newPartCount > oldPartCount) { 2019 // The new type requires more storage; make it available. 2020 integerPart *newParts; 2021 newParts = new integerPart[newPartCount]; 2022 APInt::tcSet(newParts, 0, newPartCount); 2023 if (isFiniteNonZero() || category==fcNaN) 2024 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2025 freeSignificand(); 2026 significand.parts = newParts; 2027 } else if (newPartCount == 1 && oldPartCount != 1) { 2028 // Switch to built-in storage for a single part. 2029 integerPart newPart = 0; 2030 if (isFiniteNonZero() || category==fcNaN) 2031 newPart = significandParts()[0]; 2032 freeSignificand(); 2033 significand.part = newPart; 2034 } 2035 2036 // Now that we have the right storage, switch the semantics. 2037 semantics = &toSemantics; 2038 2039 // If this is an extension, perform the shift now that the storage is 2040 // available. 2041 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2042 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2043 2044 if (isFiniteNonZero()) { 2045 fs = normalize(rounding_mode, lostFraction); 2046 *losesInfo = (fs != opOK); 2047 } else if (category == fcNaN) { 2048 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2049 2050 // For x87 extended precision, we want to make a NaN, not a special NaN if 2051 // the input wasn't special either. 2052 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2053 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2054 2055 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2056 // does not give you back the same bits. This is dubious, and we 2057 // don't currently do it. You're really supposed to get 2058 // an invalid operation signal at runtime, but nobody does that. 2059 fs = opOK; 2060 } else { 2061 *losesInfo = false; 2062 fs = opOK; 2063 } 2064 2065 return fs; 2066 } 2067 2068 /* Convert a floating point number to an integer according to the 2069 rounding mode. If the rounded integer value is out of range this 2070 returns an invalid operation exception and the contents of the 2071 destination parts are unspecified. If the rounded value is in 2072 range but the floating point number is not the exact integer, the C 2073 standard doesn't require an inexact exception to be raised. IEEE 2074 854 does require it so we do that. 2075 2076 Note that for conversions to integer type the C standard requires 2077 round-to-zero to always be used. */ 2078 APFloat::opStatus 2079 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2080 bool isSigned, 2081 roundingMode rounding_mode, 2082 bool *isExact) const 2083 { 2084 lostFraction lost_fraction; 2085 const integerPart *src; 2086 unsigned int dstPartsCount, truncatedBits; 2087 2088 *isExact = false; 2089 2090 /* Handle the three special cases first. */ 2091 if (category == fcInfinity || category == fcNaN) 2092 return opInvalidOp; 2093 2094 dstPartsCount = partCountForBits(width); 2095 2096 if (category == fcZero) { 2097 APInt::tcSet(parts, 0, dstPartsCount); 2098 // Negative zero can't be represented as an int. 2099 *isExact = !sign; 2100 return opOK; 2101 } 2102 2103 src = significandParts(); 2104 2105 /* Step 1: place our absolute value, with any fraction truncated, in 2106 the destination. */ 2107 if (exponent < 0) { 2108 /* Our absolute value is less than one; truncate everything. */ 2109 APInt::tcSet(parts, 0, dstPartsCount); 2110 /* For exponent -1 the integer bit represents .5, look at that. 2111 For smaller exponents leftmost truncated bit is 0. */ 2112 truncatedBits = semantics->precision -1U - exponent; 2113 } else { 2114 /* We want the most significant (exponent + 1) bits; the rest are 2115 truncated. */ 2116 unsigned int bits = exponent + 1U; 2117 2118 /* Hopelessly large in magnitude? */ 2119 if (bits > width) 2120 return opInvalidOp; 2121 2122 if (bits < semantics->precision) { 2123 /* We truncate (semantics->precision - bits) bits. */ 2124 truncatedBits = semantics->precision - bits; 2125 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2126 } else { 2127 /* We want at least as many bits as are available. */ 2128 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2129 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2130 truncatedBits = 0; 2131 } 2132 } 2133 2134 /* Step 2: work out any lost fraction, and increment the absolute 2135 value if we would round away from zero. */ 2136 if (truncatedBits) { 2137 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2138 truncatedBits); 2139 if (lost_fraction != lfExactlyZero && 2140 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2141 if (APInt::tcIncrement(parts, dstPartsCount)) 2142 return opInvalidOp; /* Overflow. */ 2143 } 2144 } else { 2145 lost_fraction = lfExactlyZero; 2146 } 2147 2148 /* Step 3: check if we fit in the destination. */ 2149 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2150 2151 if (sign) { 2152 if (!isSigned) { 2153 /* Negative numbers cannot be represented as unsigned. */ 2154 if (omsb != 0) 2155 return opInvalidOp; 2156 } else { 2157 /* It takes omsb bits to represent the unsigned integer value. 2158 We lose a bit for the sign, but care is needed as the 2159 maximally negative integer is a special case. */ 2160 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2161 return opInvalidOp; 2162 2163 /* This case can happen because of rounding. */ 2164 if (omsb > width) 2165 return opInvalidOp; 2166 } 2167 2168 APInt::tcNegate (parts, dstPartsCount); 2169 } else { 2170 if (omsb >= width + !isSigned) 2171 return opInvalidOp; 2172 } 2173 2174 if (lost_fraction == lfExactlyZero) { 2175 *isExact = true; 2176 return opOK; 2177 } else 2178 return opInexact; 2179 } 2180 2181 /* Same as convertToSignExtendedInteger, except we provide 2182 deterministic values in case of an invalid operation exception, 2183 namely zero for NaNs and the minimal or maximal value respectively 2184 for underflow or overflow. 2185 The *isExact output tells whether the result is exact, in the sense 2186 that converting it back to the original floating point type produces 2187 the original value. This is almost equivalent to result==opOK, 2188 except for negative zeroes. 2189 */ 2190 APFloat::opStatus 2191 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2192 bool isSigned, 2193 roundingMode rounding_mode, bool *isExact) const 2194 { 2195 opStatus fs; 2196 2197 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2198 isExact); 2199 2200 if (fs == opInvalidOp) { 2201 unsigned int bits, dstPartsCount; 2202 2203 dstPartsCount = partCountForBits(width); 2204 2205 if (category == fcNaN) 2206 bits = 0; 2207 else if (sign) 2208 bits = isSigned; 2209 else 2210 bits = width - isSigned; 2211 2212 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2213 if (sign && isSigned) 2214 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2215 } 2216 2217 return fs; 2218 } 2219 2220 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2221 an APSInt, whose initial bit-width and signed-ness are used to determine the 2222 precision of the conversion. 2223 */ 2224 APFloat::opStatus 2225 APFloat::convertToInteger(APSInt &result, 2226 roundingMode rounding_mode, bool *isExact) const 2227 { 2228 unsigned bitWidth = result.getBitWidth(); 2229 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2230 opStatus status = convertToInteger( 2231 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2232 // Keeps the original signed-ness. 2233 result = APInt(bitWidth, parts); 2234 return status; 2235 } 2236 2237 /* Convert an unsigned integer SRC to a floating point number, 2238 rounding according to ROUNDING_MODE. The sign of the floating 2239 point number is not modified. */ 2240 APFloat::opStatus 2241 APFloat::convertFromUnsignedParts(const integerPart *src, 2242 unsigned int srcCount, 2243 roundingMode rounding_mode) 2244 { 2245 unsigned int omsb, precision, dstCount; 2246 integerPart *dst; 2247 lostFraction lost_fraction; 2248 2249 category = fcNormal; 2250 omsb = APInt::tcMSB(src, srcCount) + 1; 2251 dst = significandParts(); 2252 dstCount = partCount(); 2253 precision = semantics->precision; 2254 2255 /* We want the most significant PRECISION bits of SRC. There may not 2256 be that many; extract what we can. */ 2257 if (precision <= omsb) { 2258 exponent = omsb - 1; 2259 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2260 omsb - precision); 2261 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2262 } else { 2263 exponent = precision - 1; 2264 lost_fraction = lfExactlyZero; 2265 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2266 } 2267 2268 return normalize(rounding_mode, lost_fraction); 2269 } 2270 2271 APFloat::opStatus 2272 APFloat::convertFromAPInt(const APInt &Val, 2273 bool isSigned, 2274 roundingMode rounding_mode) 2275 { 2276 unsigned int partCount = Val.getNumWords(); 2277 APInt api = Val; 2278 2279 sign = false; 2280 if (isSigned && api.isNegative()) { 2281 sign = true; 2282 api = -api; 2283 } 2284 2285 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2286 } 2287 2288 /* Convert a two's complement integer SRC to a floating point number, 2289 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2290 integer is signed, in which case it must be sign-extended. */ 2291 APFloat::opStatus 2292 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2293 unsigned int srcCount, 2294 bool isSigned, 2295 roundingMode rounding_mode) 2296 { 2297 opStatus status; 2298 2299 if (isSigned && 2300 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2301 integerPart *copy; 2302 2303 /* If we're signed and negative negate a copy. */ 2304 sign = true; 2305 copy = new integerPart[srcCount]; 2306 APInt::tcAssign(copy, src, srcCount); 2307 APInt::tcNegate(copy, srcCount); 2308 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2309 delete [] copy; 2310 } else { 2311 sign = false; 2312 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2313 } 2314 2315 return status; 2316 } 2317 2318 /* FIXME: should this just take a const APInt reference? */ 2319 APFloat::opStatus 2320 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2321 unsigned int width, bool isSigned, 2322 roundingMode rounding_mode) 2323 { 2324 unsigned int partCount = partCountForBits(width); 2325 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2326 2327 sign = false; 2328 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2329 sign = true; 2330 api = -api; 2331 } 2332 2333 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2334 } 2335 2336 APFloat::opStatus 2337 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2338 { 2339 lostFraction lost_fraction = lfExactlyZero; 2340 2341 category = fcNormal; 2342 zeroSignificand(); 2343 exponent = 0; 2344 2345 integerPart *significand = significandParts(); 2346 unsigned partsCount = partCount(); 2347 unsigned bitPos = partsCount * integerPartWidth; 2348 bool computedTrailingFraction = false; 2349 2350 // Skip leading zeroes and any (hexa)decimal point. 2351 StringRef::iterator begin = s.begin(); 2352 StringRef::iterator end = s.end(); 2353 StringRef::iterator dot; 2354 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2355 StringRef::iterator firstSignificantDigit = p; 2356 2357 while (p != end) { 2358 integerPart hex_value; 2359 2360 if (*p == '.') { 2361 assert(dot == end && "String contains multiple dots"); 2362 dot = p++; 2363 continue; 2364 } 2365 2366 hex_value = hexDigitValue(*p); 2367 if (hex_value == -1U) 2368 break; 2369 2370 p++; 2371 2372 // Store the number while we have space. 2373 if (bitPos) { 2374 bitPos -= 4; 2375 hex_value <<= bitPos % integerPartWidth; 2376 significand[bitPos / integerPartWidth] |= hex_value; 2377 } else if (!computedTrailingFraction) { 2378 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2379 computedTrailingFraction = true; 2380 } 2381 } 2382 2383 /* Hex floats require an exponent but not a hexadecimal point. */ 2384 assert(p != end && "Hex strings require an exponent"); 2385 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2386 assert(p != begin && "Significand has no digits"); 2387 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2388 2389 /* Ignore the exponent if we are zero. */ 2390 if (p != firstSignificantDigit) { 2391 int expAdjustment; 2392 2393 /* Implicit hexadecimal point? */ 2394 if (dot == end) 2395 dot = p; 2396 2397 /* Calculate the exponent adjustment implicit in the number of 2398 significant digits. */ 2399 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2400 if (expAdjustment < 0) 2401 expAdjustment++; 2402 expAdjustment = expAdjustment * 4 - 1; 2403 2404 /* Adjust for writing the significand starting at the most 2405 significant nibble. */ 2406 expAdjustment += semantics->precision; 2407 expAdjustment -= partsCount * integerPartWidth; 2408 2409 /* Adjust for the given exponent. */ 2410 exponent = totalExponent(p + 1, end, expAdjustment); 2411 } 2412 2413 return normalize(rounding_mode, lost_fraction); 2414 } 2415 2416 APFloat::opStatus 2417 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2418 unsigned sigPartCount, int exp, 2419 roundingMode rounding_mode) 2420 { 2421 unsigned int parts, pow5PartCount; 2422 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2423 integerPart pow5Parts[maxPowerOfFiveParts]; 2424 bool isNearest; 2425 2426 isNearest = (rounding_mode == rmNearestTiesToEven || 2427 rounding_mode == rmNearestTiesToAway); 2428 2429 parts = partCountForBits(semantics->precision + 11); 2430 2431 /* Calculate pow(5, abs(exp)). */ 2432 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2433 2434 for (;; parts *= 2) { 2435 opStatus sigStatus, powStatus; 2436 unsigned int excessPrecision, truncatedBits; 2437 2438 calcSemantics.precision = parts * integerPartWidth - 1; 2439 excessPrecision = calcSemantics.precision - semantics->precision; 2440 truncatedBits = excessPrecision; 2441 2442 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2443 APFloat pow5(calcSemantics); 2444 2445 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2446 rmNearestTiesToEven); 2447 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2448 rmNearestTiesToEven); 2449 /* Add exp, as 10^n = 5^n * 2^n. */ 2450 decSig.exponent += exp; 2451 2452 lostFraction calcLostFraction; 2453 integerPart HUerr, HUdistance; 2454 unsigned int powHUerr; 2455 2456 if (exp >= 0) { 2457 /* multiplySignificand leaves the precision-th bit set to 1. */ 2458 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2459 powHUerr = powStatus != opOK; 2460 } else { 2461 calcLostFraction = decSig.divideSignificand(pow5); 2462 /* Denormal numbers have less precision. */ 2463 if (decSig.exponent < semantics->minExponent) { 2464 excessPrecision += (semantics->minExponent - decSig.exponent); 2465 truncatedBits = excessPrecision; 2466 if (excessPrecision > calcSemantics.precision) 2467 excessPrecision = calcSemantics.precision; 2468 } 2469 /* Extra half-ulp lost in reciprocal of exponent. */ 2470 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2471 } 2472 2473 /* Both multiplySignificand and divideSignificand return the 2474 result with the integer bit set. */ 2475 assert(APInt::tcExtractBit 2476 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2477 2478 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2479 powHUerr); 2480 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2481 excessPrecision, isNearest); 2482 2483 /* Are we guaranteed to round correctly if we truncate? */ 2484 if (HUdistance >= HUerr) { 2485 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2486 calcSemantics.precision - excessPrecision, 2487 excessPrecision); 2488 /* Take the exponent of decSig. If we tcExtract-ed less bits 2489 above we must adjust our exponent to compensate for the 2490 implicit right shift. */ 2491 exponent = (decSig.exponent + semantics->precision 2492 - (calcSemantics.precision - excessPrecision)); 2493 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2494 decSig.partCount(), 2495 truncatedBits); 2496 return normalize(rounding_mode, calcLostFraction); 2497 } 2498 } 2499 } 2500 2501 APFloat::opStatus 2502 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2503 { 2504 decimalInfo D; 2505 opStatus fs; 2506 2507 /* Scan the text. */ 2508 StringRef::iterator p = str.begin(); 2509 interpretDecimal(p, str.end(), &D); 2510 2511 /* Handle the quick cases. First the case of no significant digits, 2512 i.e. zero, and then exponents that are obviously too large or too 2513 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2514 definitely overflows if 2515 2516 (exp - 1) * L >= maxExponent 2517 2518 and definitely underflows to zero where 2519 2520 (exp + 1) * L <= minExponent - precision 2521 2522 With integer arithmetic the tightest bounds for L are 2523 2524 93/28 < L < 196/59 [ numerator <= 256 ] 2525 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2526 */ 2527 2528 // Test if we have a zero number allowing for strings with no null terminators 2529 // and zero decimals with non-zero exponents. 2530 // 2531 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2532 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2533 // be at most one dot. On the other hand, if we have a zero with a non-zero 2534 // exponent, then we know that D.firstSigDigit will be non-numeric. 2535 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2536 category = fcZero; 2537 fs = opOK; 2538 2539 /* Check whether the normalized exponent is high enough to overflow 2540 max during the log-rebasing in the max-exponent check below. */ 2541 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2542 fs = handleOverflow(rounding_mode); 2543 2544 /* If it wasn't, then it also wasn't high enough to overflow max 2545 during the log-rebasing in the min-exponent check. Check that it 2546 won't overflow min in either check, then perform the min-exponent 2547 check. */ 2548 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2549 (D.normalizedExponent + 1) * 28738 <= 2550 8651 * (semantics->minExponent - (int) semantics->precision)) { 2551 /* Underflow to zero and round. */ 2552 category = fcNormal; 2553 zeroSignificand(); 2554 fs = normalize(rounding_mode, lfLessThanHalf); 2555 2556 /* We can finally safely perform the max-exponent check. */ 2557 } else if ((D.normalizedExponent - 1) * 42039 2558 >= 12655 * semantics->maxExponent) { 2559 /* Overflow and round. */ 2560 fs = handleOverflow(rounding_mode); 2561 } else { 2562 integerPart *decSignificand; 2563 unsigned int partCount; 2564 2565 /* A tight upper bound on number of bits required to hold an 2566 N-digit decimal integer is N * 196 / 59. Allocate enough space 2567 to hold the full significand, and an extra part required by 2568 tcMultiplyPart. */ 2569 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2570 partCount = partCountForBits(1 + 196 * partCount / 59); 2571 decSignificand = new integerPart[partCount + 1]; 2572 partCount = 0; 2573 2574 /* Convert to binary efficiently - we do almost all multiplication 2575 in an integerPart. When this would overflow do we do a single 2576 bignum multiplication, and then revert again to multiplication 2577 in an integerPart. */ 2578 do { 2579 integerPart decValue, val, multiplier; 2580 2581 val = 0; 2582 multiplier = 1; 2583 2584 do { 2585 if (*p == '.') { 2586 p++; 2587 if (p == str.end()) { 2588 break; 2589 } 2590 } 2591 decValue = decDigitValue(*p++); 2592 assert(decValue < 10U && "Invalid character in significand"); 2593 multiplier *= 10; 2594 val = val * 10 + decValue; 2595 /* The maximum number that can be multiplied by ten with any 2596 digit added without overflowing an integerPart. */ 2597 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2598 2599 /* Multiply out the current part. */ 2600 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2601 partCount, partCount + 1, false); 2602 2603 /* If we used another part (likely but not guaranteed), increase 2604 the count. */ 2605 if (decSignificand[partCount]) 2606 partCount++; 2607 } while (p <= D.lastSigDigit); 2608 2609 category = fcNormal; 2610 fs = roundSignificandWithExponent(decSignificand, partCount, 2611 D.exponent, rounding_mode); 2612 2613 delete [] decSignificand; 2614 } 2615 2616 return fs; 2617 } 2618 2619 bool 2620 APFloat::convertFromStringSpecials(StringRef str) { 2621 if (str.equals("inf") || str.equals("INFINITY")) { 2622 makeInf(false); 2623 return true; 2624 } 2625 2626 if (str.equals("-inf") || str.equals("-INFINITY")) { 2627 makeInf(true); 2628 return true; 2629 } 2630 2631 if (str.equals("nan") || str.equals("NaN")) { 2632 makeNaN(false, false); 2633 return true; 2634 } 2635 2636 if (str.equals("-nan") || str.equals("-NaN")) { 2637 makeNaN(false, true); 2638 return true; 2639 } 2640 2641 return false; 2642 } 2643 2644 APFloat::opStatus 2645 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2646 { 2647 assert(!str.empty() && "Invalid string length"); 2648 2649 // Handle special cases. 2650 if (convertFromStringSpecials(str)) 2651 return opOK; 2652 2653 /* Handle a leading minus sign. */ 2654 StringRef::iterator p = str.begin(); 2655 size_t slen = str.size(); 2656 sign = *p == '-' ? 1 : 0; 2657 if (*p == '-' || *p == '+') { 2658 p++; 2659 slen--; 2660 assert(slen && "String has no digits"); 2661 } 2662 2663 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2664 assert(slen - 2 && "Invalid string"); 2665 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2666 rounding_mode); 2667 } 2668 2669 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2670 } 2671 2672 /* Write out a hexadecimal representation of the floating point value 2673 to DST, which must be of sufficient size, in the C99 form 2674 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2675 excluding the terminating NUL. 2676 2677 If UPPERCASE, the output is in upper case, otherwise in lower case. 2678 2679 HEXDIGITS digits appear altogether, rounding the value if 2680 necessary. If HEXDIGITS is 0, the minimal precision to display the 2681 number precisely is used instead. If nothing would appear after 2682 the decimal point it is suppressed. 2683 2684 The decimal exponent is always printed and has at least one digit. 2685 Zero values display an exponent of zero. Infinities and NaNs 2686 appear as "infinity" or "nan" respectively. 2687 2688 The above rules are as specified by C99. There is ambiguity about 2689 what the leading hexadecimal digit should be. This implementation 2690 uses whatever is necessary so that the exponent is displayed as 2691 stored. This implies the exponent will fall within the IEEE format 2692 range, and the leading hexadecimal digit will be 0 (for denormals), 2693 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2694 any other digits zero). 2695 */ 2696 unsigned int 2697 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2698 bool upperCase, roundingMode rounding_mode) const 2699 { 2700 char *p; 2701 2702 p = dst; 2703 if (sign) 2704 *dst++ = '-'; 2705 2706 switch (category) { 2707 case fcInfinity: 2708 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2709 dst += sizeof infinityL - 1; 2710 break; 2711 2712 case fcNaN: 2713 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2714 dst += sizeof NaNU - 1; 2715 break; 2716 2717 case fcZero: 2718 *dst++ = '0'; 2719 *dst++ = upperCase ? 'X': 'x'; 2720 *dst++ = '0'; 2721 if (hexDigits > 1) { 2722 *dst++ = '.'; 2723 memset (dst, '0', hexDigits - 1); 2724 dst += hexDigits - 1; 2725 } 2726 *dst++ = upperCase ? 'P': 'p'; 2727 *dst++ = '0'; 2728 break; 2729 2730 case fcNormal: 2731 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2732 break; 2733 } 2734 2735 *dst = 0; 2736 2737 return static_cast<unsigned int>(dst - p); 2738 } 2739 2740 /* Does the hard work of outputting the correctly rounded hexadecimal 2741 form of a normal floating point number with the specified number of 2742 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2743 digits necessary to print the value precisely is output. */ 2744 char * 2745 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2746 bool upperCase, 2747 roundingMode rounding_mode) const 2748 { 2749 unsigned int count, valueBits, shift, partsCount, outputDigits; 2750 const char *hexDigitChars; 2751 const integerPart *significand; 2752 char *p; 2753 bool roundUp; 2754 2755 *dst++ = '0'; 2756 *dst++ = upperCase ? 'X': 'x'; 2757 2758 roundUp = false; 2759 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2760 2761 significand = significandParts(); 2762 partsCount = partCount(); 2763 2764 /* +3 because the first digit only uses the single integer bit, so 2765 we have 3 virtual zero most-significant-bits. */ 2766 valueBits = semantics->precision + 3; 2767 shift = integerPartWidth - valueBits % integerPartWidth; 2768 2769 /* The natural number of digits required ignoring trailing 2770 insignificant zeroes. */ 2771 outputDigits = (valueBits - significandLSB () + 3) / 4; 2772 2773 /* hexDigits of zero means use the required number for the 2774 precision. Otherwise, see if we are truncating. If we are, 2775 find out if we need to round away from zero. */ 2776 if (hexDigits) { 2777 if (hexDigits < outputDigits) { 2778 /* We are dropping non-zero bits, so need to check how to round. 2779 "bits" is the number of dropped bits. */ 2780 unsigned int bits; 2781 lostFraction fraction; 2782 2783 bits = valueBits - hexDigits * 4; 2784 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2785 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2786 } 2787 outputDigits = hexDigits; 2788 } 2789 2790 /* Write the digits consecutively, and start writing in the location 2791 of the hexadecimal point. We move the most significant digit 2792 left and add the hexadecimal point later. */ 2793 p = ++dst; 2794 2795 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2796 2797 while (outputDigits && count) { 2798 integerPart part; 2799 2800 /* Put the most significant integerPartWidth bits in "part". */ 2801 if (--count == partsCount) 2802 part = 0; /* An imaginary higher zero part. */ 2803 else 2804 part = significand[count] << shift; 2805 2806 if (count && shift) 2807 part |= significand[count - 1] >> (integerPartWidth - shift); 2808 2809 /* Convert as much of "part" to hexdigits as we can. */ 2810 unsigned int curDigits = integerPartWidth / 4; 2811 2812 if (curDigits > outputDigits) 2813 curDigits = outputDigits; 2814 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2815 outputDigits -= curDigits; 2816 } 2817 2818 if (roundUp) { 2819 char *q = dst; 2820 2821 /* Note that hexDigitChars has a trailing '0'. */ 2822 do { 2823 q--; 2824 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2825 } while (*q == '0'); 2826 assert(q >= p); 2827 } else { 2828 /* Add trailing zeroes. */ 2829 memset (dst, '0', outputDigits); 2830 dst += outputDigits; 2831 } 2832 2833 /* Move the most significant digit to before the point, and if there 2834 is something after the decimal point add it. This must come 2835 after rounding above. */ 2836 p[-1] = p[0]; 2837 if (dst -1 == p) 2838 dst--; 2839 else 2840 p[0] = '.'; 2841 2842 /* Finally output the exponent. */ 2843 *dst++ = upperCase ? 'P': 'p'; 2844 2845 return writeSignedDecimal (dst, exponent); 2846 } 2847 2848 hash_code llvm::hash_value(const APFloat &Arg) { 2849 if (!Arg.isFiniteNonZero()) 2850 return hash_combine((uint8_t)Arg.category, 2851 // NaN has no sign, fix it at zero. 2852 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2853 Arg.semantics->precision); 2854 2855 // Normal floats need their exponent and significand hashed. 2856 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2857 Arg.semantics->precision, Arg.exponent, 2858 hash_combine_range( 2859 Arg.significandParts(), 2860 Arg.significandParts() + Arg.partCount())); 2861 } 2862 2863 // Conversion from APFloat to/from host float/double. It may eventually be 2864 // possible to eliminate these and have everybody deal with APFloats, but that 2865 // will take a while. This approach will not easily extend to long double. 2866 // Current implementation requires integerPartWidth==64, which is correct at 2867 // the moment but could be made more general. 2868 2869 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2870 // the actual IEEE respresentations. We compensate for that here. 2871 2872 APInt 2873 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2874 { 2875 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2876 assert(partCount()==2); 2877 2878 uint64_t myexponent, mysignificand; 2879 2880 if (isFiniteNonZero()) { 2881 myexponent = exponent+16383; //bias 2882 mysignificand = significandParts()[0]; 2883 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2884 myexponent = 0; // denormal 2885 } else if (category==fcZero) { 2886 myexponent = 0; 2887 mysignificand = 0; 2888 } else if (category==fcInfinity) { 2889 myexponent = 0x7fff; 2890 mysignificand = 0x8000000000000000ULL; 2891 } else { 2892 assert(category == fcNaN && "Unknown category"); 2893 myexponent = 0x7fff; 2894 mysignificand = significandParts()[0]; 2895 } 2896 2897 uint64_t words[2]; 2898 words[0] = mysignificand; 2899 words[1] = ((uint64_t)(sign & 1) << 15) | 2900 (myexponent & 0x7fffLL); 2901 return APInt(80, words); 2902 } 2903 2904 APInt 2905 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2906 { 2907 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2908 assert(partCount()==2); 2909 2910 uint64_t words[2]; 2911 opStatus fs; 2912 bool losesInfo; 2913 2914 // Convert number to double. To avoid spurious underflows, we re- 2915 // normalize against the "double" minExponent first, and only *then* 2916 // truncate the mantissa. The result of that second conversion 2917 // may be inexact, but should never underflow. 2918 // Declare fltSemantics before APFloat that uses it (and 2919 // saves pointer to it) to ensure correct destruction order. 2920 fltSemantics extendedSemantics = *semantics; 2921 extendedSemantics.minExponent = IEEEdouble.minExponent; 2922 APFloat extended(*this); 2923 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2924 assert(fs == opOK && !losesInfo); 2925 (void)fs; 2926 2927 APFloat u(extended); 2928 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2929 assert(fs == opOK || fs == opInexact); 2930 (void)fs; 2931 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2932 2933 // If conversion was exact or resulted in a special case, we're done; 2934 // just set the second double to zero. Otherwise, re-convert back to 2935 // the extended format and compute the difference. This now should 2936 // convert exactly to double. 2937 if (u.isFiniteNonZero() && losesInfo) { 2938 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2939 assert(fs == opOK && !losesInfo); 2940 (void)fs; 2941 2942 APFloat v(extended); 2943 v.subtract(u, rmNearestTiesToEven); 2944 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2945 assert(fs == opOK && !losesInfo); 2946 (void)fs; 2947 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2948 } else { 2949 words[1] = 0; 2950 } 2951 2952 return APInt(128, words); 2953 } 2954 2955 APInt 2956 APFloat::convertQuadrupleAPFloatToAPInt() const 2957 { 2958 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2959 assert(partCount()==2); 2960 2961 uint64_t myexponent, mysignificand, mysignificand2; 2962 2963 if (isFiniteNonZero()) { 2964 myexponent = exponent+16383; //bias 2965 mysignificand = significandParts()[0]; 2966 mysignificand2 = significandParts()[1]; 2967 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2968 myexponent = 0; // denormal 2969 } else if (category==fcZero) { 2970 myexponent = 0; 2971 mysignificand = mysignificand2 = 0; 2972 } else if (category==fcInfinity) { 2973 myexponent = 0x7fff; 2974 mysignificand = mysignificand2 = 0; 2975 } else { 2976 assert(category == fcNaN && "Unknown category!"); 2977 myexponent = 0x7fff; 2978 mysignificand = significandParts()[0]; 2979 mysignificand2 = significandParts()[1]; 2980 } 2981 2982 uint64_t words[2]; 2983 words[0] = mysignificand; 2984 words[1] = ((uint64_t)(sign & 1) << 63) | 2985 ((myexponent & 0x7fff) << 48) | 2986 (mysignificand2 & 0xffffffffffffLL); 2987 2988 return APInt(128, words); 2989 } 2990 2991 APInt 2992 APFloat::convertDoubleAPFloatToAPInt() const 2993 { 2994 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2995 assert(partCount()==1); 2996 2997 uint64_t myexponent, mysignificand; 2998 2999 if (isFiniteNonZero()) { 3000 myexponent = exponent+1023; //bias 3001 mysignificand = *significandParts(); 3002 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3003 myexponent = 0; // denormal 3004 } else if (category==fcZero) { 3005 myexponent = 0; 3006 mysignificand = 0; 3007 } else if (category==fcInfinity) { 3008 myexponent = 0x7ff; 3009 mysignificand = 0; 3010 } else { 3011 assert(category == fcNaN && "Unknown category!"); 3012 myexponent = 0x7ff; 3013 mysignificand = *significandParts(); 3014 } 3015 3016 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3017 ((myexponent & 0x7ff) << 52) | 3018 (mysignificand & 0xfffffffffffffLL)))); 3019 } 3020 3021 APInt 3022 APFloat::convertFloatAPFloatToAPInt() const 3023 { 3024 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3025 assert(partCount()==1); 3026 3027 uint32_t myexponent, mysignificand; 3028 3029 if (isFiniteNonZero()) { 3030 myexponent = exponent+127; //bias 3031 mysignificand = (uint32_t)*significandParts(); 3032 if (myexponent == 1 && !(mysignificand & 0x800000)) 3033 myexponent = 0; // denormal 3034 } else if (category==fcZero) { 3035 myexponent = 0; 3036 mysignificand = 0; 3037 } else if (category==fcInfinity) { 3038 myexponent = 0xff; 3039 mysignificand = 0; 3040 } else { 3041 assert(category == fcNaN && "Unknown category!"); 3042 myexponent = 0xff; 3043 mysignificand = (uint32_t)*significandParts(); 3044 } 3045 3046 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3047 (mysignificand & 0x7fffff))); 3048 } 3049 3050 APInt 3051 APFloat::convertHalfAPFloatToAPInt() const 3052 { 3053 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3054 assert(partCount()==1); 3055 3056 uint32_t myexponent, mysignificand; 3057 3058 if (isFiniteNonZero()) { 3059 myexponent = exponent+15; //bias 3060 mysignificand = (uint32_t)*significandParts(); 3061 if (myexponent == 1 && !(mysignificand & 0x400)) 3062 myexponent = 0; // denormal 3063 } else if (category==fcZero) { 3064 myexponent = 0; 3065 mysignificand = 0; 3066 } else if (category==fcInfinity) { 3067 myexponent = 0x1f; 3068 mysignificand = 0; 3069 } else { 3070 assert(category == fcNaN && "Unknown category!"); 3071 myexponent = 0x1f; 3072 mysignificand = (uint32_t)*significandParts(); 3073 } 3074 3075 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3076 (mysignificand & 0x3ff))); 3077 } 3078 3079 // This function creates an APInt that is just a bit map of the floating 3080 // point constant as it would appear in memory. It is not a conversion, 3081 // and treating the result as a normal integer is unlikely to be useful. 3082 3083 APInt 3084 APFloat::bitcastToAPInt() const 3085 { 3086 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3087 return convertHalfAPFloatToAPInt(); 3088 3089 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3090 return convertFloatAPFloatToAPInt(); 3091 3092 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3093 return convertDoubleAPFloatToAPInt(); 3094 3095 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3096 return convertQuadrupleAPFloatToAPInt(); 3097 3098 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3099 return convertPPCDoubleDoubleAPFloatToAPInt(); 3100 3101 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3102 "unknown format!"); 3103 return convertF80LongDoubleAPFloatToAPInt(); 3104 } 3105 3106 float 3107 APFloat::convertToFloat() const 3108 { 3109 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3110 "Float semantics are not IEEEsingle"); 3111 APInt api = bitcastToAPInt(); 3112 return api.bitsToFloat(); 3113 } 3114 3115 double 3116 APFloat::convertToDouble() const 3117 { 3118 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3119 "Float semantics are not IEEEdouble"); 3120 APInt api = bitcastToAPInt(); 3121 return api.bitsToDouble(); 3122 } 3123 3124 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3125 /// does not support these bit patterns: 3126 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3127 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3128 /// exponent = 0, integer bit 1 ("pseudodenormal") 3129 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3130 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3131 void 3132 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3133 { 3134 assert(api.getBitWidth()==80); 3135 uint64_t i1 = api.getRawData()[0]; 3136 uint64_t i2 = api.getRawData()[1]; 3137 uint64_t myexponent = (i2 & 0x7fff); 3138 uint64_t mysignificand = i1; 3139 3140 initialize(&APFloat::x87DoubleExtended); 3141 assert(partCount()==2); 3142 3143 sign = static_cast<unsigned int>(i2>>15); 3144 if (myexponent==0 && mysignificand==0) { 3145 // exponent, significand meaningless 3146 category = fcZero; 3147 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3148 // exponent, significand meaningless 3149 category = fcInfinity; 3150 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3151 // exponent meaningless 3152 category = fcNaN; 3153 significandParts()[0] = mysignificand; 3154 significandParts()[1] = 0; 3155 } else { 3156 category = fcNormal; 3157 exponent = myexponent - 16383; 3158 significandParts()[0] = mysignificand; 3159 significandParts()[1] = 0; 3160 if (myexponent==0) // denormal 3161 exponent = -16382; 3162 } 3163 } 3164 3165 void 3166 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3167 { 3168 assert(api.getBitWidth()==128); 3169 uint64_t i1 = api.getRawData()[0]; 3170 uint64_t i2 = api.getRawData()[1]; 3171 opStatus fs; 3172 bool losesInfo; 3173 3174 // Get the first double and convert to our format. 3175 initFromDoubleAPInt(APInt(64, i1)); 3176 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3177 assert(fs == opOK && !losesInfo); 3178 (void)fs; 3179 3180 // Unless we have a special case, add in second double. 3181 if (isFiniteNonZero()) { 3182 APFloat v(IEEEdouble, APInt(64, i2)); 3183 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3184 assert(fs == opOK && !losesInfo); 3185 (void)fs; 3186 3187 add(v, rmNearestTiesToEven); 3188 } 3189 } 3190 3191 void 3192 APFloat::initFromQuadrupleAPInt(const APInt &api) 3193 { 3194 assert(api.getBitWidth()==128); 3195 uint64_t i1 = api.getRawData()[0]; 3196 uint64_t i2 = api.getRawData()[1]; 3197 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3198 uint64_t mysignificand = i1; 3199 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3200 3201 initialize(&APFloat::IEEEquad); 3202 assert(partCount()==2); 3203 3204 sign = static_cast<unsigned int>(i2>>63); 3205 if (myexponent==0 && 3206 (mysignificand==0 && mysignificand2==0)) { 3207 // exponent, significand meaningless 3208 category = fcZero; 3209 } else if (myexponent==0x7fff && 3210 (mysignificand==0 && mysignificand2==0)) { 3211 // exponent, significand meaningless 3212 category = fcInfinity; 3213 } else if (myexponent==0x7fff && 3214 (mysignificand!=0 || mysignificand2 !=0)) { 3215 // exponent meaningless 3216 category = fcNaN; 3217 significandParts()[0] = mysignificand; 3218 significandParts()[1] = mysignificand2; 3219 } else { 3220 category = fcNormal; 3221 exponent = myexponent - 16383; 3222 significandParts()[0] = mysignificand; 3223 significandParts()[1] = mysignificand2; 3224 if (myexponent==0) // denormal 3225 exponent = -16382; 3226 else 3227 significandParts()[1] |= 0x1000000000000LL; // integer bit 3228 } 3229 } 3230 3231 void 3232 APFloat::initFromDoubleAPInt(const APInt &api) 3233 { 3234 assert(api.getBitWidth()==64); 3235 uint64_t i = *api.getRawData(); 3236 uint64_t myexponent = (i >> 52) & 0x7ff; 3237 uint64_t mysignificand = i & 0xfffffffffffffLL; 3238 3239 initialize(&APFloat::IEEEdouble); 3240 assert(partCount()==1); 3241 3242 sign = static_cast<unsigned int>(i>>63); 3243 if (myexponent==0 && mysignificand==0) { 3244 // exponent, significand meaningless 3245 category = fcZero; 3246 } else if (myexponent==0x7ff && mysignificand==0) { 3247 // exponent, significand meaningless 3248 category = fcInfinity; 3249 } else if (myexponent==0x7ff && mysignificand!=0) { 3250 // exponent meaningless 3251 category = fcNaN; 3252 *significandParts() = mysignificand; 3253 } else { 3254 category = fcNormal; 3255 exponent = myexponent - 1023; 3256 *significandParts() = mysignificand; 3257 if (myexponent==0) // denormal 3258 exponent = -1022; 3259 else 3260 *significandParts() |= 0x10000000000000LL; // integer bit 3261 } 3262 } 3263 3264 void 3265 APFloat::initFromFloatAPInt(const APInt & api) 3266 { 3267 assert(api.getBitWidth()==32); 3268 uint32_t i = (uint32_t)*api.getRawData(); 3269 uint32_t myexponent = (i >> 23) & 0xff; 3270 uint32_t mysignificand = i & 0x7fffff; 3271 3272 initialize(&APFloat::IEEEsingle); 3273 assert(partCount()==1); 3274 3275 sign = i >> 31; 3276 if (myexponent==0 && mysignificand==0) { 3277 // exponent, significand meaningless 3278 category = fcZero; 3279 } else if (myexponent==0xff && mysignificand==0) { 3280 // exponent, significand meaningless 3281 category = fcInfinity; 3282 } else if (myexponent==0xff && mysignificand!=0) { 3283 // sign, exponent, significand meaningless 3284 category = fcNaN; 3285 *significandParts() = mysignificand; 3286 } else { 3287 category = fcNormal; 3288 exponent = myexponent - 127; //bias 3289 *significandParts() = mysignificand; 3290 if (myexponent==0) // denormal 3291 exponent = -126; 3292 else 3293 *significandParts() |= 0x800000; // integer bit 3294 } 3295 } 3296 3297 void 3298 APFloat::initFromHalfAPInt(const APInt & api) 3299 { 3300 assert(api.getBitWidth()==16); 3301 uint32_t i = (uint32_t)*api.getRawData(); 3302 uint32_t myexponent = (i >> 10) & 0x1f; 3303 uint32_t mysignificand = i & 0x3ff; 3304 3305 initialize(&APFloat::IEEEhalf); 3306 assert(partCount()==1); 3307 3308 sign = i >> 15; 3309 if (myexponent==0 && mysignificand==0) { 3310 // exponent, significand meaningless 3311 category = fcZero; 3312 } else if (myexponent==0x1f && mysignificand==0) { 3313 // exponent, significand meaningless 3314 category = fcInfinity; 3315 } else if (myexponent==0x1f && mysignificand!=0) { 3316 // sign, exponent, significand meaningless 3317 category = fcNaN; 3318 *significandParts() = mysignificand; 3319 } else { 3320 category = fcNormal; 3321 exponent = myexponent - 15; //bias 3322 *significandParts() = mysignificand; 3323 if (myexponent==0) // denormal 3324 exponent = -14; 3325 else 3326 *significandParts() |= 0x400; // integer bit 3327 } 3328 } 3329 3330 /// Treat api as containing the bits of a floating point number. Currently 3331 /// we infer the floating point type from the size of the APInt. The 3332 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3333 /// when the size is anything else). 3334 void 3335 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3336 { 3337 if (Sem == &IEEEhalf) 3338 return initFromHalfAPInt(api); 3339 if (Sem == &IEEEsingle) 3340 return initFromFloatAPInt(api); 3341 if (Sem == &IEEEdouble) 3342 return initFromDoubleAPInt(api); 3343 if (Sem == &x87DoubleExtended) 3344 return initFromF80LongDoubleAPInt(api); 3345 if (Sem == &IEEEquad) 3346 return initFromQuadrupleAPInt(api); 3347 if (Sem == &PPCDoubleDouble) 3348 return initFromPPCDoubleDoubleAPInt(api); 3349 3350 llvm_unreachable(nullptr); 3351 } 3352 3353 APFloat 3354 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3355 { 3356 switch (BitWidth) { 3357 case 16: 3358 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3359 case 32: 3360 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3361 case 64: 3362 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3363 case 80: 3364 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3365 case 128: 3366 if (isIEEE) 3367 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3368 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3369 default: 3370 llvm_unreachable("Unknown floating bit width"); 3371 } 3372 } 3373 3374 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { 3375 return Sem.sizeInBits; 3376 } 3377 3378 /// Make this number the largest magnitude normal number in the given 3379 /// semantics. 3380 void APFloat::makeLargest(bool Negative) { 3381 // We want (in interchange format): 3382 // sign = {Negative} 3383 // exponent = 1..10 3384 // significand = 1..1 3385 category = fcNormal; 3386 sign = Negative; 3387 exponent = semantics->maxExponent; 3388 3389 // Use memset to set all but the highest integerPart to all ones. 3390 integerPart *significand = significandParts(); 3391 unsigned PartCount = partCount(); 3392 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3393 3394 // Set the high integerPart especially setting all unused top bits for 3395 // internal consistency. 3396 const unsigned NumUnusedHighBits = 3397 PartCount*integerPartWidth - semantics->precision; 3398 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3399 ? (~integerPart(0) >> NumUnusedHighBits) 3400 : 0; 3401 } 3402 3403 /// Make this number the smallest magnitude denormal number in the given 3404 /// semantics. 3405 void APFloat::makeSmallest(bool Negative) { 3406 // We want (in interchange format): 3407 // sign = {Negative} 3408 // exponent = 0..0 3409 // significand = 0..01 3410 category = fcNormal; 3411 sign = Negative; 3412 exponent = semantics->minExponent; 3413 APInt::tcSet(significandParts(), 1, partCount()); 3414 } 3415 3416 3417 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3418 // We want (in interchange format): 3419 // sign = {Negative} 3420 // exponent = 1..10 3421 // significand = 1..1 3422 APFloat Val(Sem, uninitialized); 3423 Val.makeLargest(Negative); 3424 return Val; 3425 } 3426 3427 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3428 // We want (in interchange format): 3429 // sign = {Negative} 3430 // exponent = 0..0 3431 // significand = 0..01 3432 APFloat Val(Sem, uninitialized); 3433 Val.makeSmallest(Negative); 3434 return Val; 3435 } 3436 3437 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3438 APFloat Val(Sem, uninitialized); 3439 3440 // We want (in interchange format): 3441 // sign = {Negative} 3442 // exponent = 0..0 3443 // significand = 10..0 3444 3445 Val.category = fcNormal; 3446 Val.zeroSignificand(); 3447 Val.sign = Negative; 3448 Val.exponent = Sem.minExponent; 3449 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3450 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3451 3452 return Val; 3453 } 3454 3455 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3456 initFromAPInt(&Sem, API); 3457 } 3458 3459 APFloat::APFloat(float f) { 3460 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3461 } 3462 3463 APFloat::APFloat(double d) { 3464 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3465 } 3466 3467 namespace { 3468 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3469 Buffer.append(Str.begin(), Str.end()); 3470 } 3471 3472 /// Removes data from the given significand until it is no more 3473 /// precise than is required for the desired precision. 3474 void AdjustToPrecision(APInt &significand, 3475 int &exp, unsigned FormatPrecision) { 3476 unsigned bits = significand.getActiveBits(); 3477 3478 // 196/59 is a very slight overestimate of lg_2(10). 3479 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3480 3481 if (bits <= bitsRequired) return; 3482 3483 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3484 if (!tensRemovable) return; 3485 3486 exp += tensRemovable; 3487 3488 APInt divisor(significand.getBitWidth(), 1); 3489 APInt powten(significand.getBitWidth(), 10); 3490 while (true) { 3491 if (tensRemovable & 1) 3492 divisor *= powten; 3493 tensRemovable >>= 1; 3494 if (!tensRemovable) break; 3495 powten *= powten; 3496 } 3497 3498 significand = significand.udiv(divisor); 3499 3500 // Truncate the significand down to its active bit count. 3501 significand = significand.trunc(significand.getActiveBits()); 3502 } 3503 3504 3505 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3506 int &exp, unsigned FormatPrecision) { 3507 unsigned N = buffer.size(); 3508 if (N <= FormatPrecision) return; 3509 3510 // The most significant figures are the last ones in the buffer. 3511 unsigned FirstSignificant = N - FormatPrecision; 3512 3513 // Round. 3514 // FIXME: this probably shouldn't use 'round half up'. 3515 3516 // Rounding down is just a truncation, except we also want to drop 3517 // trailing zeros from the new result. 3518 if (buffer[FirstSignificant - 1] < '5') { 3519 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3520 FirstSignificant++; 3521 3522 exp += FirstSignificant; 3523 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3524 return; 3525 } 3526 3527 // Rounding up requires a decimal add-with-carry. If we continue 3528 // the carry, the newly-introduced zeros will just be truncated. 3529 for (unsigned I = FirstSignificant; I != N; ++I) { 3530 if (buffer[I] == '9') { 3531 FirstSignificant++; 3532 } else { 3533 buffer[I]++; 3534 break; 3535 } 3536 } 3537 3538 // If we carried through, we have exactly one digit of precision. 3539 if (FirstSignificant == N) { 3540 exp += FirstSignificant; 3541 buffer.clear(); 3542 buffer.push_back('1'); 3543 return; 3544 } 3545 3546 exp += FirstSignificant; 3547 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3548 } 3549 } 3550 3551 void APFloat::toString(SmallVectorImpl<char> &Str, 3552 unsigned FormatPrecision, 3553 unsigned FormatMaxPadding) const { 3554 switch (category) { 3555 case fcInfinity: 3556 if (isNegative()) 3557 return append(Str, "-Inf"); 3558 else 3559 return append(Str, "+Inf"); 3560 3561 case fcNaN: return append(Str, "NaN"); 3562 3563 case fcZero: 3564 if (isNegative()) 3565 Str.push_back('-'); 3566 3567 if (!FormatMaxPadding) 3568 append(Str, "0.0E+0"); 3569 else 3570 Str.push_back('0'); 3571 return; 3572 3573 case fcNormal: 3574 break; 3575 } 3576 3577 if (isNegative()) 3578 Str.push_back('-'); 3579 3580 // Decompose the number into an APInt and an exponent. 3581 int exp = exponent - ((int) semantics->precision - 1); 3582 APInt significand(semantics->precision, 3583 makeArrayRef(significandParts(), 3584 partCountForBits(semantics->precision))); 3585 3586 // Set FormatPrecision if zero. We want to do this before we 3587 // truncate trailing zeros, as those are part of the precision. 3588 if (!FormatPrecision) { 3589 // We use enough digits so the number can be round-tripped back to an 3590 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3591 // Accurately" by Steele and White. 3592 // FIXME: Using a formula based purely on the precision is conservative; 3593 // we can print fewer digits depending on the actual value being printed. 3594 3595 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3596 FormatPrecision = 2 + semantics->precision * 59 / 196; 3597 } 3598 3599 // Ignore trailing binary zeros. 3600 int trailingZeros = significand.countTrailingZeros(); 3601 exp += trailingZeros; 3602 significand = significand.lshr(trailingZeros); 3603 3604 // Change the exponent from 2^e to 10^e. 3605 if (exp == 0) { 3606 // Nothing to do. 3607 } else if (exp > 0) { 3608 // Just shift left. 3609 significand = significand.zext(semantics->precision + exp); 3610 significand <<= exp; 3611 exp = 0; 3612 } else { /* exp < 0 */ 3613 int texp = -exp; 3614 3615 // We transform this using the identity: 3616 // (N)(2^-e) == (N)(5^e)(10^-e) 3617 // This means we have to multiply N (the significand) by 5^e. 3618 // To avoid overflow, we have to operate on numbers large 3619 // enough to store N * 5^e: 3620 // log2(N * 5^e) == log2(N) + e * log2(5) 3621 // <= semantics->precision + e * 137 / 59 3622 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3623 3624 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3625 3626 // Multiply significand by 5^e. 3627 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3628 significand = significand.zext(precision); 3629 APInt five_to_the_i(precision, 5); 3630 while (true) { 3631 if (texp & 1) significand *= five_to_the_i; 3632 3633 texp >>= 1; 3634 if (!texp) break; 3635 five_to_the_i *= five_to_the_i; 3636 } 3637 } 3638 3639 AdjustToPrecision(significand, exp, FormatPrecision); 3640 3641 SmallVector<char, 256> buffer; 3642 3643 // Fill the buffer. 3644 unsigned precision = significand.getBitWidth(); 3645 APInt ten(precision, 10); 3646 APInt digit(precision, 0); 3647 3648 bool inTrail = true; 3649 while (significand != 0) { 3650 // digit <- significand % 10 3651 // significand <- significand / 10 3652 APInt::udivrem(significand, ten, significand, digit); 3653 3654 unsigned d = digit.getZExtValue(); 3655 3656 // Drop trailing zeros. 3657 if (inTrail && !d) exp++; 3658 else { 3659 buffer.push_back((char) ('0' + d)); 3660 inTrail = false; 3661 } 3662 } 3663 3664 assert(!buffer.empty() && "no characters in buffer!"); 3665 3666 // Drop down to FormatPrecision. 3667 // TODO: don't do more precise calculations above than are required. 3668 AdjustToPrecision(buffer, exp, FormatPrecision); 3669 3670 unsigned NDigits = buffer.size(); 3671 3672 // Check whether we should use scientific notation. 3673 bool FormatScientific; 3674 if (!FormatMaxPadding) 3675 FormatScientific = true; 3676 else { 3677 if (exp >= 0) { 3678 // 765e3 --> 765000 3679 // ^^^ 3680 // But we shouldn't make the number look more precise than it is. 3681 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3682 NDigits + (unsigned) exp > FormatPrecision); 3683 } else { 3684 // Power of the most significant digit. 3685 int MSD = exp + (int) (NDigits - 1); 3686 if (MSD >= 0) { 3687 // 765e-2 == 7.65 3688 FormatScientific = false; 3689 } else { 3690 // 765e-5 == 0.00765 3691 // ^ ^^ 3692 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3693 } 3694 } 3695 } 3696 3697 // Scientific formatting is pretty straightforward. 3698 if (FormatScientific) { 3699 exp += (NDigits - 1); 3700 3701 Str.push_back(buffer[NDigits-1]); 3702 Str.push_back('.'); 3703 if (NDigits == 1) 3704 Str.push_back('0'); 3705 else 3706 for (unsigned I = 1; I != NDigits; ++I) 3707 Str.push_back(buffer[NDigits-1-I]); 3708 Str.push_back('E'); 3709 3710 Str.push_back(exp >= 0 ? '+' : '-'); 3711 if (exp < 0) exp = -exp; 3712 SmallVector<char, 6> expbuf; 3713 do { 3714 expbuf.push_back((char) ('0' + (exp % 10))); 3715 exp /= 10; 3716 } while (exp); 3717 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3718 Str.push_back(expbuf[E-1-I]); 3719 return; 3720 } 3721 3722 // Non-scientific, positive exponents. 3723 if (exp >= 0) { 3724 for (unsigned I = 0; I != NDigits; ++I) 3725 Str.push_back(buffer[NDigits-1-I]); 3726 for (unsigned I = 0; I != (unsigned) exp; ++I) 3727 Str.push_back('0'); 3728 return; 3729 } 3730 3731 // Non-scientific, negative exponents. 3732 3733 // The number of digits to the left of the decimal point. 3734 int NWholeDigits = exp + (int) NDigits; 3735 3736 unsigned I = 0; 3737 if (NWholeDigits > 0) { 3738 for (; I != (unsigned) NWholeDigits; ++I) 3739 Str.push_back(buffer[NDigits-I-1]); 3740 Str.push_back('.'); 3741 } else { 3742 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3743 3744 Str.push_back('0'); 3745 Str.push_back('.'); 3746 for (unsigned Z = 1; Z != NZeros; ++Z) 3747 Str.push_back('0'); 3748 } 3749 3750 for (; I != NDigits; ++I) 3751 Str.push_back(buffer[NDigits-I-1]); 3752 } 3753 3754 bool APFloat::getExactInverse(APFloat *inv) const { 3755 // Special floats and denormals have no exact inverse. 3756 if (!isFiniteNonZero()) 3757 return false; 3758 3759 // Check that the number is a power of two by making sure that only the 3760 // integer bit is set in the significand. 3761 if (significandLSB() != semantics->precision - 1) 3762 return false; 3763 3764 // Get the inverse. 3765 APFloat reciprocal(*semantics, 1ULL); 3766 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3767 return false; 3768 3769 // Avoid multiplication with a denormal, it is not safe on all platforms and 3770 // may be slower than a normal division. 3771 if (reciprocal.isDenormal()) 3772 return false; 3773 3774 assert(reciprocal.isFiniteNonZero() && 3775 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3776 3777 if (inv) 3778 *inv = reciprocal; 3779 3780 return true; 3781 } 3782 3783 bool APFloat::isSignaling() const { 3784 if (!isNaN()) 3785 return false; 3786 3787 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3788 // first bit of the trailing significand being 0. 3789 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3790 } 3791 3792 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3793 /// 3794 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3795 /// appropriate sign switching before/after the computation. 3796 APFloat::opStatus APFloat::next(bool nextDown) { 3797 // If we are performing nextDown, swap sign so we have -x. 3798 if (nextDown) 3799 changeSign(); 3800 3801 // Compute nextUp(x) 3802 opStatus result = opOK; 3803 3804 // Handle each float category separately. 3805 switch (category) { 3806 case fcInfinity: 3807 // nextUp(+inf) = +inf 3808 if (!isNegative()) 3809 break; 3810 // nextUp(-inf) = -getLargest() 3811 makeLargest(true); 3812 break; 3813 case fcNaN: 3814 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3815 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3816 // change the payload. 3817 if (isSignaling()) { 3818 result = opInvalidOp; 3819 // For consistency, propagate the sign of the sNaN to the qNaN. 3820 makeNaN(false, isNegative(), nullptr); 3821 } 3822 break; 3823 case fcZero: 3824 // nextUp(pm 0) = +getSmallest() 3825 makeSmallest(false); 3826 break; 3827 case fcNormal: 3828 // nextUp(-getSmallest()) = -0 3829 if (isSmallest() && isNegative()) { 3830 APInt::tcSet(significandParts(), 0, partCount()); 3831 category = fcZero; 3832 exponent = 0; 3833 break; 3834 } 3835 3836 // nextUp(getLargest()) == INFINITY 3837 if (isLargest() && !isNegative()) { 3838 APInt::tcSet(significandParts(), 0, partCount()); 3839 category = fcInfinity; 3840 exponent = semantics->maxExponent + 1; 3841 break; 3842 } 3843 3844 // nextUp(normal) == normal + inc. 3845 if (isNegative()) { 3846 // If we are negative, we need to decrement the significand. 3847 3848 // We only cross a binade boundary that requires adjusting the exponent 3849 // if: 3850 // 1. exponent != semantics->minExponent. This implies we are not in the 3851 // smallest binade or are dealing with denormals. 3852 // 2. Our significand excluding the integral bit is all zeros. 3853 bool WillCrossBinadeBoundary = 3854 exponent != semantics->minExponent && isSignificandAllZeros(); 3855 3856 // Decrement the significand. 3857 // 3858 // We always do this since: 3859 // 1. If we are dealing with a non-binade decrement, by definition we 3860 // just decrement the significand. 3861 // 2. If we are dealing with a normal -> normal binade decrement, since 3862 // we have an explicit integral bit the fact that all bits but the 3863 // integral bit are zero implies that subtracting one will yield a 3864 // significand with 0 integral bit and 1 in all other spots. Thus we 3865 // must just adjust the exponent and set the integral bit to 1. 3866 // 3. If we are dealing with a normal -> denormal binade decrement, 3867 // since we set the integral bit to 0 when we represent denormals, we 3868 // just decrement the significand. 3869 integerPart *Parts = significandParts(); 3870 APInt::tcDecrement(Parts, partCount()); 3871 3872 if (WillCrossBinadeBoundary) { 3873 // Our result is a normal number. Do the following: 3874 // 1. Set the integral bit to 1. 3875 // 2. Decrement the exponent. 3876 APInt::tcSetBit(Parts, semantics->precision - 1); 3877 exponent--; 3878 } 3879 } else { 3880 // If we are positive, we need to increment the significand. 3881 3882 // We only cross a binade boundary that requires adjusting the exponent if 3883 // the input is not a denormal and all of said input's significand bits 3884 // are set. If all of said conditions are true: clear the significand, set 3885 // the integral bit to 1, and increment the exponent. If we have a 3886 // denormal always increment since moving denormals and the numbers in the 3887 // smallest normal binade have the same exponent in our representation. 3888 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3889 3890 if (WillCrossBinadeBoundary) { 3891 integerPart *Parts = significandParts(); 3892 APInt::tcSet(Parts, 0, partCount()); 3893 APInt::tcSetBit(Parts, semantics->precision - 1); 3894 assert(exponent != semantics->maxExponent && 3895 "We can not increment an exponent beyond the maxExponent allowed" 3896 " by the given floating point semantics."); 3897 exponent++; 3898 } else { 3899 incrementSignificand(); 3900 } 3901 } 3902 break; 3903 } 3904 3905 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3906 if (nextDown) 3907 changeSign(); 3908 3909 return result; 3910 } 3911 3912 void 3913 APFloat::makeInf(bool Negative) { 3914 category = fcInfinity; 3915 sign = Negative; 3916 exponent = semantics->maxExponent + 1; 3917 APInt::tcSet(significandParts(), 0, partCount()); 3918 } 3919 3920 void 3921 APFloat::makeZero(bool Negative) { 3922 category = fcZero; 3923 sign = Negative; 3924 exponent = semantics->minExponent-1; 3925 APInt::tcSet(significandParts(), 0, partCount()); 3926 } 3927 3928 APFloat llvm::scalbn(APFloat X, int Exp) { 3929 if (X.isInfinity() || X.isZero() || X.isNaN()) 3930 return X; 3931 3932 auto MaxExp = X.getSemantics().maxExponent; 3933 auto MinExp = X.getSemantics().minExponent; 3934 if (Exp > (MaxExp - X.exponent)) 3935 // Overflow saturates to infinity. 3936 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3937 if (Exp < (MinExp - X.exponent)) 3938 // Underflow saturates to zero. 3939 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3940 3941 X.exponent += Exp; 3942 return X; 3943 } 3944