1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 /* Represents floating point arithmetic semantics. */ 54 struct fltSemantics { 55 /* The largest E such that 2^E is representable; this matches the 56 definition of IEEE 754. */ 57 APFloatBase::ExponentType maxExponent; 58 59 /* The smallest E such that 2^E is a normalized number; this 60 matches the definition of IEEE 754. */ 61 APFloatBase::ExponentType minExponent; 62 63 /* Number of bits in the significand. This includes the integer 64 bit. */ 65 unsigned int precision; 66 67 /* Number of bits actually used in the semantics. */ 68 unsigned int sizeInBits; 69 }; 70 71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 76 static const fltSemantics semBogus = {0, 0, 0, 0}; 77 78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 82 to each other, and two 11-bit exponents. 83 84 Note: we need to make the value different from semBogus as otherwise 85 an unsafe optimization may collapse both values to a single address, 86 and we heavily rely on them having distinct addresses. */ 87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; 88 89 /* These are legacy semantics for the fallback, inaccrurate implementation of 90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 91 operation. It's equivalent to having an IEEE number with consecutive 106 92 bits of mantissa and 11 bits of exponent. 93 94 It's not equivalent to IBM double-double. For example, a legit IBM 95 double-double, 1 + epsilon: 96 97 1 + epsilon = 1 + (1 >> 1076) 98 99 is not representable by a consecutive 106 bits of mantissa. 100 101 Currently, these semantics are used in the following way: 102 103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 105 semPPCDoubleDoubleLegacy -> IEEE operations 106 107 We use bitcastToAPInt() to get the bit representation (in APInt) of the 108 underlying IEEEdouble, then use the APInt constructor to construct the 109 legacy IEEE float. 110 111 TODO: Implement all operations in semPPCDoubleDouble, and delete these 112 semantics. */ 113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 114 53 + 53, 128}; 115 116 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 117 switch (S) { 118 case S_IEEEhalf: 119 return IEEEhalf(); 120 case S_IEEEsingle: 121 return IEEEsingle(); 122 case S_IEEEdouble: 123 return IEEEdouble(); 124 case S_x87DoubleExtended: 125 return x87DoubleExtended(); 126 case S_IEEEquad: 127 return IEEEquad(); 128 case S_PPCDoubleDouble: 129 return PPCDoubleDouble(); 130 } 131 llvm_unreachable("Unrecognised floating semantics"); 132 } 133 134 APFloatBase::Semantics 135 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 136 if (&Sem == &llvm::APFloat::IEEEhalf()) 137 return S_IEEEhalf; 138 else if (&Sem == &llvm::APFloat::IEEEsingle()) 139 return S_IEEEsingle; 140 else if (&Sem == &llvm::APFloat::IEEEdouble()) 141 return S_IEEEdouble; 142 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 143 return S_x87DoubleExtended; 144 else if (&Sem == &llvm::APFloat::IEEEquad()) 145 return S_IEEEquad; 146 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 147 return S_PPCDoubleDouble; 148 else 149 llvm_unreachable("Unknown floating semantics"); 150 } 151 152 const fltSemantics &APFloatBase::IEEEhalf() { 153 return semIEEEhalf; 154 } 155 const fltSemantics &APFloatBase::IEEEsingle() { 156 return semIEEEsingle; 157 } 158 const fltSemantics &APFloatBase::IEEEdouble() { 159 return semIEEEdouble; 160 } 161 const fltSemantics &APFloatBase::IEEEquad() { 162 return semIEEEquad; 163 } 164 const fltSemantics &APFloatBase::x87DoubleExtended() { 165 return semX87DoubleExtended; 166 } 167 const fltSemantics &APFloatBase::Bogus() { 168 return semBogus; 169 } 170 const fltSemantics &APFloatBase::PPCDoubleDouble() { 171 return semPPCDoubleDouble; 172 } 173 174 /* A tight upper bound on number of parts required to hold the value 175 pow(5, power) is 176 177 power * 815 / (351 * integerPartWidth) + 1 178 179 However, whilst the result may require only this many parts, 180 because we are multiplying two values to get it, the 181 multiplication may require an extra part with the excess part 182 being zero (consider the trivial case of 1 * 1, tcFullMultiply 183 requires two parts to hold the single-part result). So we add an 184 extra one to guarantee enough space whilst multiplying. */ 185 const unsigned int maxExponent = 16383; 186 const unsigned int maxPrecision = 113; 187 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 188 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 189 190 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 191 return semantics.precision; 192 } 193 APFloatBase::ExponentType 194 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 195 return semantics.maxExponent; 196 } 197 APFloatBase::ExponentType 198 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 199 return semantics.minExponent; 200 } 201 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 202 return semantics.sizeInBits; 203 } 204 205 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 206 return Sem.sizeInBits; 207 } 208 209 /* A bunch of private, handy routines. */ 210 211 static inline Error createError(const Twine &Err) { 212 return make_error<StringError>(Err, inconvertibleErrorCode()); 213 } 214 215 static inline unsigned int 216 partCountForBits(unsigned int bits) 217 { 218 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 219 } 220 221 /* Returns 0U-9U. Return values >= 10U are not digits. */ 222 static inline unsigned int 223 decDigitValue(unsigned int c) 224 { 225 return c - '0'; 226 } 227 228 /* Return the value of a decimal exponent of the form 229 [+-]ddddddd. 230 231 If the exponent overflows, returns a large exponent with the 232 appropriate sign. */ 233 static Expected<int> readExponent(StringRef::iterator begin, 234 StringRef::iterator end) { 235 bool isNegative; 236 unsigned int absExponent; 237 const unsigned int overlargeExponent = 24000; /* FIXME. */ 238 StringRef::iterator p = begin; 239 240 // Treat no exponent as 0 to match binutils 241 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 242 return 0; 243 } 244 245 isNegative = (*p == '-'); 246 if (*p == '-' || *p == '+') { 247 p++; 248 if (p == end) 249 return createError("Exponent has no digits"); 250 } 251 252 absExponent = decDigitValue(*p++); 253 if (absExponent >= 10U) 254 return createError("Invalid character in exponent"); 255 256 for (; p != end; ++p) { 257 unsigned int value; 258 259 value = decDigitValue(*p); 260 if (value >= 10U) 261 return createError("Invalid character in exponent"); 262 263 absExponent = absExponent * 10U + value; 264 if (absExponent >= overlargeExponent) { 265 absExponent = overlargeExponent; 266 break; 267 } 268 } 269 270 if (isNegative) 271 return -(int) absExponent; 272 else 273 return (int) absExponent; 274 } 275 276 /* This is ugly and needs cleaning up, but I don't immediately see 277 how whilst remaining safe. */ 278 static Expected<int> totalExponent(StringRef::iterator p, 279 StringRef::iterator end, 280 int exponentAdjustment) { 281 int unsignedExponent; 282 bool negative, overflow; 283 int exponent = 0; 284 285 if (p == end) 286 return createError("Exponent has no digits"); 287 288 negative = *p == '-'; 289 if (*p == '-' || *p == '+') { 290 p++; 291 if (p == end) 292 return createError("Exponent has no digits"); 293 } 294 295 unsignedExponent = 0; 296 overflow = false; 297 for (; p != end; ++p) { 298 unsigned int value; 299 300 value = decDigitValue(*p); 301 if (value >= 10U) 302 return createError("Invalid character in exponent"); 303 304 unsignedExponent = unsignedExponent * 10 + value; 305 if (unsignedExponent > 32767) { 306 overflow = true; 307 break; 308 } 309 } 310 311 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 312 overflow = true; 313 314 if (!overflow) { 315 exponent = unsignedExponent; 316 if (negative) 317 exponent = -exponent; 318 exponent += exponentAdjustment; 319 if (exponent > 32767 || exponent < -32768) 320 overflow = true; 321 } 322 323 if (overflow) 324 exponent = negative ? -32768: 32767; 325 326 return exponent; 327 } 328 329 static Expected<StringRef::iterator> 330 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 331 StringRef::iterator *dot) { 332 StringRef::iterator p = begin; 333 *dot = end; 334 while (p != end && *p == '0') 335 p++; 336 337 if (p != end && *p == '.') { 338 *dot = p++; 339 340 if (end - begin == 1) 341 return createError("Significand has no digits"); 342 343 while (p != end && *p == '0') 344 p++; 345 } 346 347 return p; 348 } 349 350 /* Given a normal decimal floating point number of the form 351 352 dddd.dddd[eE][+-]ddd 353 354 where the decimal point and exponent are optional, fill out the 355 structure D. Exponent is appropriate if the significand is 356 treated as an integer, and normalizedExponent if the significand 357 is taken to have the decimal point after a single leading 358 non-zero digit. 359 360 If the value is zero, V->firstSigDigit points to a non-digit, and 361 the return exponent is zero. 362 */ 363 struct decimalInfo { 364 const char *firstSigDigit; 365 const char *lastSigDigit; 366 int exponent; 367 int normalizedExponent; 368 }; 369 370 static Error interpretDecimal(StringRef::iterator begin, 371 StringRef::iterator end, decimalInfo *D) { 372 StringRef::iterator dot = end; 373 374 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 375 if (!PtrOrErr) 376 return PtrOrErr.takeError(); 377 StringRef::iterator p = *PtrOrErr; 378 379 D->firstSigDigit = p; 380 D->exponent = 0; 381 D->normalizedExponent = 0; 382 383 for (; p != end; ++p) { 384 if (*p == '.') { 385 if (dot != end) 386 return createError("String contains multiple dots"); 387 dot = p++; 388 if (p == end) 389 break; 390 } 391 if (decDigitValue(*p) >= 10U) 392 break; 393 } 394 395 if (p != end) { 396 if (*p != 'e' && *p != 'E') 397 return createError("Invalid character in significand"); 398 if (p == begin) 399 return createError("Significand has no digits"); 400 if (dot != end && p - begin == 1) 401 return createError("Significand has no digits"); 402 403 /* p points to the first non-digit in the string */ 404 auto ExpOrErr = readExponent(p + 1, end); 405 if (!ExpOrErr) 406 return ExpOrErr.takeError(); 407 D->exponent = *ExpOrErr; 408 409 /* Implied decimal point? */ 410 if (dot == end) 411 dot = p; 412 } 413 414 /* If number is all zeroes accept any exponent. */ 415 if (p != D->firstSigDigit) { 416 /* Drop insignificant trailing zeroes. */ 417 if (p != begin) { 418 do 419 do 420 p--; 421 while (p != begin && *p == '0'); 422 while (p != begin && *p == '.'); 423 } 424 425 /* Adjust the exponents for any decimal point. */ 426 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 427 D->normalizedExponent = (D->exponent + 428 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 429 - (dot > D->firstSigDigit && dot < p))); 430 } 431 432 D->lastSigDigit = p; 433 return Error::success(); 434 } 435 436 /* Return the trailing fraction of a hexadecimal number. 437 DIGITVALUE is the first hex digit of the fraction, P points to 438 the next digit. */ 439 static Expected<lostFraction> 440 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 441 unsigned int digitValue) { 442 unsigned int hexDigit; 443 444 /* If the first trailing digit isn't 0 or 8 we can work out the 445 fraction immediately. */ 446 if (digitValue > 8) 447 return lfMoreThanHalf; 448 else if (digitValue < 8 && digitValue > 0) 449 return lfLessThanHalf; 450 451 // Otherwise we need to find the first non-zero digit. 452 while (p != end && (*p == '0' || *p == '.')) 453 p++; 454 455 if (p == end) 456 return createError("Invalid trailing hexadecimal fraction!"); 457 458 hexDigit = hexDigitValue(*p); 459 460 /* If we ran off the end it is exactly zero or one-half, otherwise 461 a little more. */ 462 if (hexDigit == -1U) 463 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 464 else 465 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 466 } 467 468 /* Return the fraction lost were a bignum truncated losing the least 469 significant BITS bits. */ 470 static lostFraction 471 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 472 unsigned int partCount, 473 unsigned int bits) 474 { 475 unsigned int lsb; 476 477 lsb = APInt::tcLSB(parts, partCount); 478 479 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 480 if (bits <= lsb) 481 return lfExactlyZero; 482 if (bits == lsb + 1) 483 return lfExactlyHalf; 484 if (bits <= partCount * APFloatBase::integerPartWidth && 485 APInt::tcExtractBit(parts, bits - 1)) 486 return lfMoreThanHalf; 487 488 return lfLessThanHalf; 489 } 490 491 /* Shift DST right BITS bits noting lost fraction. */ 492 static lostFraction 493 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 494 { 495 lostFraction lost_fraction; 496 497 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 498 499 APInt::tcShiftRight(dst, parts, bits); 500 501 return lost_fraction; 502 } 503 504 /* Combine the effect of two lost fractions. */ 505 static lostFraction 506 combineLostFractions(lostFraction moreSignificant, 507 lostFraction lessSignificant) 508 { 509 if (lessSignificant != lfExactlyZero) { 510 if (moreSignificant == lfExactlyZero) 511 moreSignificant = lfLessThanHalf; 512 else if (moreSignificant == lfExactlyHalf) 513 moreSignificant = lfMoreThanHalf; 514 } 515 516 return moreSignificant; 517 } 518 519 /* The error from the true value, in half-ulps, on multiplying two 520 floating point numbers, which differ from the value they 521 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 522 than the returned value. 523 524 See "How to Read Floating Point Numbers Accurately" by William D 525 Clinger. */ 526 static unsigned int 527 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 528 { 529 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 530 531 if (HUerr1 + HUerr2 == 0) 532 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 533 else 534 return inexactMultiply + 2 * (HUerr1 + HUerr2); 535 } 536 537 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 538 when the least significant BITS are truncated. BITS cannot be 539 zero. */ 540 static APFloatBase::integerPart 541 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 542 bool isNearest) { 543 unsigned int count, partBits; 544 APFloatBase::integerPart part, boundary; 545 546 assert(bits != 0); 547 548 bits--; 549 count = bits / APFloatBase::integerPartWidth; 550 partBits = bits % APFloatBase::integerPartWidth + 1; 551 552 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 553 554 if (isNearest) 555 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 556 else 557 boundary = 0; 558 559 if (count == 0) { 560 if (part - boundary <= boundary - part) 561 return part - boundary; 562 else 563 return boundary - part; 564 } 565 566 if (part == boundary) { 567 while (--count) 568 if (parts[count]) 569 return ~(APFloatBase::integerPart) 0; /* A lot. */ 570 571 return parts[0]; 572 } else if (part == boundary - 1) { 573 while (--count) 574 if (~parts[count]) 575 return ~(APFloatBase::integerPart) 0; /* A lot. */ 576 577 return -parts[0]; 578 } 579 580 return ~(APFloatBase::integerPart) 0; /* A lot. */ 581 } 582 583 /* Place pow(5, power) in DST, and return the number of parts used. 584 DST must be at least one part larger than size of the answer. */ 585 static unsigned int 586 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 587 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 588 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 589 pow5s[0] = 78125 * 5; 590 591 unsigned int partsCount[16] = { 1 }; 592 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 593 unsigned int result; 594 assert(power <= maxExponent); 595 596 p1 = dst; 597 p2 = scratch; 598 599 *p1 = firstEightPowers[power & 7]; 600 power >>= 3; 601 602 result = 1; 603 pow5 = pow5s; 604 605 for (unsigned int n = 0; power; power >>= 1, n++) { 606 unsigned int pc; 607 608 pc = partsCount[n]; 609 610 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 611 if (pc == 0) { 612 pc = partsCount[n - 1]; 613 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 614 pc *= 2; 615 if (pow5[pc - 1] == 0) 616 pc--; 617 partsCount[n] = pc; 618 } 619 620 if (power & 1) { 621 APFloatBase::integerPart *tmp; 622 623 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 624 result += pc; 625 if (p2[result - 1] == 0) 626 result--; 627 628 /* Now result is in p1 with partsCount parts and p2 is scratch 629 space. */ 630 tmp = p1; 631 p1 = p2; 632 p2 = tmp; 633 } 634 635 pow5 += pc; 636 } 637 638 if (p1 != dst) 639 APInt::tcAssign(dst, p1, result); 640 641 return result; 642 } 643 644 /* Zero at the end to avoid modular arithmetic when adding one; used 645 when rounding up during hexadecimal output. */ 646 static const char hexDigitsLower[] = "0123456789abcdef0"; 647 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 648 static const char infinityL[] = "infinity"; 649 static const char infinityU[] = "INFINITY"; 650 static const char NaNL[] = "nan"; 651 static const char NaNU[] = "NAN"; 652 653 /* Write out an integerPart in hexadecimal, starting with the most 654 significant nibble. Write out exactly COUNT hexdigits, return 655 COUNT. */ 656 static unsigned int 657 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 658 const char *hexDigitChars) 659 { 660 unsigned int result = count; 661 662 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 663 664 part >>= (APFloatBase::integerPartWidth - 4 * count); 665 while (count--) { 666 dst[count] = hexDigitChars[part & 0xf]; 667 part >>= 4; 668 } 669 670 return result; 671 } 672 673 /* Write out an unsigned decimal integer. */ 674 static char * 675 writeUnsignedDecimal (char *dst, unsigned int n) 676 { 677 char buff[40], *p; 678 679 p = buff; 680 do 681 *p++ = '0' + n % 10; 682 while (n /= 10); 683 684 do 685 *dst++ = *--p; 686 while (p != buff); 687 688 return dst; 689 } 690 691 /* Write out a signed decimal integer. */ 692 static char * 693 writeSignedDecimal (char *dst, int value) 694 { 695 if (value < 0) { 696 *dst++ = '-'; 697 dst = writeUnsignedDecimal(dst, -(unsigned) value); 698 } else 699 dst = writeUnsignedDecimal(dst, value); 700 701 return dst; 702 } 703 704 namespace detail { 705 /* Constructors. */ 706 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 707 unsigned int count; 708 709 semantics = ourSemantics; 710 count = partCount(); 711 if (count > 1) 712 significand.parts = new integerPart[count]; 713 } 714 715 void IEEEFloat::freeSignificand() { 716 if (needsCleanup()) 717 delete [] significand.parts; 718 } 719 720 void IEEEFloat::assign(const IEEEFloat &rhs) { 721 assert(semantics == rhs.semantics); 722 723 sign = rhs.sign; 724 category = rhs.category; 725 exponent = rhs.exponent; 726 if (isFiniteNonZero() || category == fcNaN) 727 copySignificand(rhs); 728 } 729 730 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 731 assert(isFiniteNonZero() || category == fcNaN); 732 assert(rhs.partCount() >= partCount()); 733 734 APInt::tcAssign(significandParts(), rhs.significandParts(), 735 partCount()); 736 } 737 738 /* Make this number a NaN, with an arbitrary but deterministic value 739 for the significand. If double or longer, this is a signalling NaN, 740 which may not be ideal. If float, this is QNaN(0). */ 741 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 742 category = fcNaN; 743 sign = Negative; 744 745 integerPart *significand = significandParts(); 746 unsigned numParts = partCount(); 747 748 // Set the significand bits to the fill. 749 if (!fill || fill->getNumWords() < numParts) 750 APInt::tcSet(significand, 0, numParts); 751 if (fill) { 752 APInt::tcAssign(significand, fill->getRawData(), 753 std::min(fill->getNumWords(), numParts)); 754 755 // Zero out the excess bits of the significand. 756 unsigned bitsToPreserve = semantics->precision - 1; 757 unsigned part = bitsToPreserve / 64; 758 bitsToPreserve %= 64; 759 significand[part] &= ((1ULL << bitsToPreserve) - 1); 760 for (part++; part != numParts; ++part) 761 significand[part] = 0; 762 } 763 764 unsigned QNaNBit = semantics->precision - 2; 765 766 if (SNaN) { 767 // We always have to clear the QNaN bit to make it an SNaN. 768 APInt::tcClearBit(significand, QNaNBit); 769 770 // If there are no bits set in the payload, we have to set 771 // *something* to make it a NaN instead of an infinity; 772 // conventionally, this is the next bit down from the QNaN bit. 773 if (APInt::tcIsZero(significand, numParts)) 774 APInt::tcSetBit(significand, QNaNBit - 1); 775 } else { 776 // We always have to set the QNaN bit to make it a QNaN. 777 APInt::tcSetBit(significand, QNaNBit); 778 } 779 780 // For x87 extended precision, we want to make a NaN, not a 781 // pseudo-NaN. Maybe we should expose the ability to make 782 // pseudo-NaNs? 783 if (semantics == &semX87DoubleExtended) 784 APInt::tcSetBit(significand, QNaNBit + 1); 785 } 786 787 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 788 if (this != &rhs) { 789 if (semantics != rhs.semantics) { 790 freeSignificand(); 791 initialize(rhs.semantics); 792 } 793 assign(rhs); 794 } 795 796 return *this; 797 } 798 799 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 800 freeSignificand(); 801 802 semantics = rhs.semantics; 803 significand = rhs.significand; 804 exponent = rhs.exponent; 805 category = rhs.category; 806 sign = rhs.sign; 807 808 rhs.semantics = &semBogus; 809 return *this; 810 } 811 812 bool IEEEFloat::isDenormal() const { 813 return isFiniteNonZero() && (exponent == semantics->minExponent) && 814 (APInt::tcExtractBit(significandParts(), 815 semantics->precision - 1) == 0); 816 } 817 818 bool IEEEFloat::isSmallest() const { 819 // The smallest number by magnitude in our format will be the smallest 820 // denormal, i.e. the floating point number with exponent being minimum 821 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 822 return isFiniteNonZero() && exponent == semantics->minExponent && 823 significandMSB() == 0; 824 } 825 826 bool IEEEFloat::isSignificandAllOnes() const { 827 // Test if the significand excluding the integral bit is all ones. This allows 828 // us to test for binade boundaries. 829 const integerPart *Parts = significandParts(); 830 const unsigned PartCount = partCount(); 831 for (unsigned i = 0; i < PartCount - 1; i++) 832 if (~Parts[i]) 833 return false; 834 835 // Set the unused high bits to all ones when we compare. 836 const unsigned NumHighBits = 837 PartCount*integerPartWidth - semantics->precision + 1; 838 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 839 "fill than integerPartWidth"); 840 const integerPart HighBitFill = 841 ~integerPart(0) << (integerPartWidth - NumHighBits); 842 if (~(Parts[PartCount - 1] | HighBitFill)) 843 return false; 844 845 return true; 846 } 847 848 bool IEEEFloat::isSignificandAllZeros() const { 849 // Test if the significand excluding the integral bit is all zeros. This 850 // allows us to test for binade boundaries. 851 const integerPart *Parts = significandParts(); 852 const unsigned PartCount = partCount(); 853 854 for (unsigned i = 0; i < PartCount - 1; i++) 855 if (Parts[i]) 856 return false; 857 858 const unsigned NumHighBits = 859 PartCount*integerPartWidth - semantics->precision + 1; 860 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 861 "clear than integerPartWidth"); 862 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 863 864 if (Parts[PartCount - 1] & HighBitMask) 865 return false; 866 867 return true; 868 } 869 870 bool IEEEFloat::isLargest() const { 871 // The largest number by magnitude in our format will be the floating point 872 // number with maximum exponent and with significand that is all ones. 873 return isFiniteNonZero() && exponent == semantics->maxExponent 874 && isSignificandAllOnes(); 875 } 876 877 bool IEEEFloat::isInteger() const { 878 // This could be made more efficient; I'm going for obviously correct. 879 if (!isFinite()) return false; 880 IEEEFloat truncated = *this; 881 truncated.roundToIntegral(rmTowardZero); 882 return compare(truncated) == cmpEqual; 883 } 884 885 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 886 if (this == &rhs) 887 return true; 888 if (semantics != rhs.semantics || 889 category != rhs.category || 890 sign != rhs.sign) 891 return false; 892 if (category==fcZero || category==fcInfinity) 893 return true; 894 895 if (isFiniteNonZero() && exponent != rhs.exponent) 896 return false; 897 898 return std::equal(significandParts(), significandParts() + partCount(), 899 rhs.significandParts()); 900 } 901 902 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 903 initialize(&ourSemantics); 904 sign = 0; 905 category = fcNormal; 906 zeroSignificand(); 907 exponent = ourSemantics.precision - 1; 908 significandParts()[0] = value; 909 normalize(rmNearestTiesToEven, lfExactlyZero); 910 } 911 912 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 913 initialize(&ourSemantics); 914 category = fcZero; 915 sign = false; 916 } 917 918 // Delegate to the previous constructor, because later copy constructor may 919 // actually inspects category, which can't be garbage. 920 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 921 : IEEEFloat(ourSemantics) {} 922 923 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 924 initialize(rhs.semantics); 925 assign(rhs); 926 } 927 928 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 929 *this = std::move(rhs); 930 } 931 932 IEEEFloat::~IEEEFloat() { freeSignificand(); } 933 934 unsigned int IEEEFloat::partCount() const { 935 return partCountForBits(semantics->precision + 1); 936 } 937 938 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 939 return const_cast<IEEEFloat *>(this)->significandParts(); 940 } 941 942 IEEEFloat::integerPart *IEEEFloat::significandParts() { 943 if (partCount() > 1) 944 return significand.parts; 945 else 946 return &significand.part; 947 } 948 949 void IEEEFloat::zeroSignificand() { 950 APInt::tcSet(significandParts(), 0, partCount()); 951 } 952 953 /* Increment an fcNormal floating point number's significand. */ 954 void IEEEFloat::incrementSignificand() { 955 integerPart carry; 956 957 carry = APInt::tcIncrement(significandParts(), partCount()); 958 959 /* Our callers should never cause us to overflow. */ 960 assert(carry == 0); 961 (void)carry; 962 } 963 964 /* Add the significand of the RHS. Returns the carry flag. */ 965 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 966 integerPart *parts; 967 968 parts = significandParts(); 969 970 assert(semantics == rhs.semantics); 971 assert(exponent == rhs.exponent); 972 973 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 974 } 975 976 /* Subtract the significand of the RHS with a borrow flag. Returns 977 the borrow flag. */ 978 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 979 integerPart borrow) { 980 integerPart *parts; 981 982 parts = significandParts(); 983 984 assert(semantics == rhs.semantics); 985 assert(exponent == rhs.exponent); 986 987 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 988 partCount()); 989 } 990 991 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 992 on to the full-precision result of the multiplication. Returns the 993 lost fraction. */ 994 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 995 IEEEFloat addend) { 996 unsigned int omsb; // One, not zero, based MSB. 997 unsigned int partsCount, newPartsCount, precision; 998 integerPart *lhsSignificand; 999 integerPart scratch[4]; 1000 integerPart *fullSignificand; 1001 lostFraction lost_fraction; 1002 bool ignored; 1003 1004 assert(semantics == rhs.semantics); 1005 1006 precision = semantics->precision; 1007 1008 // Allocate space for twice as many bits as the original significand, plus one 1009 // extra bit for the addition to overflow into. 1010 newPartsCount = partCountForBits(precision * 2 + 1); 1011 1012 if (newPartsCount > 4) 1013 fullSignificand = new integerPart[newPartsCount]; 1014 else 1015 fullSignificand = scratch; 1016 1017 lhsSignificand = significandParts(); 1018 partsCount = partCount(); 1019 1020 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1021 rhs.significandParts(), partsCount, partsCount); 1022 1023 lost_fraction = lfExactlyZero; 1024 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1025 exponent += rhs.exponent; 1026 1027 // Assume the operands involved in the multiplication are single-precision 1028 // FP, and the two multiplicants are: 1029 // *this = a23 . a22 ... a0 * 2^e1 1030 // rhs = b23 . b22 ... b0 * 2^e2 1031 // the result of multiplication is: 1032 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1033 // Note that there are three significant bits at the left-hand side of the 1034 // radix point: two for the multiplication, and an overflow bit for the 1035 // addition (that will always be zero at this point). Move the radix point 1036 // toward left by two bits, and adjust exponent accordingly. 1037 exponent += 2; 1038 1039 if (addend.isNonZero()) { 1040 // The intermediate result of the multiplication has "2 * precision" 1041 // signicant bit; adjust the addend to be consistent with mul result. 1042 // 1043 Significand savedSignificand = significand; 1044 const fltSemantics *savedSemantics = semantics; 1045 fltSemantics extendedSemantics; 1046 opStatus status; 1047 unsigned int extendedPrecision; 1048 1049 // Normalize our MSB to one below the top bit to allow for overflow. 1050 extendedPrecision = 2 * precision + 1; 1051 if (omsb != extendedPrecision - 1) { 1052 assert(extendedPrecision > omsb); 1053 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1054 (extendedPrecision - 1) - omsb); 1055 exponent -= (extendedPrecision - 1) - omsb; 1056 } 1057 1058 /* Create new semantics. */ 1059 extendedSemantics = *semantics; 1060 extendedSemantics.precision = extendedPrecision; 1061 1062 if (newPartsCount == 1) 1063 significand.part = fullSignificand[0]; 1064 else 1065 significand.parts = fullSignificand; 1066 semantics = &extendedSemantics; 1067 1068 // Make a copy so we can convert it to the extended semantics. 1069 // Note that we cannot convert the addend directly, as the extendedSemantics 1070 // is a local variable (which we take a reference to). 1071 IEEEFloat extendedAddend(addend); 1072 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1073 assert(status == opOK); 1074 (void)status; 1075 1076 // Shift the significand of the addend right by one bit. This guarantees 1077 // that the high bit of the significand is zero (same as fullSignificand), 1078 // so the addition will overflow (if it does overflow at all) into the top bit. 1079 lost_fraction = extendedAddend.shiftSignificandRight(1); 1080 assert(lost_fraction == lfExactlyZero && 1081 "Lost precision while shifting addend for fused-multiply-add."); 1082 1083 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1084 1085 /* Restore our state. */ 1086 if (newPartsCount == 1) 1087 fullSignificand[0] = significand.part; 1088 significand = savedSignificand; 1089 semantics = savedSemantics; 1090 1091 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1092 } 1093 1094 // Convert the result having "2 * precision" significant-bits back to the one 1095 // having "precision" significant-bits. First, move the radix point from 1096 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1097 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1098 exponent -= precision + 1; 1099 1100 // In case MSB resides at the left-hand side of radix point, shift the 1101 // mantissa right by some amount to make sure the MSB reside right before 1102 // the radix point (i.e. "MSB . rest-significant-bits"). 1103 // 1104 // Note that the result is not normalized when "omsb < precision". So, the 1105 // caller needs to call IEEEFloat::normalize() if normalized value is 1106 // expected. 1107 if (omsb > precision) { 1108 unsigned int bits, significantParts; 1109 lostFraction lf; 1110 1111 bits = omsb - precision; 1112 significantParts = partCountForBits(omsb); 1113 lf = shiftRight(fullSignificand, significantParts, bits); 1114 lost_fraction = combineLostFractions(lf, lost_fraction); 1115 exponent += bits; 1116 } 1117 1118 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1119 1120 if (newPartsCount > 4) 1121 delete [] fullSignificand; 1122 1123 return lost_fraction; 1124 } 1125 1126 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1127 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1128 } 1129 1130 /* Multiply the significands of LHS and RHS to DST. */ 1131 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1132 unsigned int bit, i, partsCount; 1133 const integerPart *rhsSignificand; 1134 integerPart *lhsSignificand, *dividend, *divisor; 1135 integerPart scratch[4]; 1136 lostFraction lost_fraction; 1137 1138 assert(semantics == rhs.semantics); 1139 1140 lhsSignificand = significandParts(); 1141 rhsSignificand = rhs.significandParts(); 1142 partsCount = partCount(); 1143 1144 if (partsCount > 2) 1145 dividend = new integerPart[partsCount * 2]; 1146 else 1147 dividend = scratch; 1148 1149 divisor = dividend + partsCount; 1150 1151 /* Copy the dividend and divisor as they will be modified in-place. */ 1152 for (i = 0; i < partsCount; i++) { 1153 dividend[i] = lhsSignificand[i]; 1154 divisor[i] = rhsSignificand[i]; 1155 lhsSignificand[i] = 0; 1156 } 1157 1158 exponent -= rhs.exponent; 1159 1160 unsigned int precision = semantics->precision; 1161 1162 /* Normalize the divisor. */ 1163 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1164 if (bit) { 1165 exponent += bit; 1166 APInt::tcShiftLeft(divisor, partsCount, bit); 1167 } 1168 1169 /* Normalize the dividend. */ 1170 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1171 if (bit) { 1172 exponent -= bit; 1173 APInt::tcShiftLeft(dividend, partsCount, bit); 1174 } 1175 1176 /* Ensure the dividend >= divisor initially for the loop below. 1177 Incidentally, this means that the division loop below is 1178 guaranteed to set the integer bit to one. */ 1179 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1180 exponent--; 1181 APInt::tcShiftLeft(dividend, partsCount, 1); 1182 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1183 } 1184 1185 /* Long division. */ 1186 for (bit = precision; bit; bit -= 1) { 1187 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1188 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1189 APInt::tcSetBit(lhsSignificand, bit - 1); 1190 } 1191 1192 APInt::tcShiftLeft(dividend, partsCount, 1); 1193 } 1194 1195 /* Figure out the lost fraction. */ 1196 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1197 1198 if (cmp > 0) 1199 lost_fraction = lfMoreThanHalf; 1200 else if (cmp == 0) 1201 lost_fraction = lfExactlyHalf; 1202 else if (APInt::tcIsZero(dividend, partsCount)) 1203 lost_fraction = lfExactlyZero; 1204 else 1205 lost_fraction = lfLessThanHalf; 1206 1207 if (partsCount > 2) 1208 delete [] dividend; 1209 1210 return lost_fraction; 1211 } 1212 1213 unsigned int IEEEFloat::significandMSB() const { 1214 return APInt::tcMSB(significandParts(), partCount()); 1215 } 1216 1217 unsigned int IEEEFloat::significandLSB() const { 1218 return APInt::tcLSB(significandParts(), partCount()); 1219 } 1220 1221 /* Note that a zero result is NOT normalized to fcZero. */ 1222 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1223 /* Our exponent should not overflow. */ 1224 assert((ExponentType) (exponent + bits) >= exponent); 1225 1226 exponent += bits; 1227 1228 return shiftRight(significandParts(), partCount(), bits); 1229 } 1230 1231 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1232 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1233 assert(bits < semantics->precision); 1234 1235 if (bits) { 1236 unsigned int partsCount = partCount(); 1237 1238 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1239 exponent -= bits; 1240 1241 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1242 } 1243 } 1244 1245 IEEEFloat::cmpResult 1246 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1247 int compare; 1248 1249 assert(semantics == rhs.semantics); 1250 assert(isFiniteNonZero()); 1251 assert(rhs.isFiniteNonZero()); 1252 1253 compare = exponent - rhs.exponent; 1254 1255 /* If exponents are equal, do an unsigned bignum comparison of the 1256 significands. */ 1257 if (compare == 0) 1258 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1259 partCount()); 1260 1261 if (compare > 0) 1262 return cmpGreaterThan; 1263 else if (compare < 0) 1264 return cmpLessThan; 1265 else 1266 return cmpEqual; 1267 } 1268 1269 /* Handle overflow. Sign is preserved. We either become infinity or 1270 the largest finite number. */ 1271 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1272 /* Infinity? */ 1273 if (rounding_mode == rmNearestTiesToEven || 1274 rounding_mode == rmNearestTiesToAway || 1275 (rounding_mode == rmTowardPositive && !sign) || 1276 (rounding_mode == rmTowardNegative && sign)) { 1277 category = fcInfinity; 1278 return (opStatus) (opOverflow | opInexact); 1279 } 1280 1281 /* Otherwise we become the largest finite number. */ 1282 category = fcNormal; 1283 exponent = semantics->maxExponent; 1284 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1285 semantics->precision); 1286 1287 return opInexact; 1288 } 1289 1290 /* Returns TRUE if, when truncating the current number, with BIT the 1291 new LSB, with the given lost fraction and rounding mode, the result 1292 would need to be rounded away from zero (i.e., by increasing the 1293 signficand). This routine must work for fcZero of both signs, and 1294 fcNormal numbers. */ 1295 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1296 lostFraction lost_fraction, 1297 unsigned int bit) const { 1298 /* NaNs and infinities should not have lost fractions. */ 1299 assert(isFiniteNonZero() || category == fcZero); 1300 1301 /* Current callers never pass this so we don't handle it. */ 1302 assert(lost_fraction != lfExactlyZero); 1303 1304 switch (rounding_mode) { 1305 case rmNearestTiesToAway: 1306 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1307 1308 case rmNearestTiesToEven: 1309 if (lost_fraction == lfMoreThanHalf) 1310 return true; 1311 1312 /* Our zeroes don't have a significand to test. */ 1313 if (lost_fraction == lfExactlyHalf && category != fcZero) 1314 return APInt::tcExtractBit(significandParts(), bit); 1315 1316 return false; 1317 1318 case rmTowardZero: 1319 return false; 1320 1321 case rmTowardPositive: 1322 return !sign; 1323 1324 case rmTowardNegative: 1325 return sign; 1326 } 1327 llvm_unreachable("Invalid rounding mode found"); 1328 } 1329 1330 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1331 lostFraction lost_fraction) { 1332 unsigned int omsb; /* One, not zero, based MSB. */ 1333 int exponentChange; 1334 1335 if (!isFiniteNonZero()) 1336 return opOK; 1337 1338 /* Before rounding normalize the exponent of fcNormal numbers. */ 1339 omsb = significandMSB() + 1; 1340 1341 if (omsb) { 1342 /* OMSB is numbered from 1. We want to place it in the integer 1343 bit numbered PRECISION if possible, with a compensating change in 1344 the exponent. */ 1345 exponentChange = omsb - semantics->precision; 1346 1347 /* If the resulting exponent is too high, overflow according to 1348 the rounding mode. */ 1349 if (exponent + exponentChange > semantics->maxExponent) 1350 return handleOverflow(rounding_mode); 1351 1352 /* Subnormal numbers have exponent minExponent, and their MSB 1353 is forced based on that. */ 1354 if (exponent + exponentChange < semantics->minExponent) 1355 exponentChange = semantics->minExponent - exponent; 1356 1357 /* Shifting left is easy as we don't lose precision. */ 1358 if (exponentChange < 0) { 1359 assert(lost_fraction == lfExactlyZero); 1360 1361 shiftSignificandLeft(-exponentChange); 1362 1363 return opOK; 1364 } 1365 1366 if (exponentChange > 0) { 1367 lostFraction lf; 1368 1369 /* Shift right and capture any new lost fraction. */ 1370 lf = shiftSignificandRight(exponentChange); 1371 1372 lost_fraction = combineLostFractions(lf, lost_fraction); 1373 1374 /* Keep OMSB up-to-date. */ 1375 if (omsb > (unsigned) exponentChange) 1376 omsb -= exponentChange; 1377 else 1378 omsb = 0; 1379 } 1380 } 1381 1382 /* Now round the number according to rounding_mode given the lost 1383 fraction. */ 1384 1385 /* As specified in IEEE 754, since we do not trap we do not report 1386 underflow for exact results. */ 1387 if (lost_fraction == lfExactlyZero) { 1388 /* Canonicalize zeroes. */ 1389 if (omsb == 0) 1390 category = fcZero; 1391 1392 return opOK; 1393 } 1394 1395 /* Increment the significand if we're rounding away from zero. */ 1396 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1397 if (omsb == 0) 1398 exponent = semantics->minExponent; 1399 1400 incrementSignificand(); 1401 omsb = significandMSB() + 1; 1402 1403 /* Did the significand increment overflow? */ 1404 if (omsb == (unsigned) semantics->precision + 1) { 1405 /* Renormalize by incrementing the exponent and shifting our 1406 significand right one. However if we already have the 1407 maximum exponent we overflow to infinity. */ 1408 if (exponent == semantics->maxExponent) { 1409 category = fcInfinity; 1410 1411 return (opStatus) (opOverflow | opInexact); 1412 } 1413 1414 shiftSignificandRight(1); 1415 1416 return opInexact; 1417 } 1418 } 1419 1420 /* The normal case - we were and are not denormal, and any 1421 significand increment above didn't overflow. */ 1422 if (omsb == semantics->precision) 1423 return opInexact; 1424 1425 /* We have a non-zero denormal. */ 1426 assert(omsb < semantics->precision); 1427 1428 /* Canonicalize zeroes. */ 1429 if (omsb == 0) 1430 category = fcZero; 1431 1432 /* The fcZero case is a denormal that underflowed to zero. */ 1433 return (opStatus) (opUnderflow | opInexact); 1434 } 1435 1436 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1437 bool subtract) { 1438 switch (PackCategoriesIntoKey(category, rhs.category)) { 1439 default: 1440 llvm_unreachable(nullptr); 1441 1442 case PackCategoriesIntoKey(fcZero, fcNaN): 1443 case PackCategoriesIntoKey(fcNormal, fcNaN): 1444 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1445 assign(rhs); 1446 LLVM_FALLTHROUGH; 1447 case PackCategoriesIntoKey(fcNaN, fcZero): 1448 case PackCategoriesIntoKey(fcNaN, fcNormal): 1449 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1450 case PackCategoriesIntoKey(fcNaN, fcNaN): 1451 if (isSignaling()) { 1452 makeQuiet(); 1453 return opInvalidOp; 1454 } 1455 return rhs.isSignaling() ? opInvalidOp : opOK; 1456 1457 case PackCategoriesIntoKey(fcNormal, fcZero): 1458 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1459 case PackCategoriesIntoKey(fcInfinity, fcZero): 1460 return opOK; 1461 1462 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1463 case PackCategoriesIntoKey(fcZero, fcInfinity): 1464 category = fcInfinity; 1465 sign = rhs.sign ^ subtract; 1466 return opOK; 1467 1468 case PackCategoriesIntoKey(fcZero, fcNormal): 1469 assign(rhs); 1470 sign = rhs.sign ^ subtract; 1471 return opOK; 1472 1473 case PackCategoriesIntoKey(fcZero, fcZero): 1474 /* Sign depends on rounding mode; handled by caller. */ 1475 return opOK; 1476 1477 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1478 /* Differently signed infinities can only be validly 1479 subtracted. */ 1480 if (((sign ^ rhs.sign)!=0) != subtract) { 1481 makeNaN(); 1482 return opInvalidOp; 1483 } 1484 1485 return opOK; 1486 1487 case PackCategoriesIntoKey(fcNormal, fcNormal): 1488 return opDivByZero; 1489 } 1490 } 1491 1492 /* Add or subtract two normal numbers. */ 1493 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1494 bool subtract) { 1495 integerPart carry; 1496 lostFraction lost_fraction; 1497 int bits; 1498 1499 /* Determine if the operation on the absolute values is effectively 1500 an addition or subtraction. */ 1501 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1502 1503 /* Are we bigger exponent-wise than the RHS? */ 1504 bits = exponent - rhs.exponent; 1505 1506 /* Subtraction is more subtle than one might naively expect. */ 1507 if (subtract) { 1508 IEEEFloat temp_rhs(rhs); 1509 1510 if (bits == 0) 1511 lost_fraction = lfExactlyZero; 1512 else if (bits > 0) { 1513 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1514 shiftSignificandLeft(1); 1515 } else { 1516 lost_fraction = shiftSignificandRight(-bits - 1); 1517 temp_rhs.shiftSignificandLeft(1); 1518 } 1519 1520 // Should we reverse the subtraction. 1521 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1522 carry = temp_rhs.subtractSignificand 1523 (*this, lost_fraction != lfExactlyZero); 1524 copySignificand(temp_rhs); 1525 sign = !sign; 1526 } else { 1527 carry = subtractSignificand 1528 (temp_rhs, lost_fraction != lfExactlyZero); 1529 } 1530 1531 /* Invert the lost fraction - it was on the RHS and 1532 subtracted. */ 1533 if (lost_fraction == lfLessThanHalf) 1534 lost_fraction = lfMoreThanHalf; 1535 else if (lost_fraction == lfMoreThanHalf) 1536 lost_fraction = lfLessThanHalf; 1537 1538 /* The code above is intended to ensure that no borrow is 1539 necessary. */ 1540 assert(!carry); 1541 (void)carry; 1542 } else { 1543 if (bits > 0) { 1544 IEEEFloat temp_rhs(rhs); 1545 1546 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1547 carry = addSignificand(temp_rhs); 1548 } else { 1549 lost_fraction = shiftSignificandRight(-bits); 1550 carry = addSignificand(rhs); 1551 } 1552 1553 /* We have a guard bit; generating a carry cannot happen. */ 1554 assert(!carry); 1555 (void)carry; 1556 } 1557 1558 return lost_fraction; 1559 } 1560 1561 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1562 switch (PackCategoriesIntoKey(category, rhs.category)) { 1563 default: 1564 llvm_unreachable(nullptr); 1565 1566 case PackCategoriesIntoKey(fcZero, fcNaN): 1567 case PackCategoriesIntoKey(fcNormal, fcNaN): 1568 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1569 assign(rhs); 1570 sign = false; 1571 LLVM_FALLTHROUGH; 1572 case PackCategoriesIntoKey(fcNaN, fcZero): 1573 case PackCategoriesIntoKey(fcNaN, fcNormal): 1574 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1575 case PackCategoriesIntoKey(fcNaN, fcNaN): 1576 sign ^= rhs.sign; // restore the original sign 1577 if (isSignaling()) { 1578 makeQuiet(); 1579 return opInvalidOp; 1580 } 1581 return rhs.isSignaling() ? opInvalidOp : opOK; 1582 1583 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1584 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1585 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1586 category = fcInfinity; 1587 return opOK; 1588 1589 case PackCategoriesIntoKey(fcZero, fcNormal): 1590 case PackCategoriesIntoKey(fcNormal, fcZero): 1591 case PackCategoriesIntoKey(fcZero, fcZero): 1592 category = fcZero; 1593 return opOK; 1594 1595 case PackCategoriesIntoKey(fcZero, fcInfinity): 1596 case PackCategoriesIntoKey(fcInfinity, fcZero): 1597 makeNaN(); 1598 return opInvalidOp; 1599 1600 case PackCategoriesIntoKey(fcNormal, fcNormal): 1601 return opOK; 1602 } 1603 } 1604 1605 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1606 switch (PackCategoriesIntoKey(category, rhs.category)) { 1607 default: 1608 llvm_unreachable(nullptr); 1609 1610 case PackCategoriesIntoKey(fcZero, fcNaN): 1611 case PackCategoriesIntoKey(fcNormal, fcNaN): 1612 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1613 assign(rhs); 1614 sign = false; 1615 LLVM_FALLTHROUGH; 1616 case PackCategoriesIntoKey(fcNaN, fcZero): 1617 case PackCategoriesIntoKey(fcNaN, fcNormal): 1618 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1619 case PackCategoriesIntoKey(fcNaN, fcNaN): 1620 sign ^= rhs.sign; // restore the original sign 1621 if (isSignaling()) { 1622 makeQuiet(); 1623 return opInvalidOp; 1624 } 1625 return rhs.isSignaling() ? opInvalidOp : opOK; 1626 1627 case PackCategoriesIntoKey(fcInfinity, fcZero): 1628 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1629 case PackCategoriesIntoKey(fcZero, fcInfinity): 1630 case PackCategoriesIntoKey(fcZero, fcNormal): 1631 return opOK; 1632 1633 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1634 category = fcZero; 1635 return opOK; 1636 1637 case PackCategoriesIntoKey(fcNormal, fcZero): 1638 category = fcInfinity; 1639 return opDivByZero; 1640 1641 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1642 case PackCategoriesIntoKey(fcZero, fcZero): 1643 makeNaN(); 1644 return opInvalidOp; 1645 1646 case PackCategoriesIntoKey(fcNormal, fcNormal): 1647 return opOK; 1648 } 1649 } 1650 1651 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1652 switch (PackCategoriesIntoKey(category, rhs.category)) { 1653 default: 1654 llvm_unreachable(nullptr); 1655 1656 case PackCategoriesIntoKey(fcZero, fcNaN): 1657 case PackCategoriesIntoKey(fcNormal, fcNaN): 1658 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1659 assign(rhs); 1660 LLVM_FALLTHROUGH; 1661 case PackCategoriesIntoKey(fcNaN, fcZero): 1662 case PackCategoriesIntoKey(fcNaN, fcNormal): 1663 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1664 case PackCategoriesIntoKey(fcNaN, fcNaN): 1665 if (isSignaling()) { 1666 makeQuiet(); 1667 return opInvalidOp; 1668 } 1669 return rhs.isSignaling() ? opInvalidOp : opOK; 1670 1671 case PackCategoriesIntoKey(fcZero, fcInfinity): 1672 case PackCategoriesIntoKey(fcZero, fcNormal): 1673 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1674 return opOK; 1675 1676 case PackCategoriesIntoKey(fcNormal, fcZero): 1677 case PackCategoriesIntoKey(fcInfinity, fcZero): 1678 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1679 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1680 case PackCategoriesIntoKey(fcZero, fcZero): 1681 makeNaN(); 1682 return opInvalidOp; 1683 1684 case PackCategoriesIntoKey(fcNormal, fcNormal): 1685 return opOK; 1686 } 1687 } 1688 1689 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1690 switch (PackCategoriesIntoKey(category, rhs.category)) { 1691 default: 1692 llvm_unreachable(nullptr); 1693 1694 case PackCategoriesIntoKey(fcZero, fcNaN): 1695 case PackCategoriesIntoKey(fcNormal, fcNaN): 1696 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1697 assign(rhs); 1698 LLVM_FALLTHROUGH; 1699 case PackCategoriesIntoKey(fcNaN, fcZero): 1700 case PackCategoriesIntoKey(fcNaN, fcNormal): 1701 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1702 case PackCategoriesIntoKey(fcNaN, fcNaN): 1703 if (isSignaling()) { 1704 makeQuiet(); 1705 return opInvalidOp; 1706 } 1707 return rhs.isSignaling() ? opInvalidOp : opOK; 1708 1709 case PackCategoriesIntoKey(fcZero, fcInfinity): 1710 case PackCategoriesIntoKey(fcZero, fcNormal): 1711 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1712 return opOK; 1713 1714 case PackCategoriesIntoKey(fcNormal, fcZero): 1715 case PackCategoriesIntoKey(fcInfinity, fcZero): 1716 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1717 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1718 case PackCategoriesIntoKey(fcZero, fcZero): 1719 makeNaN(); 1720 return opInvalidOp; 1721 1722 case PackCategoriesIntoKey(fcNormal, fcNormal): 1723 return opDivByZero; // fake status, indicating this is not a special case 1724 } 1725 } 1726 1727 /* Change sign. */ 1728 void IEEEFloat::changeSign() { 1729 /* Look mummy, this one's easy. */ 1730 sign = !sign; 1731 } 1732 1733 /* Normalized addition or subtraction. */ 1734 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1735 roundingMode rounding_mode, 1736 bool subtract) { 1737 opStatus fs; 1738 1739 fs = addOrSubtractSpecials(rhs, subtract); 1740 1741 /* This return code means it was not a simple case. */ 1742 if (fs == opDivByZero) { 1743 lostFraction lost_fraction; 1744 1745 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1746 fs = normalize(rounding_mode, lost_fraction); 1747 1748 /* Can only be zero if we lost no fraction. */ 1749 assert(category != fcZero || lost_fraction == lfExactlyZero); 1750 } 1751 1752 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1753 positive zero unless rounding to minus infinity, except that 1754 adding two like-signed zeroes gives that zero. */ 1755 if (category == fcZero) { 1756 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1757 sign = (rounding_mode == rmTowardNegative); 1758 } 1759 1760 return fs; 1761 } 1762 1763 /* Normalized addition. */ 1764 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1765 roundingMode rounding_mode) { 1766 return addOrSubtract(rhs, rounding_mode, false); 1767 } 1768 1769 /* Normalized subtraction. */ 1770 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1771 roundingMode rounding_mode) { 1772 return addOrSubtract(rhs, rounding_mode, true); 1773 } 1774 1775 /* Normalized multiply. */ 1776 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1777 roundingMode rounding_mode) { 1778 opStatus fs; 1779 1780 sign ^= rhs.sign; 1781 fs = multiplySpecials(rhs); 1782 1783 if (isFiniteNonZero()) { 1784 lostFraction lost_fraction = multiplySignificand(rhs); 1785 fs = normalize(rounding_mode, lost_fraction); 1786 if (lost_fraction != lfExactlyZero) 1787 fs = (opStatus) (fs | opInexact); 1788 } 1789 1790 return fs; 1791 } 1792 1793 /* Normalized divide. */ 1794 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1795 roundingMode rounding_mode) { 1796 opStatus fs; 1797 1798 sign ^= rhs.sign; 1799 fs = divideSpecials(rhs); 1800 1801 if (isFiniteNonZero()) { 1802 lostFraction lost_fraction = divideSignificand(rhs); 1803 fs = normalize(rounding_mode, lost_fraction); 1804 if (lost_fraction != lfExactlyZero) 1805 fs = (opStatus) (fs | opInexact); 1806 } 1807 1808 return fs; 1809 } 1810 1811 /* Normalized remainder. */ 1812 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1813 opStatus fs; 1814 unsigned int origSign = sign; 1815 1816 // First handle the special cases. 1817 fs = remainderSpecials(rhs); 1818 if (fs != opDivByZero) 1819 return fs; 1820 1821 fs = opOK; 1822 1823 // Make sure the current value is less than twice the denom. If the addition 1824 // did not succeed (an overflow has happened), which means that the finite 1825 // value we currently posses must be less than twice the denom (as we are 1826 // using the same semantics). 1827 IEEEFloat P2 = rhs; 1828 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1829 fs = mod(P2); 1830 assert(fs == opOK); 1831 } 1832 1833 // Lets work with absolute numbers. 1834 IEEEFloat P = rhs; 1835 P.sign = false; 1836 sign = false; 1837 1838 // 1839 // To calculate the remainder we use the following scheme. 1840 // 1841 // The remainder is defained as follows: 1842 // 1843 // remainder = numer - rquot * denom = x - r * p 1844 // 1845 // Where r is the result of: x/p, rounded toward the nearest integral value 1846 // (with halfway cases rounded toward the even number). 1847 // 1848 // Currently, (after x mod 2p): 1849 // r is the number of 2p's present inside x, which is inherently, an even 1850 // number of p's. 1851 // 1852 // We may split the remaining calculation into 4 options: 1853 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1854 // - if x == 0.5p then we round to the nearest even number which is 0, and we 1855 // are done as well. 1856 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 1857 // to subtract 1p at least once. 1858 // - if x >= p then we must subtract p at least once, as x must be a 1859 // remainder. 1860 // 1861 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 1862 // 1863 // We can now split the remaining calculation to the following 3 options: 1864 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1865 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 1866 // must round up to the next even number. so we must subtract p once more. 1867 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 1868 // integral, and subtract p once more. 1869 // 1870 1871 // Extend the semantics to prevent an overflow/underflow or inexact result. 1872 bool losesInfo; 1873 fltSemantics extendedSemantics = *semantics; 1874 extendedSemantics.maxExponent++; 1875 extendedSemantics.minExponent--; 1876 extendedSemantics.precision += 2; 1877 1878 IEEEFloat VEx = *this; 1879 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1880 assert(fs == opOK && !losesInfo); 1881 IEEEFloat PEx = P; 1882 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1883 assert(fs == opOK && !losesInfo); 1884 1885 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 1886 // any fraction. 1887 fs = VEx.add(VEx, rmNearestTiesToEven); 1888 assert(fs == opOK); 1889 1890 if (VEx.compare(PEx) == cmpGreaterThan) { 1891 fs = subtract(P, rmNearestTiesToEven); 1892 assert(fs == opOK); 1893 1894 // Make VEx = this.add(this), but because we have different semantics, we do 1895 // not want to `convert` again, so we just subtract PEx twice (which equals 1896 // to the desired value). 1897 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1898 assert(fs == opOK); 1899 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1900 assert(fs == opOK); 1901 1902 cmpResult result = VEx.compare(PEx); 1903 if (result == cmpGreaterThan || result == cmpEqual) { 1904 fs = subtract(P, rmNearestTiesToEven); 1905 assert(fs == opOK); 1906 } 1907 } 1908 1909 if (isZero()) 1910 sign = origSign; // IEEE754 requires this 1911 else 1912 sign ^= origSign; 1913 return fs; 1914 } 1915 1916 /* Normalized llvm frem (C fmod). */ 1917 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1918 opStatus fs; 1919 fs = modSpecials(rhs); 1920 unsigned int origSign = sign; 1921 1922 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 1923 compareAbsoluteValue(rhs) != cmpLessThan) { 1924 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); 1925 if (compareAbsoluteValue(V) == cmpLessThan) 1926 V = scalbn(V, -1, rmNearestTiesToEven); 1927 V.sign = sign; 1928 1929 fs = subtract(V, rmNearestTiesToEven); 1930 assert(fs==opOK); 1931 } 1932 if (isZero()) 1933 sign = origSign; // fmod requires this 1934 return fs; 1935 } 1936 1937 /* Normalized fused-multiply-add. */ 1938 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1939 const IEEEFloat &addend, 1940 roundingMode rounding_mode) { 1941 opStatus fs; 1942 1943 /* Post-multiplication sign, before addition. */ 1944 sign ^= multiplicand.sign; 1945 1946 /* If and only if all arguments are normal do we need to do an 1947 extended-precision calculation. */ 1948 if (isFiniteNonZero() && 1949 multiplicand.isFiniteNonZero() && 1950 addend.isFinite()) { 1951 lostFraction lost_fraction; 1952 1953 lost_fraction = multiplySignificand(multiplicand, addend); 1954 fs = normalize(rounding_mode, lost_fraction); 1955 if (lost_fraction != lfExactlyZero) 1956 fs = (opStatus) (fs | opInexact); 1957 1958 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1959 positive zero unless rounding to minus infinity, except that 1960 adding two like-signed zeroes gives that zero. */ 1961 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1962 sign = (rounding_mode == rmTowardNegative); 1963 } else { 1964 fs = multiplySpecials(multiplicand); 1965 1966 /* FS can only be opOK or opInvalidOp. There is no more work 1967 to do in the latter case. The IEEE-754R standard says it is 1968 implementation-defined in this case whether, if ADDEND is a 1969 quiet NaN, we raise invalid op; this implementation does so. 1970 1971 If we need to do the addition we can do so with normal 1972 precision. */ 1973 if (fs == opOK) 1974 fs = addOrSubtract(addend, rounding_mode, false); 1975 } 1976 1977 return fs; 1978 } 1979 1980 /* Rounding-mode correct round to integral value. */ 1981 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 1982 opStatus fs; 1983 1984 if (isInfinity()) 1985 // [IEEE Std 754-2008 6.1]: 1986 // The behavior of infinity in floating-point arithmetic is derived from the 1987 // limiting cases of real arithmetic with operands of arbitrarily 1988 // large magnitude, when such a limit exists. 1989 // ... 1990 // Operations on infinite operands are usually exact and therefore signal no 1991 // exceptions ... 1992 return opOK; 1993 1994 if (isNaN()) { 1995 if (isSignaling()) { 1996 // [IEEE Std 754-2008 6.2]: 1997 // Under default exception handling, any operation signaling an invalid 1998 // operation exception and for which a floating-point result is to be 1999 // delivered shall deliver a quiet NaN. 2000 makeQuiet(); 2001 // [IEEE Std 754-2008 6.2]: 2002 // Signaling NaNs shall be reserved operands that, under default exception 2003 // handling, signal the invalid operation exception(see 7.2) for every 2004 // general-computational and signaling-computational operation except for 2005 // the conversions described in 5.12. 2006 return opInvalidOp; 2007 } else { 2008 // [IEEE Std 754-2008 6.2]: 2009 // For an operation with quiet NaN inputs, other than maximum and minimum 2010 // operations, if a floating-point result is to be delivered the result 2011 // shall be a quiet NaN which should be one of the input NaNs. 2012 // ... 2013 // Every general-computational and quiet-computational operation involving 2014 // one or more input NaNs, none of them signaling, shall signal no 2015 // exception, except fusedMultiplyAdd might signal the invalid operation 2016 // exception(see 7.2). 2017 return opOK; 2018 } 2019 } 2020 2021 if (isZero()) { 2022 // [IEEE Std 754-2008 6.3]: 2023 // ... the sign of the result of conversions, the quantize operation, the 2024 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2025 // the sign of the first or only operand. 2026 return opOK; 2027 } 2028 2029 // If the exponent is large enough, we know that this value is already 2030 // integral, and the arithmetic below would potentially cause it to saturate 2031 // to +/-Inf. Bail out early instead. 2032 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2033 return opOK; 2034 2035 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2036 // precision of our format, and then subtract it back off again. The choice 2037 // of rounding modes for the addition/subtraction determines the rounding mode 2038 // for our integral rounding as well. 2039 // NOTE: When the input value is negative, we do subtraction followed by 2040 // addition instead. 2041 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2042 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2043 IEEEFloat MagicConstant(*semantics); 2044 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2045 rmNearestTiesToEven); 2046 assert(fs == opOK); 2047 MagicConstant.sign = sign; 2048 2049 // Preserve the input sign so that we can handle the case of zero result 2050 // correctly. 2051 bool inputSign = isNegative(); 2052 2053 fs = add(MagicConstant, rounding_mode); 2054 2055 // Current value and 'MagicConstant' are both integers, so the result of the 2056 // subtraction is always exact according to Sterbenz' lemma. 2057 subtract(MagicConstant, rounding_mode); 2058 2059 // Restore the input sign. 2060 if (inputSign != isNegative()) 2061 changeSign(); 2062 2063 return fs; 2064 } 2065 2066 2067 /* Comparison requires normalized numbers. */ 2068 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2069 cmpResult result; 2070 2071 assert(semantics == rhs.semantics); 2072 2073 switch (PackCategoriesIntoKey(category, rhs.category)) { 2074 default: 2075 llvm_unreachable(nullptr); 2076 2077 case PackCategoriesIntoKey(fcNaN, fcZero): 2078 case PackCategoriesIntoKey(fcNaN, fcNormal): 2079 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2080 case PackCategoriesIntoKey(fcNaN, fcNaN): 2081 case PackCategoriesIntoKey(fcZero, fcNaN): 2082 case PackCategoriesIntoKey(fcNormal, fcNaN): 2083 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2084 return cmpUnordered; 2085 2086 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2087 case PackCategoriesIntoKey(fcInfinity, fcZero): 2088 case PackCategoriesIntoKey(fcNormal, fcZero): 2089 if (sign) 2090 return cmpLessThan; 2091 else 2092 return cmpGreaterThan; 2093 2094 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2095 case PackCategoriesIntoKey(fcZero, fcInfinity): 2096 case PackCategoriesIntoKey(fcZero, fcNormal): 2097 if (rhs.sign) 2098 return cmpGreaterThan; 2099 else 2100 return cmpLessThan; 2101 2102 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2103 if (sign == rhs.sign) 2104 return cmpEqual; 2105 else if (sign) 2106 return cmpLessThan; 2107 else 2108 return cmpGreaterThan; 2109 2110 case PackCategoriesIntoKey(fcZero, fcZero): 2111 return cmpEqual; 2112 2113 case PackCategoriesIntoKey(fcNormal, fcNormal): 2114 break; 2115 } 2116 2117 /* Two normal numbers. Do they have the same sign? */ 2118 if (sign != rhs.sign) { 2119 if (sign) 2120 result = cmpLessThan; 2121 else 2122 result = cmpGreaterThan; 2123 } else { 2124 /* Compare absolute values; invert result if negative. */ 2125 result = compareAbsoluteValue(rhs); 2126 2127 if (sign) { 2128 if (result == cmpLessThan) 2129 result = cmpGreaterThan; 2130 else if (result == cmpGreaterThan) 2131 result = cmpLessThan; 2132 } 2133 } 2134 2135 return result; 2136 } 2137 2138 /// IEEEFloat::convert - convert a value of one floating point type to another. 2139 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2140 /// records whether the transformation lost information, i.e. whether 2141 /// converting the result back to the original type will produce the 2142 /// original value (this is almost the same as return value==fsOK, but there 2143 /// are edge cases where this is not so). 2144 2145 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2146 roundingMode rounding_mode, 2147 bool *losesInfo) { 2148 lostFraction lostFraction; 2149 unsigned int newPartCount, oldPartCount; 2150 opStatus fs; 2151 int shift; 2152 const fltSemantics &fromSemantics = *semantics; 2153 2154 lostFraction = lfExactlyZero; 2155 newPartCount = partCountForBits(toSemantics.precision + 1); 2156 oldPartCount = partCount(); 2157 shift = toSemantics.precision - fromSemantics.precision; 2158 2159 bool X86SpecialNan = false; 2160 if (&fromSemantics == &semX87DoubleExtended && 2161 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2162 (!(*significandParts() & 0x8000000000000000ULL) || 2163 !(*significandParts() & 0x4000000000000000ULL))) { 2164 // x86 has some unusual NaNs which cannot be represented in any other 2165 // format; note them here. 2166 X86SpecialNan = true; 2167 } 2168 2169 // If this is a truncation of a denormal number, and the target semantics 2170 // has larger exponent range than the source semantics (this can happen 2171 // when truncating from PowerPC double-double to double format), the 2172 // right shift could lose result mantissa bits. Adjust exponent instead 2173 // of performing excessive shift. 2174 if (shift < 0 && isFiniteNonZero()) { 2175 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2176 if (exponent + exponentChange < toSemantics.minExponent) 2177 exponentChange = toSemantics.minExponent - exponent; 2178 if (exponentChange < shift) 2179 exponentChange = shift; 2180 if (exponentChange < 0) { 2181 shift -= exponentChange; 2182 exponent += exponentChange; 2183 } 2184 } 2185 2186 // If this is a truncation, perform the shift before we narrow the storage. 2187 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2188 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2189 2190 // Fix the storage so it can hold to new value. 2191 if (newPartCount > oldPartCount) { 2192 // The new type requires more storage; make it available. 2193 integerPart *newParts; 2194 newParts = new integerPart[newPartCount]; 2195 APInt::tcSet(newParts, 0, newPartCount); 2196 if (isFiniteNonZero() || category==fcNaN) 2197 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2198 freeSignificand(); 2199 significand.parts = newParts; 2200 } else if (newPartCount == 1 && oldPartCount != 1) { 2201 // Switch to built-in storage for a single part. 2202 integerPart newPart = 0; 2203 if (isFiniteNonZero() || category==fcNaN) 2204 newPart = significandParts()[0]; 2205 freeSignificand(); 2206 significand.part = newPart; 2207 } 2208 2209 // Now that we have the right storage, switch the semantics. 2210 semantics = &toSemantics; 2211 2212 // If this is an extension, perform the shift now that the storage is 2213 // available. 2214 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2215 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2216 2217 if (isFiniteNonZero()) { 2218 fs = normalize(rounding_mode, lostFraction); 2219 *losesInfo = (fs != opOK); 2220 } else if (category == fcNaN) { 2221 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2222 2223 // For x87 extended precision, we want to make a NaN, not a special NaN if 2224 // the input wasn't special either. 2225 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2226 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2227 2228 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2229 // does not give you back the same bits. This is dubious, and we 2230 // don't currently do it. You're really supposed to get 2231 // an invalid operation signal at runtime, but nobody does that. 2232 fs = opOK; 2233 } else { 2234 *losesInfo = false; 2235 fs = opOK; 2236 } 2237 2238 return fs; 2239 } 2240 2241 /* Convert a floating point number to an integer according to the 2242 rounding mode. If the rounded integer value is out of range this 2243 returns an invalid operation exception and the contents of the 2244 destination parts are unspecified. If the rounded value is in 2245 range but the floating point number is not the exact integer, the C 2246 standard doesn't require an inexact exception to be raised. IEEE 2247 854 does require it so we do that. 2248 2249 Note that for conversions to integer type the C standard requires 2250 round-to-zero to always be used. */ 2251 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2252 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2253 roundingMode rounding_mode, bool *isExact) const { 2254 lostFraction lost_fraction; 2255 const integerPart *src; 2256 unsigned int dstPartsCount, truncatedBits; 2257 2258 *isExact = false; 2259 2260 /* Handle the three special cases first. */ 2261 if (category == fcInfinity || category == fcNaN) 2262 return opInvalidOp; 2263 2264 dstPartsCount = partCountForBits(width); 2265 assert(dstPartsCount <= parts.size() && "Integer too big"); 2266 2267 if (category == fcZero) { 2268 APInt::tcSet(parts.data(), 0, dstPartsCount); 2269 // Negative zero can't be represented as an int. 2270 *isExact = !sign; 2271 return opOK; 2272 } 2273 2274 src = significandParts(); 2275 2276 /* Step 1: place our absolute value, with any fraction truncated, in 2277 the destination. */ 2278 if (exponent < 0) { 2279 /* Our absolute value is less than one; truncate everything. */ 2280 APInt::tcSet(parts.data(), 0, dstPartsCount); 2281 /* For exponent -1 the integer bit represents .5, look at that. 2282 For smaller exponents leftmost truncated bit is 0. */ 2283 truncatedBits = semantics->precision -1U - exponent; 2284 } else { 2285 /* We want the most significant (exponent + 1) bits; the rest are 2286 truncated. */ 2287 unsigned int bits = exponent + 1U; 2288 2289 /* Hopelessly large in magnitude? */ 2290 if (bits > width) 2291 return opInvalidOp; 2292 2293 if (bits < semantics->precision) { 2294 /* We truncate (semantics->precision - bits) bits. */ 2295 truncatedBits = semantics->precision - bits; 2296 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2297 } else { 2298 /* We want at least as many bits as are available. */ 2299 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2300 0); 2301 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2302 bits - semantics->precision); 2303 truncatedBits = 0; 2304 } 2305 } 2306 2307 /* Step 2: work out any lost fraction, and increment the absolute 2308 value if we would round away from zero. */ 2309 if (truncatedBits) { 2310 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2311 truncatedBits); 2312 if (lost_fraction != lfExactlyZero && 2313 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2314 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2315 return opInvalidOp; /* Overflow. */ 2316 } 2317 } else { 2318 lost_fraction = lfExactlyZero; 2319 } 2320 2321 /* Step 3: check if we fit in the destination. */ 2322 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2323 2324 if (sign) { 2325 if (!isSigned) { 2326 /* Negative numbers cannot be represented as unsigned. */ 2327 if (omsb != 0) 2328 return opInvalidOp; 2329 } else { 2330 /* It takes omsb bits to represent the unsigned integer value. 2331 We lose a bit for the sign, but care is needed as the 2332 maximally negative integer is a special case. */ 2333 if (omsb == width && 2334 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2335 return opInvalidOp; 2336 2337 /* This case can happen because of rounding. */ 2338 if (omsb > width) 2339 return opInvalidOp; 2340 } 2341 2342 APInt::tcNegate (parts.data(), dstPartsCount); 2343 } else { 2344 if (omsb >= width + !isSigned) 2345 return opInvalidOp; 2346 } 2347 2348 if (lost_fraction == lfExactlyZero) { 2349 *isExact = true; 2350 return opOK; 2351 } else 2352 return opInexact; 2353 } 2354 2355 /* Same as convertToSignExtendedInteger, except we provide 2356 deterministic values in case of an invalid operation exception, 2357 namely zero for NaNs and the minimal or maximal value respectively 2358 for underflow or overflow. 2359 The *isExact output tells whether the result is exact, in the sense 2360 that converting it back to the original floating point type produces 2361 the original value. This is almost equivalent to result==opOK, 2362 except for negative zeroes. 2363 */ 2364 IEEEFloat::opStatus 2365 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2366 unsigned int width, bool isSigned, 2367 roundingMode rounding_mode, bool *isExact) const { 2368 opStatus fs; 2369 2370 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2371 isExact); 2372 2373 if (fs == opInvalidOp) { 2374 unsigned int bits, dstPartsCount; 2375 2376 dstPartsCount = partCountForBits(width); 2377 assert(dstPartsCount <= parts.size() && "Integer too big"); 2378 2379 if (category == fcNaN) 2380 bits = 0; 2381 else if (sign) 2382 bits = isSigned; 2383 else 2384 bits = width - isSigned; 2385 2386 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2387 if (sign && isSigned) 2388 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2389 } 2390 2391 return fs; 2392 } 2393 2394 /* Convert an unsigned integer SRC to a floating point number, 2395 rounding according to ROUNDING_MODE. The sign of the floating 2396 point number is not modified. */ 2397 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2398 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2399 unsigned int omsb, precision, dstCount; 2400 integerPart *dst; 2401 lostFraction lost_fraction; 2402 2403 category = fcNormal; 2404 omsb = APInt::tcMSB(src, srcCount) + 1; 2405 dst = significandParts(); 2406 dstCount = partCount(); 2407 precision = semantics->precision; 2408 2409 /* We want the most significant PRECISION bits of SRC. There may not 2410 be that many; extract what we can. */ 2411 if (precision <= omsb) { 2412 exponent = omsb - 1; 2413 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2414 omsb - precision); 2415 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2416 } else { 2417 exponent = precision - 1; 2418 lost_fraction = lfExactlyZero; 2419 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2420 } 2421 2422 return normalize(rounding_mode, lost_fraction); 2423 } 2424 2425 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2426 roundingMode rounding_mode) { 2427 unsigned int partCount = Val.getNumWords(); 2428 APInt api = Val; 2429 2430 sign = false; 2431 if (isSigned && api.isNegative()) { 2432 sign = true; 2433 api = -api; 2434 } 2435 2436 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2437 } 2438 2439 /* Convert a two's complement integer SRC to a floating point number, 2440 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2441 integer is signed, in which case it must be sign-extended. */ 2442 IEEEFloat::opStatus 2443 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2444 unsigned int srcCount, bool isSigned, 2445 roundingMode rounding_mode) { 2446 opStatus status; 2447 2448 if (isSigned && 2449 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2450 integerPart *copy; 2451 2452 /* If we're signed and negative negate a copy. */ 2453 sign = true; 2454 copy = new integerPart[srcCount]; 2455 APInt::tcAssign(copy, src, srcCount); 2456 APInt::tcNegate(copy, srcCount); 2457 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2458 delete [] copy; 2459 } else { 2460 sign = false; 2461 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2462 } 2463 2464 return status; 2465 } 2466 2467 /* FIXME: should this just take a const APInt reference? */ 2468 IEEEFloat::opStatus 2469 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2470 unsigned int width, bool isSigned, 2471 roundingMode rounding_mode) { 2472 unsigned int partCount = partCountForBits(width); 2473 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2474 2475 sign = false; 2476 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2477 sign = true; 2478 api = -api; 2479 } 2480 2481 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2482 } 2483 2484 Expected<IEEEFloat::opStatus> 2485 IEEEFloat::convertFromHexadecimalString(StringRef s, 2486 roundingMode rounding_mode) { 2487 lostFraction lost_fraction = lfExactlyZero; 2488 2489 category = fcNormal; 2490 zeroSignificand(); 2491 exponent = 0; 2492 2493 integerPart *significand = significandParts(); 2494 unsigned partsCount = partCount(); 2495 unsigned bitPos = partsCount * integerPartWidth; 2496 bool computedTrailingFraction = false; 2497 2498 // Skip leading zeroes and any (hexa)decimal point. 2499 StringRef::iterator begin = s.begin(); 2500 StringRef::iterator end = s.end(); 2501 StringRef::iterator dot; 2502 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2503 if (!PtrOrErr) 2504 return PtrOrErr.takeError(); 2505 StringRef::iterator p = *PtrOrErr; 2506 StringRef::iterator firstSignificantDigit = p; 2507 2508 while (p != end) { 2509 integerPart hex_value; 2510 2511 if (*p == '.') { 2512 if (dot != end) 2513 return createError("String contains multiple dots"); 2514 dot = p++; 2515 continue; 2516 } 2517 2518 hex_value = hexDigitValue(*p); 2519 if (hex_value == -1U) 2520 break; 2521 2522 p++; 2523 2524 // Store the number while we have space. 2525 if (bitPos) { 2526 bitPos -= 4; 2527 hex_value <<= bitPos % integerPartWidth; 2528 significand[bitPos / integerPartWidth] |= hex_value; 2529 } else if (!computedTrailingFraction) { 2530 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2531 if (!FractOrErr) 2532 return FractOrErr.takeError(); 2533 lost_fraction = *FractOrErr; 2534 computedTrailingFraction = true; 2535 } 2536 } 2537 2538 /* Hex floats require an exponent but not a hexadecimal point. */ 2539 if (p == end) 2540 return createError("Hex strings require an exponent"); 2541 if (*p != 'p' && *p != 'P') 2542 return createError("Invalid character in significand"); 2543 if (p == begin) 2544 return createError("Significand has no digits"); 2545 if (dot != end && p - begin == 1) 2546 return createError("Significand has no digits"); 2547 2548 /* Ignore the exponent if we are zero. */ 2549 if (p != firstSignificantDigit) { 2550 int expAdjustment; 2551 2552 /* Implicit hexadecimal point? */ 2553 if (dot == end) 2554 dot = p; 2555 2556 /* Calculate the exponent adjustment implicit in the number of 2557 significant digits. */ 2558 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2559 if (expAdjustment < 0) 2560 expAdjustment++; 2561 expAdjustment = expAdjustment * 4 - 1; 2562 2563 /* Adjust for writing the significand starting at the most 2564 significant nibble. */ 2565 expAdjustment += semantics->precision; 2566 expAdjustment -= partsCount * integerPartWidth; 2567 2568 /* Adjust for the given exponent. */ 2569 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2570 if (!ExpOrErr) 2571 return ExpOrErr.takeError(); 2572 exponent = *ExpOrErr; 2573 } 2574 2575 return normalize(rounding_mode, lost_fraction); 2576 } 2577 2578 IEEEFloat::opStatus 2579 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2580 unsigned sigPartCount, int exp, 2581 roundingMode rounding_mode) { 2582 unsigned int parts, pow5PartCount; 2583 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2584 integerPart pow5Parts[maxPowerOfFiveParts]; 2585 bool isNearest; 2586 2587 isNearest = (rounding_mode == rmNearestTiesToEven || 2588 rounding_mode == rmNearestTiesToAway); 2589 2590 parts = partCountForBits(semantics->precision + 11); 2591 2592 /* Calculate pow(5, abs(exp)). */ 2593 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2594 2595 for (;; parts *= 2) { 2596 opStatus sigStatus, powStatus; 2597 unsigned int excessPrecision, truncatedBits; 2598 2599 calcSemantics.precision = parts * integerPartWidth - 1; 2600 excessPrecision = calcSemantics.precision - semantics->precision; 2601 truncatedBits = excessPrecision; 2602 2603 IEEEFloat decSig(calcSemantics, uninitialized); 2604 decSig.makeZero(sign); 2605 IEEEFloat pow5(calcSemantics); 2606 2607 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2608 rmNearestTiesToEven); 2609 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2610 rmNearestTiesToEven); 2611 /* Add exp, as 10^n = 5^n * 2^n. */ 2612 decSig.exponent += exp; 2613 2614 lostFraction calcLostFraction; 2615 integerPart HUerr, HUdistance; 2616 unsigned int powHUerr; 2617 2618 if (exp >= 0) { 2619 /* multiplySignificand leaves the precision-th bit set to 1. */ 2620 calcLostFraction = decSig.multiplySignificand(pow5); 2621 powHUerr = powStatus != opOK; 2622 } else { 2623 calcLostFraction = decSig.divideSignificand(pow5); 2624 /* Denormal numbers have less precision. */ 2625 if (decSig.exponent < semantics->minExponent) { 2626 excessPrecision += (semantics->minExponent - decSig.exponent); 2627 truncatedBits = excessPrecision; 2628 if (excessPrecision > calcSemantics.precision) 2629 excessPrecision = calcSemantics.precision; 2630 } 2631 /* Extra half-ulp lost in reciprocal of exponent. */ 2632 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2633 } 2634 2635 /* Both multiplySignificand and divideSignificand return the 2636 result with the integer bit set. */ 2637 assert(APInt::tcExtractBit 2638 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2639 2640 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2641 powHUerr); 2642 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2643 excessPrecision, isNearest); 2644 2645 /* Are we guaranteed to round correctly if we truncate? */ 2646 if (HUdistance >= HUerr) { 2647 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2648 calcSemantics.precision - excessPrecision, 2649 excessPrecision); 2650 /* Take the exponent of decSig. If we tcExtract-ed less bits 2651 above we must adjust our exponent to compensate for the 2652 implicit right shift. */ 2653 exponent = (decSig.exponent + semantics->precision 2654 - (calcSemantics.precision - excessPrecision)); 2655 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2656 decSig.partCount(), 2657 truncatedBits); 2658 return normalize(rounding_mode, calcLostFraction); 2659 } 2660 } 2661 } 2662 2663 Expected<IEEEFloat::opStatus> 2664 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2665 decimalInfo D; 2666 opStatus fs; 2667 2668 /* Scan the text. */ 2669 StringRef::iterator p = str.begin(); 2670 if (Error Err = interpretDecimal(p, str.end(), &D)) 2671 return std::move(Err); 2672 2673 /* Handle the quick cases. First the case of no significant digits, 2674 i.e. zero, and then exponents that are obviously too large or too 2675 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2676 definitely overflows if 2677 2678 (exp - 1) * L >= maxExponent 2679 2680 and definitely underflows to zero where 2681 2682 (exp + 1) * L <= minExponent - precision 2683 2684 With integer arithmetic the tightest bounds for L are 2685 2686 93/28 < L < 196/59 [ numerator <= 256 ] 2687 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2688 */ 2689 2690 // Test if we have a zero number allowing for strings with no null terminators 2691 // and zero decimals with non-zero exponents. 2692 // 2693 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2694 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2695 // be at most one dot. On the other hand, if we have a zero with a non-zero 2696 // exponent, then we know that D.firstSigDigit will be non-numeric. 2697 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2698 category = fcZero; 2699 fs = opOK; 2700 2701 /* Check whether the normalized exponent is high enough to overflow 2702 max during the log-rebasing in the max-exponent check below. */ 2703 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2704 fs = handleOverflow(rounding_mode); 2705 2706 /* If it wasn't, then it also wasn't high enough to overflow max 2707 during the log-rebasing in the min-exponent check. Check that it 2708 won't overflow min in either check, then perform the min-exponent 2709 check. */ 2710 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2711 (D.normalizedExponent + 1) * 28738 <= 2712 8651 * (semantics->minExponent - (int) semantics->precision)) { 2713 /* Underflow to zero and round. */ 2714 category = fcNormal; 2715 zeroSignificand(); 2716 fs = normalize(rounding_mode, lfLessThanHalf); 2717 2718 /* We can finally safely perform the max-exponent check. */ 2719 } else if ((D.normalizedExponent - 1) * 42039 2720 >= 12655 * semantics->maxExponent) { 2721 /* Overflow and round. */ 2722 fs = handleOverflow(rounding_mode); 2723 } else { 2724 integerPart *decSignificand; 2725 unsigned int partCount; 2726 2727 /* A tight upper bound on number of bits required to hold an 2728 N-digit decimal integer is N * 196 / 59. Allocate enough space 2729 to hold the full significand, and an extra part required by 2730 tcMultiplyPart. */ 2731 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2732 partCount = partCountForBits(1 + 196 * partCount / 59); 2733 decSignificand = new integerPart[partCount + 1]; 2734 partCount = 0; 2735 2736 /* Convert to binary efficiently - we do almost all multiplication 2737 in an integerPart. When this would overflow do we do a single 2738 bignum multiplication, and then revert again to multiplication 2739 in an integerPart. */ 2740 do { 2741 integerPart decValue, val, multiplier; 2742 2743 val = 0; 2744 multiplier = 1; 2745 2746 do { 2747 if (*p == '.') { 2748 p++; 2749 if (p == str.end()) { 2750 break; 2751 } 2752 } 2753 decValue = decDigitValue(*p++); 2754 if (decValue >= 10U) { 2755 delete[] decSignificand; 2756 return createError("Invalid character in significand"); 2757 } 2758 multiplier *= 10; 2759 val = val * 10 + decValue; 2760 /* The maximum number that can be multiplied by ten with any 2761 digit added without overflowing an integerPart. */ 2762 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2763 2764 /* Multiply out the current part. */ 2765 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2766 partCount, partCount + 1, false); 2767 2768 /* If we used another part (likely but not guaranteed), increase 2769 the count. */ 2770 if (decSignificand[partCount]) 2771 partCount++; 2772 } while (p <= D.lastSigDigit); 2773 2774 category = fcNormal; 2775 fs = roundSignificandWithExponent(decSignificand, partCount, 2776 D.exponent, rounding_mode); 2777 2778 delete [] decSignificand; 2779 } 2780 2781 return fs; 2782 } 2783 2784 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2785 const size_t MIN_NAME_SIZE = 3; 2786 2787 if (str.size() < MIN_NAME_SIZE) 2788 return false; 2789 2790 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2791 makeInf(false); 2792 return true; 2793 } 2794 2795 bool IsNegative = str.front() == '-'; 2796 if (IsNegative) { 2797 str = str.drop_front(); 2798 if (str.size() < MIN_NAME_SIZE) 2799 return false; 2800 2801 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2802 makeInf(true); 2803 return true; 2804 } 2805 } 2806 2807 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2808 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2809 if (IsSignaling) { 2810 str = str.drop_front(); 2811 if (str.size() < MIN_NAME_SIZE) 2812 return false; 2813 } 2814 2815 if (str.startswith("nan") || str.startswith("NaN")) { 2816 str = str.drop_front(3); 2817 2818 // A NaN without payload. 2819 if (str.empty()) { 2820 makeNaN(IsSignaling, IsNegative); 2821 return true; 2822 } 2823 2824 // Allow the payload to be inside parentheses. 2825 if (str.front() == '(') { 2826 // Parentheses should be balanced (and not empty). 2827 if (str.size() <= 2 || str.back() != ')') 2828 return false; 2829 2830 str = str.slice(1, str.size() - 1); 2831 } 2832 2833 // Determine the payload number's radix. 2834 unsigned Radix = 10; 2835 if (str[0] == '0') { 2836 if (str.size() > 1 && tolower(str[1]) == 'x') { 2837 str = str.drop_front(2); 2838 Radix = 16; 2839 } else 2840 Radix = 8; 2841 } 2842 2843 // Parse the payload and make the NaN. 2844 APInt Payload; 2845 if (!str.getAsInteger(Radix, Payload)) { 2846 makeNaN(IsSignaling, IsNegative, &Payload); 2847 return true; 2848 } 2849 } 2850 2851 return false; 2852 } 2853 2854 Expected<IEEEFloat::opStatus> 2855 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 2856 if (str.empty()) 2857 return createError("Invalid string length"); 2858 2859 // Handle special cases. 2860 if (convertFromStringSpecials(str)) 2861 return opOK; 2862 2863 /* Handle a leading minus sign. */ 2864 StringRef::iterator p = str.begin(); 2865 size_t slen = str.size(); 2866 sign = *p == '-' ? 1 : 0; 2867 if (*p == '-' || *p == '+') { 2868 p++; 2869 slen--; 2870 if (!slen) 2871 return createError("String has no digits"); 2872 } 2873 2874 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2875 if (slen == 2) 2876 return createError("Invalid string"); 2877 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2878 rounding_mode); 2879 } 2880 2881 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2882 } 2883 2884 /* Write out a hexadecimal representation of the floating point value 2885 to DST, which must be of sufficient size, in the C99 form 2886 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2887 excluding the terminating NUL. 2888 2889 If UPPERCASE, the output is in upper case, otherwise in lower case. 2890 2891 HEXDIGITS digits appear altogether, rounding the value if 2892 necessary. If HEXDIGITS is 0, the minimal precision to display the 2893 number precisely is used instead. If nothing would appear after 2894 the decimal point it is suppressed. 2895 2896 The decimal exponent is always printed and has at least one digit. 2897 Zero values display an exponent of zero. Infinities and NaNs 2898 appear as "infinity" or "nan" respectively. 2899 2900 The above rules are as specified by C99. There is ambiguity about 2901 what the leading hexadecimal digit should be. This implementation 2902 uses whatever is necessary so that the exponent is displayed as 2903 stored. This implies the exponent will fall within the IEEE format 2904 range, and the leading hexadecimal digit will be 0 (for denormals), 2905 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2906 any other digits zero). 2907 */ 2908 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2909 bool upperCase, 2910 roundingMode rounding_mode) const { 2911 char *p; 2912 2913 p = dst; 2914 if (sign) 2915 *dst++ = '-'; 2916 2917 switch (category) { 2918 case fcInfinity: 2919 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2920 dst += sizeof infinityL - 1; 2921 break; 2922 2923 case fcNaN: 2924 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2925 dst += sizeof NaNU - 1; 2926 break; 2927 2928 case fcZero: 2929 *dst++ = '0'; 2930 *dst++ = upperCase ? 'X': 'x'; 2931 *dst++ = '0'; 2932 if (hexDigits > 1) { 2933 *dst++ = '.'; 2934 memset (dst, '0', hexDigits - 1); 2935 dst += hexDigits - 1; 2936 } 2937 *dst++ = upperCase ? 'P': 'p'; 2938 *dst++ = '0'; 2939 break; 2940 2941 case fcNormal: 2942 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2943 break; 2944 } 2945 2946 *dst = 0; 2947 2948 return static_cast<unsigned int>(dst - p); 2949 } 2950 2951 /* Does the hard work of outputting the correctly rounded hexadecimal 2952 form of a normal floating point number with the specified number of 2953 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2954 digits necessary to print the value precisely is output. */ 2955 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2956 bool upperCase, 2957 roundingMode rounding_mode) const { 2958 unsigned int count, valueBits, shift, partsCount, outputDigits; 2959 const char *hexDigitChars; 2960 const integerPart *significand; 2961 char *p; 2962 bool roundUp; 2963 2964 *dst++ = '0'; 2965 *dst++ = upperCase ? 'X': 'x'; 2966 2967 roundUp = false; 2968 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2969 2970 significand = significandParts(); 2971 partsCount = partCount(); 2972 2973 /* +3 because the first digit only uses the single integer bit, so 2974 we have 3 virtual zero most-significant-bits. */ 2975 valueBits = semantics->precision + 3; 2976 shift = integerPartWidth - valueBits % integerPartWidth; 2977 2978 /* The natural number of digits required ignoring trailing 2979 insignificant zeroes. */ 2980 outputDigits = (valueBits - significandLSB () + 3) / 4; 2981 2982 /* hexDigits of zero means use the required number for the 2983 precision. Otherwise, see if we are truncating. If we are, 2984 find out if we need to round away from zero. */ 2985 if (hexDigits) { 2986 if (hexDigits < outputDigits) { 2987 /* We are dropping non-zero bits, so need to check how to round. 2988 "bits" is the number of dropped bits. */ 2989 unsigned int bits; 2990 lostFraction fraction; 2991 2992 bits = valueBits - hexDigits * 4; 2993 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2994 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2995 } 2996 outputDigits = hexDigits; 2997 } 2998 2999 /* Write the digits consecutively, and start writing in the location 3000 of the hexadecimal point. We move the most significant digit 3001 left and add the hexadecimal point later. */ 3002 p = ++dst; 3003 3004 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3005 3006 while (outputDigits && count) { 3007 integerPart part; 3008 3009 /* Put the most significant integerPartWidth bits in "part". */ 3010 if (--count == partsCount) 3011 part = 0; /* An imaginary higher zero part. */ 3012 else 3013 part = significand[count] << shift; 3014 3015 if (count && shift) 3016 part |= significand[count - 1] >> (integerPartWidth - shift); 3017 3018 /* Convert as much of "part" to hexdigits as we can. */ 3019 unsigned int curDigits = integerPartWidth / 4; 3020 3021 if (curDigits > outputDigits) 3022 curDigits = outputDigits; 3023 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3024 outputDigits -= curDigits; 3025 } 3026 3027 if (roundUp) { 3028 char *q = dst; 3029 3030 /* Note that hexDigitChars has a trailing '0'. */ 3031 do { 3032 q--; 3033 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3034 } while (*q == '0'); 3035 assert(q >= p); 3036 } else { 3037 /* Add trailing zeroes. */ 3038 memset (dst, '0', outputDigits); 3039 dst += outputDigits; 3040 } 3041 3042 /* Move the most significant digit to before the point, and if there 3043 is something after the decimal point add it. This must come 3044 after rounding above. */ 3045 p[-1] = p[0]; 3046 if (dst -1 == p) 3047 dst--; 3048 else 3049 p[0] = '.'; 3050 3051 /* Finally output the exponent. */ 3052 *dst++ = upperCase ? 'P': 'p'; 3053 3054 return writeSignedDecimal (dst, exponent); 3055 } 3056 3057 hash_code hash_value(const IEEEFloat &Arg) { 3058 if (!Arg.isFiniteNonZero()) 3059 return hash_combine((uint8_t)Arg.category, 3060 // NaN has no sign, fix it at zero. 3061 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3062 Arg.semantics->precision); 3063 3064 // Normal floats need their exponent and significand hashed. 3065 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3066 Arg.semantics->precision, Arg.exponent, 3067 hash_combine_range( 3068 Arg.significandParts(), 3069 Arg.significandParts() + Arg.partCount())); 3070 } 3071 3072 // Conversion from APFloat to/from host float/double. It may eventually be 3073 // possible to eliminate these and have everybody deal with APFloats, but that 3074 // will take a while. This approach will not easily extend to long double. 3075 // Current implementation requires integerPartWidth==64, which is correct at 3076 // the moment but could be made more general. 3077 3078 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3079 // the actual IEEE respresentations. We compensate for that here. 3080 3081 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3082 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3083 assert(partCount()==2); 3084 3085 uint64_t myexponent, mysignificand; 3086 3087 if (isFiniteNonZero()) { 3088 myexponent = exponent+16383; //bias 3089 mysignificand = significandParts()[0]; 3090 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3091 myexponent = 0; // denormal 3092 } else if (category==fcZero) { 3093 myexponent = 0; 3094 mysignificand = 0; 3095 } else if (category==fcInfinity) { 3096 myexponent = 0x7fff; 3097 mysignificand = 0x8000000000000000ULL; 3098 } else { 3099 assert(category == fcNaN && "Unknown category"); 3100 myexponent = 0x7fff; 3101 mysignificand = significandParts()[0]; 3102 } 3103 3104 uint64_t words[2]; 3105 words[0] = mysignificand; 3106 words[1] = ((uint64_t)(sign & 1) << 15) | 3107 (myexponent & 0x7fffLL); 3108 return APInt(80, words); 3109 } 3110 3111 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3112 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3113 assert(partCount()==2); 3114 3115 uint64_t words[2]; 3116 opStatus fs; 3117 bool losesInfo; 3118 3119 // Convert number to double. To avoid spurious underflows, we re- 3120 // normalize against the "double" minExponent first, and only *then* 3121 // truncate the mantissa. The result of that second conversion 3122 // may be inexact, but should never underflow. 3123 // Declare fltSemantics before APFloat that uses it (and 3124 // saves pointer to it) to ensure correct destruction order. 3125 fltSemantics extendedSemantics = *semantics; 3126 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3127 IEEEFloat extended(*this); 3128 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3129 assert(fs == opOK && !losesInfo); 3130 (void)fs; 3131 3132 IEEEFloat u(extended); 3133 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3134 assert(fs == opOK || fs == opInexact); 3135 (void)fs; 3136 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3137 3138 // If conversion was exact or resulted in a special case, we're done; 3139 // just set the second double to zero. Otherwise, re-convert back to 3140 // the extended format and compute the difference. This now should 3141 // convert exactly to double. 3142 if (u.isFiniteNonZero() && losesInfo) { 3143 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3144 assert(fs == opOK && !losesInfo); 3145 (void)fs; 3146 3147 IEEEFloat v(extended); 3148 v.subtract(u, rmNearestTiesToEven); 3149 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3150 assert(fs == opOK && !losesInfo); 3151 (void)fs; 3152 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3153 } else { 3154 words[1] = 0; 3155 } 3156 3157 return APInt(128, words); 3158 } 3159 3160 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3161 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3162 assert(partCount()==2); 3163 3164 uint64_t myexponent, mysignificand, mysignificand2; 3165 3166 if (isFiniteNonZero()) { 3167 myexponent = exponent+16383; //bias 3168 mysignificand = significandParts()[0]; 3169 mysignificand2 = significandParts()[1]; 3170 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3171 myexponent = 0; // denormal 3172 } else if (category==fcZero) { 3173 myexponent = 0; 3174 mysignificand = mysignificand2 = 0; 3175 } else if (category==fcInfinity) { 3176 myexponent = 0x7fff; 3177 mysignificand = mysignificand2 = 0; 3178 } else { 3179 assert(category == fcNaN && "Unknown category!"); 3180 myexponent = 0x7fff; 3181 mysignificand = significandParts()[0]; 3182 mysignificand2 = significandParts()[1]; 3183 } 3184 3185 uint64_t words[2]; 3186 words[0] = mysignificand; 3187 words[1] = ((uint64_t)(sign & 1) << 63) | 3188 ((myexponent & 0x7fff) << 48) | 3189 (mysignificand2 & 0xffffffffffffLL); 3190 3191 return APInt(128, words); 3192 } 3193 3194 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3195 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3196 assert(partCount()==1); 3197 3198 uint64_t myexponent, mysignificand; 3199 3200 if (isFiniteNonZero()) { 3201 myexponent = exponent+1023; //bias 3202 mysignificand = *significandParts(); 3203 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3204 myexponent = 0; // denormal 3205 } else if (category==fcZero) { 3206 myexponent = 0; 3207 mysignificand = 0; 3208 } else if (category==fcInfinity) { 3209 myexponent = 0x7ff; 3210 mysignificand = 0; 3211 } else { 3212 assert(category == fcNaN && "Unknown category!"); 3213 myexponent = 0x7ff; 3214 mysignificand = *significandParts(); 3215 } 3216 3217 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3218 ((myexponent & 0x7ff) << 52) | 3219 (mysignificand & 0xfffffffffffffLL)))); 3220 } 3221 3222 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3223 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3224 assert(partCount()==1); 3225 3226 uint32_t myexponent, mysignificand; 3227 3228 if (isFiniteNonZero()) { 3229 myexponent = exponent+127; //bias 3230 mysignificand = (uint32_t)*significandParts(); 3231 if (myexponent == 1 && !(mysignificand & 0x800000)) 3232 myexponent = 0; // denormal 3233 } else if (category==fcZero) { 3234 myexponent = 0; 3235 mysignificand = 0; 3236 } else if (category==fcInfinity) { 3237 myexponent = 0xff; 3238 mysignificand = 0; 3239 } else { 3240 assert(category == fcNaN && "Unknown category!"); 3241 myexponent = 0xff; 3242 mysignificand = (uint32_t)*significandParts(); 3243 } 3244 3245 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3246 (mysignificand & 0x7fffff))); 3247 } 3248 3249 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3250 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3251 assert(partCount()==1); 3252 3253 uint32_t myexponent, mysignificand; 3254 3255 if (isFiniteNonZero()) { 3256 myexponent = exponent+15; //bias 3257 mysignificand = (uint32_t)*significandParts(); 3258 if (myexponent == 1 && !(mysignificand & 0x400)) 3259 myexponent = 0; // denormal 3260 } else if (category==fcZero) { 3261 myexponent = 0; 3262 mysignificand = 0; 3263 } else if (category==fcInfinity) { 3264 myexponent = 0x1f; 3265 mysignificand = 0; 3266 } else { 3267 assert(category == fcNaN && "Unknown category!"); 3268 myexponent = 0x1f; 3269 mysignificand = (uint32_t)*significandParts(); 3270 } 3271 3272 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3273 (mysignificand & 0x3ff))); 3274 } 3275 3276 // This function creates an APInt that is just a bit map of the floating 3277 // point constant as it would appear in memory. It is not a conversion, 3278 // and treating the result as a normal integer is unlikely to be useful. 3279 3280 APInt IEEEFloat::bitcastToAPInt() const { 3281 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3282 return convertHalfAPFloatToAPInt(); 3283 3284 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3285 return convertFloatAPFloatToAPInt(); 3286 3287 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3288 return convertDoubleAPFloatToAPInt(); 3289 3290 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3291 return convertQuadrupleAPFloatToAPInt(); 3292 3293 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3294 return convertPPCDoubleDoubleAPFloatToAPInt(); 3295 3296 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3297 "unknown format!"); 3298 return convertF80LongDoubleAPFloatToAPInt(); 3299 } 3300 3301 float IEEEFloat::convertToFloat() const { 3302 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3303 "Float semantics are not IEEEsingle"); 3304 APInt api = bitcastToAPInt(); 3305 return api.bitsToFloat(); 3306 } 3307 3308 double IEEEFloat::convertToDouble() const { 3309 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3310 "Float semantics are not IEEEdouble"); 3311 APInt api = bitcastToAPInt(); 3312 return api.bitsToDouble(); 3313 } 3314 3315 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3316 /// does not support these bit patterns: 3317 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3318 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3319 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3320 /// exponent = 0, integer bit 1 ("pseudodenormal") 3321 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3322 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3323 assert(api.getBitWidth()==80); 3324 uint64_t i1 = api.getRawData()[0]; 3325 uint64_t i2 = api.getRawData()[1]; 3326 uint64_t myexponent = (i2 & 0x7fff); 3327 uint64_t mysignificand = i1; 3328 uint8_t myintegerbit = mysignificand >> 63; 3329 3330 initialize(&semX87DoubleExtended); 3331 assert(partCount()==2); 3332 3333 sign = static_cast<unsigned int>(i2>>15); 3334 if (myexponent == 0 && mysignificand == 0) { 3335 // exponent, significand meaningless 3336 category = fcZero; 3337 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3338 // exponent, significand meaningless 3339 category = fcInfinity; 3340 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3341 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3342 // exponent meaningless 3343 category = fcNaN; 3344 significandParts()[0] = mysignificand; 3345 significandParts()[1] = 0; 3346 } else { 3347 category = fcNormal; 3348 exponent = myexponent - 16383; 3349 significandParts()[0] = mysignificand; 3350 significandParts()[1] = 0; 3351 if (myexponent==0) // denormal 3352 exponent = -16382; 3353 } 3354 } 3355 3356 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3357 assert(api.getBitWidth()==128); 3358 uint64_t i1 = api.getRawData()[0]; 3359 uint64_t i2 = api.getRawData()[1]; 3360 opStatus fs; 3361 bool losesInfo; 3362 3363 // Get the first double and convert to our format. 3364 initFromDoubleAPInt(APInt(64, i1)); 3365 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3366 assert(fs == opOK && !losesInfo); 3367 (void)fs; 3368 3369 // Unless we have a special case, add in second double. 3370 if (isFiniteNonZero()) { 3371 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3372 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3373 assert(fs == opOK && !losesInfo); 3374 (void)fs; 3375 3376 add(v, rmNearestTiesToEven); 3377 } 3378 } 3379 3380 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3381 assert(api.getBitWidth()==128); 3382 uint64_t i1 = api.getRawData()[0]; 3383 uint64_t i2 = api.getRawData()[1]; 3384 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3385 uint64_t mysignificand = i1; 3386 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3387 3388 initialize(&semIEEEquad); 3389 assert(partCount()==2); 3390 3391 sign = static_cast<unsigned int>(i2>>63); 3392 if (myexponent==0 && 3393 (mysignificand==0 && mysignificand2==0)) { 3394 // exponent, significand meaningless 3395 category = fcZero; 3396 } else if (myexponent==0x7fff && 3397 (mysignificand==0 && mysignificand2==0)) { 3398 // exponent, significand meaningless 3399 category = fcInfinity; 3400 } else if (myexponent==0x7fff && 3401 (mysignificand!=0 || mysignificand2 !=0)) { 3402 // exponent meaningless 3403 category = fcNaN; 3404 significandParts()[0] = mysignificand; 3405 significandParts()[1] = mysignificand2; 3406 } else { 3407 category = fcNormal; 3408 exponent = myexponent - 16383; 3409 significandParts()[0] = mysignificand; 3410 significandParts()[1] = mysignificand2; 3411 if (myexponent==0) // denormal 3412 exponent = -16382; 3413 else 3414 significandParts()[1] |= 0x1000000000000LL; // integer bit 3415 } 3416 } 3417 3418 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3419 assert(api.getBitWidth()==64); 3420 uint64_t i = *api.getRawData(); 3421 uint64_t myexponent = (i >> 52) & 0x7ff; 3422 uint64_t mysignificand = i & 0xfffffffffffffLL; 3423 3424 initialize(&semIEEEdouble); 3425 assert(partCount()==1); 3426 3427 sign = static_cast<unsigned int>(i>>63); 3428 if (myexponent==0 && mysignificand==0) { 3429 // exponent, significand meaningless 3430 category = fcZero; 3431 } else if (myexponent==0x7ff && mysignificand==0) { 3432 // exponent, significand meaningless 3433 category = fcInfinity; 3434 } else if (myexponent==0x7ff && mysignificand!=0) { 3435 // exponent meaningless 3436 category = fcNaN; 3437 *significandParts() = mysignificand; 3438 } else { 3439 category = fcNormal; 3440 exponent = myexponent - 1023; 3441 *significandParts() = mysignificand; 3442 if (myexponent==0) // denormal 3443 exponent = -1022; 3444 else 3445 *significandParts() |= 0x10000000000000LL; // integer bit 3446 } 3447 } 3448 3449 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3450 assert(api.getBitWidth()==32); 3451 uint32_t i = (uint32_t)*api.getRawData(); 3452 uint32_t myexponent = (i >> 23) & 0xff; 3453 uint32_t mysignificand = i & 0x7fffff; 3454 3455 initialize(&semIEEEsingle); 3456 assert(partCount()==1); 3457 3458 sign = i >> 31; 3459 if (myexponent==0 && mysignificand==0) { 3460 // exponent, significand meaningless 3461 category = fcZero; 3462 } else if (myexponent==0xff && mysignificand==0) { 3463 // exponent, significand meaningless 3464 category = fcInfinity; 3465 } else if (myexponent==0xff && mysignificand!=0) { 3466 // sign, exponent, significand meaningless 3467 category = fcNaN; 3468 *significandParts() = mysignificand; 3469 } else { 3470 category = fcNormal; 3471 exponent = myexponent - 127; //bias 3472 *significandParts() = mysignificand; 3473 if (myexponent==0) // denormal 3474 exponent = -126; 3475 else 3476 *significandParts() |= 0x800000; // integer bit 3477 } 3478 } 3479 3480 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3481 assert(api.getBitWidth()==16); 3482 uint32_t i = (uint32_t)*api.getRawData(); 3483 uint32_t myexponent = (i >> 10) & 0x1f; 3484 uint32_t mysignificand = i & 0x3ff; 3485 3486 initialize(&semIEEEhalf); 3487 assert(partCount()==1); 3488 3489 sign = i >> 15; 3490 if (myexponent==0 && mysignificand==0) { 3491 // exponent, significand meaningless 3492 category = fcZero; 3493 } else if (myexponent==0x1f && mysignificand==0) { 3494 // exponent, significand meaningless 3495 category = fcInfinity; 3496 } else if (myexponent==0x1f && mysignificand!=0) { 3497 // sign, exponent, significand meaningless 3498 category = fcNaN; 3499 *significandParts() = mysignificand; 3500 } else { 3501 category = fcNormal; 3502 exponent = myexponent - 15; //bias 3503 *significandParts() = mysignificand; 3504 if (myexponent==0) // denormal 3505 exponent = -14; 3506 else 3507 *significandParts() |= 0x400; // integer bit 3508 } 3509 } 3510 3511 /// Treat api as containing the bits of a floating point number. Currently 3512 /// we infer the floating point type from the size of the APInt. The 3513 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3514 /// when the size is anything else). 3515 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3516 if (Sem == &semIEEEhalf) 3517 return initFromHalfAPInt(api); 3518 if (Sem == &semIEEEsingle) 3519 return initFromFloatAPInt(api); 3520 if (Sem == &semIEEEdouble) 3521 return initFromDoubleAPInt(api); 3522 if (Sem == &semX87DoubleExtended) 3523 return initFromF80LongDoubleAPInt(api); 3524 if (Sem == &semIEEEquad) 3525 return initFromQuadrupleAPInt(api); 3526 if (Sem == &semPPCDoubleDoubleLegacy) 3527 return initFromPPCDoubleDoubleAPInt(api); 3528 3529 llvm_unreachable(nullptr); 3530 } 3531 3532 /// Make this number the largest magnitude normal number in the given 3533 /// semantics. 3534 void IEEEFloat::makeLargest(bool Negative) { 3535 // We want (in interchange format): 3536 // sign = {Negative} 3537 // exponent = 1..10 3538 // significand = 1..1 3539 category = fcNormal; 3540 sign = Negative; 3541 exponent = semantics->maxExponent; 3542 3543 // Use memset to set all but the highest integerPart to all ones. 3544 integerPart *significand = significandParts(); 3545 unsigned PartCount = partCount(); 3546 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3547 3548 // Set the high integerPart especially setting all unused top bits for 3549 // internal consistency. 3550 const unsigned NumUnusedHighBits = 3551 PartCount*integerPartWidth - semantics->precision; 3552 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3553 ? (~integerPart(0) >> NumUnusedHighBits) 3554 : 0; 3555 } 3556 3557 /// Make this number the smallest magnitude denormal number in the given 3558 /// semantics. 3559 void IEEEFloat::makeSmallest(bool Negative) { 3560 // We want (in interchange format): 3561 // sign = {Negative} 3562 // exponent = 0..0 3563 // significand = 0..01 3564 category = fcNormal; 3565 sign = Negative; 3566 exponent = semantics->minExponent; 3567 APInt::tcSet(significandParts(), 1, partCount()); 3568 } 3569 3570 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3571 // We want (in interchange format): 3572 // sign = {Negative} 3573 // exponent = 0..0 3574 // significand = 10..0 3575 3576 category = fcNormal; 3577 zeroSignificand(); 3578 sign = Negative; 3579 exponent = semantics->minExponent; 3580 significandParts()[partCountForBits(semantics->precision) - 1] |= 3581 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3582 } 3583 3584 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3585 initFromAPInt(&Sem, API); 3586 } 3587 3588 IEEEFloat::IEEEFloat(float f) { 3589 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3590 } 3591 3592 IEEEFloat::IEEEFloat(double d) { 3593 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3594 } 3595 3596 namespace { 3597 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3598 Buffer.append(Str.begin(), Str.end()); 3599 } 3600 3601 /// Removes data from the given significand until it is no more 3602 /// precise than is required for the desired precision. 3603 void AdjustToPrecision(APInt &significand, 3604 int &exp, unsigned FormatPrecision) { 3605 unsigned bits = significand.getActiveBits(); 3606 3607 // 196/59 is a very slight overestimate of lg_2(10). 3608 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3609 3610 if (bits <= bitsRequired) return; 3611 3612 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3613 if (!tensRemovable) return; 3614 3615 exp += tensRemovable; 3616 3617 APInt divisor(significand.getBitWidth(), 1); 3618 APInt powten(significand.getBitWidth(), 10); 3619 while (true) { 3620 if (tensRemovable & 1) 3621 divisor *= powten; 3622 tensRemovable >>= 1; 3623 if (!tensRemovable) break; 3624 powten *= powten; 3625 } 3626 3627 significand = significand.udiv(divisor); 3628 3629 // Truncate the significand down to its active bit count. 3630 significand = significand.trunc(significand.getActiveBits()); 3631 } 3632 3633 3634 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3635 int &exp, unsigned FormatPrecision) { 3636 unsigned N = buffer.size(); 3637 if (N <= FormatPrecision) return; 3638 3639 // The most significant figures are the last ones in the buffer. 3640 unsigned FirstSignificant = N - FormatPrecision; 3641 3642 // Round. 3643 // FIXME: this probably shouldn't use 'round half up'. 3644 3645 // Rounding down is just a truncation, except we also want to drop 3646 // trailing zeros from the new result. 3647 if (buffer[FirstSignificant - 1] < '5') { 3648 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3649 FirstSignificant++; 3650 3651 exp += FirstSignificant; 3652 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3653 return; 3654 } 3655 3656 // Rounding up requires a decimal add-with-carry. If we continue 3657 // the carry, the newly-introduced zeros will just be truncated. 3658 for (unsigned I = FirstSignificant; I != N; ++I) { 3659 if (buffer[I] == '9') { 3660 FirstSignificant++; 3661 } else { 3662 buffer[I]++; 3663 break; 3664 } 3665 } 3666 3667 // If we carried through, we have exactly one digit of precision. 3668 if (FirstSignificant == N) { 3669 exp += FirstSignificant; 3670 buffer.clear(); 3671 buffer.push_back('1'); 3672 return; 3673 } 3674 3675 exp += FirstSignificant; 3676 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3677 } 3678 } 3679 3680 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3681 unsigned FormatMaxPadding, bool TruncateZero) const { 3682 switch (category) { 3683 case fcInfinity: 3684 if (isNegative()) 3685 return append(Str, "-Inf"); 3686 else 3687 return append(Str, "+Inf"); 3688 3689 case fcNaN: return append(Str, "NaN"); 3690 3691 case fcZero: 3692 if (isNegative()) 3693 Str.push_back('-'); 3694 3695 if (!FormatMaxPadding) { 3696 if (TruncateZero) 3697 append(Str, "0.0E+0"); 3698 else { 3699 append(Str, "0.0"); 3700 if (FormatPrecision > 1) 3701 Str.append(FormatPrecision - 1, '0'); 3702 append(Str, "e+00"); 3703 } 3704 } else 3705 Str.push_back('0'); 3706 return; 3707 3708 case fcNormal: 3709 break; 3710 } 3711 3712 if (isNegative()) 3713 Str.push_back('-'); 3714 3715 // Decompose the number into an APInt and an exponent. 3716 int exp = exponent - ((int) semantics->precision - 1); 3717 APInt significand(semantics->precision, 3718 makeArrayRef(significandParts(), 3719 partCountForBits(semantics->precision))); 3720 3721 // Set FormatPrecision if zero. We want to do this before we 3722 // truncate trailing zeros, as those are part of the precision. 3723 if (!FormatPrecision) { 3724 // We use enough digits so the number can be round-tripped back to an 3725 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3726 // Accurately" by Steele and White. 3727 // FIXME: Using a formula based purely on the precision is conservative; 3728 // we can print fewer digits depending on the actual value being printed. 3729 3730 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3731 FormatPrecision = 2 + semantics->precision * 59 / 196; 3732 } 3733 3734 // Ignore trailing binary zeros. 3735 int trailingZeros = significand.countTrailingZeros(); 3736 exp += trailingZeros; 3737 significand.lshrInPlace(trailingZeros); 3738 3739 // Change the exponent from 2^e to 10^e. 3740 if (exp == 0) { 3741 // Nothing to do. 3742 } else if (exp > 0) { 3743 // Just shift left. 3744 significand = significand.zext(semantics->precision + exp); 3745 significand <<= exp; 3746 exp = 0; 3747 } else { /* exp < 0 */ 3748 int texp = -exp; 3749 3750 // We transform this using the identity: 3751 // (N)(2^-e) == (N)(5^e)(10^-e) 3752 // This means we have to multiply N (the significand) by 5^e. 3753 // To avoid overflow, we have to operate on numbers large 3754 // enough to store N * 5^e: 3755 // log2(N * 5^e) == log2(N) + e * log2(5) 3756 // <= semantics->precision + e * 137 / 59 3757 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3758 3759 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3760 3761 // Multiply significand by 5^e. 3762 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3763 significand = significand.zext(precision); 3764 APInt five_to_the_i(precision, 5); 3765 while (true) { 3766 if (texp & 1) significand *= five_to_the_i; 3767 3768 texp >>= 1; 3769 if (!texp) break; 3770 five_to_the_i *= five_to_the_i; 3771 } 3772 } 3773 3774 AdjustToPrecision(significand, exp, FormatPrecision); 3775 3776 SmallVector<char, 256> buffer; 3777 3778 // Fill the buffer. 3779 unsigned precision = significand.getBitWidth(); 3780 APInt ten(precision, 10); 3781 APInt digit(precision, 0); 3782 3783 bool inTrail = true; 3784 while (significand != 0) { 3785 // digit <- significand % 10 3786 // significand <- significand / 10 3787 APInt::udivrem(significand, ten, significand, digit); 3788 3789 unsigned d = digit.getZExtValue(); 3790 3791 // Drop trailing zeros. 3792 if (inTrail && !d) exp++; 3793 else { 3794 buffer.push_back((char) ('0' + d)); 3795 inTrail = false; 3796 } 3797 } 3798 3799 assert(!buffer.empty() && "no characters in buffer!"); 3800 3801 // Drop down to FormatPrecision. 3802 // TODO: don't do more precise calculations above than are required. 3803 AdjustToPrecision(buffer, exp, FormatPrecision); 3804 3805 unsigned NDigits = buffer.size(); 3806 3807 // Check whether we should use scientific notation. 3808 bool FormatScientific; 3809 if (!FormatMaxPadding) 3810 FormatScientific = true; 3811 else { 3812 if (exp >= 0) { 3813 // 765e3 --> 765000 3814 // ^^^ 3815 // But we shouldn't make the number look more precise than it is. 3816 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3817 NDigits + (unsigned) exp > FormatPrecision); 3818 } else { 3819 // Power of the most significant digit. 3820 int MSD = exp + (int) (NDigits - 1); 3821 if (MSD >= 0) { 3822 // 765e-2 == 7.65 3823 FormatScientific = false; 3824 } else { 3825 // 765e-5 == 0.00765 3826 // ^ ^^ 3827 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3828 } 3829 } 3830 } 3831 3832 // Scientific formatting is pretty straightforward. 3833 if (FormatScientific) { 3834 exp += (NDigits - 1); 3835 3836 Str.push_back(buffer[NDigits-1]); 3837 Str.push_back('.'); 3838 if (NDigits == 1 && TruncateZero) 3839 Str.push_back('0'); 3840 else 3841 for (unsigned I = 1; I != NDigits; ++I) 3842 Str.push_back(buffer[NDigits-1-I]); 3843 // Fill with zeros up to FormatPrecision. 3844 if (!TruncateZero && FormatPrecision > NDigits - 1) 3845 Str.append(FormatPrecision - NDigits + 1, '0'); 3846 // For !TruncateZero we use lower 'e'. 3847 Str.push_back(TruncateZero ? 'E' : 'e'); 3848 3849 Str.push_back(exp >= 0 ? '+' : '-'); 3850 if (exp < 0) exp = -exp; 3851 SmallVector<char, 6> expbuf; 3852 do { 3853 expbuf.push_back((char) ('0' + (exp % 10))); 3854 exp /= 10; 3855 } while (exp); 3856 // Exponent always at least two digits if we do not truncate zeros. 3857 if (!TruncateZero && expbuf.size() < 2) 3858 expbuf.push_back('0'); 3859 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3860 Str.push_back(expbuf[E-1-I]); 3861 return; 3862 } 3863 3864 // Non-scientific, positive exponents. 3865 if (exp >= 0) { 3866 for (unsigned I = 0; I != NDigits; ++I) 3867 Str.push_back(buffer[NDigits-1-I]); 3868 for (unsigned I = 0; I != (unsigned) exp; ++I) 3869 Str.push_back('0'); 3870 return; 3871 } 3872 3873 // Non-scientific, negative exponents. 3874 3875 // The number of digits to the left of the decimal point. 3876 int NWholeDigits = exp + (int) NDigits; 3877 3878 unsigned I = 0; 3879 if (NWholeDigits > 0) { 3880 for (; I != (unsigned) NWholeDigits; ++I) 3881 Str.push_back(buffer[NDigits-I-1]); 3882 Str.push_back('.'); 3883 } else { 3884 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3885 3886 Str.push_back('0'); 3887 Str.push_back('.'); 3888 for (unsigned Z = 1; Z != NZeros; ++Z) 3889 Str.push_back('0'); 3890 } 3891 3892 for (; I != NDigits; ++I) 3893 Str.push_back(buffer[NDigits-I-1]); 3894 } 3895 3896 bool IEEEFloat::getExactInverse(APFloat *inv) const { 3897 // Special floats and denormals have no exact inverse. 3898 if (!isFiniteNonZero()) 3899 return false; 3900 3901 // Check that the number is a power of two by making sure that only the 3902 // integer bit is set in the significand. 3903 if (significandLSB() != semantics->precision - 1) 3904 return false; 3905 3906 // Get the inverse. 3907 IEEEFloat reciprocal(*semantics, 1ULL); 3908 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3909 return false; 3910 3911 // Avoid multiplication with a denormal, it is not safe on all platforms and 3912 // may be slower than a normal division. 3913 if (reciprocal.isDenormal()) 3914 return false; 3915 3916 assert(reciprocal.isFiniteNonZero() && 3917 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3918 3919 if (inv) 3920 *inv = APFloat(reciprocal, *semantics); 3921 3922 return true; 3923 } 3924 3925 bool IEEEFloat::isSignaling() const { 3926 if (!isNaN()) 3927 return false; 3928 3929 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3930 // first bit of the trailing significand being 0. 3931 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3932 } 3933 3934 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3935 /// 3936 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3937 /// appropriate sign switching before/after the computation. 3938 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 3939 // If we are performing nextDown, swap sign so we have -x. 3940 if (nextDown) 3941 changeSign(); 3942 3943 // Compute nextUp(x) 3944 opStatus result = opOK; 3945 3946 // Handle each float category separately. 3947 switch (category) { 3948 case fcInfinity: 3949 // nextUp(+inf) = +inf 3950 if (!isNegative()) 3951 break; 3952 // nextUp(-inf) = -getLargest() 3953 makeLargest(true); 3954 break; 3955 case fcNaN: 3956 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3957 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3958 // change the payload. 3959 if (isSignaling()) { 3960 result = opInvalidOp; 3961 // For consistency, propagate the sign of the sNaN to the qNaN. 3962 makeNaN(false, isNegative(), nullptr); 3963 } 3964 break; 3965 case fcZero: 3966 // nextUp(pm 0) = +getSmallest() 3967 makeSmallest(false); 3968 break; 3969 case fcNormal: 3970 // nextUp(-getSmallest()) = -0 3971 if (isSmallest() && isNegative()) { 3972 APInt::tcSet(significandParts(), 0, partCount()); 3973 category = fcZero; 3974 exponent = 0; 3975 break; 3976 } 3977 3978 // nextUp(getLargest()) == INFINITY 3979 if (isLargest() && !isNegative()) { 3980 APInt::tcSet(significandParts(), 0, partCount()); 3981 category = fcInfinity; 3982 exponent = semantics->maxExponent + 1; 3983 break; 3984 } 3985 3986 // nextUp(normal) == normal + inc. 3987 if (isNegative()) { 3988 // If we are negative, we need to decrement the significand. 3989 3990 // We only cross a binade boundary that requires adjusting the exponent 3991 // if: 3992 // 1. exponent != semantics->minExponent. This implies we are not in the 3993 // smallest binade or are dealing with denormals. 3994 // 2. Our significand excluding the integral bit is all zeros. 3995 bool WillCrossBinadeBoundary = 3996 exponent != semantics->minExponent && isSignificandAllZeros(); 3997 3998 // Decrement the significand. 3999 // 4000 // We always do this since: 4001 // 1. If we are dealing with a non-binade decrement, by definition we 4002 // just decrement the significand. 4003 // 2. If we are dealing with a normal -> normal binade decrement, since 4004 // we have an explicit integral bit the fact that all bits but the 4005 // integral bit are zero implies that subtracting one will yield a 4006 // significand with 0 integral bit and 1 in all other spots. Thus we 4007 // must just adjust the exponent and set the integral bit to 1. 4008 // 3. If we are dealing with a normal -> denormal binade decrement, 4009 // since we set the integral bit to 0 when we represent denormals, we 4010 // just decrement the significand. 4011 integerPart *Parts = significandParts(); 4012 APInt::tcDecrement(Parts, partCount()); 4013 4014 if (WillCrossBinadeBoundary) { 4015 // Our result is a normal number. Do the following: 4016 // 1. Set the integral bit to 1. 4017 // 2. Decrement the exponent. 4018 APInt::tcSetBit(Parts, semantics->precision - 1); 4019 exponent--; 4020 } 4021 } else { 4022 // If we are positive, we need to increment the significand. 4023 4024 // We only cross a binade boundary that requires adjusting the exponent if 4025 // the input is not a denormal and all of said input's significand bits 4026 // are set. If all of said conditions are true: clear the significand, set 4027 // the integral bit to 1, and increment the exponent. If we have a 4028 // denormal always increment since moving denormals and the numbers in the 4029 // smallest normal binade have the same exponent in our representation. 4030 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4031 4032 if (WillCrossBinadeBoundary) { 4033 integerPart *Parts = significandParts(); 4034 APInt::tcSet(Parts, 0, partCount()); 4035 APInt::tcSetBit(Parts, semantics->precision - 1); 4036 assert(exponent != semantics->maxExponent && 4037 "We can not increment an exponent beyond the maxExponent allowed" 4038 " by the given floating point semantics."); 4039 exponent++; 4040 } else { 4041 incrementSignificand(); 4042 } 4043 } 4044 break; 4045 } 4046 4047 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4048 if (nextDown) 4049 changeSign(); 4050 4051 return result; 4052 } 4053 4054 void IEEEFloat::makeInf(bool Negative) { 4055 category = fcInfinity; 4056 sign = Negative; 4057 exponent = semantics->maxExponent + 1; 4058 APInt::tcSet(significandParts(), 0, partCount()); 4059 } 4060 4061 void IEEEFloat::makeZero(bool Negative) { 4062 category = fcZero; 4063 sign = Negative; 4064 exponent = semantics->minExponent-1; 4065 APInt::tcSet(significandParts(), 0, partCount()); 4066 } 4067 4068 void IEEEFloat::makeQuiet() { 4069 assert(isNaN()); 4070 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4071 } 4072 4073 int ilogb(const IEEEFloat &Arg) { 4074 if (Arg.isNaN()) 4075 return IEEEFloat::IEK_NaN; 4076 if (Arg.isZero()) 4077 return IEEEFloat::IEK_Zero; 4078 if (Arg.isInfinity()) 4079 return IEEEFloat::IEK_Inf; 4080 if (!Arg.isDenormal()) 4081 return Arg.exponent; 4082 4083 IEEEFloat Normalized(Arg); 4084 int SignificandBits = Arg.getSemantics().precision - 1; 4085 4086 Normalized.exponent += SignificandBits; 4087 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4088 return Normalized.exponent - SignificandBits; 4089 } 4090 4091 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4092 auto MaxExp = X.getSemantics().maxExponent; 4093 auto MinExp = X.getSemantics().minExponent; 4094 4095 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4096 // overflow; clamp it to a safe range before adding, but ensure that the range 4097 // is large enough that the clamp does not change the result. The range we 4098 // need to support is the difference between the largest possible exponent and 4099 // the normalized exponent of half the smallest denormal. 4100 4101 int SignificandBits = X.getSemantics().precision - 1; 4102 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4103 4104 // Clamp to one past the range ends to let normalize handle overlflow. 4105 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4106 X.normalize(RoundingMode, lfExactlyZero); 4107 if (X.isNaN()) 4108 X.makeQuiet(); 4109 return X; 4110 } 4111 4112 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4113 Exp = ilogb(Val); 4114 4115 // Quiet signalling nans. 4116 if (Exp == IEEEFloat::IEK_NaN) { 4117 IEEEFloat Quiet(Val); 4118 Quiet.makeQuiet(); 4119 return Quiet; 4120 } 4121 4122 if (Exp == IEEEFloat::IEK_Inf) 4123 return Val; 4124 4125 // 1 is added because frexp is defined to return a normalized fraction in 4126 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4127 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4128 return scalbn(Val, -Exp, RM); 4129 } 4130 4131 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4132 : Semantics(&S), 4133 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4134 assert(Semantics == &semPPCDoubleDouble); 4135 } 4136 4137 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4138 : Semantics(&S), 4139 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4140 APFloat(semIEEEdouble, uninitialized)}) { 4141 assert(Semantics == &semPPCDoubleDouble); 4142 } 4143 4144 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4145 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4146 APFloat(semIEEEdouble)}) { 4147 assert(Semantics == &semPPCDoubleDouble); 4148 } 4149 4150 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4151 : Semantics(&S), 4152 Floats(new APFloat[2]{ 4153 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4154 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4155 assert(Semantics == &semPPCDoubleDouble); 4156 } 4157 4158 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4159 APFloat &&Second) 4160 : Semantics(&S), 4161 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4162 assert(Semantics == &semPPCDoubleDouble); 4163 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4164 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4165 } 4166 4167 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4168 : Semantics(RHS.Semantics), 4169 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4170 APFloat(RHS.Floats[1])} 4171 : nullptr) { 4172 assert(Semantics == &semPPCDoubleDouble); 4173 } 4174 4175 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4176 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4177 RHS.Semantics = &semBogus; 4178 assert(Semantics == &semPPCDoubleDouble); 4179 } 4180 4181 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4182 if (Semantics == RHS.Semantics && RHS.Floats) { 4183 Floats[0] = RHS.Floats[0]; 4184 Floats[1] = RHS.Floats[1]; 4185 } else if (this != &RHS) { 4186 this->~DoubleAPFloat(); 4187 new (this) DoubleAPFloat(RHS); 4188 } 4189 return *this; 4190 } 4191 4192 // Implement addition, subtraction, multiplication and division based on: 4193 // "Software for Doubled-Precision Floating-Point Computations", 4194 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4195 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4196 const APFloat &c, const APFloat &cc, 4197 roundingMode RM) { 4198 int Status = opOK; 4199 APFloat z = a; 4200 Status |= z.add(c, RM); 4201 if (!z.isFinite()) { 4202 if (!z.isInfinity()) { 4203 Floats[0] = std::move(z); 4204 Floats[1].makeZero(/* Neg = */ false); 4205 return (opStatus)Status; 4206 } 4207 Status = opOK; 4208 auto AComparedToC = a.compareAbsoluteValue(c); 4209 z = cc; 4210 Status |= z.add(aa, RM); 4211 if (AComparedToC == APFloat::cmpGreaterThan) { 4212 // z = cc + aa + c + a; 4213 Status |= z.add(c, RM); 4214 Status |= z.add(a, RM); 4215 } else { 4216 // z = cc + aa + a + c; 4217 Status |= z.add(a, RM); 4218 Status |= z.add(c, RM); 4219 } 4220 if (!z.isFinite()) { 4221 Floats[0] = std::move(z); 4222 Floats[1].makeZero(/* Neg = */ false); 4223 return (opStatus)Status; 4224 } 4225 Floats[0] = z; 4226 APFloat zz = aa; 4227 Status |= zz.add(cc, RM); 4228 if (AComparedToC == APFloat::cmpGreaterThan) { 4229 // Floats[1] = a - z + c + zz; 4230 Floats[1] = a; 4231 Status |= Floats[1].subtract(z, RM); 4232 Status |= Floats[1].add(c, RM); 4233 Status |= Floats[1].add(zz, RM); 4234 } else { 4235 // Floats[1] = c - z + a + zz; 4236 Floats[1] = c; 4237 Status |= Floats[1].subtract(z, RM); 4238 Status |= Floats[1].add(a, RM); 4239 Status |= Floats[1].add(zz, RM); 4240 } 4241 } else { 4242 // q = a - z; 4243 APFloat q = a; 4244 Status |= q.subtract(z, RM); 4245 4246 // zz = q + c + (a - (q + z)) + aa + cc; 4247 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4248 auto zz = q; 4249 Status |= zz.add(c, RM); 4250 Status |= q.add(z, RM); 4251 Status |= q.subtract(a, RM); 4252 q.changeSign(); 4253 Status |= zz.add(q, RM); 4254 Status |= zz.add(aa, RM); 4255 Status |= zz.add(cc, RM); 4256 if (zz.isZero() && !zz.isNegative()) { 4257 Floats[0] = std::move(z); 4258 Floats[1].makeZero(/* Neg = */ false); 4259 return opOK; 4260 } 4261 Floats[0] = z; 4262 Status |= Floats[0].add(zz, RM); 4263 if (!Floats[0].isFinite()) { 4264 Floats[1].makeZero(/* Neg = */ false); 4265 return (opStatus)Status; 4266 } 4267 Floats[1] = std::move(z); 4268 Status |= Floats[1].subtract(Floats[0], RM); 4269 Status |= Floats[1].add(zz, RM); 4270 } 4271 return (opStatus)Status; 4272 } 4273 4274 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4275 const DoubleAPFloat &RHS, 4276 DoubleAPFloat &Out, 4277 roundingMode RM) { 4278 if (LHS.getCategory() == fcNaN) { 4279 Out = LHS; 4280 return opOK; 4281 } 4282 if (RHS.getCategory() == fcNaN) { 4283 Out = RHS; 4284 return opOK; 4285 } 4286 if (LHS.getCategory() == fcZero) { 4287 Out = RHS; 4288 return opOK; 4289 } 4290 if (RHS.getCategory() == fcZero) { 4291 Out = LHS; 4292 return opOK; 4293 } 4294 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4295 LHS.isNegative() != RHS.isNegative()) { 4296 Out.makeNaN(false, Out.isNegative(), nullptr); 4297 return opInvalidOp; 4298 } 4299 if (LHS.getCategory() == fcInfinity) { 4300 Out = LHS; 4301 return opOK; 4302 } 4303 if (RHS.getCategory() == fcInfinity) { 4304 Out = RHS; 4305 return opOK; 4306 } 4307 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4308 4309 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4310 CC(RHS.Floats[1]); 4311 assert(&A.getSemantics() == &semIEEEdouble); 4312 assert(&AA.getSemantics() == &semIEEEdouble); 4313 assert(&C.getSemantics() == &semIEEEdouble); 4314 assert(&CC.getSemantics() == &semIEEEdouble); 4315 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4316 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4317 return Out.addImpl(A, AA, C, CC, RM); 4318 } 4319 4320 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4321 roundingMode RM) { 4322 return addWithSpecial(*this, RHS, *this, RM); 4323 } 4324 4325 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4326 roundingMode RM) { 4327 changeSign(); 4328 auto Ret = add(RHS, RM); 4329 changeSign(); 4330 return Ret; 4331 } 4332 4333 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4334 APFloat::roundingMode RM) { 4335 const auto &LHS = *this; 4336 auto &Out = *this; 4337 /* Interesting observation: For special categories, finding the lowest 4338 common ancestor of the following layered graph gives the correct 4339 return category: 4340 4341 NaN 4342 / \ 4343 Zero Inf 4344 \ / 4345 Normal 4346 4347 e.g. NaN * NaN = NaN 4348 Zero * Inf = NaN 4349 Normal * Zero = Zero 4350 Normal * Inf = Inf 4351 */ 4352 if (LHS.getCategory() == fcNaN) { 4353 Out = LHS; 4354 return opOK; 4355 } 4356 if (RHS.getCategory() == fcNaN) { 4357 Out = RHS; 4358 return opOK; 4359 } 4360 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4361 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4362 Out.makeNaN(false, false, nullptr); 4363 return opOK; 4364 } 4365 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4366 Out = LHS; 4367 return opOK; 4368 } 4369 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4370 Out = RHS; 4371 return opOK; 4372 } 4373 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4374 "Special cases not handled exhaustively"); 4375 4376 int Status = opOK; 4377 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4378 // t = a * c 4379 APFloat T = A; 4380 Status |= T.multiply(C, RM); 4381 if (!T.isFiniteNonZero()) { 4382 Floats[0] = T; 4383 Floats[1].makeZero(/* Neg = */ false); 4384 return (opStatus)Status; 4385 } 4386 4387 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4388 APFloat Tau = A; 4389 T.changeSign(); 4390 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4391 T.changeSign(); 4392 { 4393 // v = a * d 4394 APFloat V = A; 4395 Status |= V.multiply(D, RM); 4396 // w = b * c 4397 APFloat W = B; 4398 Status |= W.multiply(C, RM); 4399 Status |= V.add(W, RM); 4400 // tau += v + w 4401 Status |= Tau.add(V, RM); 4402 } 4403 // u = t + tau 4404 APFloat U = T; 4405 Status |= U.add(Tau, RM); 4406 4407 Floats[0] = U; 4408 if (!U.isFinite()) { 4409 Floats[1].makeZero(/* Neg = */ false); 4410 } else { 4411 // Floats[1] = (t - u) + tau 4412 Status |= T.subtract(U, RM); 4413 Status |= T.add(Tau, RM); 4414 Floats[1] = T; 4415 } 4416 return (opStatus)Status; 4417 } 4418 4419 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4420 APFloat::roundingMode RM) { 4421 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4422 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4423 auto Ret = 4424 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4425 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4426 return Ret; 4427 } 4428 4429 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4430 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4431 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4432 auto Ret = 4433 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4434 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4435 return Ret; 4436 } 4437 4438 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4439 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4440 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4441 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4442 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4443 return Ret; 4444 } 4445 4446 APFloat::opStatus 4447 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4448 const DoubleAPFloat &Addend, 4449 APFloat::roundingMode RM) { 4450 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4451 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4452 auto Ret = Tmp.fusedMultiplyAdd( 4453 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4454 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4455 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4456 return Ret; 4457 } 4458 4459 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4460 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4461 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4462 auto Ret = Tmp.roundToIntegral(RM); 4463 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4464 return Ret; 4465 } 4466 4467 void DoubleAPFloat::changeSign() { 4468 Floats[0].changeSign(); 4469 Floats[1].changeSign(); 4470 } 4471 4472 APFloat::cmpResult 4473 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4474 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4475 if (Result != cmpEqual) 4476 return Result; 4477 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4478 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4479 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4480 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4481 if (Against && !RHSAgainst) 4482 return cmpLessThan; 4483 if (!Against && RHSAgainst) 4484 return cmpGreaterThan; 4485 if (!Against && !RHSAgainst) 4486 return Result; 4487 if (Against && RHSAgainst) 4488 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4489 } 4490 return Result; 4491 } 4492 4493 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4494 return Floats[0].getCategory(); 4495 } 4496 4497 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4498 4499 void DoubleAPFloat::makeInf(bool Neg) { 4500 Floats[0].makeInf(Neg); 4501 Floats[1].makeZero(/* Neg = */ false); 4502 } 4503 4504 void DoubleAPFloat::makeZero(bool Neg) { 4505 Floats[0].makeZero(Neg); 4506 Floats[1].makeZero(/* Neg = */ false); 4507 } 4508 4509 void DoubleAPFloat::makeLargest(bool Neg) { 4510 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4511 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4512 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4513 if (Neg) 4514 changeSign(); 4515 } 4516 4517 void DoubleAPFloat::makeSmallest(bool Neg) { 4518 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4519 Floats[0].makeSmallest(Neg); 4520 Floats[1].makeZero(/* Neg = */ false); 4521 } 4522 4523 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4524 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4525 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4526 if (Neg) 4527 Floats[0].changeSign(); 4528 Floats[1].makeZero(/* Neg = */ false); 4529 } 4530 4531 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4532 Floats[0].makeNaN(SNaN, Neg, fill); 4533 Floats[1].makeZero(/* Neg = */ false); 4534 } 4535 4536 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4537 auto Result = Floats[0].compare(RHS.Floats[0]); 4538 // |Float[0]| > |Float[1]| 4539 if (Result == APFloat::cmpEqual) 4540 return Floats[1].compare(RHS.Floats[1]); 4541 return Result; 4542 } 4543 4544 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4545 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4546 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4547 } 4548 4549 hash_code hash_value(const DoubleAPFloat &Arg) { 4550 if (Arg.Floats) 4551 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4552 return hash_combine(Arg.Semantics); 4553 } 4554 4555 APInt DoubleAPFloat::bitcastToAPInt() const { 4556 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4557 uint64_t Data[] = { 4558 Floats[0].bitcastToAPInt().getRawData()[0], 4559 Floats[1].bitcastToAPInt().getRawData()[0], 4560 }; 4561 return APInt(128, 2, Data); 4562 } 4563 4564 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4565 roundingMode RM) { 4566 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4567 APFloat Tmp(semPPCDoubleDoubleLegacy); 4568 auto Ret = Tmp.convertFromString(S, RM); 4569 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4570 return Ret; 4571 } 4572 4573 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4574 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4575 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4576 auto Ret = Tmp.next(nextDown); 4577 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4578 return Ret; 4579 } 4580 4581 APFloat::opStatus 4582 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4583 unsigned int Width, bool IsSigned, 4584 roundingMode RM, bool *IsExact) const { 4585 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4586 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4587 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4588 } 4589 4590 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4591 bool IsSigned, 4592 roundingMode RM) { 4593 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4594 APFloat Tmp(semPPCDoubleDoubleLegacy); 4595 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4596 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4597 return Ret; 4598 } 4599 4600 APFloat::opStatus 4601 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4602 unsigned int InputSize, 4603 bool IsSigned, roundingMode RM) { 4604 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4605 APFloat Tmp(semPPCDoubleDoubleLegacy); 4606 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4607 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4608 return Ret; 4609 } 4610 4611 APFloat::opStatus 4612 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4613 unsigned int InputSize, 4614 bool IsSigned, roundingMode RM) { 4615 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4616 APFloat Tmp(semPPCDoubleDoubleLegacy); 4617 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4618 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4619 return Ret; 4620 } 4621 4622 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4623 unsigned int HexDigits, 4624 bool UpperCase, 4625 roundingMode RM) const { 4626 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4627 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4628 .convertToHexString(DST, HexDigits, UpperCase, RM); 4629 } 4630 4631 bool DoubleAPFloat::isDenormal() const { 4632 return getCategory() == fcNormal && 4633 (Floats[0].isDenormal() || Floats[1].isDenormal() || 4634 // (double)(Hi + Lo) == Hi defines a normal number. 4635 Floats[0] != Floats[0] + Floats[1]); 4636 } 4637 4638 bool DoubleAPFloat::isSmallest() const { 4639 if (getCategory() != fcNormal) 4640 return false; 4641 DoubleAPFloat Tmp(*this); 4642 Tmp.makeSmallest(this->isNegative()); 4643 return Tmp.compare(*this) == cmpEqual; 4644 } 4645 4646 bool DoubleAPFloat::isLargest() const { 4647 if (getCategory() != fcNormal) 4648 return false; 4649 DoubleAPFloat Tmp(*this); 4650 Tmp.makeLargest(this->isNegative()); 4651 return Tmp.compare(*this) == cmpEqual; 4652 } 4653 4654 bool DoubleAPFloat::isInteger() const { 4655 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4656 return Floats[0].isInteger() && Floats[1].isInteger(); 4657 } 4658 4659 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 4660 unsigned FormatPrecision, 4661 unsigned FormatMaxPadding, 4662 bool TruncateZero) const { 4663 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4664 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4665 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 4666 } 4667 4668 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 4669 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4670 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4671 if (!inv) 4672 return Tmp.getExactInverse(nullptr); 4673 APFloat Inv(semPPCDoubleDoubleLegacy); 4674 auto Ret = Tmp.getExactInverse(&Inv); 4675 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 4676 return Ret; 4677 } 4678 4679 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) { 4680 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4681 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 4682 scalbn(Arg.Floats[1], Exp, RM)); 4683 } 4684 4685 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 4686 APFloat::roundingMode RM) { 4687 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4688 APFloat First = frexp(Arg.Floats[0], Exp, RM); 4689 APFloat Second = Arg.Floats[1]; 4690 if (Arg.getCategory() == APFloat::fcNormal) 4691 Second = scalbn(Second, -Exp, RM); 4692 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 4693 } 4694 4695 } // End detail namespace 4696 4697 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4698 if (usesLayout<IEEEFloat>(Semantics)) { 4699 new (&IEEE) IEEEFloat(std::move(F)); 4700 return; 4701 } 4702 if (usesLayout<DoubleAPFloat>(Semantics)) { 4703 const fltSemantics& S = F.getSemantics(); 4704 new (&Double) 4705 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 4706 APFloat(semIEEEdouble)); 4707 return; 4708 } 4709 llvm_unreachable("Unexpected semantics"); 4710 } 4711 4712 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 4713 roundingMode RM) { 4714 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 4715 } 4716 4717 hash_code hash_value(const APFloat &Arg) { 4718 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 4719 return hash_value(Arg.U.IEEE); 4720 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 4721 return hash_value(Arg.U.Double); 4722 llvm_unreachable("Unexpected semantics"); 4723 } 4724 4725 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4726 : APFloat(Semantics) { 4727 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 4728 assert(StatusOrErr && "Invalid floating point representation"); 4729 consumeError(StatusOrErr.takeError()); 4730 } 4731 4732 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4733 roundingMode RM, bool *losesInfo) { 4734 if (&getSemantics() == &ToSemantics) { 4735 *losesInfo = false; 4736 return opOK; 4737 } 4738 if (usesLayout<IEEEFloat>(getSemantics()) && 4739 usesLayout<IEEEFloat>(ToSemantics)) 4740 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4741 if (usesLayout<IEEEFloat>(getSemantics()) && 4742 usesLayout<DoubleAPFloat>(ToSemantics)) { 4743 assert(&ToSemantics == &semPPCDoubleDouble); 4744 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 4745 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 4746 return Ret; 4747 } 4748 if (usesLayout<DoubleAPFloat>(getSemantics()) && 4749 usesLayout<IEEEFloat>(ToSemantics)) { 4750 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4751 *this = APFloat(std::move(getIEEE()), ToSemantics); 4752 return Ret; 4753 } 4754 llvm_unreachable("Unexpected semantics"); 4755 } 4756 4757 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { 4758 if (isIEEE) { 4759 switch (BitWidth) { 4760 case 16: 4761 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth)); 4762 case 32: 4763 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth)); 4764 case 64: 4765 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth)); 4766 case 80: 4767 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 4768 case 128: 4769 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth)); 4770 default: 4771 llvm_unreachable("Unknown floating bit width"); 4772 } 4773 } else { 4774 assert(BitWidth == 128); 4775 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 4776 } 4777 } 4778 4779 void APFloat::print(raw_ostream &OS) const { 4780 SmallVector<char, 16> Buffer; 4781 toString(Buffer); 4782 OS << Buffer << "\n"; 4783 } 4784 4785 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4786 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 4787 #endif 4788 4789 void APFloat::Profile(FoldingSetNodeID &NID) const { 4790 NID.Add(bitcastToAPInt()); 4791 } 4792 4793 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 4794 an APSInt, whose initial bit-width and signed-ness are used to determine the 4795 precision of the conversion. 4796 */ 4797 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 4798 roundingMode rounding_mode, 4799 bool *isExact) const { 4800 unsigned bitWidth = result.getBitWidth(); 4801 SmallVector<uint64_t, 4> parts(result.getNumWords()); 4802 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 4803 rounding_mode, isExact); 4804 // Keeps the original signed-ness. 4805 result = APInt(bitWidth, parts); 4806 return status; 4807 } 4808 4809 } // End llvm namespace 4810 4811 #undef APFLOAT_DISPATCH_ON_SEMANTICS 4812