1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 /* Represents floating point arithmetic semantics. */ 54 struct fltSemantics { 55 /* The largest E such that 2^E is representable; this matches the 56 definition of IEEE 754. */ 57 APFloatBase::ExponentType maxExponent; 58 59 /* The smallest E such that 2^E is a normalized number; this 60 matches the definition of IEEE 754. */ 61 APFloatBase::ExponentType minExponent; 62 63 /* Number of bits in the significand. This includes the integer 64 bit. */ 65 unsigned int precision; 66 67 /* Number of bits actually used in the semantics. */ 68 unsigned int sizeInBits; 69 }; 70 71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 72 static const fltSemantics semBFloat = {127, -126, 8, 16}; 73 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 74 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 75 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 76 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 77 static const fltSemantics semBogus = {0, 0, 0, 0}; 78 79 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 80 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 81 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 82 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 83 to each other, and two 11-bit exponents. 84 85 Note: we need to make the value different from semBogus as otherwise 86 an unsafe optimization may collapse both values to a single address, 87 and we heavily rely on them having distinct addresses. */ 88 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; 89 90 /* These are legacy semantics for the fallback, inaccrurate implementation of 91 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 92 operation. It's equivalent to having an IEEE number with consecutive 106 93 bits of mantissa and 11 bits of exponent. 94 95 It's not equivalent to IBM double-double. For example, a legit IBM 96 double-double, 1 + epsilon: 97 98 1 + epsilon = 1 + (1 >> 1076) 99 100 is not representable by a consecutive 106 bits of mantissa. 101 102 Currently, these semantics are used in the following way: 103 104 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 105 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 106 semPPCDoubleDoubleLegacy -> IEEE operations 107 108 We use bitcastToAPInt() to get the bit representation (in APInt) of the 109 underlying IEEEdouble, then use the APInt constructor to construct the 110 legacy IEEE float. 111 112 TODO: Implement all operations in semPPCDoubleDouble, and delete these 113 semantics. */ 114 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 115 53 + 53, 128}; 116 117 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 118 switch (S) { 119 case S_IEEEhalf: 120 return IEEEhalf(); 121 case S_BFloat: 122 return BFloat(); 123 case S_IEEEsingle: 124 return IEEEsingle(); 125 case S_IEEEdouble: 126 return IEEEdouble(); 127 case S_x87DoubleExtended: 128 return x87DoubleExtended(); 129 case S_IEEEquad: 130 return IEEEquad(); 131 case S_PPCDoubleDouble: 132 return PPCDoubleDouble(); 133 } 134 llvm_unreachable("Unrecognised floating semantics"); 135 } 136 137 APFloatBase::Semantics 138 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 139 if (&Sem == &llvm::APFloat::IEEEhalf()) 140 return S_IEEEhalf; 141 else if (&Sem == &llvm::APFloat::BFloat()) 142 return S_BFloat; 143 else if (&Sem == &llvm::APFloat::IEEEsingle()) 144 return S_IEEEsingle; 145 else if (&Sem == &llvm::APFloat::IEEEdouble()) 146 return S_IEEEdouble; 147 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 148 return S_x87DoubleExtended; 149 else if (&Sem == &llvm::APFloat::IEEEquad()) 150 return S_IEEEquad; 151 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 152 return S_PPCDoubleDouble; 153 else 154 llvm_unreachable("Unknown floating semantics"); 155 } 156 157 const fltSemantics &APFloatBase::IEEEhalf() { 158 return semIEEEhalf; 159 } 160 const fltSemantics &APFloatBase::BFloat() { 161 return semBFloat; 162 } 163 const fltSemantics &APFloatBase::IEEEsingle() { 164 return semIEEEsingle; 165 } 166 const fltSemantics &APFloatBase::IEEEdouble() { 167 return semIEEEdouble; 168 } 169 const fltSemantics &APFloatBase::IEEEquad() { 170 return semIEEEquad; 171 } 172 const fltSemantics &APFloatBase::x87DoubleExtended() { 173 return semX87DoubleExtended; 174 } 175 const fltSemantics &APFloatBase::Bogus() { 176 return semBogus; 177 } 178 const fltSemantics &APFloatBase::PPCDoubleDouble() { 179 return semPPCDoubleDouble; 180 } 181 182 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 183 constexpr RoundingMode APFloatBase::rmTowardPositive; 184 constexpr RoundingMode APFloatBase::rmTowardNegative; 185 constexpr RoundingMode APFloatBase::rmTowardZero; 186 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 187 188 /* A tight upper bound on number of parts required to hold the value 189 pow(5, power) is 190 191 power * 815 / (351 * integerPartWidth) + 1 192 193 However, whilst the result may require only this many parts, 194 because we are multiplying two values to get it, the 195 multiplication may require an extra part with the excess part 196 being zero (consider the trivial case of 1 * 1, tcFullMultiply 197 requires two parts to hold the single-part result). So we add an 198 extra one to guarantee enough space whilst multiplying. */ 199 const unsigned int maxExponent = 16383; 200 const unsigned int maxPrecision = 113; 201 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 202 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 203 204 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 205 return semantics.precision; 206 } 207 APFloatBase::ExponentType 208 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 209 return semantics.maxExponent; 210 } 211 APFloatBase::ExponentType 212 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 213 return semantics.minExponent; 214 } 215 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 216 return semantics.sizeInBits; 217 } 218 219 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 220 return Sem.sizeInBits; 221 } 222 223 /* A bunch of private, handy routines. */ 224 225 static inline Error createError(const Twine &Err) { 226 return make_error<StringError>(Err, inconvertibleErrorCode()); 227 } 228 229 static inline unsigned int 230 partCountForBits(unsigned int bits) 231 { 232 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 233 } 234 235 /* Returns 0U-9U. Return values >= 10U are not digits. */ 236 static inline unsigned int 237 decDigitValue(unsigned int c) 238 { 239 return c - '0'; 240 } 241 242 /* Return the value of a decimal exponent of the form 243 [+-]ddddddd. 244 245 If the exponent overflows, returns a large exponent with the 246 appropriate sign. */ 247 static Expected<int> readExponent(StringRef::iterator begin, 248 StringRef::iterator end) { 249 bool isNegative; 250 unsigned int absExponent; 251 const unsigned int overlargeExponent = 24000; /* FIXME. */ 252 StringRef::iterator p = begin; 253 254 // Treat no exponent as 0 to match binutils 255 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 256 return 0; 257 } 258 259 isNegative = (*p == '-'); 260 if (*p == '-' || *p == '+') { 261 p++; 262 if (p == end) 263 return createError("Exponent has no digits"); 264 } 265 266 absExponent = decDigitValue(*p++); 267 if (absExponent >= 10U) 268 return createError("Invalid character in exponent"); 269 270 for (; p != end; ++p) { 271 unsigned int value; 272 273 value = decDigitValue(*p); 274 if (value >= 10U) 275 return createError("Invalid character in exponent"); 276 277 absExponent = absExponent * 10U + value; 278 if (absExponent >= overlargeExponent) { 279 absExponent = overlargeExponent; 280 break; 281 } 282 } 283 284 if (isNegative) 285 return -(int) absExponent; 286 else 287 return (int) absExponent; 288 } 289 290 /* This is ugly and needs cleaning up, but I don't immediately see 291 how whilst remaining safe. */ 292 static Expected<int> totalExponent(StringRef::iterator p, 293 StringRef::iterator end, 294 int exponentAdjustment) { 295 int unsignedExponent; 296 bool negative, overflow; 297 int exponent = 0; 298 299 if (p == end) 300 return createError("Exponent has no digits"); 301 302 negative = *p == '-'; 303 if (*p == '-' || *p == '+') { 304 p++; 305 if (p == end) 306 return createError("Exponent has no digits"); 307 } 308 309 unsignedExponent = 0; 310 overflow = false; 311 for (; p != end; ++p) { 312 unsigned int value; 313 314 value = decDigitValue(*p); 315 if (value >= 10U) 316 return createError("Invalid character in exponent"); 317 318 unsignedExponent = unsignedExponent * 10 + value; 319 if (unsignedExponent > 32767) { 320 overflow = true; 321 break; 322 } 323 } 324 325 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 326 overflow = true; 327 328 if (!overflow) { 329 exponent = unsignedExponent; 330 if (negative) 331 exponent = -exponent; 332 exponent += exponentAdjustment; 333 if (exponent > 32767 || exponent < -32768) 334 overflow = true; 335 } 336 337 if (overflow) 338 exponent = negative ? -32768: 32767; 339 340 return exponent; 341 } 342 343 static Expected<StringRef::iterator> 344 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 345 StringRef::iterator *dot) { 346 StringRef::iterator p = begin; 347 *dot = end; 348 while (p != end && *p == '0') 349 p++; 350 351 if (p != end && *p == '.') { 352 *dot = p++; 353 354 if (end - begin == 1) 355 return createError("Significand has no digits"); 356 357 while (p != end && *p == '0') 358 p++; 359 } 360 361 return p; 362 } 363 364 /* Given a normal decimal floating point number of the form 365 366 dddd.dddd[eE][+-]ddd 367 368 where the decimal point and exponent are optional, fill out the 369 structure D. Exponent is appropriate if the significand is 370 treated as an integer, and normalizedExponent if the significand 371 is taken to have the decimal point after a single leading 372 non-zero digit. 373 374 If the value is zero, V->firstSigDigit points to a non-digit, and 375 the return exponent is zero. 376 */ 377 struct decimalInfo { 378 const char *firstSigDigit; 379 const char *lastSigDigit; 380 int exponent; 381 int normalizedExponent; 382 }; 383 384 static Error interpretDecimal(StringRef::iterator begin, 385 StringRef::iterator end, decimalInfo *D) { 386 StringRef::iterator dot = end; 387 388 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 389 if (!PtrOrErr) 390 return PtrOrErr.takeError(); 391 StringRef::iterator p = *PtrOrErr; 392 393 D->firstSigDigit = p; 394 D->exponent = 0; 395 D->normalizedExponent = 0; 396 397 for (; p != end; ++p) { 398 if (*p == '.') { 399 if (dot != end) 400 return createError("String contains multiple dots"); 401 dot = p++; 402 if (p == end) 403 break; 404 } 405 if (decDigitValue(*p) >= 10U) 406 break; 407 } 408 409 if (p != end) { 410 if (*p != 'e' && *p != 'E') 411 return createError("Invalid character in significand"); 412 if (p == begin) 413 return createError("Significand has no digits"); 414 if (dot != end && p - begin == 1) 415 return createError("Significand has no digits"); 416 417 /* p points to the first non-digit in the string */ 418 auto ExpOrErr = readExponent(p + 1, end); 419 if (!ExpOrErr) 420 return ExpOrErr.takeError(); 421 D->exponent = *ExpOrErr; 422 423 /* Implied decimal point? */ 424 if (dot == end) 425 dot = p; 426 } 427 428 /* If number is all zeroes accept any exponent. */ 429 if (p != D->firstSigDigit) { 430 /* Drop insignificant trailing zeroes. */ 431 if (p != begin) { 432 do 433 do 434 p--; 435 while (p != begin && *p == '0'); 436 while (p != begin && *p == '.'); 437 } 438 439 /* Adjust the exponents for any decimal point. */ 440 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 441 D->normalizedExponent = (D->exponent + 442 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 443 - (dot > D->firstSigDigit && dot < p))); 444 } 445 446 D->lastSigDigit = p; 447 return Error::success(); 448 } 449 450 /* Return the trailing fraction of a hexadecimal number. 451 DIGITVALUE is the first hex digit of the fraction, P points to 452 the next digit. */ 453 static Expected<lostFraction> 454 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 455 unsigned int digitValue) { 456 unsigned int hexDigit; 457 458 /* If the first trailing digit isn't 0 or 8 we can work out the 459 fraction immediately. */ 460 if (digitValue > 8) 461 return lfMoreThanHalf; 462 else if (digitValue < 8 && digitValue > 0) 463 return lfLessThanHalf; 464 465 // Otherwise we need to find the first non-zero digit. 466 while (p != end && (*p == '0' || *p == '.')) 467 p++; 468 469 if (p == end) 470 return createError("Invalid trailing hexadecimal fraction!"); 471 472 hexDigit = hexDigitValue(*p); 473 474 /* If we ran off the end it is exactly zero or one-half, otherwise 475 a little more. */ 476 if (hexDigit == -1U) 477 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 478 else 479 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 480 } 481 482 /* Return the fraction lost were a bignum truncated losing the least 483 significant BITS bits. */ 484 static lostFraction 485 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 486 unsigned int partCount, 487 unsigned int bits) 488 { 489 unsigned int lsb; 490 491 lsb = APInt::tcLSB(parts, partCount); 492 493 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 494 if (bits <= lsb) 495 return lfExactlyZero; 496 if (bits == lsb + 1) 497 return lfExactlyHalf; 498 if (bits <= partCount * APFloatBase::integerPartWidth && 499 APInt::tcExtractBit(parts, bits - 1)) 500 return lfMoreThanHalf; 501 502 return lfLessThanHalf; 503 } 504 505 /* Shift DST right BITS bits noting lost fraction. */ 506 static lostFraction 507 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 508 { 509 lostFraction lost_fraction; 510 511 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 512 513 APInt::tcShiftRight(dst, parts, bits); 514 515 return lost_fraction; 516 } 517 518 /* Combine the effect of two lost fractions. */ 519 static lostFraction 520 combineLostFractions(lostFraction moreSignificant, 521 lostFraction lessSignificant) 522 { 523 if (lessSignificant != lfExactlyZero) { 524 if (moreSignificant == lfExactlyZero) 525 moreSignificant = lfLessThanHalf; 526 else if (moreSignificant == lfExactlyHalf) 527 moreSignificant = lfMoreThanHalf; 528 } 529 530 return moreSignificant; 531 } 532 533 /* The error from the true value, in half-ulps, on multiplying two 534 floating point numbers, which differ from the value they 535 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 536 than the returned value. 537 538 See "How to Read Floating Point Numbers Accurately" by William D 539 Clinger. */ 540 static unsigned int 541 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 542 { 543 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 544 545 if (HUerr1 + HUerr2 == 0) 546 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 547 else 548 return inexactMultiply + 2 * (HUerr1 + HUerr2); 549 } 550 551 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 552 when the least significant BITS are truncated. BITS cannot be 553 zero. */ 554 static APFloatBase::integerPart 555 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 556 bool isNearest) { 557 unsigned int count, partBits; 558 APFloatBase::integerPart part, boundary; 559 560 assert(bits != 0); 561 562 bits--; 563 count = bits / APFloatBase::integerPartWidth; 564 partBits = bits % APFloatBase::integerPartWidth + 1; 565 566 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 567 568 if (isNearest) 569 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 570 else 571 boundary = 0; 572 573 if (count == 0) { 574 if (part - boundary <= boundary - part) 575 return part - boundary; 576 else 577 return boundary - part; 578 } 579 580 if (part == boundary) { 581 while (--count) 582 if (parts[count]) 583 return ~(APFloatBase::integerPart) 0; /* A lot. */ 584 585 return parts[0]; 586 } else if (part == boundary - 1) { 587 while (--count) 588 if (~parts[count]) 589 return ~(APFloatBase::integerPart) 0; /* A lot. */ 590 591 return -parts[0]; 592 } 593 594 return ~(APFloatBase::integerPart) 0; /* A lot. */ 595 } 596 597 /* Place pow(5, power) in DST, and return the number of parts used. 598 DST must be at least one part larger than size of the answer. */ 599 static unsigned int 600 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 601 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 602 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 603 pow5s[0] = 78125 * 5; 604 605 unsigned int partsCount[16] = { 1 }; 606 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 607 unsigned int result; 608 assert(power <= maxExponent); 609 610 p1 = dst; 611 p2 = scratch; 612 613 *p1 = firstEightPowers[power & 7]; 614 power >>= 3; 615 616 result = 1; 617 pow5 = pow5s; 618 619 for (unsigned int n = 0; power; power >>= 1, n++) { 620 unsigned int pc; 621 622 pc = partsCount[n]; 623 624 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 625 if (pc == 0) { 626 pc = partsCount[n - 1]; 627 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 628 pc *= 2; 629 if (pow5[pc - 1] == 0) 630 pc--; 631 partsCount[n] = pc; 632 } 633 634 if (power & 1) { 635 APFloatBase::integerPart *tmp; 636 637 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 638 result += pc; 639 if (p2[result - 1] == 0) 640 result--; 641 642 /* Now result is in p1 with partsCount parts and p2 is scratch 643 space. */ 644 tmp = p1; 645 p1 = p2; 646 p2 = tmp; 647 } 648 649 pow5 += pc; 650 } 651 652 if (p1 != dst) 653 APInt::tcAssign(dst, p1, result); 654 655 return result; 656 } 657 658 /* Zero at the end to avoid modular arithmetic when adding one; used 659 when rounding up during hexadecimal output. */ 660 static const char hexDigitsLower[] = "0123456789abcdef0"; 661 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 662 static const char infinityL[] = "infinity"; 663 static const char infinityU[] = "INFINITY"; 664 static const char NaNL[] = "nan"; 665 static const char NaNU[] = "NAN"; 666 667 /* Write out an integerPart in hexadecimal, starting with the most 668 significant nibble. Write out exactly COUNT hexdigits, return 669 COUNT. */ 670 static unsigned int 671 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 672 const char *hexDigitChars) 673 { 674 unsigned int result = count; 675 676 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 677 678 part >>= (APFloatBase::integerPartWidth - 4 * count); 679 while (count--) { 680 dst[count] = hexDigitChars[part & 0xf]; 681 part >>= 4; 682 } 683 684 return result; 685 } 686 687 /* Write out an unsigned decimal integer. */ 688 static char * 689 writeUnsignedDecimal (char *dst, unsigned int n) 690 { 691 char buff[40], *p; 692 693 p = buff; 694 do 695 *p++ = '0' + n % 10; 696 while (n /= 10); 697 698 do 699 *dst++ = *--p; 700 while (p != buff); 701 702 return dst; 703 } 704 705 /* Write out a signed decimal integer. */ 706 static char * 707 writeSignedDecimal (char *dst, int value) 708 { 709 if (value < 0) { 710 *dst++ = '-'; 711 dst = writeUnsignedDecimal(dst, -(unsigned) value); 712 } else 713 dst = writeUnsignedDecimal(dst, value); 714 715 return dst; 716 } 717 718 namespace detail { 719 /* Constructors. */ 720 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 721 unsigned int count; 722 723 semantics = ourSemantics; 724 count = partCount(); 725 if (count > 1) 726 significand.parts = new integerPart[count]; 727 } 728 729 void IEEEFloat::freeSignificand() { 730 if (needsCleanup()) 731 delete [] significand.parts; 732 } 733 734 void IEEEFloat::assign(const IEEEFloat &rhs) { 735 assert(semantics == rhs.semantics); 736 737 sign = rhs.sign; 738 category = rhs.category; 739 exponent = rhs.exponent; 740 if (isFiniteNonZero() || category == fcNaN) 741 copySignificand(rhs); 742 } 743 744 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 745 assert(isFiniteNonZero() || category == fcNaN); 746 assert(rhs.partCount() >= partCount()); 747 748 APInt::tcAssign(significandParts(), rhs.significandParts(), 749 partCount()); 750 } 751 752 /* Make this number a NaN, with an arbitrary but deterministic value 753 for the significand. If double or longer, this is a signalling NaN, 754 which may not be ideal. If float, this is QNaN(0). */ 755 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 756 category = fcNaN; 757 sign = Negative; 758 exponent = exponentNaN(); 759 760 integerPart *significand = significandParts(); 761 unsigned numParts = partCount(); 762 763 // Set the significand bits to the fill. 764 if (!fill || fill->getNumWords() < numParts) 765 APInt::tcSet(significand, 0, numParts); 766 if (fill) { 767 APInt::tcAssign(significand, fill->getRawData(), 768 std::min(fill->getNumWords(), numParts)); 769 770 // Zero out the excess bits of the significand. 771 unsigned bitsToPreserve = semantics->precision - 1; 772 unsigned part = bitsToPreserve / 64; 773 bitsToPreserve %= 64; 774 significand[part] &= ((1ULL << bitsToPreserve) - 1); 775 for (part++; part != numParts; ++part) 776 significand[part] = 0; 777 } 778 779 unsigned QNaNBit = semantics->precision - 2; 780 781 if (SNaN) { 782 // We always have to clear the QNaN bit to make it an SNaN. 783 APInt::tcClearBit(significand, QNaNBit); 784 785 // If there are no bits set in the payload, we have to set 786 // *something* to make it a NaN instead of an infinity; 787 // conventionally, this is the next bit down from the QNaN bit. 788 if (APInt::tcIsZero(significand, numParts)) 789 APInt::tcSetBit(significand, QNaNBit - 1); 790 } else { 791 // We always have to set the QNaN bit to make it a QNaN. 792 APInt::tcSetBit(significand, QNaNBit); 793 } 794 795 // For x87 extended precision, we want to make a NaN, not a 796 // pseudo-NaN. Maybe we should expose the ability to make 797 // pseudo-NaNs? 798 if (semantics == &semX87DoubleExtended) 799 APInt::tcSetBit(significand, QNaNBit + 1); 800 } 801 802 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 803 if (this != &rhs) { 804 if (semantics != rhs.semantics) { 805 freeSignificand(); 806 initialize(rhs.semantics); 807 } 808 assign(rhs); 809 } 810 811 return *this; 812 } 813 814 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 815 freeSignificand(); 816 817 semantics = rhs.semantics; 818 significand = rhs.significand; 819 exponent = rhs.exponent; 820 category = rhs.category; 821 sign = rhs.sign; 822 823 rhs.semantics = &semBogus; 824 return *this; 825 } 826 827 bool IEEEFloat::isDenormal() const { 828 return isFiniteNonZero() && (exponent == semantics->minExponent) && 829 (APInt::tcExtractBit(significandParts(), 830 semantics->precision - 1) == 0); 831 } 832 833 bool IEEEFloat::isSmallest() const { 834 // The smallest number by magnitude in our format will be the smallest 835 // denormal, i.e. the floating point number with exponent being minimum 836 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 837 return isFiniteNonZero() && exponent == semantics->minExponent && 838 significandMSB() == 0; 839 } 840 841 bool IEEEFloat::isSignificandAllOnes() const { 842 // Test if the significand excluding the integral bit is all ones. This allows 843 // us to test for binade boundaries. 844 const integerPart *Parts = significandParts(); 845 const unsigned PartCount = partCountForBits(semantics->precision); 846 for (unsigned i = 0; i < PartCount - 1; i++) 847 if (~Parts[i]) 848 return false; 849 850 // Set the unused high bits to all ones when we compare. 851 const unsigned NumHighBits = 852 PartCount*integerPartWidth - semantics->precision + 1; 853 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 854 "Can not have more high bits to fill than integerPartWidth"); 855 const integerPart HighBitFill = 856 ~integerPart(0) << (integerPartWidth - NumHighBits); 857 if (~(Parts[PartCount - 1] | HighBitFill)) 858 return false; 859 860 return true; 861 } 862 863 bool IEEEFloat::isSignificandAllZeros() const { 864 // Test if the significand excluding the integral bit is all zeros. This 865 // allows us to test for binade boundaries. 866 const integerPart *Parts = significandParts(); 867 const unsigned PartCount = partCountForBits(semantics->precision); 868 869 for (unsigned i = 0; i < PartCount - 1; i++) 870 if (Parts[i]) 871 return false; 872 873 // Compute how many bits are used in the final word. 874 const unsigned NumHighBits = 875 PartCount*integerPartWidth - semantics->precision + 1; 876 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 877 "clear than integerPartWidth"); 878 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 879 880 if (Parts[PartCount - 1] & HighBitMask) 881 return false; 882 883 return true; 884 } 885 886 bool IEEEFloat::isLargest() const { 887 // The largest number by magnitude in our format will be the floating point 888 // number with maximum exponent and with significand that is all ones. 889 return isFiniteNonZero() && exponent == semantics->maxExponent 890 && isSignificandAllOnes(); 891 } 892 893 bool IEEEFloat::isInteger() const { 894 // This could be made more efficient; I'm going for obviously correct. 895 if (!isFinite()) return false; 896 IEEEFloat truncated = *this; 897 truncated.roundToIntegral(rmTowardZero); 898 return compare(truncated) == cmpEqual; 899 } 900 901 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 902 if (this == &rhs) 903 return true; 904 if (semantics != rhs.semantics || 905 category != rhs.category || 906 sign != rhs.sign) 907 return false; 908 if (category==fcZero || category==fcInfinity) 909 return true; 910 911 if (isFiniteNonZero() && exponent != rhs.exponent) 912 return false; 913 914 return std::equal(significandParts(), significandParts() + partCount(), 915 rhs.significandParts()); 916 } 917 918 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 919 initialize(&ourSemantics); 920 sign = 0; 921 category = fcNormal; 922 zeroSignificand(); 923 exponent = ourSemantics.precision - 1; 924 significandParts()[0] = value; 925 normalize(rmNearestTiesToEven, lfExactlyZero); 926 } 927 928 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 929 initialize(&ourSemantics); 930 makeZero(false); 931 } 932 933 // Delegate to the previous constructor, because later copy constructor may 934 // actually inspects category, which can't be garbage. 935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 936 : IEEEFloat(ourSemantics) {} 937 938 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 939 initialize(rhs.semantics); 940 assign(rhs); 941 } 942 943 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 944 *this = std::move(rhs); 945 } 946 947 IEEEFloat::~IEEEFloat() { freeSignificand(); } 948 949 unsigned int IEEEFloat::partCount() const { 950 return partCountForBits(semantics->precision + 1); 951 } 952 953 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 954 return const_cast<IEEEFloat *>(this)->significandParts(); 955 } 956 957 IEEEFloat::integerPart *IEEEFloat::significandParts() { 958 if (partCount() > 1) 959 return significand.parts; 960 else 961 return &significand.part; 962 } 963 964 void IEEEFloat::zeroSignificand() { 965 APInt::tcSet(significandParts(), 0, partCount()); 966 } 967 968 /* Increment an fcNormal floating point number's significand. */ 969 void IEEEFloat::incrementSignificand() { 970 integerPart carry; 971 972 carry = APInt::tcIncrement(significandParts(), partCount()); 973 974 /* Our callers should never cause us to overflow. */ 975 assert(carry == 0); 976 (void)carry; 977 } 978 979 /* Add the significand of the RHS. Returns the carry flag. */ 980 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 981 integerPart *parts; 982 983 parts = significandParts(); 984 985 assert(semantics == rhs.semantics); 986 assert(exponent == rhs.exponent); 987 988 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 989 } 990 991 /* Subtract the significand of the RHS with a borrow flag. Returns 992 the borrow flag. */ 993 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 994 integerPart borrow) { 995 integerPart *parts; 996 997 parts = significandParts(); 998 999 assert(semantics == rhs.semantics); 1000 assert(exponent == rhs.exponent); 1001 1002 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1003 partCount()); 1004 } 1005 1006 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1007 on to the full-precision result of the multiplication. Returns the 1008 lost fraction. */ 1009 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1010 IEEEFloat addend) { 1011 unsigned int omsb; // One, not zero, based MSB. 1012 unsigned int partsCount, newPartsCount, precision; 1013 integerPart *lhsSignificand; 1014 integerPart scratch[4]; 1015 integerPart *fullSignificand; 1016 lostFraction lost_fraction; 1017 bool ignored; 1018 1019 assert(semantics == rhs.semantics); 1020 1021 precision = semantics->precision; 1022 1023 // Allocate space for twice as many bits as the original significand, plus one 1024 // extra bit for the addition to overflow into. 1025 newPartsCount = partCountForBits(precision * 2 + 1); 1026 1027 if (newPartsCount > 4) 1028 fullSignificand = new integerPart[newPartsCount]; 1029 else 1030 fullSignificand = scratch; 1031 1032 lhsSignificand = significandParts(); 1033 partsCount = partCount(); 1034 1035 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1036 rhs.significandParts(), partsCount, partsCount); 1037 1038 lost_fraction = lfExactlyZero; 1039 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1040 exponent += rhs.exponent; 1041 1042 // Assume the operands involved in the multiplication are single-precision 1043 // FP, and the two multiplicants are: 1044 // *this = a23 . a22 ... a0 * 2^e1 1045 // rhs = b23 . b22 ... b0 * 2^e2 1046 // the result of multiplication is: 1047 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1048 // Note that there are three significant bits at the left-hand side of the 1049 // radix point: two for the multiplication, and an overflow bit for the 1050 // addition (that will always be zero at this point). Move the radix point 1051 // toward left by two bits, and adjust exponent accordingly. 1052 exponent += 2; 1053 1054 if (addend.isNonZero()) { 1055 // The intermediate result of the multiplication has "2 * precision" 1056 // signicant bit; adjust the addend to be consistent with mul result. 1057 // 1058 Significand savedSignificand = significand; 1059 const fltSemantics *savedSemantics = semantics; 1060 fltSemantics extendedSemantics; 1061 opStatus status; 1062 unsigned int extendedPrecision; 1063 1064 // Normalize our MSB to one below the top bit to allow for overflow. 1065 extendedPrecision = 2 * precision + 1; 1066 if (omsb != extendedPrecision - 1) { 1067 assert(extendedPrecision > omsb); 1068 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1069 (extendedPrecision - 1) - omsb); 1070 exponent -= (extendedPrecision - 1) - omsb; 1071 } 1072 1073 /* Create new semantics. */ 1074 extendedSemantics = *semantics; 1075 extendedSemantics.precision = extendedPrecision; 1076 1077 if (newPartsCount == 1) 1078 significand.part = fullSignificand[0]; 1079 else 1080 significand.parts = fullSignificand; 1081 semantics = &extendedSemantics; 1082 1083 // Make a copy so we can convert it to the extended semantics. 1084 // Note that we cannot convert the addend directly, as the extendedSemantics 1085 // is a local variable (which we take a reference to). 1086 IEEEFloat extendedAddend(addend); 1087 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1088 assert(status == opOK); 1089 (void)status; 1090 1091 // Shift the significand of the addend right by one bit. This guarantees 1092 // that the high bit of the significand is zero (same as fullSignificand), 1093 // so the addition will overflow (if it does overflow at all) into the top bit. 1094 lost_fraction = extendedAddend.shiftSignificandRight(1); 1095 assert(lost_fraction == lfExactlyZero && 1096 "Lost precision while shifting addend for fused-multiply-add."); 1097 1098 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1099 1100 /* Restore our state. */ 1101 if (newPartsCount == 1) 1102 fullSignificand[0] = significand.part; 1103 significand = savedSignificand; 1104 semantics = savedSemantics; 1105 1106 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1107 } 1108 1109 // Convert the result having "2 * precision" significant-bits back to the one 1110 // having "precision" significant-bits. First, move the radix point from 1111 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1112 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1113 exponent -= precision + 1; 1114 1115 // In case MSB resides at the left-hand side of radix point, shift the 1116 // mantissa right by some amount to make sure the MSB reside right before 1117 // the radix point (i.e. "MSB . rest-significant-bits"). 1118 // 1119 // Note that the result is not normalized when "omsb < precision". So, the 1120 // caller needs to call IEEEFloat::normalize() if normalized value is 1121 // expected. 1122 if (omsb > precision) { 1123 unsigned int bits, significantParts; 1124 lostFraction lf; 1125 1126 bits = omsb - precision; 1127 significantParts = partCountForBits(omsb); 1128 lf = shiftRight(fullSignificand, significantParts, bits); 1129 lost_fraction = combineLostFractions(lf, lost_fraction); 1130 exponent += bits; 1131 } 1132 1133 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1134 1135 if (newPartsCount > 4) 1136 delete [] fullSignificand; 1137 1138 return lost_fraction; 1139 } 1140 1141 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1142 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1143 } 1144 1145 /* Multiply the significands of LHS and RHS to DST. */ 1146 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1147 unsigned int bit, i, partsCount; 1148 const integerPart *rhsSignificand; 1149 integerPart *lhsSignificand, *dividend, *divisor; 1150 integerPart scratch[4]; 1151 lostFraction lost_fraction; 1152 1153 assert(semantics == rhs.semantics); 1154 1155 lhsSignificand = significandParts(); 1156 rhsSignificand = rhs.significandParts(); 1157 partsCount = partCount(); 1158 1159 if (partsCount > 2) 1160 dividend = new integerPart[partsCount * 2]; 1161 else 1162 dividend = scratch; 1163 1164 divisor = dividend + partsCount; 1165 1166 /* Copy the dividend and divisor as they will be modified in-place. */ 1167 for (i = 0; i < partsCount; i++) { 1168 dividend[i] = lhsSignificand[i]; 1169 divisor[i] = rhsSignificand[i]; 1170 lhsSignificand[i] = 0; 1171 } 1172 1173 exponent -= rhs.exponent; 1174 1175 unsigned int precision = semantics->precision; 1176 1177 /* Normalize the divisor. */ 1178 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1179 if (bit) { 1180 exponent += bit; 1181 APInt::tcShiftLeft(divisor, partsCount, bit); 1182 } 1183 1184 /* Normalize the dividend. */ 1185 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1186 if (bit) { 1187 exponent -= bit; 1188 APInt::tcShiftLeft(dividend, partsCount, bit); 1189 } 1190 1191 /* Ensure the dividend >= divisor initially for the loop below. 1192 Incidentally, this means that the division loop below is 1193 guaranteed to set the integer bit to one. */ 1194 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1195 exponent--; 1196 APInt::tcShiftLeft(dividend, partsCount, 1); 1197 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1198 } 1199 1200 /* Long division. */ 1201 for (bit = precision; bit; bit -= 1) { 1202 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1203 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1204 APInt::tcSetBit(lhsSignificand, bit - 1); 1205 } 1206 1207 APInt::tcShiftLeft(dividend, partsCount, 1); 1208 } 1209 1210 /* Figure out the lost fraction. */ 1211 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1212 1213 if (cmp > 0) 1214 lost_fraction = lfMoreThanHalf; 1215 else if (cmp == 0) 1216 lost_fraction = lfExactlyHalf; 1217 else if (APInt::tcIsZero(dividend, partsCount)) 1218 lost_fraction = lfExactlyZero; 1219 else 1220 lost_fraction = lfLessThanHalf; 1221 1222 if (partsCount > 2) 1223 delete [] dividend; 1224 1225 return lost_fraction; 1226 } 1227 1228 unsigned int IEEEFloat::significandMSB() const { 1229 return APInt::tcMSB(significandParts(), partCount()); 1230 } 1231 1232 unsigned int IEEEFloat::significandLSB() const { 1233 return APInt::tcLSB(significandParts(), partCount()); 1234 } 1235 1236 /* Note that a zero result is NOT normalized to fcZero. */ 1237 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1238 /* Our exponent should not overflow. */ 1239 assert((ExponentType) (exponent + bits) >= exponent); 1240 1241 exponent += bits; 1242 1243 return shiftRight(significandParts(), partCount(), bits); 1244 } 1245 1246 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1247 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1248 assert(bits < semantics->precision); 1249 1250 if (bits) { 1251 unsigned int partsCount = partCount(); 1252 1253 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1254 exponent -= bits; 1255 1256 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1257 } 1258 } 1259 1260 IEEEFloat::cmpResult 1261 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1262 int compare; 1263 1264 assert(semantics == rhs.semantics); 1265 assert(isFiniteNonZero()); 1266 assert(rhs.isFiniteNonZero()); 1267 1268 compare = exponent - rhs.exponent; 1269 1270 /* If exponents are equal, do an unsigned bignum comparison of the 1271 significands. */ 1272 if (compare == 0) 1273 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1274 partCount()); 1275 1276 if (compare > 0) 1277 return cmpGreaterThan; 1278 else if (compare < 0) 1279 return cmpLessThan; 1280 else 1281 return cmpEqual; 1282 } 1283 1284 /* Handle overflow. Sign is preserved. We either become infinity or 1285 the largest finite number. */ 1286 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1287 /* Infinity? */ 1288 if (rounding_mode == rmNearestTiesToEven || 1289 rounding_mode == rmNearestTiesToAway || 1290 (rounding_mode == rmTowardPositive && !sign) || 1291 (rounding_mode == rmTowardNegative && sign)) { 1292 category = fcInfinity; 1293 return (opStatus) (opOverflow | opInexact); 1294 } 1295 1296 /* Otherwise we become the largest finite number. */ 1297 category = fcNormal; 1298 exponent = semantics->maxExponent; 1299 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1300 semantics->precision); 1301 1302 return opInexact; 1303 } 1304 1305 /* Returns TRUE if, when truncating the current number, with BIT the 1306 new LSB, with the given lost fraction and rounding mode, the result 1307 would need to be rounded away from zero (i.e., by increasing the 1308 signficand). This routine must work for fcZero of both signs, and 1309 fcNormal numbers. */ 1310 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1311 lostFraction lost_fraction, 1312 unsigned int bit) const { 1313 /* NaNs and infinities should not have lost fractions. */ 1314 assert(isFiniteNonZero() || category == fcZero); 1315 1316 /* Current callers never pass this so we don't handle it. */ 1317 assert(lost_fraction != lfExactlyZero); 1318 1319 switch (rounding_mode) { 1320 case rmNearestTiesToAway: 1321 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1322 1323 case rmNearestTiesToEven: 1324 if (lost_fraction == lfMoreThanHalf) 1325 return true; 1326 1327 /* Our zeroes don't have a significand to test. */ 1328 if (lost_fraction == lfExactlyHalf && category != fcZero) 1329 return APInt::tcExtractBit(significandParts(), bit); 1330 1331 return false; 1332 1333 case rmTowardZero: 1334 return false; 1335 1336 case rmTowardPositive: 1337 return !sign; 1338 1339 case rmTowardNegative: 1340 return sign; 1341 1342 default: 1343 break; 1344 } 1345 llvm_unreachable("Invalid rounding mode found"); 1346 } 1347 1348 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1349 lostFraction lost_fraction) { 1350 unsigned int omsb; /* One, not zero, based MSB. */ 1351 int exponentChange; 1352 1353 if (!isFiniteNonZero()) 1354 return opOK; 1355 1356 /* Before rounding normalize the exponent of fcNormal numbers. */ 1357 omsb = significandMSB() + 1; 1358 1359 if (omsb) { 1360 /* OMSB is numbered from 1. We want to place it in the integer 1361 bit numbered PRECISION if possible, with a compensating change in 1362 the exponent. */ 1363 exponentChange = omsb - semantics->precision; 1364 1365 /* If the resulting exponent is too high, overflow according to 1366 the rounding mode. */ 1367 if (exponent + exponentChange > semantics->maxExponent) 1368 return handleOverflow(rounding_mode); 1369 1370 /* Subnormal numbers have exponent minExponent, and their MSB 1371 is forced based on that. */ 1372 if (exponent + exponentChange < semantics->minExponent) 1373 exponentChange = semantics->minExponent - exponent; 1374 1375 /* Shifting left is easy as we don't lose precision. */ 1376 if (exponentChange < 0) { 1377 assert(lost_fraction == lfExactlyZero); 1378 1379 shiftSignificandLeft(-exponentChange); 1380 1381 return opOK; 1382 } 1383 1384 if (exponentChange > 0) { 1385 lostFraction lf; 1386 1387 /* Shift right and capture any new lost fraction. */ 1388 lf = shiftSignificandRight(exponentChange); 1389 1390 lost_fraction = combineLostFractions(lf, lost_fraction); 1391 1392 /* Keep OMSB up-to-date. */ 1393 if (omsb > (unsigned) exponentChange) 1394 omsb -= exponentChange; 1395 else 1396 omsb = 0; 1397 } 1398 } 1399 1400 /* Now round the number according to rounding_mode given the lost 1401 fraction. */ 1402 1403 /* As specified in IEEE 754, since we do not trap we do not report 1404 underflow for exact results. */ 1405 if (lost_fraction == lfExactlyZero) { 1406 /* Canonicalize zeroes. */ 1407 if (omsb == 0) 1408 category = fcZero; 1409 1410 return opOK; 1411 } 1412 1413 /* Increment the significand if we're rounding away from zero. */ 1414 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1415 if (omsb == 0) 1416 exponent = semantics->minExponent; 1417 1418 incrementSignificand(); 1419 omsb = significandMSB() + 1; 1420 1421 /* Did the significand increment overflow? */ 1422 if (omsb == (unsigned) semantics->precision + 1) { 1423 /* Renormalize by incrementing the exponent and shifting our 1424 significand right one. However if we already have the 1425 maximum exponent we overflow to infinity. */ 1426 if (exponent == semantics->maxExponent) { 1427 category = fcInfinity; 1428 1429 return (opStatus) (opOverflow | opInexact); 1430 } 1431 1432 shiftSignificandRight(1); 1433 1434 return opInexact; 1435 } 1436 } 1437 1438 /* The normal case - we were and are not denormal, and any 1439 significand increment above didn't overflow. */ 1440 if (omsb == semantics->precision) 1441 return opInexact; 1442 1443 /* We have a non-zero denormal. */ 1444 assert(omsb < semantics->precision); 1445 1446 /* Canonicalize zeroes. */ 1447 if (omsb == 0) 1448 category = fcZero; 1449 1450 /* The fcZero case is a denormal that underflowed to zero. */ 1451 return (opStatus) (opUnderflow | opInexact); 1452 } 1453 1454 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1455 bool subtract) { 1456 switch (PackCategoriesIntoKey(category, rhs.category)) { 1457 default: 1458 llvm_unreachable(nullptr); 1459 1460 case PackCategoriesIntoKey(fcZero, fcNaN): 1461 case PackCategoriesIntoKey(fcNormal, fcNaN): 1462 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1463 assign(rhs); 1464 LLVM_FALLTHROUGH; 1465 case PackCategoriesIntoKey(fcNaN, fcZero): 1466 case PackCategoriesIntoKey(fcNaN, fcNormal): 1467 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1468 case PackCategoriesIntoKey(fcNaN, fcNaN): 1469 if (isSignaling()) { 1470 makeQuiet(); 1471 return opInvalidOp; 1472 } 1473 return rhs.isSignaling() ? opInvalidOp : opOK; 1474 1475 case PackCategoriesIntoKey(fcNormal, fcZero): 1476 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1477 case PackCategoriesIntoKey(fcInfinity, fcZero): 1478 return opOK; 1479 1480 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1481 case PackCategoriesIntoKey(fcZero, fcInfinity): 1482 category = fcInfinity; 1483 sign = rhs.sign ^ subtract; 1484 return opOK; 1485 1486 case PackCategoriesIntoKey(fcZero, fcNormal): 1487 assign(rhs); 1488 sign = rhs.sign ^ subtract; 1489 return opOK; 1490 1491 case PackCategoriesIntoKey(fcZero, fcZero): 1492 /* Sign depends on rounding mode; handled by caller. */ 1493 return opOK; 1494 1495 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1496 /* Differently signed infinities can only be validly 1497 subtracted. */ 1498 if (((sign ^ rhs.sign)!=0) != subtract) { 1499 makeNaN(); 1500 return opInvalidOp; 1501 } 1502 1503 return opOK; 1504 1505 case PackCategoriesIntoKey(fcNormal, fcNormal): 1506 return opDivByZero; 1507 } 1508 } 1509 1510 /* Add or subtract two normal numbers. */ 1511 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1512 bool subtract) { 1513 integerPart carry; 1514 lostFraction lost_fraction; 1515 int bits; 1516 1517 /* Determine if the operation on the absolute values is effectively 1518 an addition or subtraction. */ 1519 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1520 1521 /* Are we bigger exponent-wise than the RHS? */ 1522 bits = exponent - rhs.exponent; 1523 1524 /* Subtraction is more subtle than one might naively expect. */ 1525 if (subtract) { 1526 IEEEFloat temp_rhs(rhs); 1527 1528 if (bits == 0) 1529 lost_fraction = lfExactlyZero; 1530 else if (bits > 0) { 1531 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1532 shiftSignificandLeft(1); 1533 } else { 1534 lost_fraction = shiftSignificandRight(-bits - 1); 1535 temp_rhs.shiftSignificandLeft(1); 1536 } 1537 1538 // Should we reverse the subtraction. 1539 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1540 carry = temp_rhs.subtractSignificand 1541 (*this, lost_fraction != lfExactlyZero); 1542 copySignificand(temp_rhs); 1543 sign = !sign; 1544 } else { 1545 carry = subtractSignificand 1546 (temp_rhs, lost_fraction != lfExactlyZero); 1547 } 1548 1549 /* Invert the lost fraction - it was on the RHS and 1550 subtracted. */ 1551 if (lost_fraction == lfLessThanHalf) 1552 lost_fraction = lfMoreThanHalf; 1553 else if (lost_fraction == lfMoreThanHalf) 1554 lost_fraction = lfLessThanHalf; 1555 1556 /* The code above is intended to ensure that no borrow is 1557 necessary. */ 1558 assert(!carry); 1559 (void)carry; 1560 } else { 1561 if (bits > 0) { 1562 IEEEFloat temp_rhs(rhs); 1563 1564 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1565 carry = addSignificand(temp_rhs); 1566 } else { 1567 lost_fraction = shiftSignificandRight(-bits); 1568 carry = addSignificand(rhs); 1569 } 1570 1571 /* We have a guard bit; generating a carry cannot happen. */ 1572 assert(!carry); 1573 (void)carry; 1574 } 1575 1576 return lost_fraction; 1577 } 1578 1579 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1580 switch (PackCategoriesIntoKey(category, rhs.category)) { 1581 default: 1582 llvm_unreachable(nullptr); 1583 1584 case PackCategoriesIntoKey(fcZero, fcNaN): 1585 case PackCategoriesIntoKey(fcNormal, fcNaN): 1586 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1587 assign(rhs); 1588 sign = false; 1589 LLVM_FALLTHROUGH; 1590 case PackCategoriesIntoKey(fcNaN, fcZero): 1591 case PackCategoriesIntoKey(fcNaN, fcNormal): 1592 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1593 case PackCategoriesIntoKey(fcNaN, fcNaN): 1594 sign ^= rhs.sign; // restore the original sign 1595 if (isSignaling()) { 1596 makeQuiet(); 1597 return opInvalidOp; 1598 } 1599 return rhs.isSignaling() ? opInvalidOp : opOK; 1600 1601 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1602 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1603 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1604 category = fcInfinity; 1605 return opOK; 1606 1607 case PackCategoriesIntoKey(fcZero, fcNormal): 1608 case PackCategoriesIntoKey(fcNormal, fcZero): 1609 case PackCategoriesIntoKey(fcZero, fcZero): 1610 category = fcZero; 1611 return opOK; 1612 1613 case PackCategoriesIntoKey(fcZero, fcInfinity): 1614 case PackCategoriesIntoKey(fcInfinity, fcZero): 1615 makeNaN(); 1616 return opInvalidOp; 1617 1618 case PackCategoriesIntoKey(fcNormal, fcNormal): 1619 return opOK; 1620 } 1621 } 1622 1623 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1624 switch (PackCategoriesIntoKey(category, rhs.category)) { 1625 default: 1626 llvm_unreachable(nullptr); 1627 1628 case PackCategoriesIntoKey(fcZero, fcNaN): 1629 case PackCategoriesIntoKey(fcNormal, fcNaN): 1630 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1631 assign(rhs); 1632 sign = false; 1633 LLVM_FALLTHROUGH; 1634 case PackCategoriesIntoKey(fcNaN, fcZero): 1635 case PackCategoriesIntoKey(fcNaN, fcNormal): 1636 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1637 case PackCategoriesIntoKey(fcNaN, fcNaN): 1638 sign ^= rhs.sign; // restore the original sign 1639 if (isSignaling()) { 1640 makeQuiet(); 1641 return opInvalidOp; 1642 } 1643 return rhs.isSignaling() ? opInvalidOp : opOK; 1644 1645 case PackCategoriesIntoKey(fcInfinity, fcZero): 1646 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1647 case PackCategoriesIntoKey(fcZero, fcInfinity): 1648 case PackCategoriesIntoKey(fcZero, fcNormal): 1649 return opOK; 1650 1651 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1652 category = fcZero; 1653 return opOK; 1654 1655 case PackCategoriesIntoKey(fcNormal, fcZero): 1656 category = fcInfinity; 1657 return opDivByZero; 1658 1659 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1660 case PackCategoriesIntoKey(fcZero, fcZero): 1661 makeNaN(); 1662 return opInvalidOp; 1663 1664 case PackCategoriesIntoKey(fcNormal, fcNormal): 1665 return opOK; 1666 } 1667 } 1668 1669 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1670 switch (PackCategoriesIntoKey(category, rhs.category)) { 1671 default: 1672 llvm_unreachable(nullptr); 1673 1674 case PackCategoriesIntoKey(fcZero, fcNaN): 1675 case PackCategoriesIntoKey(fcNormal, fcNaN): 1676 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1677 assign(rhs); 1678 LLVM_FALLTHROUGH; 1679 case PackCategoriesIntoKey(fcNaN, fcZero): 1680 case PackCategoriesIntoKey(fcNaN, fcNormal): 1681 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1682 case PackCategoriesIntoKey(fcNaN, fcNaN): 1683 if (isSignaling()) { 1684 makeQuiet(); 1685 return opInvalidOp; 1686 } 1687 return rhs.isSignaling() ? opInvalidOp : opOK; 1688 1689 case PackCategoriesIntoKey(fcZero, fcInfinity): 1690 case PackCategoriesIntoKey(fcZero, fcNormal): 1691 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1692 return opOK; 1693 1694 case PackCategoriesIntoKey(fcNormal, fcZero): 1695 case PackCategoriesIntoKey(fcInfinity, fcZero): 1696 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1697 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1698 case PackCategoriesIntoKey(fcZero, fcZero): 1699 makeNaN(); 1700 return opInvalidOp; 1701 1702 case PackCategoriesIntoKey(fcNormal, fcNormal): 1703 return opOK; 1704 } 1705 } 1706 1707 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1708 switch (PackCategoriesIntoKey(category, rhs.category)) { 1709 default: 1710 llvm_unreachable(nullptr); 1711 1712 case PackCategoriesIntoKey(fcZero, fcNaN): 1713 case PackCategoriesIntoKey(fcNormal, fcNaN): 1714 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1715 assign(rhs); 1716 LLVM_FALLTHROUGH; 1717 case PackCategoriesIntoKey(fcNaN, fcZero): 1718 case PackCategoriesIntoKey(fcNaN, fcNormal): 1719 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1720 case PackCategoriesIntoKey(fcNaN, fcNaN): 1721 if (isSignaling()) { 1722 makeQuiet(); 1723 return opInvalidOp; 1724 } 1725 return rhs.isSignaling() ? opInvalidOp : opOK; 1726 1727 case PackCategoriesIntoKey(fcZero, fcInfinity): 1728 case PackCategoriesIntoKey(fcZero, fcNormal): 1729 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1730 return opOK; 1731 1732 case PackCategoriesIntoKey(fcNormal, fcZero): 1733 case PackCategoriesIntoKey(fcInfinity, fcZero): 1734 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1735 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1736 case PackCategoriesIntoKey(fcZero, fcZero): 1737 makeNaN(); 1738 return opInvalidOp; 1739 1740 case PackCategoriesIntoKey(fcNormal, fcNormal): 1741 return opDivByZero; // fake status, indicating this is not a special case 1742 } 1743 } 1744 1745 /* Change sign. */ 1746 void IEEEFloat::changeSign() { 1747 /* Look mummy, this one's easy. */ 1748 sign = !sign; 1749 } 1750 1751 /* Normalized addition or subtraction. */ 1752 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1753 roundingMode rounding_mode, 1754 bool subtract) { 1755 opStatus fs; 1756 1757 fs = addOrSubtractSpecials(rhs, subtract); 1758 1759 /* This return code means it was not a simple case. */ 1760 if (fs == opDivByZero) { 1761 lostFraction lost_fraction; 1762 1763 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1764 fs = normalize(rounding_mode, lost_fraction); 1765 1766 /* Can only be zero if we lost no fraction. */ 1767 assert(category != fcZero || lost_fraction == lfExactlyZero); 1768 } 1769 1770 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1771 positive zero unless rounding to minus infinity, except that 1772 adding two like-signed zeroes gives that zero. */ 1773 if (category == fcZero) { 1774 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1775 sign = (rounding_mode == rmTowardNegative); 1776 } 1777 1778 return fs; 1779 } 1780 1781 /* Normalized addition. */ 1782 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1783 roundingMode rounding_mode) { 1784 return addOrSubtract(rhs, rounding_mode, false); 1785 } 1786 1787 /* Normalized subtraction. */ 1788 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1789 roundingMode rounding_mode) { 1790 return addOrSubtract(rhs, rounding_mode, true); 1791 } 1792 1793 /* Normalized multiply. */ 1794 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1795 roundingMode rounding_mode) { 1796 opStatus fs; 1797 1798 sign ^= rhs.sign; 1799 fs = multiplySpecials(rhs); 1800 1801 if (isFiniteNonZero()) { 1802 lostFraction lost_fraction = multiplySignificand(rhs); 1803 fs = normalize(rounding_mode, lost_fraction); 1804 if (lost_fraction != lfExactlyZero) 1805 fs = (opStatus) (fs | opInexact); 1806 } 1807 1808 return fs; 1809 } 1810 1811 /* Normalized divide. */ 1812 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1813 roundingMode rounding_mode) { 1814 opStatus fs; 1815 1816 sign ^= rhs.sign; 1817 fs = divideSpecials(rhs); 1818 1819 if (isFiniteNonZero()) { 1820 lostFraction lost_fraction = divideSignificand(rhs); 1821 fs = normalize(rounding_mode, lost_fraction); 1822 if (lost_fraction != lfExactlyZero) 1823 fs = (opStatus) (fs | opInexact); 1824 } 1825 1826 return fs; 1827 } 1828 1829 /* Normalized remainder. */ 1830 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1831 opStatus fs; 1832 unsigned int origSign = sign; 1833 1834 // First handle the special cases. 1835 fs = remainderSpecials(rhs); 1836 if (fs != opDivByZero) 1837 return fs; 1838 1839 fs = opOK; 1840 1841 // Make sure the current value is less than twice the denom. If the addition 1842 // did not succeed (an overflow has happened), which means that the finite 1843 // value we currently posses must be less than twice the denom (as we are 1844 // using the same semantics). 1845 IEEEFloat P2 = rhs; 1846 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1847 fs = mod(P2); 1848 assert(fs == opOK); 1849 } 1850 1851 // Lets work with absolute numbers. 1852 IEEEFloat P = rhs; 1853 P.sign = false; 1854 sign = false; 1855 1856 // 1857 // To calculate the remainder we use the following scheme. 1858 // 1859 // The remainder is defained as follows: 1860 // 1861 // remainder = numer - rquot * denom = x - r * p 1862 // 1863 // Where r is the result of: x/p, rounded toward the nearest integral value 1864 // (with halfway cases rounded toward the even number). 1865 // 1866 // Currently, (after x mod 2p): 1867 // r is the number of 2p's present inside x, which is inherently, an even 1868 // number of p's. 1869 // 1870 // We may split the remaining calculation into 4 options: 1871 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1872 // - if x == 0.5p then we round to the nearest even number which is 0, and we 1873 // are done as well. 1874 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 1875 // to subtract 1p at least once. 1876 // - if x >= p then we must subtract p at least once, as x must be a 1877 // remainder. 1878 // 1879 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 1880 // 1881 // We can now split the remaining calculation to the following 3 options: 1882 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1883 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 1884 // must round up to the next even number. so we must subtract p once more. 1885 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 1886 // integral, and subtract p once more. 1887 // 1888 1889 // Extend the semantics to prevent an overflow/underflow or inexact result. 1890 bool losesInfo; 1891 fltSemantics extendedSemantics = *semantics; 1892 extendedSemantics.maxExponent++; 1893 extendedSemantics.minExponent--; 1894 extendedSemantics.precision += 2; 1895 1896 IEEEFloat VEx = *this; 1897 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1898 assert(fs == opOK && !losesInfo); 1899 IEEEFloat PEx = P; 1900 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1901 assert(fs == opOK && !losesInfo); 1902 1903 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 1904 // any fraction. 1905 fs = VEx.add(VEx, rmNearestTiesToEven); 1906 assert(fs == opOK); 1907 1908 if (VEx.compare(PEx) == cmpGreaterThan) { 1909 fs = subtract(P, rmNearestTiesToEven); 1910 assert(fs == opOK); 1911 1912 // Make VEx = this.add(this), but because we have different semantics, we do 1913 // not want to `convert` again, so we just subtract PEx twice (which equals 1914 // to the desired value). 1915 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1916 assert(fs == opOK); 1917 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1918 assert(fs == opOK); 1919 1920 cmpResult result = VEx.compare(PEx); 1921 if (result == cmpGreaterThan || result == cmpEqual) { 1922 fs = subtract(P, rmNearestTiesToEven); 1923 assert(fs == opOK); 1924 } 1925 } 1926 1927 if (isZero()) 1928 sign = origSign; // IEEE754 requires this 1929 else 1930 sign ^= origSign; 1931 return fs; 1932 } 1933 1934 /* Normalized llvm frem (C fmod). */ 1935 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1936 opStatus fs; 1937 fs = modSpecials(rhs); 1938 unsigned int origSign = sign; 1939 1940 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 1941 compareAbsoluteValue(rhs) != cmpLessThan) { 1942 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); 1943 if (compareAbsoluteValue(V) == cmpLessThan) 1944 V = scalbn(V, -1, rmNearestTiesToEven); 1945 V.sign = sign; 1946 1947 fs = subtract(V, rmNearestTiesToEven); 1948 assert(fs==opOK); 1949 } 1950 if (isZero()) 1951 sign = origSign; // fmod requires this 1952 return fs; 1953 } 1954 1955 /* Normalized fused-multiply-add. */ 1956 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1957 const IEEEFloat &addend, 1958 roundingMode rounding_mode) { 1959 opStatus fs; 1960 1961 /* Post-multiplication sign, before addition. */ 1962 sign ^= multiplicand.sign; 1963 1964 /* If and only if all arguments are normal do we need to do an 1965 extended-precision calculation. */ 1966 if (isFiniteNonZero() && 1967 multiplicand.isFiniteNonZero() && 1968 addend.isFinite()) { 1969 lostFraction lost_fraction; 1970 1971 lost_fraction = multiplySignificand(multiplicand, addend); 1972 fs = normalize(rounding_mode, lost_fraction); 1973 if (lost_fraction != lfExactlyZero) 1974 fs = (opStatus) (fs | opInexact); 1975 1976 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1977 positive zero unless rounding to minus infinity, except that 1978 adding two like-signed zeroes gives that zero. */ 1979 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1980 sign = (rounding_mode == rmTowardNegative); 1981 } else { 1982 fs = multiplySpecials(multiplicand); 1983 1984 /* FS can only be opOK or opInvalidOp. There is no more work 1985 to do in the latter case. The IEEE-754R standard says it is 1986 implementation-defined in this case whether, if ADDEND is a 1987 quiet NaN, we raise invalid op; this implementation does so. 1988 1989 If we need to do the addition we can do so with normal 1990 precision. */ 1991 if (fs == opOK) 1992 fs = addOrSubtract(addend, rounding_mode, false); 1993 } 1994 1995 return fs; 1996 } 1997 1998 /* Rounding-mode correct round to integral value. */ 1999 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2000 opStatus fs; 2001 2002 if (isInfinity()) 2003 // [IEEE Std 754-2008 6.1]: 2004 // The behavior of infinity in floating-point arithmetic is derived from the 2005 // limiting cases of real arithmetic with operands of arbitrarily 2006 // large magnitude, when such a limit exists. 2007 // ... 2008 // Operations on infinite operands are usually exact and therefore signal no 2009 // exceptions ... 2010 return opOK; 2011 2012 if (isNaN()) { 2013 if (isSignaling()) { 2014 // [IEEE Std 754-2008 6.2]: 2015 // Under default exception handling, any operation signaling an invalid 2016 // operation exception and for which a floating-point result is to be 2017 // delivered shall deliver a quiet NaN. 2018 makeQuiet(); 2019 // [IEEE Std 754-2008 6.2]: 2020 // Signaling NaNs shall be reserved operands that, under default exception 2021 // handling, signal the invalid operation exception(see 7.2) for every 2022 // general-computational and signaling-computational operation except for 2023 // the conversions described in 5.12. 2024 return opInvalidOp; 2025 } else { 2026 // [IEEE Std 754-2008 6.2]: 2027 // For an operation with quiet NaN inputs, other than maximum and minimum 2028 // operations, if a floating-point result is to be delivered the result 2029 // shall be a quiet NaN which should be one of the input NaNs. 2030 // ... 2031 // Every general-computational and quiet-computational operation involving 2032 // one or more input NaNs, none of them signaling, shall signal no 2033 // exception, except fusedMultiplyAdd might signal the invalid operation 2034 // exception(see 7.2). 2035 return opOK; 2036 } 2037 } 2038 2039 if (isZero()) { 2040 // [IEEE Std 754-2008 6.3]: 2041 // ... the sign of the result of conversions, the quantize operation, the 2042 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2043 // the sign of the first or only operand. 2044 return opOK; 2045 } 2046 2047 // If the exponent is large enough, we know that this value is already 2048 // integral, and the arithmetic below would potentially cause it to saturate 2049 // to +/-Inf. Bail out early instead. 2050 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2051 return opOK; 2052 2053 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2054 // precision of our format, and then subtract it back off again. The choice 2055 // of rounding modes for the addition/subtraction determines the rounding mode 2056 // for our integral rounding as well. 2057 // NOTE: When the input value is negative, we do subtraction followed by 2058 // addition instead. 2059 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2060 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2061 IEEEFloat MagicConstant(*semantics); 2062 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2063 rmNearestTiesToEven); 2064 assert(fs == opOK); 2065 MagicConstant.sign = sign; 2066 2067 // Preserve the input sign so that we can handle the case of zero result 2068 // correctly. 2069 bool inputSign = isNegative(); 2070 2071 fs = add(MagicConstant, rounding_mode); 2072 2073 // Current value and 'MagicConstant' are both integers, so the result of the 2074 // subtraction is always exact according to Sterbenz' lemma. 2075 subtract(MagicConstant, rounding_mode); 2076 2077 // Restore the input sign. 2078 if (inputSign != isNegative()) 2079 changeSign(); 2080 2081 return fs; 2082 } 2083 2084 2085 /* Comparison requires normalized numbers. */ 2086 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2087 cmpResult result; 2088 2089 assert(semantics == rhs.semantics); 2090 2091 switch (PackCategoriesIntoKey(category, rhs.category)) { 2092 default: 2093 llvm_unreachable(nullptr); 2094 2095 case PackCategoriesIntoKey(fcNaN, fcZero): 2096 case PackCategoriesIntoKey(fcNaN, fcNormal): 2097 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2098 case PackCategoriesIntoKey(fcNaN, fcNaN): 2099 case PackCategoriesIntoKey(fcZero, fcNaN): 2100 case PackCategoriesIntoKey(fcNormal, fcNaN): 2101 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2102 return cmpUnordered; 2103 2104 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2105 case PackCategoriesIntoKey(fcInfinity, fcZero): 2106 case PackCategoriesIntoKey(fcNormal, fcZero): 2107 if (sign) 2108 return cmpLessThan; 2109 else 2110 return cmpGreaterThan; 2111 2112 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2113 case PackCategoriesIntoKey(fcZero, fcInfinity): 2114 case PackCategoriesIntoKey(fcZero, fcNormal): 2115 if (rhs.sign) 2116 return cmpGreaterThan; 2117 else 2118 return cmpLessThan; 2119 2120 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2121 if (sign == rhs.sign) 2122 return cmpEqual; 2123 else if (sign) 2124 return cmpLessThan; 2125 else 2126 return cmpGreaterThan; 2127 2128 case PackCategoriesIntoKey(fcZero, fcZero): 2129 return cmpEqual; 2130 2131 case PackCategoriesIntoKey(fcNormal, fcNormal): 2132 break; 2133 } 2134 2135 /* Two normal numbers. Do they have the same sign? */ 2136 if (sign != rhs.sign) { 2137 if (sign) 2138 result = cmpLessThan; 2139 else 2140 result = cmpGreaterThan; 2141 } else { 2142 /* Compare absolute values; invert result if negative. */ 2143 result = compareAbsoluteValue(rhs); 2144 2145 if (sign) { 2146 if (result == cmpLessThan) 2147 result = cmpGreaterThan; 2148 else if (result == cmpGreaterThan) 2149 result = cmpLessThan; 2150 } 2151 } 2152 2153 return result; 2154 } 2155 2156 /// IEEEFloat::convert - convert a value of one floating point type to another. 2157 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2158 /// records whether the transformation lost information, i.e. whether 2159 /// converting the result back to the original type will produce the 2160 /// original value (this is almost the same as return value==fsOK, but there 2161 /// are edge cases where this is not so). 2162 2163 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2164 roundingMode rounding_mode, 2165 bool *losesInfo) { 2166 lostFraction lostFraction; 2167 unsigned int newPartCount, oldPartCount; 2168 opStatus fs; 2169 int shift; 2170 const fltSemantics &fromSemantics = *semantics; 2171 2172 lostFraction = lfExactlyZero; 2173 newPartCount = partCountForBits(toSemantics.precision + 1); 2174 oldPartCount = partCount(); 2175 shift = toSemantics.precision - fromSemantics.precision; 2176 2177 bool X86SpecialNan = false; 2178 if (&fromSemantics == &semX87DoubleExtended && 2179 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2180 (!(*significandParts() & 0x8000000000000000ULL) || 2181 !(*significandParts() & 0x4000000000000000ULL))) { 2182 // x86 has some unusual NaNs which cannot be represented in any other 2183 // format; note them here. 2184 X86SpecialNan = true; 2185 } 2186 2187 // If this is a truncation of a denormal number, and the target semantics 2188 // has larger exponent range than the source semantics (this can happen 2189 // when truncating from PowerPC double-double to double format), the 2190 // right shift could lose result mantissa bits. Adjust exponent instead 2191 // of performing excessive shift. 2192 if (shift < 0 && isFiniteNonZero()) { 2193 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2194 if (exponent + exponentChange < toSemantics.minExponent) 2195 exponentChange = toSemantics.minExponent - exponent; 2196 if (exponentChange < shift) 2197 exponentChange = shift; 2198 if (exponentChange < 0) { 2199 shift -= exponentChange; 2200 exponent += exponentChange; 2201 } 2202 } 2203 2204 // If this is a truncation, perform the shift before we narrow the storage. 2205 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2206 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2207 2208 // Fix the storage so it can hold to new value. 2209 if (newPartCount > oldPartCount) { 2210 // The new type requires more storage; make it available. 2211 integerPart *newParts; 2212 newParts = new integerPart[newPartCount]; 2213 APInt::tcSet(newParts, 0, newPartCount); 2214 if (isFiniteNonZero() || category==fcNaN) 2215 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2216 freeSignificand(); 2217 significand.parts = newParts; 2218 } else if (newPartCount == 1 && oldPartCount != 1) { 2219 // Switch to built-in storage for a single part. 2220 integerPart newPart = 0; 2221 if (isFiniteNonZero() || category==fcNaN) 2222 newPart = significandParts()[0]; 2223 freeSignificand(); 2224 significand.part = newPart; 2225 } 2226 2227 // Now that we have the right storage, switch the semantics. 2228 semantics = &toSemantics; 2229 2230 // If this is an extension, perform the shift now that the storage is 2231 // available. 2232 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2233 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2234 2235 if (isFiniteNonZero()) { 2236 fs = normalize(rounding_mode, lostFraction); 2237 *losesInfo = (fs != opOK); 2238 } else if (category == fcNaN) { 2239 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2240 2241 // For x87 extended precision, we want to make a NaN, not a special NaN if 2242 // the input wasn't special either. 2243 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2244 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2245 2246 // If we are truncating NaN, it is possible that we shifted out all of the 2247 // set bits in a signalling NaN payload. But NaN must remain NaN, so some 2248 // bit in the significand must be set (otherwise it is Inf). 2249 // This can only happen with sNaN. Set the 1st bit after the quiet bit, 2250 // so that we still have an sNaN. 2251 // FIXME: Set quiet and return opInvalidOp (on convert of any sNaN). 2252 // But this requires fixing LLVM to parse 32-bit hex FP or ignoring 2253 // conversions while parsing IR. 2254 if (APInt::tcIsZero(significandParts(), newPartCount)) { 2255 assert(shift < 0 && "Should not lose NaN payload on extend"); 2256 assert(semantics->precision >= 3 && "Unexpectedly narrow significand"); 2257 assert(*losesInfo && "Missing payload should have set lost info"); 2258 APInt::tcSetBit(significandParts(), semantics->precision - 3); 2259 } 2260 2261 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2262 // does not give you back the same bits. This is dubious, and we 2263 // don't currently do it. You're really supposed to get 2264 // an invalid operation signal at runtime, but nobody does that. 2265 fs = opOK; 2266 } else { 2267 *losesInfo = false; 2268 fs = opOK; 2269 } 2270 2271 return fs; 2272 } 2273 2274 /* Convert a floating point number to an integer according to the 2275 rounding mode. If the rounded integer value is out of range this 2276 returns an invalid operation exception and the contents of the 2277 destination parts are unspecified. If the rounded value is in 2278 range but the floating point number is not the exact integer, the C 2279 standard doesn't require an inexact exception to be raised. IEEE 2280 854 does require it so we do that. 2281 2282 Note that for conversions to integer type the C standard requires 2283 round-to-zero to always be used. */ 2284 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2285 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2286 roundingMode rounding_mode, bool *isExact) const { 2287 lostFraction lost_fraction; 2288 const integerPart *src; 2289 unsigned int dstPartsCount, truncatedBits; 2290 2291 *isExact = false; 2292 2293 /* Handle the three special cases first. */ 2294 if (category == fcInfinity || category == fcNaN) 2295 return opInvalidOp; 2296 2297 dstPartsCount = partCountForBits(width); 2298 assert(dstPartsCount <= parts.size() && "Integer too big"); 2299 2300 if (category == fcZero) { 2301 APInt::tcSet(parts.data(), 0, dstPartsCount); 2302 // Negative zero can't be represented as an int. 2303 *isExact = !sign; 2304 return opOK; 2305 } 2306 2307 src = significandParts(); 2308 2309 /* Step 1: place our absolute value, with any fraction truncated, in 2310 the destination. */ 2311 if (exponent < 0) { 2312 /* Our absolute value is less than one; truncate everything. */ 2313 APInt::tcSet(parts.data(), 0, dstPartsCount); 2314 /* For exponent -1 the integer bit represents .5, look at that. 2315 For smaller exponents leftmost truncated bit is 0. */ 2316 truncatedBits = semantics->precision -1U - exponent; 2317 } else { 2318 /* We want the most significant (exponent + 1) bits; the rest are 2319 truncated. */ 2320 unsigned int bits = exponent + 1U; 2321 2322 /* Hopelessly large in magnitude? */ 2323 if (bits > width) 2324 return opInvalidOp; 2325 2326 if (bits < semantics->precision) { 2327 /* We truncate (semantics->precision - bits) bits. */ 2328 truncatedBits = semantics->precision - bits; 2329 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2330 } else { 2331 /* We want at least as many bits as are available. */ 2332 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2333 0); 2334 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2335 bits - semantics->precision); 2336 truncatedBits = 0; 2337 } 2338 } 2339 2340 /* Step 2: work out any lost fraction, and increment the absolute 2341 value if we would round away from zero. */ 2342 if (truncatedBits) { 2343 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2344 truncatedBits); 2345 if (lost_fraction != lfExactlyZero && 2346 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2347 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2348 return opInvalidOp; /* Overflow. */ 2349 } 2350 } else { 2351 lost_fraction = lfExactlyZero; 2352 } 2353 2354 /* Step 3: check if we fit in the destination. */ 2355 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2356 2357 if (sign) { 2358 if (!isSigned) { 2359 /* Negative numbers cannot be represented as unsigned. */ 2360 if (omsb != 0) 2361 return opInvalidOp; 2362 } else { 2363 /* It takes omsb bits to represent the unsigned integer value. 2364 We lose a bit for the sign, but care is needed as the 2365 maximally negative integer is a special case. */ 2366 if (omsb == width && 2367 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2368 return opInvalidOp; 2369 2370 /* This case can happen because of rounding. */ 2371 if (omsb > width) 2372 return opInvalidOp; 2373 } 2374 2375 APInt::tcNegate (parts.data(), dstPartsCount); 2376 } else { 2377 if (omsb >= width + !isSigned) 2378 return opInvalidOp; 2379 } 2380 2381 if (lost_fraction == lfExactlyZero) { 2382 *isExact = true; 2383 return opOK; 2384 } else 2385 return opInexact; 2386 } 2387 2388 /* Same as convertToSignExtendedInteger, except we provide 2389 deterministic values in case of an invalid operation exception, 2390 namely zero for NaNs and the minimal or maximal value respectively 2391 for underflow or overflow. 2392 The *isExact output tells whether the result is exact, in the sense 2393 that converting it back to the original floating point type produces 2394 the original value. This is almost equivalent to result==opOK, 2395 except for negative zeroes. 2396 */ 2397 IEEEFloat::opStatus 2398 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2399 unsigned int width, bool isSigned, 2400 roundingMode rounding_mode, bool *isExact) const { 2401 opStatus fs; 2402 2403 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2404 isExact); 2405 2406 if (fs == opInvalidOp) { 2407 unsigned int bits, dstPartsCount; 2408 2409 dstPartsCount = partCountForBits(width); 2410 assert(dstPartsCount <= parts.size() && "Integer too big"); 2411 2412 if (category == fcNaN) 2413 bits = 0; 2414 else if (sign) 2415 bits = isSigned; 2416 else 2417 bits = width - isSigned; 2418 2419 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2420 if (sign && isSigned) 2421 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2422 } 2423 2424 return fs; 2425 } 2426 2427 /* Convert an unsigned integer SRC to a floating point number, 2428 rounding according to ROUNDING_MODE. The sign of the floating 2429 point number is not modified. */ 2430 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2431 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2432 unsigned int omsb, precision, dstCount; 2433 integerPart *dst; 2434 lostFraction lost_fraction; 2435 2436 category = fcNormal; 2437 omsb = APInt::tcMSB(src, srcCount) + 1; 2438 dst = significandParts(); 2439 dstCount = partCount(); 2440 precision = semantics->precision; 2441 2442 /* We want the most significant PRECISION bits of SRC. There may not 2443 be that many; extract what we can. */ 2444 if (precision <= omsb) { 2445 exponent = omsb - 1; 2446 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2447 omsb - precision); 2448 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2449 } else { 2450 exponent = precision - 1; 2451 lost_fraction = lfExactlyZero; 2452 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2453 } 2454 2455 return normalize(rounding_mode, lost_fraction); 2456 } 2457 2458 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2459 roundingMode rounding_mode) { 2460 unsigned int partCount = Val.getNumWords(); 2461 APInt api = Val; 2462 2463 sign = false; 2464 if (isSigned && api.isNegative()) { 2465 sign = true; 2466 api = -api; 2467 } 2468 2469 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2470 } 2471 2472 /* Convert a two's complement integer SRC to a floating point number, 2473 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2474 integer is signed, in which case it must be sign-extended. */ 2475 IEEEFloat::opStatus 2476 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2477 unsigned int srcCount, bool isSigned, 2478 roundingMode rounding_mode) { 2479 opStatus status; 2480 2481 if (isSigned && 2482 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2483 integerPart *copy; 2484 2485 /* If we're signed and negative negate a copy. */ 2486 sign = true; 2487 copy = new integerPart[srcCount]; 2488 APInt::tcAssign(copy, src, srcCount); 2489 APInt::tcNegate(copy, srcCount); 2490 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2491 delete [] copy; 2492 } else { 2493 sign = false; 2494 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2495 } 2496 2497 return status; 2498 } 2499 2500 /* FIXME: should this just take a const APInt reference? */ 2501 IEEEFloat::opStatus 2502 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2503 unsigned int width, bool isSigned, 2504 roundingMode rounding_mode) { 2505 unsigned int partCount = partCountForBits(width); 2506 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2507 2508 sign = false; 2509 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2510 sign = true; 2511 api = -api; 2512 } 2513 2514 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2515 } 2516 2517 Expected<IEEEFloat::opStatus> 2518 IEEEFloat::convertFromHexadecimalString(StringRef s, 2519 roundingMode rounding_mode) { 2520 lostFraction lost_fraction = lfExactlyZero; 2521 2522 category = fcNormal; 2523 zeroSignificand(); 2524 exponent = 0; 2525 2526 integerPart *significand = significandParts(); 2527 unsigned partsCount = partCount(); 2528 unsigned bitPos = partsCount * integerPartWidth; 2529 bool computedTrailingFraction = false; 2530 2531 // Skip leading zeroes and any (hexa)decimal point. 2532 StringRef::iterator begin = s.begin(); 2533 StringRef::iterator end = s.end(); 2534 StringRef::iterator dot; 2535 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2536 if (!PtrOrErr) 2537 return PtrOrErr.takeError(); 2538 StringRef::iterator p = *PtrOrErr; 2539 StringRef::iterator firstSignificantDigit = p; 2540 2541 while (p != end) { 2542 integerPart hex_value; 2543 2544 if (*p == '.') { 2545 if (dot != end) 2546 return createError("String contains multiple dots"); 2547 dot = p++; 2548 continue; 2549 } 2550 2551 hex_value = hexDigitValue(*p); 2552 if (hex_value == -1U) 2553 break; 2554 2555 p++; 2556 2557 // Store the number while we have space. 2558 if (bitPos) { 2559 bitPos -= 4; 2560 hex_value <<= bitPos % integerPartWidth; 2561 significand[bitPos / integerPartWidth] |= hex_value; 2562 } else if (!computedTrailingFraction) { 2563 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2564 if (!FractOrErr) 2565 return FractOrErr.takeError(); 2566 lost_fraction = *FractOrErr; 2567 computedTrailingFraction = true; 2568 } 2569 } 2570 2571 /* Hex floats require an exponent but not a hexadecimal point. */ 2572 if (p == end) 2573 return createError("Hex strings require an exponent"); 2574 if (*p != 'p' && *p != 'P') 2575 return createError("Invalid character in significand"); 2576 if (p == begin) 2577 return createError("Significand has no digits"); 2578 if (dot != end && p - begin == 1) 2579 return createError("Significand has no digits"); 2580 2581 /* Ignore the exponent if we are zero. */ 2582 if (p != firstSignificantDigit) { 2583 int expAdjustment; 2584 2585 /* Implicit hexadecimal point? */ 2586 if (dot == end) 2587 dot = p; 2588 2589 /* Calculate the exponent adjustment implicit in the number of 2590 significant digits. */ 2591 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2592 if (expAdjustment < 0) 2593 expAdjustment++; 2594 expAdjustment = expAdjustment * 4 - 1; 2595 2596 /* Adjust for writing the significand starting at the most 2597 significant nibble. */ 2598 expAdjustment += semantics->precision; 2599 expAdjustment -= partsCount * integerPartWidth; 2600 2601 /* Adjust for the given exponent. */ 2602 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2603 if (!ExpOrErr) 2604 return ExpOrErr.takeError(); 2605 exponent = *ExpOrErr; 2606 } 2607 2608 return normalize(rounding_mode, lost_fraction); 2609 } 2610 2611 IEEEFloat::opStatus 2612 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2613 unsigned sigPartCount, int exp, 2614 roundingMode rounding_mode) { 2615 unsigned int parts, pow5PartCount; 2616 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2617 integerPart pow5Parts[maxPowerOfFiveParts]; 2618 bool isNearest; 2619 2620 isNearest = (rounding_mode == rmNearestTiesToEven || 2621 rounding_mode == rmNearestTiesToAway); 2622 2623 parts = partCountForBits(semantics->precision + 11); 2624 2625 /* Calculate pow(5, abs(exp)). */ 2626 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2627 2628 for (;; parts *= 2) { 2629 opStatus sigStatus, powStatus; 2630 unsigned int excessPrecision, truncatedBits; 2631 2632 calcSemantics.precision = parts * integerPartWidth - 1; 2633 excessPrecision = calcSemantics.precision - semantics->precision; 2634 truncatedBits = excessPrecision; 2635 2636 IEEEFloat decSig(calcSemantics, uninitialized); 2637 decSig.makeZero(sign); 2638 IEEEFloat pow5(calcSemantics); 2639 2640 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2641 rmNearestTiesToEven); 2642 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2643 rmNearestTiesToEven); 2644 /* Add exp, as 10^n = 5^n * 2^n. */ 2645 decSig.exponent += exp; 2646 2647 lostFraction calcLostFraction; 2648 integerPart HUerr, HUdistance; 2649 unsigned int powHUerr; 2650 2651 if (exp >= 0) { 2652 /* multiplySignificand leaves the precision-th bit set to 1. */ 2653 calcLostFraction = decSig.multiplySignificand(pow5); 2654 powHUerr = powStatus != opOK; 2655 } else { 2656 calcLostFraction = decSig.divideSignificand(pow5); 2657 /* Denormal numbers have less precision. */ 2658 if (decSig.exponent < semantics->minExponent) { 2659 excessPrecision += (semantics->minExponent - decSig.exponent); 2660 truncatedBits = excessPrecision; 2661 if (excessPrecision > calcSemantics.precision) 2662 excessPrecision = calcSemantics.precision; 2663 } 2664 /* Extra half-ulp lost in reciprocal of exponent. */ 2665 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2666 } 2667 2668 /* Both multiplySignificand and divideSignificand return the 2669 result with the integer bit set. */ 2670 assert(APInt::tcExtractBit 2671 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2672 2673 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2674 powHUerr); 2675 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2676 excessPrecision, isNearest); 2677 2678 /* Are we guaranteed to round correctly if we truncate? */ 2679 if (HUdistance >= HUerr) { 2680 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2681 calcSemantics.precision - excessPrecision, 2682 excessPrecision); 2683 /* Take the exponent of decSig. If we tcExtract-ed less bits 2684 above we must adjust our exponent to compensate for the 2685 implicit right shift. */ 2686 exponent = (decSig.exponent + semantics->precision 2687 - (calcSemantics.precision - excessPrecision)); 2688 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2689 decSig.partCount(), 2690 truncatedBits); 2691 return normalize(rounding_mode, calcLostFraction); 2692 } 2693 } 2694 } 2695 2696 Expected<IEEEFloat::opStatus> 2697 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2698 decimalInfo D; 2699 opStatus fs; 2700 2701 /* Scan the text. */ 2702 StringRef::iterator p = str.begin(); 2703 if (Error Err = interpretDecimal(p, str.end(), &D)) 2704 return std::move(Err); 2705 2706 /* Handle the quick cases. First the case of no significant digits, 2707 i.e. zero, and then exponents that are obviously too large or too 2708 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2709 definitely overflows if 2710 2711 (exp - 1) * L >= maxExponent 2712 2713 and definitely underflows to zero where 2714 2715 (exp + 1) * L <= minExponent - precision 2716 2717 With integer arithmetic the tightest bounds for L are 2718 2719 93/28 < L < 196/59 [ numerator <= 256 ] 2720 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2721 */ 2722 2723 // Test if we have a zero number allowing for strings with no null terminators 2724 // and zero decimals with non-zero exponents. 2725 // 2726 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2727 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2728 // be at most one dot. On the other hand, if we have a zero with a non-zero 2729 // exponent, then we know that D.firstSigDigit will be non-numeric. 2730 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2731 category = fcZero; 2732 fs = opOK; 2733 2734 /* Check whether the normalized exponent is high enough to overflow 2735 max during the log-rebasing in the max-exponent check below. */ 2736 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2737 fs = handleOverflow(rounding_mode); 2738 2739 /* If it wasn't, then it also wasn't high enough to overflow max 2740 during the log-rebasing in the min-exponent check. Check that it 2741 won't overflow min in either check, then perform the min-exponent 2742 check. */ 2743 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2744 (D.normalizedExponent + 1) * 28738 <= 2745 8651 * (semantics->minExponent - (int) semantics->precision)) { 2746 /* Underflow to zero and round. */ 2747 category = fcNormal; 2748 zeroSignificand(); 2749 fs = normalize(rounding_mode, lfLessThanHalf); 2750 2751 /* We can finally safely perform the max-exponent check. */ 2752 } else if ((D.normalizedExponent - 1) * 42039 2753 >= 12655 * semantics->maxExponent) { 2754 /* Overflow and round. */ 2755 fs = handleOverflow(rounding_mode); 2756 } else { 2757 integerPart *decSignificand; 2758 unsigned int partCount; 2759 2760 /* A tight upper bound on number of bits required to hold an 2761 N-digit decimal integer is N * 196 / 59. Allocate enough space 2762 to hold the full significand, and an extra part required by 2763 tcMultiplyPart. */ 2764 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2765 partCount = partCountForBits(1 + 196 * partCount / 59); 2766 decSignificand = new integerPart[partCount + 1]; 2767 partCount = 0; 2768 2769 /* Convert to binary efficiently - we do almost all multiplication 2770 in an integerPart. When this would overflow do we do a single 2771 bignum multiplication, and then revert again to multiplication 2772 in an integerPart. */ 2773 do { 2774 integerPart decValue, val, multiplier; 2775 2776 val = 0; 2777 multiplier = 1; 2778 2779 do { 2780 if (*p == '.') { 2781 p++; 2782 if (p == str.end()) { 2783 break; 2784 } 2785 } 2786 decValue = decDigitValue(*p++); 2787 if (decValue >= 10U) { 2788 delete[] decSignificand; 2789 return createError("Invalid character in significand"); 2790 } 2791 multiplier *= 10; 2792 val = val * 10 + decValue; 2793 /* The maximum number that can be multiplied by ten with any 2794 digit added without overflowing an integerPart. */ 2795 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2796 2797 /* Multiply out the current part. */ 2798 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2799 partCount, partCount + 1, false); 2800 2801 /* If we used another part (likely but not guaranteed), increase 2802 the count. */ 2803 if (decSignificand[partCount]) 2804 partCount++; 2805 } while (p <= D.lastSigDigit); 2806 2807 category = fcNormal; 2808 fs = roundSignificandWithExponent(decSignificand, partCount, 2809 D.exponent, rounding_mode); 2810 2811 delete [] decSignificand; 2812 } 2813 2814 return fs; 2815 } 2816 2817 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2818 const size_t MIN_NAME_SIZE = 3; 2819 2820 if (str.size() < MIN_NAME_SIZE) 2821 return false; 2822 2823 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2824 makeInf(false); 2825 return true; 2826 } 2827 2828 bool IsNegative = str.front() == '-'; 2829 if (IsNegative) { 2830 str = str.drop_front(); 2831 if (str.size() < MIN_NAME_SIZE) 2832 return false; 2833 2834 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2835 makeInf(true); 2836 return true; 2837 } 2838 } 2839 2840 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2841 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2842 if (IsSignaling) { 2843 str = str.drop_front(); 2844 if (str.size() < MIN_NAME_SIZE) 2845 return false; 2846 } 2847 2848 if (str.startswith("nan") || str.startswith("NaN")) { 2849 str = str.drop_front(3); 2850 2851 // A NaN without payload. 2852 if (str.empty()) { 2853 makeNaN(IsSignaling, IsNegative); 2854 return true; 2855 } 2856 2857 // Allow the payload to be inside parentheses. 2858 if (str.front() == '(') { 2859 // Parentheses should be balanced (and not empty). 2860 if (str.size() <= 2 || str.back() != ')') 2861 return false; 2862 2863 str = str.slice(1, str.size() - 1); 2864 } 2865 2866 // Determine the payload number's radix. 2867 unsigned Radix = 10; 2868 if (str[0] == '0') { 2869 if (str.size() > 1 && tolower(str[1]) == 'x') { 2870 str = str.drop_front(2); 2871 Radix = 16; 2872 } else 2873 Radix = 8; 2874 } 2875 2876 // Parse the payload and make the NaN. 2877 APInt Payload; 2878 if (!str.getAsInteger(Radix, Payload)) { 2879 makeNaN(IsSignaling, IsNegative, &Payload); 2880 return true; 2881 } 2882 } 2883 2884 return false; 2885 } 2886 2887 Expected<IEEEFloat::opStatus> 2888 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 2889 if (str.empty()) 2890 return createError("Invalid string length"); 2891 2892 // Handle special cases. 2893 if (convertFromStringSpecials(str)) 2894 return opOK; 2895 2896 /* Handle a leading minus sign. */ 2897 StringRef::iterator p = str.begin(); 2898 size_t slen = str.size(); 2899 sign = *p == '-' ? 1 : 0; 2900 if (*p == '-' || *p == '+') { 2901 p++; 2902 slen--; 2903 if (!slen) 2904 return createError("String has no digits"); 2905 } 2906 2907 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2908 if (slen == 2) 2909 return createError("Invalid string"); 2910 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2911 rounding_mode); 2912 } 2913 2914 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2915 } 2916 2917 /* Write out a hexadecimal representation of the floating point value 2918 to DST, which must be of sufficient size, in the C99 form 2919 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2920 excluding the terminating NUL. 2921 2922 If UPPERCASE, the output is in upper case, otherwise in lower case. 2923 2924 HEXDIGITS digits appear altogether, rounding the value if 2925 necessary. If HEXDIGITS is 0, the minimal precision to display the 2926 number precisely is used instead. If nothing would appear after 2927 the decimal point it is suppressed. 2928 2929 The decimal exponent is always printed and has at least one digit. 2930 Zero values display an exponent of zero. Infinities and NaNs 2931 appear as "infinity" or "nan" respectively. 2932 2933 The above rules are as specified by C99. There is ambiguity about 2934 what the leading hexadecimal digit should be. This implementation 2935 uses whatever is necessary so that the exponent is displayed as 2936 stored. This implies the exponent will fall within the IEEE format 2937 range, and the leading hexadecimal digit will be 0 (for denormals), 2938 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2939 any other digits zero). 2940 */ 2941 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2942 bool upperCase, 2943 roundingMode rounding_mode) const { 2944 char *p; 2945 2946 p = dst; 2947 if (sign) 2948 *dst++ = '-'; 2949 2950 switch (category) { 2951 case fcInfinity: 2952 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2953 dst += sizeof infinityL - 1; 2954 break; 2955 2956 case fcNaN: 2957 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2958 dst += sizeof NaNU - 1; 2959 break; 2960 2961 case fcZero: 2962 *dst++ = '0'; 2963 *dst++ = upperCase ? 'X': 'x'; 2964 *dst++ = '0'; 2965 if (hexDigits > 1) { 2966 *dst++ = '.'; 2967 memset (dst, '0', hexDigits - 1); 2968 dst += hexDigits - 1; 2969 } 2970 *dst++ = upperCase ? 'P': 'p'; 2971 *dst++ = '0'; 2972 break; 2973 2974 case fcNormal: 2975 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2976 break; 2977 } 2978 2979 *dst = 0; 2980 2981 return static_cast<unsigned int>(dst - p); 2982 } 2983 2984 /* Does the hard work of outputting the correctly rounded hexadecimal 2985 form of a normal floating point number with the specified number of 2986 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2987 digits necessary to print the value precisely is output. */ 2988 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2989 bool upperCase, 2990 roundingMode rounding_mode) const { 2991 unsigned int count, valueBits, shift, partsCount, outputDigits; 2992 const char *hexDigitChars; 2993 const integerPart *significand; 2994 char *p; 2995 bool roundUp; 2996 2997 *dst++ = '0'; 2998 *dst++ = upperCase ? 'X': 'x'; 2999 3000 roundUp = false; 3001 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3002 3003 significand = significandParts(); 3004 partsCount = partCount(); 3005 3006 /* +3 because the first digit only uses the single integer bit, so 3007 we have 3 virtual zero most-significant-bits. */ 3008 valueBits = semantics->precision + 3; 3009 shift = integerPartWidth - valueBits % integerPartWidth; 3010 3011 /* The natural number of digits required ignoring trailing 3012 insignificant zeroes. */ 3013 outputDigits = (valueBits - significandLSB () + 3) / 4; 3014 3015 /* hexDigits of zero means use the required number for the 3016 precision. Otherwise, see if we are truncating. If we are, 3017 find out if we need to round away from zero. */ 3018 if (hexDigits) { 3019 if (hexDigits < outputDigits) { 3020 /* We are dropping non-zero bits, so need to check how to round. 3021 "bits" is the number of dropped bits. */ 3022 unsigned int bits; 3023 lostFraction fraction; 3024 3025 bits = valueBits - hexDigits * 4; 3026 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3027 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3028 } 3029 outputDigits = hexDigits; 3030 } 3031 3032 /* Write the digits consecutively, and start writing in the location 3033 of the hexadecimal point. We move the most significant digit 3034 left and add the hexadecimal point later. */ 3035 p = ++dst; 3036 3037 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3038 3039 while (outputDigits && count) { 3040 integerPart part; 3041 3042 /* Put the most significant integerPartWidth bits in "part". */ 3043 if (--count == partsCount) 3044 part = 0; /* An imaginary higher zero part. */ 3045 else 3046 part = significand[count] << shift; 3047 3048 if (count && shift) 3049 part |= significand[count - 1] >> (integerPartWidth - shift); 3050 3051 /* Convert as much of "part" to hexdigits as we can. */ 3052 unsigned int curDigits = integerPartWidth / 4; 3053 3054 if (curDigits > outputDigits) 3055 curDigits = outputDigits; 3056 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3057 outputDigits -= curDigits; 3058 } 3059 3060 if (roundUp) { 3061 char *q = dst; 3062 3063 /* Note that hexDigitChars has a trailing '0'. */ 3064 do { 3065 q--; 3066 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3067 } while (*q == '0'); 3068 assert(q >= p); 3069 } else { 3070 /* Add trailing zeroes. */ 3071 memset (dst, '0', outputDigits); 3072 dst += outputDigits; 3073 } 3074 3075 /* Move the most significant digit to before the point, and if there 3076 is something after the decimal point add it. This must come 3077 after rounding above. */ 3078 p[-1] = p[0]; 3079 if (dst -1 == p) 3080 dst--; 3081 else 3082 p[0] = '.'; 3083 3084 /* Finally output the exponent. */ 3085 *dst++ = upperCase ? 'P': 'p'; 3086 3087 return writeSignedDecimal (dst, exponent); 3088 } 3089 3090 hash_code hash_value(const IEEEFloat &Arg) { 3091 if (!Arg.isFiniteNonZero()) 3092 return hash_combine((uint8_t)Arg.category, 3093 // NaN has no sign, fix it at zero. 3094 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3095 Arg.semantics->precision); 3096 3097 // Normal floats need their exponent and significand hashed. 3098 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3099 Arg.semantics->precision, Arg.exponent, 3100 hash_combine_range( 3101 Arg.significandParts(), 3102 Arg.significandParts() + Arg.partCount())); 3103 } 3104 3105 // Conversion from APFloat to/from host float/double. It may eventually be 3106 // possible to eliminate these and have everybody deal with APFloats, but that 3107 // will take a while. This approach will not easily extend to long double. 3108 // Current implementation requires integerPartWidth==64, which is correct at 3109 // the moment but could be made more general. 3110 3111 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3112 // the actual IEEE respresentations. We compensate for that here. 3113 3114 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3115 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3116 assert(partCount()==2); 3117 3118 uint64_t myexponent, mysignificand; 3119 3120 if (isFiniteNonZero()) { 3121 myexponent = exponent+16383; //bias 3122 mysignificand = significandParts()[0]; 3123 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3124 myexponent = 0; // denormal 3125 } else if (category==fcZero) { 3126 myexponent = 0; 3127 mysignificand = 0; 3128 } else if (category==fcInfinity) { 3129 myexponent = 0x7fff; 3130 mysignificand = 0x8000000000000000ULL; 3131 } else { 3132 assert(category == fcNaN && "Unknown category"); 3133 myexponent = 0x7fff; 3134 mysignificand = significandParts()[0]; 3135 } 3136 3137 uint64_t words[2]; 3138 words[0] = mysignificand; 3139 words[1] = ((uint64_t)(sign & 1) << 15) | 3140 (myexponent & 0x7fffLL); 3141 return APInt(80, words); 3142 } 3143 3144 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3145 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3146 assert(partCount()==2); 3147 3148 uint64_t words[2]; 3149 opStatus fs; 3150 bool losesInfo; 3151 3152 // Convert number to double. To avoid spurious underflows, we re- 3153 // normalize against the "double" minExponent first, and only *then* 3154 // truncate the mantissa. The result of that second conversion 3155 // may be inexact, but should never underflow. 3156 // Declare fltSemantics before APFloat that uses it (and 3157 // saves pointer to it) to ensure correct destruction order. 3158 fltSemantics extendedSemantics = *semantics; 3159 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3160 IEEEFloat extended(*this); 3161 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3162 assert(fs == opOK && !losesInfo); 3163 (void)fs; 3164 3165 IEEEFloat u(extended); 3166 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3167 assert(fs == opOK || fs == opInexact); 3168 (void)fs; 3169 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3170 3171 // If conversion was exact or resulted in a special case, we're done; 3172 // just set the second double to zero. Otherwise, re-convert back to 3173 // the extended format and compute the difference. This now should 3174 // convert exactly to double. 3175 if (u.isFiniteNonZero() && losesInfo) { 3176 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3177 assert(fs == opOK && !losesInfo); 3178 (void)fs; 3179 3180 IEEEFloat v(extended); 3181 v.subtract(u, rmNearestTiesToEven); 3182 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3183 assert(fs == opOK && !losesInfo); 3184 (void)fs; 3185 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3186 } else { 3187 words[1] = 0; 3188 } 3189 3190 return APInt(128, words); 3191 } 3192 3193 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3194 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3195 assert(partCount()==2); 3196 3197 uint64_t myexponent, mysignificand, mysignificand2; 3198 3199 if (isFiniteNonZero()) { 3200 myexponent = exponent+16383; //bias 3201 mysignificand = significandParts()[0]; 3202 mysignificand2 = significandParts()[1]; 3203 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3204 myexponent = 0; // denormal 3205 } else if (category==fcZero) { 3206 myexponent = 0; 3207 mysignificand = mysignificand2 = 0; 3208 } else if (category==fcInfinity) { 3209 myexponent = 0x7fff; 3210 mysignificand = mysignificand2 = 0; 3211 } else { 3212 assert(category == fcNaN && "Unknown category!"); 3213 myexponent = 0x7fff; 3214 mysignificand = significandParts()[0]; 3215 mysignificand2 = significandParts()[1]; 3216 } 3217 3218 uint64_t words[2]; 3219 words[0] = mysignificand; 3220 words[1] = ((uint64_t)(sign & 1) << 63) | 3221 ((myexponent & 0x7fff) << 48) | 3222 (mysignificand2 & 0xffffffffffffLL); 3223 3224 return APInt(128, words); 3225 } 3226 3227 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3228 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3229 assert(partCount()==1); 3230 3231 uint64_t myexponent, mysignificand; 3232 3233 if (isFiniteNonZero()) { 3234 myexponent = exponent+1023; //bias 3235 mysignificand = *significandParts(); 3236 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3237 myexponent = 0; // denormal 3238 } else if (category==fcZero) { 3239 myexponent = 0; 3240 mysignificand = 0; 3241 } else if (category==fcInfinity) { 3242 myexponent = 0x7ff; 3243 mysignificand = 0; 3244 } else { 3245 assert(category == fcNaN && "Unknown category!"); 3246 myexponent = 0x7ff; 3247 mysignificand = *significandParts(); 3248 } 3249 3250 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3251 ((myexponent & 0x7ff) << 52) | 3252 (mysignificand & 0xfffffffffffffLL)))); 3253 } 3254 3255 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3256 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3257 assert(partCount()==1); 3258 3259 uint32_t myexponent, mysignificand; 3260 3261 if (isFiniteNonZero()) { 3262 myexponent = exponent+127; //bias 3263 mysignificand = (uint32_t)*significandParts(); 3264 if (myexponent == 1 && !(mysignificand & 0x800000)) 3265 myexponent = 0; // denormal 3266 } else if (category==fcZero) { 3267 myexponent = 0; 3268 mysignificand = 0; 3269 } else if (category==fcInfinity) { 3270 myexponent = 0xff; 3271 mysignificand = 0; 3272 } else { 3273 assert(category == fcNaN && "Unknown category!"); 3274 myexponent = 0xff; 3275 mysignificand = (uint32_t)*significandParts(); 3276 } 3277 3278 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3279 (mysignificand & 0x7fffff))); 3280 } 3281 3282 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3283 assert(semantics == (const llvm::fltSemantics *)&semBFloat); 3284 assert(partCount() == 1); 3285 3286 uint32_t myexponent, mysignificand; 3287 3288 if (isFiniteNonZero()) { 3289 myexponent = exponent + 127; // bias 3290 mysignificand = (uint32_t)*significandParts(); 3291 if (myexponent == 1 && !(mysignificand & 0x80)) 3292 myexponent = 0; // denormal 3293 } else if (category == fcZero) { 3294 myexponent = 0; 3295 mysignificand = 0; 3296 } else if (category == fcInfinity) { 3297 myexponent = 0xff; 3298 mysignificand = 0; 3299 } else { 3300 assert(category == fcNaN && "Unknown category!"); 3301 myexponent = 0xff; 3302 mysignificand = (uint32_t)*significandParts(); 3303 } 3304 3305 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | 3306 (mysignificand & 0x7f))); 3307 } 3308 3309 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3310 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3311 assert(partCount()==1); 3312 3313 uint32_t myexponent, mysignificand; 3314 3315 if (isFiniteNonZero()) { 3316 myexponent = exponent+15; //bias 3317 mysignificand = (uint32_t)*significandParts(); 3318 if (myexponent == 1 && !(mysignificand & 0x400)) 3319 myexponent = 0; // denormal 3320 } else if (category==fcZero) { 3321 myexponent = 0; 3322 mysignificand = 0; 3323 } else if (category==fcInfinity) { 3324 myexponent = 0x1f; 3325 mysignificand = 0; 3326 } else { 3327 assert(category == fcNaN && "Unknown category!"); 3328 myexponent = 0x1f; 3329 mysignificand = (uint32_t)*significandParts(); 3330 } 3331 3332 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3333 (mysignificand & 0x3ff))); 3334 } 3335 3336 // This function creates an APInt that is just a bit map of the floating 3337 // point constant as it would appear in memory. It is not a conversion, 3338 // and treating the result as a normal integer is unlikely to be useful. 3339 3340 APInt IEEEFloat::bitcastToAPInt() const { 3341 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3342 return convertHalfAPFloatToAPInt(); 3343 3344 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3345 return convertBFloatAPFloatToAPInt(); 3346 3347 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3348 return convertFloatAPFloatToAPInt(); 3349 3350 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3351 return convertDoubleAPFloatToAPInt(); 3352 3353 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3354 return convertQuadrupleAPFloatToAPInt(); 3355 3356 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3357 return convertPPCDoubleDoubleAPFloatToAPInt(); 3358 3359 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3360 "unknown format!"); 3361 return convertF80LongDoubleAPFloatToAPInt(); 3362 } 3363 3364 float IEEEFloat::convertToFloat() const { 3365 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3366 "Float semantics are not IEEEsingle"); 3367 APInt api = bitcastToAPInt(); 3368 return api.bitsToFloat(); 3369 } 3370 3371 double IEEEFloat::convertToDouble() const { 3372 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3373 "Float semantics are not IEEEdouble"); 3374 APInt api = bitcastToAPInt(); 3375 return api.bitsToDouble(); 3376 } 3377 3378 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3379 /// does not support these bit patterns: 3380 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3381 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3382 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3383 /// exponent = 0, integer bit 1 ("pseudodenormal") 3384 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3385 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3386 assert(api.getBitWidth()==80); 3387 uint64_t i1 = api.getRawData()[0]; 3388 uint64_t i2 = api.getRawData()[1]; 3389 uint64_t myexponent = (i2 & 0x7fff); 3390 uint64_t mysignificand = i1; 3391 uint8_t myintegerbit = mysignificand >> 63; 3392 3393 initialize(&semX87DoubleExtended); 3394 assert(partCount()==2); 3395 3396 sign = static_cast<unsigned int>(i2>>15); 3397 if (myexponent == 0 && mysignificand == 0) { 3398 makeZero(sign); 3399 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3400 makeInf(sign); 3401 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3402 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3403 category = fcNaN; 3404 exponent = exponentNaN(); 3405 significandParts()[0] = mysignificand; 3406 significandParts()[1] = 0; 3407 } else { 3408 category = fcNormal; 3409 exponent = myexponent - 16383; 3410 significandParts()[0] = mysignificand; 3411 significandParts()[1] = 0; 3412 if (myexponent==0) // denormal 3413 exponent = -16382; 3414 } 3415 } 3416 3417 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3418 assert(api.getBitWidth()==128); 3419 uint64_t i1 = api.getRawData()[0]; 3420 uint64_t i2 = api.getRawData()[1]; 3421 opStatus fs; 3422 bool losesInfo; 3423 3424 // Get the first double and convert to our format. 3425 initFromDoubleAPInt(APInt(64, i1)); 3426 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3427 assert(fs == opOK && !losesInfo); 3428 (void)fs; 3429 3430 // Unless we have a special case, add in second double. 3431 if (isFiniteNonZero()) { 3432 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3433 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3434 assert(fs == opOK && !losesInfo); 3435 (void)fs; 3436 3437 add(v, rmNearestTiesToEven); 3438 } 3439 } 3440 3441 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3442 assert(api.getBitWidth()==128); 3443 uint64_t i1 = api.getRawData()[0]; 3444 uint64_t i2 = api.getRawData()[1]; 3445 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3446 uint64_t mysignificand = i1; 3447 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3448 3449 initialize(&semIEEEquad); 3450 assert(partCount()==2); 3451 3452 sign = static_cast<unsigned int>(i2>>63); 3453 if (myexponent==0 && 3454 (mysignificand==0 && mysignificand2==0)) { 3455 makeZero(sign); 3456 } else if (myexponent==0x7fff && 3457 (mysignificand==0 && mysignificand2==0)) { 3458 makeInf(sign); 3459 } else if (myexponent==0x7fff && 3460 (mysignificand!=0 || mysignificand2 !=0)) { 3461 category = fcNaN; 3462 exponent = exponentNaN(); 3463 significandParts()[0] = mysignificand; 3464 significandParts()[1] = mysignificand2; 3465 } else { 3466 category = fcNormal; 3467 exponent = myexponent - 16383; 3468 significandParts()[0] = mysignificand; 3469 significandParts()[1] = mysignificand2; 3470 if (myexponent==0) // denormal 3471 exponent = -16382; 3472 else 3473 significandParts()[1] |= 0x1000000000000LL; // integer bit 3474 } 3475 } 3476 3477 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3478 assert(api.getBitWidth()==64); 3479 uint64_t i = *api.getRawData(); 3480 uint64_t myexponent = (i >> 52) & 0x7ff; 3481 uint64_t mysignificand = i & 0xfffffffffffffLL; 3482 3483 initialize(&semIEEEdouble); 3484 assert(partCount()==1); 3485 3486 sign = static_cast<unsigned int>(i>>63); 3487 if (myexponent==0 && mysignificand==0) { 3488 makeZero(sign); 3489 } else if (myexponent==0x7ff && mysignificand==0) { 3490 makeInf(sign); 3491 } else if (myexponent==0x7ff && mysignificand!=0) { 3492 category = fcNaN; 3493 exponent = exponentNaN(); 3494 *significandParts() = mysignificand; 3495 } else { 3496 category = fcNormal; 3497 exponent = myexponent - 1023; 3498 *significandParts() = mysignificand; 3499 if (myexponent==0) // denormal 3500 exponent = -1022; 3501 else 3502 *significandParts() |= 0x10000000000000LL; // integer bit 3503 } 3504 } 3505 3506 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3507 assert(api.getBitWidth()==32); 3508 uint32_t i = (uint32_t)*api.getRawData(); 3509 uint32_t myexponent = (i >> 23) & 0xff; 3510 uint32_t mysignificand = i & 0x7fffff; 3511 3512 initialize(&semIEEEsingle); 3513 assert(partCount()==1); 3514 3515 sign = i >> 31; 3516 if (myexponent==0 && mysignificand==0) { 3517 makeZero(sign); 3518 } else if (myexponent==0xff && mysignificand==0) { 3519 makeInf(sign); 3520 } else if (myexponent==0xff && mysignificand!=0) { 3521 category = fcNaN; 3522 exponent = exponentNaN(); 3523 *significandParts() = mysignificand; 3524 } else { 3525 category = fcNormal; 3526 exponent = myexponent - 127; //bias 3527 *significandParts() = mysignificand; 3528 if (myexponent==0) // denormal 3529 exponent = -126; 3530 else 3531 *significandParts() |= 0x800000; // integer bit 3532 } 3533 } 3534 3535 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3536 assert(api.getBitWidth() == 16); 3537 uint32_t i = (uint32_t)*api.getRawData(); 3538 uint32_t myexponent = (i >> 7) & 0xff; 3539 uint32_t mysignificand = i & 0x7f; 3540 3541 initialize(&semBFloat); 3542 assert(partCount() == 1); 3543 3544 sign = i >> 15; 3545 if (myexponent == 0 && mysignificand == 0) { 3546 makeZero(sign); 3547 } else if (myexponent == 0xff && mysignificand == 0) { 3548 makeInf(sign); 3549 } else if (myexponent == 0xff && mysignificand != 0) { 3550 category = fcNaN; 3551 exponent = exponentNaN(); 3552 *significandParts() = mysignificand; 3553 } else { 3554 category = fcNormal; 3555 exponent = myexponent - 127; // bias 3556 *significandParts() = mysignificand; 3557 if (myexponent == 0) // denormal 3558 exponent = -126; 3559 else 3560 *significandParts() |= 0x80; // integer bit 3561 } 3562 } 3563 3564 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3565 assert(api.getBitWidth()==16); 3566 uint32_t i = (uint32_t)*api.getRawData(); 3567 uint32_t myexponent = (i >> 10) & 0x1f; 3568 uint32_t mysignificand = i & 0x3ff; 3569 3570 initialize(&semIEEEhalf); 3571 assert(partCount()==1); 3572 3573 sign = i >> 15; 3574 if (myexponent==0 && mysignificand==0) { 3575 makeZero(sign); 3576 } else if (myexponent==0x1f && mysignificand==0) { 3577 makeInf(sign); 3578 } else if (myexponent==0x1f && mysignificand!=0) { 3579 category = fcNaN; 3580 exponent = exponentNaN(); 3581 *significandParts() = mysignificand; 3582 } else { 3583 category = fcNormal; 3584 exponent = myexponent - 15; //bias 3585 *significandParts() = mysignificand; 3586 if (myexponent==0) // denormal 3587 exponent = -14; 3588 else 3589 *significandParts() |= 0x400; // integer bit 3590 } 3591 } 3592 3593 /// Treat api as containing the bits of a floating point number. Currently 3594 /// we infer the floating point type from the size of the APInt. The 3595 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3596 /// when the size is anything else). 3597 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3598 if (Sem == &semIEEEhalf) 3599 return initFromHalfAPInt(api); 3600 if (Sem == &semBFloat) 3601 return initFromBFloatAPInt(api); 3602 if (Sem == &semIEEEsingle) 3603 return initFromFloatAPInt(api); 3604 if (Sem == &semIEEEdouble) 3605 return initFromDoubleAPInt(api); 3606 if (Sem == &semX87DoubleExtended) 3607 return initFromF80LongDoubleAPInt(api); 3608 if (Sem == &semIEEEquad) 3609 return initFromQuadrupleAPInt(api); 3610 if (Sem == &semPPCDoubleDoubleLegacy) 3611 return initFromPPCDoubleDoubleAPInt(api); 3612 3613 llvm_unreachable(nullptr); 3614 } 3615 3616 /// Make this number the largest magnitude normal number in the given 3617 /// semantics. 3618 void IEEEFloat::makeLargest(bool Negative) { 3619 // We want (in interchange format): 3620 // sign = {Negative} 3621 // exponent = 1..10 3622 // significand = 1..1 3623 category = fcNormal; 3624 sign = Negative; 3625 exponent = semantics->maxExponent; 3626 3627 // Use memset to set all but the highest integerPart to all ones. 3628 integerPart *significand = significandParts(); 3629 unsigned PartCount = partCount(); 3630 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3631 3632 // Set the high integerPart especially setting all unused top bits for 3633 // internal consistency. 3634 const unsigned NumUnusedHighBits = 3635 PartCount*integerPartWidth - semantics->precision; 3636 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3637 ? (~integerPart(0) >> NumUnusedHighBits) 3638 : 0; 3639 } 3640 3641 /// Make this number the smallest magnitude denormal number in the given 3642 /// semantics. 3643 void IEEEFloat::makeSmallest(bool Negative) { 3644 // We want (in interchange format): 3645 // sign = {Negative} 3646 // exponent = 0..0 3647 // significand = 0..01 3648 category = fcNormal; 3649 sign = Negative; 3650 exponent = semantics->minExponent; 3651 APInt::tcSet(significandParts(), 1, partCount()); 3652 } 3653 3654 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3655 // We want (in interchange format): 3656 // sign = {Negative} 3657 // exponent = 0..0 3658 // significand = 10..0 3659 3660 category = fcNormal; 3661 zeroSignificand(); 3662 sign = Negative; 3663 exponent = semantics->minExponent; 3664 significandParts()[partCountForBits(semantics->precision) - 1] |= 3665 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3666 } 3667 3668 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3669 initFromAPInt(&Sem, API); 3670 } 3671 3672 IEEEFloat::IEEEFloat(float f) { 3673 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3674 } 3675 3676 IEEEFloat::IEEEFloat(double d) { 3677 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3678 } 3679 3680 namespace { 3681 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3682 Buffer.append(Str.begin(), Str.end()); 3683 } 3684 3685 /// Removes data from the given significand until it is no more 3686 /// precise than is required for the desired precision. 3687 void AdjustToPrecision(APInt &significand, 3688 int &exp, unsigned FormatPrecision) { 3689 unsigned bits = significand.getActiveBits(); 3690 3691 // 196/59 is a very slight overestimate of lg_2(10). 3692 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3693 3694 if (bits <= bitsRequired) return; 3695 3696 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3697 if (!tensRemovable) return; 3698 3699 exp += tensRemovable; 3700 3701 APInt divisor(significand.getBitWidth(), 1); 3702 APInt powten(significand.getBitWidth(), 10); 3703 while (true) { 3704 if (tensRemovable & 1) 3705 divisor *= powten; 3706 tensRemovable >>= 1; 3707 if (!tensRemovable) break; 3708 powten *= powten; 3709 } 3710 3711 significand = significand.udiv(divisor); 3712 3713 // Truncate the significand down to its active bit count. 3714 significand = significand.trunc(significand.getActiveBits()); 3715 } 3716 3717 3718 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3719 int &exp, unsigned FormatPrecision) { 3720 unsigned N = buffer.size(); 3721 if (N <= FormatPrecision) return; 3722 3723 // The most significant figures are the last ones in the buffer. 3724 unsigned FirstSignificant = N - FormatPrecision; 3725 3726 // Round. 3727 // FIXME: this probably shouldn't use 'round half up'. 3728 3729 // Rounding down is just a truncation, except we also want to drop 3730 // trailing zeros from the new result. 3731 if (buffer[FirstSignificant - 1] < '5') { 3732 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3733 FirstSignificant++; 3734 3735 exp += FirstSignificant; 3736 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3737 return; 3738 } 3739 3740 // Rounding up requires a decimal add-with-carry. If we continue 3741 // the carry, the newly-introduced zeros will just be truncated. 3742 for (unsigned I = FirstSignificant; I != N; ++I) { 3743 if (buffer[I] == '9') { 3744 FirstSignificant++; 3745 } else { 3746 buffer[I]++; 3747 break; 3748 } 3749 } 3750 3751 // If we carried through, we have exactly one digit of precision. 3752 if (FirstSignificant == N) { 3753 exp += FirstSignificant; 3754 buffer.clear(); 3755 buffer.push_back('1'); 3756 return; 3757 } 3758 3759 exp += FirstSignificant; 3760 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3761 } 3762 } 3763 3764 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3765 unsigned FormatMaxPadding, bool TruncateZero) const { 3766 switch (category) { 3767 case fcInfinity: 3768 if (isNegative()) 3769 return append(Str, "-Inf"); 3770 else 3771 return append(Str, "+Inf"); 3772 3773 case fcNaN: return append(Str, "NaN"); 3774 3775 case fcZero: 3776 if (isNegative()) 3777 Str.push_back('-'); 3778 3779 if (!FormatMaxPadding) { 3780 if (TruncateZero) 3781 append(Str, "0.0E+0"); 3782 else { 3783 append(Str, "0.0"); 3784 if (FormatPrecision > 1) 3785 Str.append(FormatPrecision - 1, '0'); 3786 append(Str, "e+00"); 3787 } 3788 } else 3789 Str.push_back('0'); 3790 return; 3791 3792 case fcNormal: 3793 break; 3794 } 3795 3796 if (isNegative()) 3797 Str.push_back('-'); 3798 3799 // Decompose the number into an APInt and an exponent. 3800 int exp = exponent - ((int) semantics->precision - 1); 3801 APInt significand(semantics->precision, 3802 makeArrayRef(significandParts(), 3803 partCountForBits(semantics->precision))); 3804 3805 // Set FormatPrecision if zero. We want to do this before we 3806 // truncate trailing zeros, as those are part of the precision. 3807 if (!FormatPrecision) { 3808 // We use enough digits so the number can be round-tripped back to an 3809 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3810 // Accurately" by Steele and White. 3811 // FIXME: Using a formula based purely on the precision is conservative; 3812 // we can print fewer digits depending on the actual value being printed. 3813 3814 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3815 FormatPrecision = 2 + semantics->precision * 59 / 196; 3816 } 3817 3818 // Ignore trailing binary zeros. 3819 int trailingZeros = significand.countTrailingZeros(); 3820 exp += trailingZeros; 3821 significand.lshrInPlace(trailingZeros); 3822 3823 // Change the exponent from 2^e to 10^e. 3824 if (exp == 0) { 3825 // Nothing to do. 3826 } else if (exp > 0) { 3827 // Just shift left. 3828 significand = significand.zext(semantics->precision + exp); 3829 significand <<= exp; 3830 exp = 0; 3831 } else { /* exp < 0 */ 3832 int texp = -exp; 3833 3834 // We transform this using the identity: 3835 // (N)(2^-e) == (N)(5^e)(10^-e) 3836 // This means we have to multiply N (the significand) by 5^e. 3837 // To avoid overflow, we have to operate on numbers large 3838 // enough to store N * 5^e: 3839 // log2(N * 5^e) == log2(N) + e * log2(5) 3840 // <= semantics->precision + e * 137 / 59 3841 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3842 3843 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3844 3845 // Multiply significand by 5^e. 3846 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3847 significand = significand.zext(precision); 3848 APInt five_to_the_i(precision, 5); 3849 while (true) { 3850 if (texp & 1) significand *= five_to_the_i; 3851 3852 texp >>= 1; 3853 if (!texp) break; 3854 five_to_the_i *= five_to_the_i; 3855 } 3856 } 3857 3858 AdjustToPrecision(significand, exp, FormatPrecision); 3859 3860 SmallVector<char, 256> buffer; 3861 3862 // Fill the buffer. 3863 unsigned precision = significand.getBitWidth(); 3864 APInt ten(precision, 10); 3865 APInt digit(precision, 0); 3866 3867 bool inTrail = true; 3868 while (significand != 0) { 3869 // digit <- significand % 10 3870 // significand <- significand / 10 3871 APInt::udivrem(significand, ten, significand, digit); 3872 3873 unsigned d = digit.getZExtValue(); 3874 3875 // Drop trailing zeros. 3876 if (inTrail && !d) exp++; 3877 else { 3878 buffer.push_back((char) ('0' + d)); 3879 inTrail = false; 3880 } 3881 } 3882 3883 assert(!buffer.empty() && "no characters in buffer!"); 3884 3885 // Drop down to FormatPrecision. 3886 // TODO: don't do more precise calculations above than are required. 3887 AdjustToPrecision(buffer, exp, FormatPrecision); 3888 3889 unsigned NDigits = buffer.size(); 3890 3891 // Check whether we should use scientific notation. 3892 bool FormatScientific; 3893 if (!FormatMaxPadding) 3894 FormatScientific = true; 3895 else { 3896 if (exp >= 0) { 3897 // 765e3 --> 765000 3898 // ^^^ 3899 // But we shouldn't make the number look more precise than it is. 3900 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3901 NDigits + (unsigned) exp > FormatPrecision); 3902 } else { 3903 // Power of the most significant digit. 3904 int MSD = exp + (int) (NDigits - 1); 3905 if (MSD >= 0) { 3906 // 765e-2 == 7.65 3907 FormatScientific = false; 3908 } else { 3909 // 765e-5 == 0.00765 3910 // ^ ^^ 3911 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3912 } 3913 } 3914 } 3915 3916 // Scientific formatting is pretty straightforward. 3917 if (FormatScientific) { 3918 exp += (NDigits - 1); 3919 3920 Str.push_back(buffer[NDigits-1]); 3921 Str.push_back('.'); 3922 if (NDigits == 1 && TruncateZero) 3923 Str.push_back('0'); 3924 else 3925 for (unsigned I = 1; I != NDigits; ++I) 3926 Str.push_back(buffer[NDigits-1-I]); 3927 // Fill with zeros up to FormatPrecision. 3928 if (!TruncateZero && FormatPrecision > NDigits - 1) 3929 Str.append(FormatPrecision - NDigits + 1, '0'); 3930 // For !TruncateZero we use lower 'e'. 3931 Str.push_back(TruncateZero ? 'E' : 'e'); 3932 3933 Str.push_back(exp >= 0 ? '+' : '-'); 3934 if (exp < 0) exp = -exp; 3935 SmallVector<char, 6> expbuf; 3936 do { 3937 expbuf.push_back((char) ('0' + (exp % 10))); 3938 exp /= 10; 3939 } while (exp); 3940 // Exponent always at least two digits if we do not truncate zeros. 3941 if (!TruncateZero && expbuf.size() < 2) 3942 expbuf.push_back('0'); 3943 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3944 Str.push_back(expbuf[E-1-I]); 3945 return; 3946 } 3947 3948 // Non-scientific, positive exponents. 3949 if (exp >= 0) { 3950 for (unsigned I = 0; I != NDigits; ++I) 3951 Str.push_back(buffer[NDigits-1-I]); 3952 for (unsigned I = 0; I != (unsigned) exp; ++I) 3953 Str.push_back('0'); 3954 return; 3955 } 3956 3957 // Non-scientific, negative exponents. 3958 3959 // The number of digits to the left of the decimal point. 3960 int NWholeDigits = exp + (int) NDigits; 3961 3962 unsigned I = 0; 3963 if (NWholeDigits > 0) { 3964 for (; I != (unsigned) NWholeDigits; ++I) 3965 Str.push_back(buffer[NDigits-I-1]); 3966 Str.push_back('.'); 3967 } else { 3968 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3969 3970 Str.push_back('0'); 3971 Str.push_back('.'); 3972 for (unsigned Z = 1; Z != NZeros; ++Z) 3973 Str.push_back('0'); 3974 } 3975 3976 for (; I != NDigits; ++I) 3977 Str.push_back(buffer[NDigits-I-1]); 3978 } 3979 3980 bool IEEEFloat::getExactInverse(APFloat *inv) const { 3981 // Special floats and denormals have no exact inverse. 3982 if (!isFiniteNonZero()) 3983 return false; 3984 3985 // Check that the number is a power of two by making sure that only the 3986 // integer bit is set in the significand. 3987 if (significandLSB() != semantics->precision - 1) 3988 return false; 3989 3990 // Get the inverse. 3991 IEEEFloat reciprocal(*semantics, 1ULL); 3992 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3993 return false; 3994 3995 // Avoid multiplication with a denormal, it is not safe on all platforms and 3996 // may be slower than a normal division. 3997 if (reciprocal.isDenormal()) 3998 return false; 3999 4000 assert(reciprocal.isFiniteNonZero() && 4001 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4002 4003 if (inv) 4004 *inv = APFloat(reciprocal, *semantics); 4005 4006 return true; 4007 } 4008 4009 bool IEEEFloat::isSignaling() const { 4010 if (!isNaN()) 4011 return false; 4012 4013 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4014 // first bit of the trailing significand being 0. 4015 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4016 } 4017 4018 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4019 /// 4020 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4021 /// appropriate sign switching before/after the computation. 4022 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4023 // If we are performing nextDown, swap sign so we have -x. 4024 if (nextDown) 4025 changeSign(); 4026 4027 // Compute nextUp(x) 4028 opStatus result = opOK; 4029 4030 // Handle each float category separately. 4031 switch (category) { 4032 case fcInfinity: 4033 // nextUp(+inf) = +inf 4034 if (!isNegative()) 4035 break; 4036 // nextUp(-inf) = -getLargest() 4037 makeLargest(true); 4038 break; 4039 case fcNaN: 4040 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4041 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4042 // change the payload. 4043 if (isSignaling()) { 4044 result = opInvalidOp; 4045 // For consistency, propagate the sign of the sNaN to the qNaN. 4046 makeNaN(false, isNegative(), nullptr); 4047 } 4048 break; 4049 case fcZero: 4050 // nextUp(pm 0) = +getSmallest() 4051 makeSmallest(false); 4052 break; 4053 case fcNormal: 4054 // nextUp(-getSmallest()) = -0 4055 if (isSmallest() && isNegative()) { 4056 APInt::tcSet(significandParts(), 0, partCount()); 4057 category = fcZero; 4058 exponent = 0; 4059 break; 4060 } 4061 4062 // nextUp(getLargest()) == INFINITY 4063 if (isLargest() && !isNegative()) { 4064 APInt::tcSet(significandParts(), 0, partCount()); 4065 category = fcInfinity; 4066 exponent = semantics->maxExponent + 1; 4067 break; 4068 } 4069 4070 // nextUp(normal) == normal + inc. 4071 if (isNegative()) { 4072 // If we are negative, we need to decrement the significand. 4073 4074 // We only cross a binade boundary that requires adjusting the exponent 4075 // if: 4076 // 1. exponent != semantics->minExponent. This implies we are not in the 4077 // smallest binade or are dealing with denormals. 4078 // 2. Our significand excluding the integral bit is all zeros. 4079 bool WillCrossBinadeBoundary = 4080 exponent != semantics->minExponent && isSignificandAllZeros(); 4081 4082 // Decrement the significand. 4083 // 4084 // We always do this since: 4085 // 1. If we are dealing with a non-binade decrement, by definition we 4086 // just decrement the significand. 4087 // 2. If we are dealing with a normal -> normal binade decrement, since 4088 // we have an explicit integral bit the fact that all bits but the 4089 // integral bit are zero implies that subtracting one will yield a 4090 // significand with 0 integral bit and 1 in all other spots. Thus we 4091 // must just adjust the exponent and set the integral bit to 1. 4092 // 3. If we are dealing with a normal -> denormal binade decrement, 4093 // since we set the integral bit to 0 when we represent denormals, we 4094 // just decrement the significand. 4095 integerPart *Parts = significandParts(); 4096 APInt::tcDecrement(Parts, partCount()); 4097 4098 if (WillCrossBinadeBoundary) { 4099 // Our result is a normal number. Do the following: 4100 // 1. Set the integral bit to 1. 4101 // 2. Decrement the exponent. 4102 APInt::tcSetBit(Parts, semantics->precision - 1); 4103 exponent--; 4104 } 4105 } else { 4106 // If we are positive, we need to increment the significand. 4107 4108 // We only cross a binade boundary that requires adjusting the exponent if 4109 // the input is not a denormal and all of said input's significand bits 4110 // are set. If all of said conditions are true: clear the significand, set 4111 // the integral bit to 1, and increment the exponent. If we have a 4112 // denormal always increment since moving denormals and the numbers in the 4113 // smallest normal binade have the same exponent in our representation. 4114 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4115 4116 if (WillCrossBinadeBoundary) { 4117 integerPart *Parts = significandParts(); 4118 APInt::tcSet(Parts, 0, partCount()); 4119 APInt::tcSetBit(Parts, semantics->precision - 1); 4120 assert(exponent != semantics->maxExponent && 4121 "We can not increment an exponent beyond the maxExponent allowed" 4122 " by the given floating point semantics."); 4123 exponent++; 4124 } else { 4125 incrementSignificand(); 4126 } 4127 } 4128 break; 4129 } 4130 4131 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4132 if (nextDown) 4133 changeSign(); 4134 4135 return result; 4136 } 4137 4138 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4139 return semantics->maxExponent + 1; 4140 } 4141 4142 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4143 return semantics->maxExponent + 1; 4144 } 4145 4146 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4147 return semantics->minExponent - 1; 4148 } 4149 4150 void IEEEFloat::makeInf(bool Negative) { 4151 category = fcInfinity; 4152 sign = Negative; 4153 exponent = exponentInf(); 4154 APInt::tcSet(significandParts(), 0, partCount()); 4155 } 4156 4157 void IEEEFloat::makeZero(bool Negative) { 4158 category = fcZero; 4159 sign = Negative; 4160 exponent = exponentZero(); 4161 APInt::tcSet(significandParts(), 0, partCount()); 4162 } 4163 4164 void IEEEFloat::makeQuiet() { 4165 assert(isNaN()); 4166 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4167 } 4168 4169 int ilogb(const IEEEFloat &Arg) { 4170 if (Arg.isNaN()) 4171 return IEEEFloat::IEK_NaN; 4172 if (Arg.isZero()) 4173 return IEEEFloat::IEK_Zero; 4174 if (Arg.isInfinity()) 4175 return IEEEFloat::IEK_Inf; 4176 if (!Arg.isDenormal()) 4177 return Arg.exponent; 4178 4179 IEEEFloat Normalized(Arg); 4180 int SignificandBits = Arg.getSemantics().precision - 1; 4181 4182 Normalized.exponent += SignificandBits; 4183 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4184 return Normalized.exponent - SignificandBits; 4185 } 4186 4187 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4188 auto MaxExp = X.getSemantics().maxExponent; 4189 auto MinExp = X.getSemantics().minExponent; 4190 4191 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4192 // overflow; clamp it to a safe range before adding, but ensure that the range 4193 // is large enough that the clamp does not change the result. The range we 4194 // need to support is the difference between the largest possible exponent and 4195 // the normalized exponent of half the smallest denormal. 4196 4197 int SignificandBits = X.getSemantics().precision - 1; 4198 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4199 4200 // Clamp to one past the range ends to let normalize handle overlflow. 4201 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4202 X.normalize(RoundingMode, lfExactlyZero); 4203 if (X.isNaN()) 4204 X.makeQuiet(); 4205 return X; 4206 } 4207 4208 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4209 Exp = ilogb(Val); 4210 4211 // Quiet signalling nans. 4212 if (Exp == IEEEFloat::IEK_NaN) { 4213 IEEEFloat Quiet(Val); 4214 Quiet.makeQuiet(); 4215 return Quiet; 4216 } 4217 4218 if (Exp == IEEEFloat::IEK_Inf) 4219 return Val; 4220 4221 // 1 is added because frexp is defined to return a normalized fraction in 4222 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4223 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4224 return scalbn(Val, -Exp, RM); 4225 } 4226 4227 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4228 : Semantics(&S), 4229 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4230 assert(Semantics == &semPPCDoubleDouble); 4231 } 4232 4233 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4234 : Semantics(&S), 4235 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4236 APFloat(semIEEEdouble, uninitialized)}) { 4237 assert(Semantics == &semPPCDoubleDouble); 4238 } 4239 4240 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4241 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4242 APFloat(semIEEEdouble)}) { 4243 assert(Semantics == &semPPCDoubleDouble); 4244 } 4245 4246 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4247 : Semantics(&S), 4248 Floats(new APFloat[2]{ 4249 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4250 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4251 assert(Semantics == &semPPCDoubleDouble); 4252 } 4253 4254 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4255 APFloat &&Second) 4256 : Semantics(&S), 4257 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4258 assert(Semantics == &semPPCDoubleDouble); 4259 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4260 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4261 } 4262 4263 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4264 : Semantics(RHS.Semantics), 4265 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4266 APFloat(RHS.Floats[1])} 4267 : nullptr) { 4268 assert(Semantics == &semPPCDoubleDouble); 4269 } 4270 4271 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4272 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4273 RHS.Semantics = &semBogus; 4274 assert(Semantics == &semPPCDoubleDouble); 4275 } 4276 4277 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4278 if (Semantics == RHS.Semantics && RHS.Floats) { 4279 Floats[0] = RHS.Floats[0]; 4280 Floats[1] = RHS.Floats[1]; 4281 } else if (this != &RHS) { 4282 this->~DoubleAPFloat(); 4283 new (this) DoubleAPFloat(RHS); 4284 } 4285 return *this; 4286 } 4287 4288 // Implement addition, subtraction, multiplication and division based on: 4289 // "Software for Doubled-Precision Floating-Point Computations", 4290 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4291 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4292 const APFloat &c, const APFloat &cc, 4293 roundingMode RM) { 4294 int Status = opOK; 4295 APFloat z = a; 4296 Status |= z.add(c, RM); 4297 if (!z.isFinite()) { 4298 if (!z.isInfinity()) { 4299 Floats[0] = std::move(z); 4300 Floats[1].makeZero(/* Neg = */ false); 4301 return (opStatus)Status; 4302 } 4303 Status = opOK; 4304 auto AComparedToC = a.compareAbsoluteValue(c); 4305 z = cc; 4306 Status |= z.add(aa, RM); 4307 if (AComparedToC == APFloat::cmpGreaterThan) { 4308 // z = cc + aa + c + a; 4309 Status |= z.add(c, RM); 4310 Status |= z.add(a, RM); 4311 } else { 4312 // z = cc + aa + a + c; 4313 Status |= z.add(a, RM); 4314 Status |= z.add(c, RM); 4315 } 4316 if (!z.isFinite()) { 4317 Floats[0] = std::move(z); 4318 Floats[1].makeZero(/* Neg = */ false); 4319 return (opStatus)Status; 4320 } 4321 Floats[0] = z; 4322 APFloat zz = aa; 4323 Status |= zz.add(cc, RM); 4324 if (AComparedToC == APFloat::cmpGreaterThan) { 4325 // Floats[1] = a - z + c + zz; 4326 Floats[1] = a; 4327 Status |= Floats[1].subtract(z, RM); 4328 Status |= Floats[1].add(c, RM); 4329 Status |= Floats[1].add(zz, RM); 4330 } else { 4331 // Floats[1] = c - z + a + zz; 4332 Floats[1] = c; 4333 Status |= Floats[1].subtract(z, RM); 4334 Status |= Floats[1].add(a, RM); 4335 Status |= Floats[1].add(zz, RM); 4336 } 4337 } else { 4338 // q = a - z; 4339 APFloat q = a; 4340 Status |= q.subtract(z, RM); 4341 4342 // zz = q + c + (a - (q + z)) + aa + cc; 4343 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4344 auto zz = q; 4345 Status |= zz.add(c, RM); 4346 Status |= q.add(z, RM); 4347 Status |= q.subtract(a, RM); 4348 q.changeSign(); 4349 Status |= zz.add(q, RM); 4350 Status |= zz.add(aa, RM); 4351 Status |= zz.add(cc, RM); 4352 if (zz.isZero() && !zz.isNegative()) { 4353 Floats[0] = std::move(z); 4354 Floats[1].makeZero(/* Neg = */ false); 4355 return opOK; 4356 } 4357 Floats[0] = z; 4358 Status |= Floats[0].add(zz, RM); 4359 if (!Floats[0].isFinite()) { 4360 Floats[1].makeZero(/* Neg = */ false); 4361 return (opStatus)Status; 4362 } 4363 Floats[1] = std::move(z); 4364 Status |= Floats[1].subtract(Floats[0], RM); 4365 Status |= Floats[1].add(zz, RM); 4366 } 4367 return (opStatus)Status; 4368 } 4369 4370 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4371 const DoubleAPFloat &RHS, 4372 DoubleAPFloat &Out, 4373 roundingMode RM) { 4374 if (LHS.getCategory() == fcNaN) { 4375 Out = LHS; 4376 return opOK; 4377 } 4378 if (RHS.getCategory() == fcNaN) { 4379 Out = RHS; 4380 return opOK; 4381 } 4382 if (LHS.getCategory() == fcZero) { 4383 Out = RHS; 4384 return opOK; 4385 } 4386 if (RHS.getCategory() == fcZero) { 4387 Out = LHS; 4388 return opOK; 4389 } 4390 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4391 LHS.isNegative() != RHS.isNegative()) { 4392 Out.makeNaN(false, Out.isNegative(), nullptr); 4393 return opInvalidOp; 4394 } 4395 if (LHS.getCategory() == fcInfinity) { 4396 Out = LHS; 4397 return opOK; 4398 } 4399 if (RHS.getCategory() == fcInfinity) { 4400 Out = RHS; 4401 return opOK; 4402 } 4403 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4404 4405 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4406 CC(RHS.Floats[1]); 4407 assert(&A.getSemantics() == &semIEEEdouble); 4408 assert(&AA.getSemantics() == &semIEEEdouble); 4409 assert(&C.getSemantics() == &semIEEEdouble); 4410 assert(&CC.getSemantics() == &semIEEEdouble); 4411 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4412 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4413 return Out.addImpl(A, AA, C, CC, RM); 4414 } 4415 4416 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4417 roundingMode RM) { 4418 return addWithSpecial(*this, RHS, *this, RM); 4419 } 4420 4421 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4422 roundingMode RM) { 4423 changeSign(); 4424 auto Ret = add(RHS, RM); 4425 changeSign(); 4426 return Ret; 4427 } 4428 4429 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4430 APFloat::roundingMode RM) { 4431 const auto &LHS = *this; 4432 auto &Out = *this; 4433 /* Interesting observation: For special categories, finding the lowest 4434 common ancestor of the following layered graph gives the correct 4435 return category: 4436 4437 NaN 4438 / \ 4439 Zero Inf 4440 \ / 4441 Normal 4442 4443 e.g. NaN * NaN = NaN 4444 Zero * Inf = NaN 4445 Normal * Zero = Zero 4446 Normal * Inf = Inf 4447 */ 4448 if (LHS.getCategory() == fcNaN) { 4449 Out = LHS; 4450 return opOK; 4451 } 4452 if (RHS.getCategory() == fcNaN) { 4453 Out = RHS; 4454 return opOK; 4455 } 4456 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4457 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4458 Out.makeNaN(false, false, nullptr); 4459 return opOK; 4460 } 4461 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4462 Out = LHS; 4463 return opOK; 4464 } 4465 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4466 Out = RHS; 4467 return opOK; 4468 } 4469 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4470 "Special cases not handled exhaustively"); 4471 4472 int Status = opOK; 4473 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4474 // t = a * c 4475 APFloat T = A; 4476 Status |= T.multiply(C, RM); 4477 if (!T.isFiniteNonZero()) { 4478 Floats[0] = T; 4479 Floats[1].makeZero(/* Neg = */ false); 4480 return (opStatus)Status; 4481 } 4482 4483 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4484 APFloat Tau = A; 4485 T.changeSign(); 4486 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4487 T.changeSign(); 4488 { 4489 // v = a * d 4490 APFloat V = A; 4491 Status |= V.multiply(D, RM); 4492 // w = b * c 4493 APFloat W = B; 4494 Status |= W.multiply(C, RM); 4495 Status |= V.add(W, RM); 4496 // tau += v + w 4497 Status |= Tau.add(V, RM); 4498 } 4499 // u = t + tau 4500 APFloat U = T; 4501 Status |= U.add(Tau, RM); 4502 4503 Floats[0] = U; 4504 if (!U.isFinite()) { 4505 Floats[1].makeZero(/* Neg = */ false); 4506 } else { 4507 // Floats[1] = (t - u) + tau 4508 Status |= T.subtract(U, RM); 4509 Status |= T.add(Tau, RM); 4510 Floats[1] = T; 4511 } 4512 return (opStatus)Status; 4513 } 4514 4515 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4516 APFloat::roundingMode RM) { 4517 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4518 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4519 auto Ret = 4520 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4521 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4522 return Ret; 4523 } 4524 4525 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4526 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4527 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4528 auto Ret = 4529 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4530 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4531 return Ret; 4532 } 4533 4534 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4535 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4536 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4537 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4538 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4539 return Ret; 4540 } 4541 4542 APFloat::opStatus 4543 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4544 const DoubleAPFloat &Addend, 4545 APFloat::roundingMode RM) { 4546 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4547 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4548 auto Ret = Tmp.fusedMultiplyAdd( 4549 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4550 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4551 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4552 return Ret; 4553 } 4554 4555 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4556 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4557 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4558 auto Ret = Tmp.roundToIntegral(RM); 4559 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4560 return Ret; 4561 } 4562 4563 void DoubleAPFloat::changeSign() { 4564 Floats[0].changeSign(); 4565 Floats[1].changeSign(); 4566 } 4567 4568 APFloat::cmpResult 4569 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4570 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4571 if (Result != cmpEqual) 4572 return Result; 4573 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4574 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4575 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4576 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4577 if (Against && !RHSAgainst) 4578 return cmpLessThan; 4579 if (!Against && RHSAgainst) 4580 return cmpGreaterThan; 4581 if (!Against && !RHSAgainst) 4582 return Result; 4583 if (Against && RHSAgainst) 4584 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4585 } 4586 return Result; 4587 } 4588 4589 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4590 return Floats[0].getCategory(); 4591 } 4592 4593 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4594 4595 void DoubleAPFloat::makeInf(bool Neg) { 4596 Floats[0].makeInf(Neg); 4597 Floats[1].makeZero(/* Neg = */ false); 4598 } 4599 4600 void DoubleAPFloat::makeZero(bool Neg) { 4601 Floats[0].makeZero(Neg); 4602 Floats[1].makeZero(/* Neg = */ false); 4603 } 4604 4605 void DoubleAPFloat::makeLargest(bool Neg) { 4606 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4607 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4608 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4609 if (Neg) 4610 changeSign(); 4611 } 4612 4613 void DoubleAPFloat::makeSmallest(bool Neg) { 4614 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4615 Floats[0].makeSmallest(Neg); 4616 Floats[1].makeZero(/* Neg = */ false); 4617 } 4618 4619 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4620 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4621 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4622 if (Neg) 4623 Floats[0].changeSign(); 4624 Floats[1].makeZero(/* Neg = */ false); 4625 } 4626 4627 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4628 Floats[0].makeNaN(SNaN, Neg, fill); 4629 Floats[1].makeZero(/* Neg = */ false); 4630 } 4631 4632 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4633 auto Result = Floats[0].compare(RHS.Floats[0]); 4634 // |Float[0]| > |Float[1]| 4635 if (Result == APFloat::cmpEqual) 4636 return Floats[1].compare(RHS.Floats[1]); 4637 return Result; 4638 } 4639 4640 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4641 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4642 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4643 } 4644 4645 hash_code hash_value(const DoubleAPFloat &Arg) { 4646 if (Arg.Floats) 4647 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4648 return hash_combine(Arg.Semantics); 4649 } 4650 4651 APInt DoubleAPFloat::bitcastToAPInt() const { 4652 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4653 uint64_t Data[] = { 4654 Floats[0].bitcastToAPInt().getRawData()[0], 4655 Floats[1].bitcastToAPInt().getRawData()[0], 4656 }; 4657 return APInt(128, 2, Data); 4658 } 4659 4660 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4661 roundingMode RM) { 4662 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4663 APFloat Tmp(semPPCDoubleDoubleLegacy); 4664 auto Ret = Tmp.convertFromString(S, RM); 4665 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4666 return Ret; 4667 } 4668 4669 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4670 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4671 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4672 auto Ret = Tmp.next(nextDown); 4673 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4674 return Ret; 4675 } 4676 4677 APFloat::opStatus 4678 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4679 unsigned int Width, bool IsSigned, 4680 roundingMode RM, bool *IsExact) const { 4681 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4682 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4683 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4684 } 4685 4686 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4687 bool IsSigned, 4688 roundingMode RM) { 4689 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4690 APFloat Tmp(semPPCDoubleDoubleLegacy); 4691 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4692 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4693 return Ret; 4694 } 4695 4696 APFloat::opStatus 4697 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4698 unsigned int InputSize, 4699 bool IsSigned, roundingMode RM) { 4700 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4701 APFloat Tmp(semPPCDoubleDoubleLegacy); 4702 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4703 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4704 return Ret; 4705 } 4706 4707 APFloat::opStatus 4708 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4709 unsigned int InputSize, 4710 bool IsSigned, roundingMode RM) { 4711 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4712 APFloat Tmp(semPPCDoubleDoubleLegacy); 4713 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4714 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4715 return Ret; 4716 } 4717 4718 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4719 unsigned int HexDigits, 4720 bool UpperCase, 4721 roundingMode RM) const { 4722 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4723 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4724 .convertToHexString(DST, HexDigits, UpperCase, RM); 4725 } 4726 4727 bool DoubleAPFloat::isDenormal() const { 4728 return getCategory() == fcNormal && 4729 (Floats[0].isDenormal() || Floats[1].isDenormal() || 4730 // (double)(Hi + Lo) == Hi defines a normal number. 4731 Floats[0] != Floats[0] + Floats[1]); 4732 } 4733 4734 bool DoubleAPFloat::isSmallest() const { 4735 if (getCategory() != fcNormal) 4736 return false; 4737 DoubleAPFloat Tmp(*this); 4738 Tmp.makeSmallest(this->isNegative()); 4739 return Tmp.compare(*this) == cmpEqual; 4740 } 4741 4742 bool DoubleAPFloat::isLargest() const { 4743 if (getCategory() != fcNormal) 4744 return false; 4745 DoubleAPFloat Tmp(*this); 4746 Tmp.makeLargest(this->isNegative()); 4747 return Tmp.compare(*this) == cmpEqual; 4748 } 4749 4750 bool DoubleAPFloat::isInteger() const { 4751 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4752 return Floats[0].isInteger() && Floats[1].isInteger(); 4753 } 4754 4755 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 4756 unsigned FormatPrecision, 4757 unsigned FormatMaxPadding, 4758 bool TruncateZero) const { 4759 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4760 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4761 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 4762 } 4763 4764 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 4765 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4766 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4767 if (!inv) 4768 return Tmp.getExactInverse(nullptr); 4769 APFloat Inv(semPPCDoubleDoubleLegacy); 4770 auto Ret = Tmp.getExactInverse(&Inv); 4771 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 4772 return Ret; 4773 } 4774 4775 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) { 4776 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4777 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 4778 scalbn(Arg.Floats[1], Exp, RM)); 4779 } 4780 4781 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 4782 APFloat::roundingMode RM) { 4783 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4784 APFloat First = frexp(Arg.Floats[0], Exp, RM); 4785 APFloat Second = Arg.Floats[1]; 4786 if (Arg.getCategory() == APFloat::fcNormal) 4787 Second = scalbn(Second, -Exp, RM); 4788 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 4789 } 4790 4791 } // End detail namespace 4792 4793 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4794 if (usesLayout<IEEEFloat>(Semantics)) { 4795 new (&IEEE) IEEEFloat(std::move(F)); 4796 return; 4797 } 4798 if (usesLayout<DoubleAPFloat>(Semantics)) { 4799 const fltSemantics& S = F.getSemantics(); 4800 new (&Double) 4801 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 4802 APFloat(semIEEEdouble)); 4803 return; 4804 } 4805 llvm_unreachable("Unexpected semantics"); 4806 } 4807 4808 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 4809 roundingMode RM) { 4810 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 4811 } 4812 4813 hash_code hash_value(const APFloat &Arg) { 4814 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 4815 return hash_value(Arg.U.IEEE); 4816 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 4817 return hash_value(Arg.U.Double); 4818 llvm_unreachable("Unexpected semantics"); 4819 } 4820 4821 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4822 : APFloat(Semantics) { 4823 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 4824 assert(StatusOrErr && "Invalid floating point representation"); 4825 consumeError(StatusOrErr.takeError()); 4826 } 4827 4828 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4829 roundingMode RM, bool *losesInfo) { 4830 if (&getSemantics() == &ToSemantics) { 4831 *losesInfo = false; 4832 return opOK; 4833 } 4834 if (usesLayout<IEEEFloat>(getSemantics()) && 4835 usesLayout<IEEEFloat>(ToSemantics)) 4836 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4837 if (usesLayout<IEEEFloat>(getSemantics()) && 4838 usesLayout<DoubleAPFloat>(ToSemantics)) { 4839 assert(&ToSemantics == &semPPCDoubleDouble); 4840 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 4841 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 4842 return Ret; 4843 } 4844 if (usesLayout<DoubleAPFloat>(getSemantics()) && 4845 usesLayout<IEEEFloat>(ToSemantics)) { 4846 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4847 *this = APFloat(std::move(getIEEE()), ToSemantics); 4848 return Ret; 4849 } 4850 llvm_unreachable("Unexpected semantics"); 4851 } 4852 4853 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics, 4854 unsigned BitWidth) { 4855 return APFloat(Semantics, APInt::getAllOnesValue(BitWidth)); 4856 } 4857 4858 void APFloat::print(raw_ostream &OS) const { 4859 SmallVector<char, 16> Buffer; 4860 toString(Buffer); 4861 OS << Buffer << "\n"; 4862 } 4863 4864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4865 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 4866 #endif 4867 4868 void APFloat::Profile(FoldingSetNodeID &NID) const { 4869 NID.Add(bitcastToAPInt()); 4870 } 4871 4872 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 4873 an APSInt, whose initial bit-width and signed-ness are used to determine the 4874 precision of the conversion. 4875 */ 4876 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 4877 roundingMode rounding_mode, 4878 bool *isExact) const { 4879 unsigned bitWidth = result.getBitWidth(); 4880 SmallVector<uint64_t, 4> parts(result.getNumWords()); 4881 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 4882 rounding_mode, isExact); 4883 // Keeps the original signed-ness. 4884 result = APInt(bitWidth, parts); 4885 return status; 4886 } 4887 4888 } // End llvm namespace 4889 4890 #undef APFLOAT_DISPATCH_ON_SEMANTICS 4891