1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <cstring>
28 #include <limits.h>
29 
30 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
31   do {                                                                         \
32     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
33       return U.IEEE.METHOD_CALL;                                               \
34     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
35       return U.Double.METHOD_CALL;                                             \
36     llvm_unreachable("Unexpected semantics");                                  \
37   } while (false)
38 
39 using namespace llvm;
40 
41 /// A macro used to combine two fcCategory enums into one key which can be used
42 /// in a switch statement to classify how the interaction of two APFloat's
43 /// categories affects an operation.
44 ///
45 /// TODO: If clang source code is ever allowed to use constexpr in its own
46 /// codebase, change this into a static inline function.
47 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 
49 /* Assumed in hexadecimal significand parsing, and conversion to
50    hexadecimal strings.  */
51 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
52 
53 namespace llvm {
54   /* Represents floating point arithmetic semantics.  */
55   struct fltSemantics {
56     /* The largest E such that 2^E is representable; this matches the
57        definition of IEEE 754.  */
58     APFloatBase::ExponentType maxExponent;
59 
60     /* The smallest E such that 2^E is a normalized number; this
61        matches the definition of IEEE 754.  */
62     APFloatBase::ExponentType minExponent;
63 
64     /* Number of bits in the significand.  This includes the integer
65        bit.  */
66     unsigned int precision;
67 
68     /* Number of bits actually used in the semantics. */
69     unsigned int sizeInBits;
70   };
71 
72   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
73   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
74   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
75   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
76   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
77   static const fltSemantics semBogus = {0, 0, 0, 0};
78 
79   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
80      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
81      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
82      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
83      to each other, and two 11-bit exponents.
84 
85      Note: we need to make the value different from semBogus as otherwise
86      an unsafe optimization may collapse both values to a single address,
87      and we heavily rely on them having distinct addresses.             */
88   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 
90   /* These are legacy semantics for the fallback, inaccrurate implementation of
91      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
92      operation. It's equivalent to having an IEEE number with consecutive 106
93      bits of mantissa and 11 bits of exponent.
94 
95      It's not equivalent to IBM double-double. For example, a legit IBM
96      double-double, 1 + epsilon:
97 
98        1 + epsilon = 1 + (1 >> 1076)
99 
100      is not representable by a consecutive 106 bits of mantissa.
101 
102      Currently, these semantics are used in the following way:
103 
104        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
105        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
106        semPPCDoubleDoubleLegacy -> IEEE operations
107 
108      We use bitcastToAPInt() to get the bit representation (in APInt) of the
109      underlying IEEEdouble, then use the APInt constructor to construct the
110      legacy IEEE float.
111 
112      TODO: Implement all operations in semPPCDoubleDouble, and delete these
113      semantics.  */
114   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
115                                                         53 + 53, 128};
116 
IEEEhalf()117   const fltSemantics &APFloatBase::IEEEhalf() {
118     return semIEEEhalf;
119   }
IEEEsingle()120   const fltSemantics &APFloatBase::IEEEsingle() {
121     return semIEEEsingle;
122   }
IEEEdouble()123   const fltSemantics &APFloatBase::IEEEdouble() {
124     return semIEEEdouble;
125   }
IEEEquad()126   const fltSemantics &APFloatBase::IEEEquad() {
127     return semIEEEquad;
128   }
x87DoubleExtended()129   const fltSemantics &APFloatBase::x87DoubleExtended() {
130     return semX87DoubleExtended;
131   }
Bogus()132   const fltSemantics &APFloatBase::Bogus() {
133     return semBogus;
134   }
PPCDoubleDouble()135   const fltSemantics &APFloatBase::PPCDoubleDouble() {
136     return semPPCDoubleDouble;
137   }
138 
139   /* A tight upper bound on number of parts required to hold the value
140      pow(5, power) is
141 
142        power * 815 / (351 * integerPartWidth) + 1
143 
144      However, whilst the result may require only this many parts,
145      because we are multiplying two values to get it, the
146      multiplication may require an extra part with the excess part
147      being zero (consider the trivial case of 1 * 1, tcFullMultiply
148      requires two parts to hold the single-part result).  So we add an
149      extra one to guarantee enough space whilst multiplying.  */
150   const unsigned int maxExponent = 16383;
151   const unsigned int maxPrecision = 113;
152   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
153   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
154 
semanticsPrecision(const fltSemantics & semantics)155   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
156     return semantics.precision;
157   }
158   APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)159   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
160     return semantics.maxExponent;
161   }
162   APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)163   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
164     return semantics.minExponent;
165   }
semanticsSizeInBits(const fltSemantics & semantics)166   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
167     return semantics.sizeInBits;
168   }
169 
getSizeInBits(const fltSemantics & Sem)170   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
171     return Sem.sizeInBits;
172 }
173 
174 /* A bunch of private, handy routines.  */
175 
176 static inline unsigned int
partCountForBits(unsigned int bits)177 partCountForBits(unsigned int bits)
178 {
179   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
180 }
181 
182 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
183 static inline unsigned int
decDigitValue(unsigned int c)184 decDigitValue(unsigned int c)
185 {
186   return c - '0';
187 }
188 
189 /* Return the value of a decimal exponent of the form
190    [+-]ddddddd.
191 
192    If the exponent overflows, returns a large exponent with the
193    appropriate sign.  */
194 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)195 readExponent(StringRef::iterator begin, StringRef::iterator end)
196 {
197   bool isNegative;
198   unsigned int absExponent;
199   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
200   StringRef::iterator p = begin;
201 
202   assert(p != end && "Exponent has no digits");
203 
204   isNegative = (*p == '-');
205   if (*p == '-' || *p == '+') {
206     p++;
207     assert(p != end && "Exponent has no digits");
208   }
209 
210   absExponent = decDigitValue(*p++);
211   assert(absExponent < 10U && "Invalid character in exponent");
212 
213   for (; p != end; ++p) {
214     unsigned int value;
215 
216     value = decDigitValue(*p);
217     assert(value < 10U && "Invalid character in exponent");
218 
219     value += absExponent * 10;
220     if (absExponent >= overlargeExponent) {
221       absExponent = overlargeExponent;
222       p = end;  /* outwit assert below */
223       break;
224     }
225     absExponent = value;
226   }
227 
228   assert(p == end && "Invalid exponent in exponent");
229 
230   if (isNegative)
231     return -(int) absExponent;
232   else
233     return (int) absExponent;
234 }
235 
236 /* This is ugly and needs cleaning up, but I don't immediately see
237    how whilst remaining safe.  */
238 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)239 totalExponent(StringRef::iterator p, StringRef::iterator end,
240               int exponentAdjustment)
241 {
242   int unsignedExponent;
243   bool negative, overflow;
244   int exponent = 0;
245 
246   assert(p != end && "Exponent has no digits");
247 
248   negative = *p == '-';
249   if (*p == '-' || *p == '+') {
250     p++;
251     assert(p != end && "Exponent has no digits");
252   }
253 
254   unsignedExponent = 0;
255   overflow = false;
256   for (; p != end; ++p) {
257     unsigned int value;
258 
259     value = decDigitValue(*p);
260     assert(value < 10U && "Invalid character in exponent");
261 
262     unsignedExponent = unsignedExponent * 10 + value;
263     if (unsignedExponent > 32767) {
264       overflow = true;
265       break;
266     }
267   }
268 
269   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
270     overflow = true;
271 
272   if (!overflow) {
273     exponent = unsignedExponent;
274     if (negative)
275       exponent = -exponent;
276     exponent += exponentAdjustment;
277     if (exponent > 32767 || exponent < -32768)
278       overflow = true;
279   }
280 
281   if (overflow)
282     exponent = negative ? -32768: 32767;
283 
284   return exponent;
285 }
286 
287 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)288 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
289                            StringRef::iterator *dot)
290 {
291   StringRef::iterator p = begin;
292   *dot = end;
293   while (p != end && *p == '0')
294     p++;
295 
296   if (p != end && *p == '.') {
297     *dot = p++;
298 
299     assert(end - begin != 1 && "Significand has no digits");
300 
301     while (p != end && *p == '0')
302       p++;
303   }
304 
305   return p;
306 }
307 
308 /* Given a normal decimal floating point number of the form
309 
310      dddd.dddd[eE][+-]ddd
311 
312    where the decimal point and exponent are optional, fill out the
313    structure D.  Exponent is appropriate if the significand is
314    treated as an integer, and normalizedExponent if the significand
315    is taken to have the decimal point after a single leading
316    non-zero digit.
317 
318    If the value is zero, V->firstSigDigit points to a non-digit, and
319    the return exponent is zero.
320 */
321 struct decimalInfo {
322   const char *firstSigDigit;
323   const char *lastSigDigit;
324   int exponent;
325   int normalizedExponent;
326 };
327 
328 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)329 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
330                  decimalInfo *D)
331 {
332   StringRef::iterator dot = end;
333   StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
334 
335   D->firstSigDigit = p;
336   D->exponent = 0;
337   D->normalizedExponent = 0;
338 
339   for (; p != end; ++p) {
340     if (*p == '.') {
341       assert(dot == end && "String contains multiple dots");
342       dot = p++;
343       if (p == end)
344         break;
345     }
346     if (decDigitValue(*p) >= 10U)
347       break;
348   }
349 
350   if (p != end) {
351     assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
352     assert(p != begin && "Significand has no digits");
353     assert((dot == end || p - begin != 1) && "Significand has no digits");
354 
355     /* p points to the first non-digit in the string */
356     D->exponent = readExponent(p + 1, end);
357 
358     /* Implied decimal point?  */
359     if (dot == end)
360       dot = p;
361   }
362 
363   /* If number is all zeroes accept any exponent.  */
364   if (p != D->firstSigDigit) {
365     /* Drop insignificant trailing zeroes.  */
366     if (p != begin) {
367       do
368         do
369           p--;
370         while (p != begin && *p == '0');
371       while (p != begin && *p == '.');
372     }
373 
374     /* Adjust the exponents for any decimal point.  */
375     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
376     D->normalizedExponent = (D->exponent +
377               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
378                                       - (dot > D->firstSigDigit && dot < p)));
379   }
380 
381   D->lastSigDigit = p;
382 }
383 
384 /* Return the trailing fraction of a hexadecimal number.
385    DIGITVALUE is the first hex digit of the fraction, P points to
386    the next digit.  */
387 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)388 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
389                             unsigned int digitValue)
390 {
391   unsigned int hexDigit;
392 
393   /* If the first trailing digit isn't 0 or 8 we can work out the
394      fraction immediately.  */
395   if (digitValue > 8)
396     return lfMoreThanHalf;
397   else if (digitValue < 8 && digitValue > 0)
398     return lfLessThanHalf;
399 
400   // Otherwise we need to find the first non-zero digit.
401   while (p != end && (*p == '0' || *p == '.'))
402     p++;
403 
404   assert(p != end && "Invalid trailing hexadecimal fraction!");
405 
406   hexDigit = hexDigitValue(*p);
407 
408   /* If we ran off the end it is exactly zero or one-half, otherwise
409      a little more.  */
410   if (hexDigit == -1U)
411     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
412   else
413     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
414 }
415 
416 /* Return the fraction lost were a bignum truncated losing the least
417    significant BITS bits.  */
418 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)419 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
420                               unsigned int partCount,
421                               unsigned int bits)
422 {
423   unsigned int lsb;
424 
425   lsb = APInt::tcLSB(parts, partCount);
426 
427   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
428   if (bits <= lsb)
429     return lfExactlyZero;
430   if (bits == lsb + 1)
431     return lfExactlyHalf;
432   if (bits <= partCount * APFloatBase::integerPartWidth &&
433       APInt::tcExtractBit(parts, bits - 1))
434     return lfMoreThanHalf;
435 
436   return lfLessThanHalf;
437 }
438 
439 /* Shift DST right BITS bits noting lost fraction.  */
440 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)441 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
442 {
443   lostFraction lost_fraction;
444 
445   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
446 
447   APInt::tcShiftRight(dst, parts, bits);
448 
449   return lost_fraction;
450 }
451 
452 /* Combine the effect of two lost fractions.  */
453 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)454 combineLostFractions(lostFraction moreSignificant,
455                      lostFraction lessSignificant)
456 {
457   if (lessSignificant != lfExactlyZero) {
458     if (moreSignificant == lfExactlyZero)
459       moreSignificant = lfLessThanHalf;
460     else if (moreSignificant == lfExactlyHalf)
461       moreSignificant = lfMoreThanHalf;
462   }
463 
464   return moreSignificant;
465 }
466 
467 /* The error from the true value, in half-ulps, on multiplying two
468    floating point numbers, which differ from the value they
469    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
470    than the returned value.
471 
472    See "How to Read Floating Point Numbers Accurately" by William D
473    Clinger.  */
474 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)475 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
476 {
477   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
478 
479   if (HUerr1 + HUerr2 == 0)
480     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
481   else
482     return inexactMultiply + 2 * (HUerr1 + HUerr2);
483 }
484 
485 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
486    when the least significant BITS are truncated.  BITS cannot be
487    zero.  */
488 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)489 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
490                  bool isNearest) {
491   unsigned int count, partBits;
492   APFloatBase::integerPart part, boundary;
493 
494   assert(bits != 0);
495 
496   bits--;
497   count = bits / APFloatBase::integerPartWidth;
498   partBits = bits % APFloatBase::integerPartWidth + 1;
499 
500   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
501 
502   if (isNearest)
503     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
504   else
505     boundary = 0;
506 
507   if (count == 0) {
508     if (part - boundary <= boundary - part)
509       return part - boundary;
510     else
511       return boundary - part;
512   }
513 
514   if (part == boundary) {
515     while (--count)
516       if (parts[count])
517         return ~(APFloatBase::integerPart) 0; /* A lot.  */
518 
519     return parts[0];
520   } else if (part == boundary - 1) {
521     while (--count)
522       if (~parts[count])
523         return ~(APFloatBase::integerPart) 0; /* A lot.  */
524 
525     return -parts[0];
526   }
527 
528   return ~(APFloatBase::integerPart) 0; /* A lot.  */
529 }
530 
531 /* Place pow(5, power) in DST, and return the number of parts used.
532    DST must be at least one part larger than size of the answer.  */
533 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)534 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
535   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
536   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
537   pow5s[0] = 78125 * 5;
538 
539   unsigned int partsCount[16] = { 1 };
540   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
541   unsigned int result;
542   assert(power <= maxExponent);
543 
544   p1 = dst;
545   p2 = scratch;
546 
547   *p1 = firstEightPowers[power & 7];
548   power >>= 3;
549 
550   result = 1;
551   pow5 = pow5s;
552 
553   for (unsigned int n = 0; power; power >>= 1, n++) {
554     unsigned int pc;
555 
556     pc = partsCount[n];
557 
558     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
559     if (pc == 0) {
560       pc = partsCount[n - 1];
561       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
562       pc *= 2;
563       if (pow5[pc - 1] == 0)
564         pc--;
565       partsCount[n] = pc;
566     }
567 
568     if (power & 1) {
569       APFloatBase::integerPart *tmp;
570 
571       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
572       result += pc;
573       if (p2[result - 1] == 0)
574         result--;
575 
576       /* Now result is in p1 with partsCount parts and p2 is scratch
577          space.  */
578       tmp = p1;
579       p1 = p2;
580       p2 = tmp;
581     }
582 
583     pow5 += pc;
584   }
585 
586   if (p1 != dst)
587     APInt::tcAssign(dst, p1, result);
588 
589   return result;
590 }
591 
592 /* Zero at the end to avoid modular arithmetic when adding one; used
593    when rounding up during hexadecimal output.  */
594 static const char hexDigitsLower[] = "0123456789abcdef0";
595 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
596 static const char infinityL[] = "infinity";
597 static const char infinityU[] = "INFINITY";
598 static const char NaNL[] = "nan";
599 static const char NaNU[] = "NAN";
600 
601 /* Write out an integerPart in hexadecimal, starting with the most
602    significant nibble.  Write out exactly COUNT hexdigits, return
603    COUNT.  */
604 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)605 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
606            const char *hexDigitChars)
607 {
608   unsigned int result = count;
609 
610   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
611 
612   part >>= (APFloatBase::integerPartWidth - 4 * count);
613   while (count--) {
614     dst[count] = hexDigitChars[part & 0xf];
615     part >>= 4;
616   }
617 
618   return result;
619 }
620 
621 /* Write out an unsigned decimal integer.  */
622 static char *
writeUnsignedDecimal(char * dst,unsigned int n)623 writeUnsignedDecimal (char *dst, unsigned int n)
624 {
625   char buff[40], *p;
626 
627   p = buff;
628   do
629     *p++ = '0' + n % 10;
630   while (n /= 10);
631 
632   do
633     *dst++ = *--p;
634   while (p != buff);
635 
636   return dst;
637 }
638 
639 /* Write out a signed decimal integer.  */
640 static char *
writeSignedDecimal(char * dst,int value)641 writeSignedDecimal (char *dst, int value)
642 {
643   if (value < 0) {
644     *dst++ = '-';
645     dst = writeUnsignedDecimal(dst, -(unsigned) value);
646   } else
647     dst = writeUnsignedDecimal(dst, value);
648 
649   return dst;
650 }
651 
652 namespace detail {
653 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)654 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
655   unsigned int count;
656 
657   semantics = ourSemantics;
658   count = partCount();
659   if (count > 1)
660     significand.parts = new integerPart[count];
661 }
662 
freeSignificand()663 void IEEEFloat::freeSignificand() {
664   if (needsCleanup())
665     delete [] significand.parts;
666 }
667 
assign(const IEEEFloat & rhs)668 void IEEEFloat::assign(const IEEEFloat &rhs) {
669   assert(semantics == rhs.semantics);
670 
671   sign = rhs.sign;
672   category = rhs.category;
673   exponent = rhs.exponent;
674   if (isFiniteNonZero() || category == fcNaN)
675     copySignificand(rhs);
676 }
677 
copySignificand(const IEEEFloat & rhs)678 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
679   assert(isFiniteNonZero() || category == fcNaN);
680   assert(rhs.partCount() >= partCount());
681 
682   APInt::tcAssign(significandParts(), rhs.significandParts(),
683                   partCount());
684 }
685 
686 /* Make this number a NaN, with an arbitrary but deterministic value
687    for the significand.  If double or longer, this is a signalling NaN,
688    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)689 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
690   category = fcNaN;
691   sign = Negative;
692 
693   integerPart *significand = significandParts();
694   unsigned numParts = partCount();
695 
696   // Set the significand bits to the fill.
697   if (!fill || fill->getNumWords() < numParts)
698     APInt::tcSet(significand, 0, numParts);
699   if (fill) {
700     APInt::tcAssign(significand, fill->getRawData(),
701                     std::min(fill->getNumWords(), numParts));
702 
703     // Zero out the excess bits of the significand.
704     unsigned bitsToPreserve = semantics->precision - 1;
705     unsigned part = bitsToPreserve / 64;
706     bitsToPreserve %= 64;
707     significand[part] &= ((1ULL << bitsToPreserve) - 1);
708     for (part++; part != numParts; ++part)
709       significand[part] = 0;
710   }
711 
712   unsigned QNaNBit = semantics->precision - 2;
713 
714   if (SNaN) {
715     // We always have to clear the QNaN bit to make it an SNaN.
716     APInt::tcClearBit(significand, QNaNBit);
717 
718     // If there are no bits set in the payload, we have to set
719     // *something* to make it a NaN instead of an infinity;
720     // conventionally, this is the next bit down from the QNaN bit.
721     if (APInt::tcIsZero(significand, numParts))
722       APInt::tcSetBit(significand, QNaNBit - 1);
723   } else {
724     // We always have to set the QNaN bit to make it a QNaN.
725     APInt::tcSetBit(significand, QNaNBit);
726   }
727 
728   // For x87 extended precision, we want to make a NaN, not a
729   // pseudo-NaN.  Maybe we should expose the ability to make
730   // pseudo-NaNs?
731   if (semantics == &semX87DoubleExtended)
732     APInt::tcSetBit(significand, QNaNBit + 1);
733 }
734 
operator =(const IEEEFloat & rhs)735 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
736   if (this != &rhs) {
737     if (semantics != rhs.semantics) {
738       freeSignificand();
739       initialize(rhs.semantics);
740     }
741     assign(rhs);
742   }
743 
744   return *this;
745 }
746 
operator =(IEEEFloat && rhs)747 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
748   freeSignificand();
749 
750   semantics = rhs.semantics;
751   significand = rhs.significand;
752   exponent = rhs.exponent;
753   category = rhs.category;
754   sign = rhs.sign;
755 
756   rhs.semantics = &semBogus;
757   return *this;
758 }
759 
isDenormal() const760 bool IEEEFloat::isDenormal() const {
761   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
762          (APInt::tcExtractBit(significandParts(),
763                               semantics->precision - 1) == 0);
764 }
765 
isSmallest() const766 bool IEEEFloat::isSmallest() const {
767   // The smallest number by magnitude in our format will be the smallest
768   // denormal, i.e. the floating point number with exponent being minimum
769   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
770   return isFiniteNonZero() && exponent == semantics->minExponent &&
771     significandMSB() == 0;
772 }
773 
isSignificandAllOnes() const774 bool IEEEFloat::isSignificandAllOnes() const {
775   // Test if the significand excluding the integral bit is all ones. This allows
776   // us to test for binade boundaries.
777   const integerPart *Parts = significandParts();
778   const unsigned PartCount = partCount();
779   for (unsigned i = 0; i < PartCount - 1; i++)
780     if (~Parts[i])
781       return false;
782 
783   // Set the unused high bits to all ones when we compare.
784   const unsigned NumHighBits =
785     PartCount*integerPartWidth - semantics->precision + 1;
786   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
787          "fill than integerPartWidth");
788   const integerPart HighBitFill =
789     ~integerPart(0) << (integerPartWidth - NumHighBits);
790   if (~(Parts[PartCount - 1] | HighBitFill))
791     return false;
792 
793   return true;
794 }
795 
isSignificandAllZeros() const796 bool IEEEFloat::isSignificandAllZeros() const {
797   // Test if the significand excluding the integral bit is all zeros. This
798   // allows us to test for binade boundaries.
799   const integerPart *Parts = significandParts();
800   const unsigned PartCount = partCount();
801 
802   for (unsigned i = 0; i < PartCount - 1; i++)
803     if (Parts[i])
804       return false;
805 
806   const unsigned NumHighBits =
807     PartCount*integerPartWidth - semantics->precision + 1;
808   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
809          "clear than integerPartWidth");
810   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
811 
812   if (Parts[PartCount - 1] & HighBitMask)
813     return false;
814 
815   return true;
816 }
817 
isLargest() const818 bool IEEEFloat::isLargest() const {
819   // The largest number by magnitude in our format will be the floating point
820   // number with maximum exponent and with significand that is all ones.
821   return isFiniteNonZero() && exponent == semantics->maxExponent
822     && isSignificandAllOnes();
823 }
824 
isInteger() const825 bool IEEEFloat::isInteger() const {
826   // This could be made more efficient; I'm going for obviously correct.
827   if (!isFinite()) return false;
828   IEEEFloat truncated = *this;
829   truncated.roundToIntegral(rmTowardZero);
830   return compare(truncated) == cmpEqual;
831 }
832 
bitwiseIsEqual(const IEEEFloat & rhs) const833 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
834   if (this == &rhs)
835     return true;
836   if (semantics != rhs.semantics ||
837       category != rhs.category ||
838       sign != rhs.sign)
839     return false;
840   if (category==fcZero || category==fcInfinity)
841     return true;
842 
843   if (isFiniteNonZero() && exponent != rhs.exponent)
844     return false;
845 
846   return std::equal(significandParts(), significandParts() + partCount(),
847                     rhs.significandParts());
848 }
849 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)850 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
851   initialize(&ourSemantics);
852   sign = 0;
853   category = fcNormal;
854   zeroSignificand();
855   exponent = ourSemantics.precision - 1;
856   significandParts()[0] = value;
857   normalize(rmNearestTiesToEven, lfExactlyZero);
858 }
859 
IEEEFloat(const fltSemantics & ourSemantics)860 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
861   initialize(&ourSemantics);
862   category = fcZero;
863   sign = false;
864 }
865 
866 // Delegate to the previous constructor, because later copy constructor may
867 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)868 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
869     : IEEEFloat(ourSemantics) {}
870 
IEEEFloat(const IEEEFloat & rhs)871 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
872   initialize(rhs.semantics);
873   assign(rhs);
874 }
875 
IEEEFloat(IEEEFloat && rhs)876 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
877   *this = std::move(rhs);
878 }
879 
~IEEEFloat()880 IEEEFloat::~IEEEFloat() { freeSignificand(); }
881 
partCount() const882 unsigned int IEEEFloat::partCount() const {
883   return partCountForBits(semantics->precision + 1);
884 }
885 
significandParts() const886 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
887   return const_cast<IEEEFloat *>(this)->significandParts();
888 }
889 
significandParts()890 IEEEFloat::integerPart *IEEEFloat::significandParts() {
891   if (partCount() > 1)
892     return significand.parts;
893   else
894     return &significand.part;
895 }
896 
zeroSignificand()897 void IEEEFloat::zeroSignificand() {
898   APInt::tcSet(significandParts(), 0, partCount());
899 }
900 
901 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()902 void IEEEFloat::incrementSignificand() {
903   integerPart carry;
904 
905   carry = APInt::tcIncrement(significandParts(), partCount());
906 
907   /* Our callers should never cause us to overflow.  */
908   assert(carry == 0);
909   (void)carry;
910 }
911 
912 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)913 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
914   integerPart *parts;
915 
916   parts = significandParts();
917 
918   assert(semantics == rhs.semantics);
919   assert(exponent == rhs.exponent);
920 
921   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
922 }
923 
924 /* Subtract the significand of the RHS with a borrow flag.  Returns
925    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)926 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
927                                                       integerPart borrow) {
928   integerPart *parts;
929 
930   parts = significandParts();
931 
932   assert(semantics == rhs.semantics);
933   assert(exponent == rhs.exponent);
934 
935   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
936                            partCount());
937 }
938 
939 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
940    on to the full-precision result of the multiplication.  Returns the
941    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,const IEEEFloat * addend)942 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
943                                             const IEEEFloat *addend) {
944   unsigned int omsb;        // One, not zero, based MSB.
945   unsigned int partsCount, newPartsCount, precision;
946   integerPart *lhsSignificand;
947   integerPart scratch[4];
948   integerPart *fullSignificand;
949   lostFraction lost_fraction;
950   bool ignored;
951 
952   assert(semantics == rhs.semantics);
953 
954   precision = semantics->precision;
955 
956   // Allocate space for twice as many bits as the original significand, plus one
957   // extra bit for the addition to overflow into.
958   newPartsCount = partCountForBits(precision * 2 + 1);
959 
960   if (newPartsCount > 4)
961     fullSignificand = new integerPart[newPartsCount];
962   else
963     fullSignificand = scratch;
964 
965   lhsSignificand = significandParts();
966   partsCount = partCount();
967 
968   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
969                         rhs.significandParts(), partsCount, partsCount);
970 
971   lost_fraction = lfExactlyZero;
972   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
973   exponent += rhs.exponent;
974 
975   // Assume the operands involved in the multiplication are single-precision
976   // FP, and the two multiplicants are:
977   //   *this = a23 . a22 ... a0 * 2^e1
978   //     rhs = b23 . b22 ... b0 * 2^e2
979   // the result of multiplication is:
980   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
981   // Note that there are three significant bits at the left-hand side of the
982   // radix point: two for the multiplication, and an overflow bit for the
983   // addition (that will always be zero at this point). Move the radix point
984   // toward left by two bits, and adjust exponent accordingly.
985   exponent += 2;
986 
987   if (addend && addend->isNonZero()) {
988     // The intermediate result of the multiplication has "2 * precision"
989     // signicant bit; adjust the addend to be consistent with mul result.
990     //
991     Significand savedSignificand = significand;
992     const fltSemantics *savedSemantics = semantics;
993     fltSemantics extendedSemantics;
994     opStatus status;
995     unsigned int extendedPrecision;
996 
997     // Normalize our MSB to one below the top bit to allow for overflow.
998     extendedPrecision = 2 * precision + 1;
999     if (omsb != extendedPrecision - 1) {
1000       assert(extendedPrecision > omsb);
1001       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1002                          (extendedPrecision - 1) - omsb);
1003       exponent -= (extendedPrecision - 1) - omsb;
1004     }
1005 
1006     /* Create new semantics.  */
1007     extendedSemantics = *semantics;
1008     extendedSemantics.precision = extendedPrecision;
1009 
1010     if (newPartsCount == 1)
1011       significand.part = fullSignificand[0];
1012     else
1013       significand.parts = fullSignificand;
1014     semantics = &extendedSemantics;
1015 
1016     IEEEFloat extendedAddend(*addend);
1017     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1018     assert(status == opOK);
1019     (void)status;
1020 
1021     // Shift the significand of the addend right by one bit. This guarantees
1022     // that the high bit of the significand is zero (same as fullSignificand),
1023     // so the addition will overflow (if it does overflow at all) into the top bit.
1024     lost_fraction = extendedAddend.shiftSignificandRight(1);
1025     assert(lost_fraction == lfExactlyZero &&
1026            "Lost precision while shifting addend for fused-multiply-add.");
1027 
1028     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1029 
1030     /* Restore our state.  */
1031     if (newPartsCount == 1)
1032       fullSignificand[0] = significand.part;
1033     significand = savedSignificand;
1034     semantics = savedSemantics;
1035 
1036     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1037   }
1038 
1039   // Convert the result having "2 * precision" significant-bits back to the one
1040   // having "precision" significant-bits. First, move the radix point from
1041   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1042   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1043   exponent -= precision + 1;
1044 
1045   // In case MSB resides at the left-hand side of radix point, shift the
1046   // mantissa right by some amount to make sure the MSB reside right before
1047   // the radix point (i.e. "MSB . rest-significant-bits").
1048   //
1049   // Note that the result is not normalized when "omsb < precision". So, the
1050   // caller needs to call IEEEFloat::normalize() if normalized value is
1051   // expected.
1052   if (omsb > precision) {
1053     unsigned int bits, significantParts;
1054     lostFraction lf;
1055 
1056     bits = omsb - precision;
1057     significantParts = partCountForBits(omsb);
1058     lf = shiftRight(fullSignificand, significantParts, bits);
1059     lost_fraction = combineLostFractions(lf, lost_fraction);
1060     exponent += bits;
1061   }
1062 
1063   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1064 
1065   if (newPartsCount > 4)
1066     delete [] fullSignificand;
1067 
1068   return lost_fraction;
1069 }
1070 
1071 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1072 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1073   unsigned int bit, i, partsCount;
1074   const integerPart *rhsSignificand;
1075   integerPart *lhsSignificand, *dividend, *divisor;
1076   integerPart scratch[4];
1077   lostFraction lost_fraction;
1078 
1079   assert(semantics == rhs.semantics);
1080 
1081   lhsSignificand = significandParts();
1082   rhsSignificand = rhs.significandParts();
1083   partsCount = partCount();
1084 
1085   if (partsCount > 2)
1086     dividend = new integerPart[partsCount * 2];
1087   else
1088     dividend = scratch;
1089 
1090   divisor = dividend + partsCount;
1091 
1092   /* Copy the dividend and divisor as they will be modified in-place.  */
1093   for (i = 0; i < partsCount; i++) {
1094     dividend[i] = lhsSignificand[i];
1095     divisor[i] = rhsSignificand[i];
1096     lhsSignificand[i] = 0;
1097   }
1098 
1099   exponent -= rhs.exponent;
1100 
1101   unsigned int precision = semantics->precision;
1102 
1103   /* Normalize the divisor.  */
1104   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1105   if (bit) {
1106     exponent += bit;
1107     APInt::tcShiftLeft(divisor, partsCount, bit);
1108   }
1109 
1110   /* Normalize the dividend.  */
1111   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1112   if (bit) {
1113     exponent -= bit;
1114     APInt::tcShiftLeft(dividend, partsCount, bit);
1115   }
1116 
1117   /* Ensure the dividend >= divisor initially for the loop below.
1118      Incidentally, this means that the division loop below is
1119      guaranteed to set the integer bit to one.  */
1120   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1121     exponent--;
1122     APInt::tcShiftLeft(dividend, partsCount, 1);
1123     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1124   }
1125 
1126   /* Long division.  */
1127   for (bit = precision; bit; bit -= 1) {
1128     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1129       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1130       APInt::tcSetBit(lhsSignificand, bit - 1);
1131     }
1132 
1133     APInt::tcShiftLeft(dividend, partsCount, 1);
1134   }
1135 
1136   /* Figure out the lost fraction.  */
1137   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1138 
1139   if (cmp > 0)
1140     lost_fraction = lfMoreThanHalf;
1141   else if (cmp == 0)
1142     lost_fraction = lfExactlyHalf;
1143   else if (APInt::tcIsZero(dividend, partsCount))
1144     lost_fraction = lfExactlyZero;
1145   else
1146     lost_fraction = lfLessThanHalf;
1147 
1148   if (partsCount > 2)
1149     delete [] dividend;
1150 
1151   return lost_fraction;
1152 }
1153 
significandMSB() const1154 unsigned int IEEEFloat::significandMSB() const {
1155   return APInt::tcMSB(significandParts(), partCount());
1156 }
1157 
significandLSB() const1158 unsigned int IEEEFloat::significandLSB() const {
1159   return APInt::tcLSB(significandParts(), partCount());
1160 }
1161 
1162 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1163 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1164   /* Our exponent should not overflow.  */
1165   assert((ExponentType) (exponent + bits) >= exponent);
1166 
1167   exponent += bits;
1168 
1169   return shiftRight(significandParts(), partCount(), bits);
1170 }
1171 
1172 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1173 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1174   assert(bits < semantics->precision);
1175 
1176   if (bits) {
1177     unsigned int partsCount = partCount();
1178 
1179     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1180     exponent -= bits;
1181 
1182     assert(!APInt::tcIsZero(significandParts(), partsCount));
1183   }
1184 }
1185 
1186 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1187 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1188   int compare;
1189 
1190   assert(semantics == rhs.semantics);
1191   assert(isFiniteNonZero());
1192   assert(rhs.isFiniteNonZero());
1193 
1194   compare = exponent - rhs.exponent;
1195 
1196   /* If exponents are equal, do an unsigned bignum comparison of the
1197      significands.  */
1198   if (compare == 0)
1199     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1200                                partCount());
1201 
1202   if (compare > 0)
1203     return cmpGreaterThan;
1204   else if (compare < 0)
1205     return cmpLessThan;
1206   else
1207     return cmpEqual;
1208 }
1209 
1210 /* Handle overflow.  Sign is preserved.  We either become infinity or
1211    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1212 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1213   /* Infinity?  */
1214   if (rounding_mode == rmNearestTiesToEven ||
1215       rounding_mode == rmNearestTiesToAway ||
1216       (rounding_mode == rmTowardPositive && !sign) ||
1217       (rounding_mode == rmTowardNegative && sign)) {
1218     category = fcInfinity;
1219     return (opStatus) (opOverflow | opInexact);
1220   }
1221 
1222   /* Otherwise we become the largest finite number.  */
1223   category = fcNormal;
1224   exponent = semantics->maxExponent;
1225   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1226                                    semantics->precision);
1227 
1228   return opInexact;
1229 }
1230 
1231 /* Returns TRUE if, when truncating the current number, with BIT the
1232    new LSB, with the given lost fraction and rounding mode, the result
1233    would need to be rounded away from zero (i.e., by increasing the
1234    signficand).  This routine must work for fcZero of both signs, and
1235    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1236 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1237                                   lostFraction lost_fraction,
1238                                   unsigned int bit) const {
1239   /* NaNs and infinities should not have lost fractions.  */
1240   assert(isFiniteNonZero() || category == fcZero);
1241 
1242   /* Current callers never pass this so we don't handle it.  */
1243   assert(lost_fraction != lfExactlyZero);
1244 
1245   switch (rounding_mode) {
1246   case rmNearestTiesToAway:
1247     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1248 
1249   case rmNearestTiesToEven:
1250     if (lost_fraction == lfMoreThanHalf)
1251       return true;
1252 
1253     /* Our zeroes don't have a significand to test.  */
1254     if (lost_fraction == lfExactlyHalf && category != fcZero)
1255       return APInt::tcExtractBit(significandParts(), bit);
1256 
1257     return false;
1258 
1259   case rmTowardZero:
1260     return false;
1261 
1262   case rmTowardPositive:
1263     return !sign;
1264 
1265   case rmTowardNegative:
1266     return sign;
1267   }
1268   llvm_unreachable("Invalid rounding mode found");
1269 }
1270 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1271 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1272                                          lostFraction lost_fraction) {
1273   unsigned int omsb;                /* One, not zero, based MSB.  */
1274   int exponentChange;
1275 
1276   if (!isFiniteNonZero())
1277     return opOK;
1278 
1279   /* Before rounding normalize the exponent of fcNormal numbers.  */
1280   omsb = significandMSB() + 1;
1281 
1282   if (omsb) {
1283     /* OMSB is numbered from 1.  We want to place it in the integer
1284        bit numbered PRECISION if possible, with a compensating change in
1285        the exponent.  */
1286     exponentChange = omsb - semantics->precision;
1287 
1288     /* If the resulting exponent is too high, overflow according to
1289        the rounding mode.  */
1290     if (exponent + exponentChange > semantics->maxExponent)
1291       return handleOverflow(rounding_mode);
1292 
1293     /* Subnormal numbers have exponent minExponent, and their MSB
1294        is forced based on that.  */
1295     if (exponent + exponentChange < semantics->minExponent)
1296       exponentChange = semantics->minExponent - exponent;
1297 
1298     /* Shifting left is easy as we don't lose precision.  */
1299     if (exponentChange < 0) {
1300       assert(lost_fraction == lfExactlyZero);
1301 
1302       shiftSignificandLeft(-exponentChange);
1303 
1304       return opOK;
1305     }
1306 
1307     if (exponentChange > 0) {
1308       lostFraction lf;
1309 
1310       /* Shift right and capture any new lost fraction.  */
1311       lf = shiftSignificandRight(exponentChange);
1312 
1313       lost_fraction = combineLostFractions(lf, lost_fraction);
1314 
1315       /* Keep OMSB up-to-date.  */
1316       if (omsb > (unsigned) exponentChange)
1317         omsb -= exponentChange;
1318       else
1319         omsb = 0;
1320     }
1321   }
1322 
1323   /* Now round the number according to rounding_mode given the lost
1324      fraction.  */
1325 
1326   /* As specified in IEEE 754, since we do not trap we do not report
1327      underflow for exact results.  */
1328   if (lost_fraction == lfExactlyZero) {
1329     /* Canonicalize zeroes.  */
1330     if (omsb == 0)
1331       category = fcZero;
1332 
1333     return opOK;
1334   }
1335 
1336   /* Increment the significand if we're rounding away from zero.  */
1337   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1338     if (omsb == 0)
1339       exponent = semantics->minExponent;
1340 
1341     incrementSignificand();
1342     omsb = significandMSB() + 1;
1343 
1344     /* Did the significand increment overflow?  */
1345     if (omsb == (unsigned) semantics->precision + 1) {
1346       /* Renormalize by incrementing the exponent and shifting our
1347          significand right one.  However if we already have the
1348          maximum exponent we overflow to infinity.  */
1349       if (exponent == semantics->maxExponent) {
1350         category = fcInfinity;
1351 
1352         return (opStatus) (opOverflow | opInexact);
1353       }
1354 
1355       shiftSignificandRight(1);
1356 
1357       return opInexact;
1358     }
1359   }
1360 
1361   /* The normal case - we were and are not denormal, and any
1362      significand increment above didn't overflow.  */
1363   if (omsb == semantics->precision)
1364     return opInexact;
1365 
1366   /* We have a non-zero denormal.  */
1367   assert(omsb < semantics->precision);
1368 
1369   /* Canonicalize zeroes.  */
1370   if (omsb == 0)
1371     category = fcZero;
1372 
1373   /* The fcZero case is a denormal that underflowed to zero.  */
1374   return (opStatus) (opUnderflow | opInexact);
1375 }
1376 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1377 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1378                                                      bool subtract) {
1379   switch (PackCategoriesIntoKey(category, rhs.category)) {
1380   default:
1381     llvm_unreachable(nullptr);
1382 
1383   case PackCategoriesIntoKey(fcNaN, fcZero):
1384   case PackCategoriesIntoKey(fcNaN, fcNormal):
1385   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1386   case PackCategoriesIntoKey(fcNaN, fcNaN):
1387   case PackCategoriesIntoKey(fcNormal, fcZero):
1388   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1389   case PackCategoriesIntoKey(fcInfinity, fcZero):
1390     return opOK;
1391 
1392   case PackCategoriesIntoKey(fcZero, fcNaN):
1393   case PackCategoriesIntoKey(fcNormal, fcNaN):
1394   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1395     // We need to be sure to flip the sign here for subtraction because we
1396     // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1397     sign = rhs.sign ^ subtract;
1398     category = fcNaN;
1399     copySignificand(rhs);
1400     return opOK;
1401 
1402   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1403   case PackCategoriesIntoKey(fcZero, fcInfinity):
1404     category = fcInfinity;
1405     sign = rhs.sign ^ subtract;
1406     return opOK;
1407 
1408   case PackCategoriesIntoKey(fcZero, fcNormal):
1409     assign(rhs);
1410     sign = rhs.sign ^ subtract;
1411     return opOK;
1412 
1413   case PackCategoriesIntoKey(fcZero, fcZero):
1414     /* Sign depends on rounding mode; handled by caller.  */
1415     return opOK;
1416 
1417   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1418     /* Differently signed infinities can only be validly
1419        subtracted.  */
1420     if (((sign ^ rhs.sign)!=0) != subtract) {
1421       makeNaN();
1422       return opInvalidOp;
1423     }
1424 
1425     return opOK;
1426 
1427   case PackCategoriesIntoKey(fcNormal, fcNormal):
1428     return opDivByZero;
1429   }
1430 }
1431 
1432 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1433 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1434                                                  bool subtract) {
1435   integerPart carry;
1436   lostFraction lost_fraction;
1437   int bits;
1438 
1439   /* Determine if the operation on the absolute values is effectively
1440      an addition or subtraction.  */
1441   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1442 
1443   /* Are we bigger exponent-wise than the RHS?  */
1444   bits = exponent - rhs.exponent;
1445 
1446   /* Subtraction is more subtle than one might naively expect.  */
1447   if (subtract) {
1448     IEEEFloat temp_rhs(rhs);
1449     bool reverse;
1450 
1451     if (bits == 0) {
1452       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1453       lost_fraction = lfExactlyZero;
1454     } else if (bits > 0) {
1455       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1456       shiftSignificandLeft(1);
1457       reverse = false;
1458     } else {
1459       lost_fraction = shiftSignificandRight(-bits - 1);
1460       temp_rhs.shiftSignificandLeft(1);
1461       reverse = true;
1462     }
1463 
1464     if (reverse) {
1465       carry = temp_rhs.subtractSignificand
1466         (*this, lost_fraction != lfExactlyZero);
1467       copySignificand(temp_rhs);
1468       sign = !sign;
1469     } else {
1470       carry = subtractSignificand
1471         (temp_rhs, lost_fraction != lfExactlyZero);
1472     }
1473 
1474     /* Invert the lost fraction - it was on the RHS and
1475        subtracted.  */
1476     if (lost_fraction == lfLessThanHalf)
1477       lost_fraction = lfMoreThanHalf;
1478     else if (lost_fraction == lfMoreThanHalf)
1479       lost_fraction = lfLessThanHalf;
1480 
1481     /* The code above is intended to ensure that no borrow is
1482        necessary.  */
1483     assert(!carry);
1484     (void)carry;
1485   } else {
1486     if (bits > 0) {
1487       IEEEFloat temp_rhs(rhs);
1488 
1489       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1490       carry = addSignificand(temp_rhs);
1491     } else {
1492       lost_fraction = shiftSignificandRight(-bits);
1493       carry = addSignificand(rhs);
1494     }
1495 
1496     /* We have a guard bit; generating a carry cannot happen.  */
1497     assert(!carry);
1498     (void)carry;
1499   }
1500 
1501   return lost_fraction;
1502 }
1503 
multiplySpecials(const IEEEFloat & rhs)1504 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1505   switch (PackCategoriesIntoKey(category, rhs.category)) {
1506   default:
1507     llvm_unreachable(nullptr);
1508 
1509   case PackCategoriesIntoKey(fcNaN, fcZero):
1510   case PackCategoriesIntoKey(fcNaN, fcNormal):
1511   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1512   case PackCategoriesIntoKey(fcNaN, fcNaN):
1513     sign = false;
1514     return opOK;
1515 
1516   case PackCategoriesIntoKey(fcZero, fcNaN):
1517   case PackCategoriesIntoKey(fcNormal, fcNaN):
1518   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1519     sign = false;
1520     category = fcNaN;
1521     copySignificand(rhs);
1522     return opOK;
1523 
1524   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1525   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1526   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1527     category = fcInfinity;
1528     return opOK;
1529 
1530   case PackCategoriesIntoKey(fcZero, fcNormal):
1531   case PackCategoriesIntoKey(fcNormal, fcZero):
1532   case PackCategoriesIntoKey(fcZero, fcZero):
1533     category = fcZero;
1534     return opOK;
1535 
1536   case PackCategoriesIntoKey(fcZero, fcInfinity):
1537   case PackCategoriesIntoKey(fcInfinity, fcZero):
1538     makeNaN();
1539     return opInvalidOp;
1540 
1541   case PackCategoriesIntoKey(fcNormal, fcNormal):
1542     return opOK;
1543   }
1544 }
1545 
divideSpecials(const IEEEFloat & rhs)1546 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1547   switch (PackCategoriesIntoKey(category, rhs.category)) {
1548   default:
1549     llvm_unreachable(nullptr);
1550 
1551   case PackCategoriesIntoKey(fcZero, fcNaN):
1552   case PackCategoriesIntoKey(fcNormal, fcNaN):
1553   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1554     category = fcNaN;
1555     copySignificand(rhs);
1556     LLVM_FALLTHROUGH;
1557   case PackCategoriesIntoKey(fcNaN, fcZero):
1558   case PackCategoriesIntoKey(fcNaN, fcNormal):
1559   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1560   case PackCategoriesIntoKey(fcNaN, fcNaN):
1561     sign = false;
1562     LLVM_FALLTHROUGH;
1563   case PackCategoriesIntoKey(fcInfinity, fcZero):
1564   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1565   case PackCategoriesIntoKey(fcZero, fcInfinity):
1566   case PackCategoriesIntoKey(fcZero, fcNormal):
1567     return opOK;
1568 
1569   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1570     category = fcZero;
1571     return opOK;
1572 
1573   case PackCategoriesIntoKey(fcNormal, fcZero):
1574     category = fcInfinity;
1575     return opDivByZero;
1576 
1577   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1578   case PackCategoriesIntoKey(fcZero, fcZero):
1579     makeNaN();
1580     return opInvalidOp;
1581 
1582   case PackCategoriesIntoKey(fcNormal, fcNormal):
1583     return opOK;
1584   }
1585 }
1586 
modSpecials(const IEEEFloat & rhs)1587 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1588   switch (PackCategoriesIntoKey(category, rhs.category)) {
1589   default:
1590     llvm_unreachable(nullptr);
1591 
1592   case PackCategoriesIntoKey(fcNaN, fcZero):
1593   case PackCategoriesIntoKey(fcNaN, fcNormal):
1594   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1595   case PackCategoriesIntoKey(fcNaN, fcNaN):
1596   case PackCategoriesIntoKey(fcZero, fcInfinity):
1597   case PackCategoriesIntoKey(fcZero, fcNormal):
1598   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1599     return opOK;
1600 
1601   case PackCategoriesIntoKey(fcZero, fcNaN):
1602   case PackCategoriesIntoKey(fcNormal, fcNaN):
1603   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1604     sign = false;
1605     category = fcNaN;
1606     copySignificand(rhs);
1607     return opOK;
1608 
1609   case PackCategoriesIntoKey(fcNormal, fcZero):
1610   case PackCategoriesIntoKey(fcInfinity, fcZero):
1611   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1612   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1613   case PackCategoriesIntoKey(fcZero, fcZero):
1614     makeNaN();
1615     return opInvalidOp;
1616 
1617   case PackCategoriesIntoKey(fcNormal, fcNormal):
1618     return opOK;
1619   }
1620 }
1621 
1622 /* Change sign.  */
changeSign()1623 void IEEEFloat::changeSign() {
1624   /* Look mummy, this one's easy.  */
1625   sign = !sign;
1626 }
1627 
1628 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1629 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1630                                              roundingMode rounding_mode,
1631                                              bool subtract) {
1632   opStatus fs;
1633 
1634   fs = addOrSubtractSpecials(rhs, subtract);
1635 
1636   /* This return code means it was not a simple case.  */
1637   if (fs == opDivByZero) {
1638     lostFraction lost_fraction;
1639 
1640     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1641     fs = normalize(rounding_mode, lost_fraction);
1642 
1643     /* Can only be zero if we lost no fraction.  */
1644     assert(category != fcZero || lost_fraction == lfExactlyZero);
1645   }
1646 
1647   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1648      positive zero unless rounding to minus infinity, except that
1649      adding two like-signed zeroes gives that zero.  */
1650   if (category == fcZero) {
1651     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1652       sign = (rounding_mode == rmTowardNegative);
1653   }
1654 
1655   return fs;
1656 }
1657 
1658 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1659 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1660                                    roundingMode rounding_mode) {
1661   return addOrSubtract(rhs, rounding_mode, false);
1662 }
1663 
1664 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1665 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1666                                         roundingMode rounding_mode) {
1667   return addOrSubtract(rhs, rounding_mode, true);
1668 }
1669 
1670 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1671 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1672                                         roundingMode rounding_mode) {
1673   opStatus fs;
1674 
1675   sign ^= rhs.sign;
1676   fs = multiplySpecials(rhs);
1677 
1678   if (isFiniteNonZero()) {
1679     lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1680     fs = normalize(rounding_mode, lost_fraction);
1681     if (lost_fraction != lfExactlyZero)
1682       fs = (opStatus) (fs | opInexact);
1683   }
1684 
1685   return fs;
1686 }
1687 
1688 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1689 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1690                                       roundingMode rounding_mode) {
1691   opStatus fs;
1692 
1693   sign ^= rhs.sign;
1694   fs = divideSpecials(rhs);
1695 
1696   if (isFiniteNonZero()) {
1697     lostFraction lost_fraction = divideSignificand(rhs);
1698     fs = normalize(rounding_mode, lost_fraction);
1699     if (lost_fraction != lfExactlyZero)
1700       fs = (opStatus) (fs | opInexact);
1701   }
1702 
1703   return fs;
1704 }
1705 
1706 /* Normalized remainder.  This is not currently correct in all cases.  */
remainder(const IEEEFloat & rhs)1707 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1708   opStatus fs;
1709   IEEEFloat V = *this;
1710   unsigned int origSign = sign;
1711 
1712   fs = V.divide(rhs, rmNearestTiesToEven);
1713   if (fs == opDivByZero)
1714     return fs;
1715 
1716   int parts = partCount();
1717   integerPart *x = new integerPart[parts];
1718   bool ignored;
1719   fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1720                           parts * integerPartWidth, true, rmNearestTiesToEven,
1721                           &ignored);
1722   if (fs == opInvalidOp) {
1723     delete[] x;
1724     return fs;
1725   }
1726 
1727   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1728                                         rmNearestTiesToEven);
1729   assert(fs==opOK);   // should always work
1730 
1731   fs = V.multiply(rhs, rmNearestTiesToEven);
1732   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1733 
1734   fs = subtract(V, rmNearestTiesToEven);
1735   assert(fs==opOK || fs==opInexact);   // likewise
1736 
1737   if (isZero())
1738     sign = origSign;    // IEEE754 requires this
1739   delete[] x;
1740   return fs;
1741 }
1742 
1743 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1744 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1745   opStatus fs;
1746   fs = modSpecials(rhs);
1747   unsigned int origSign = sign;
1748 
1749   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1750          compareAbsoluteValue(rhs) != cmpLessThan) {
1751     IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1752     if (compareAbsoluteValue(V) == cmpLessThan)
1753       V = scalbn(V, -1, rmNearestTiesToEven);
1754     V.sign = sign;
1755 
1756     fs = subtract(V, rmNearestTiesToEven);
1757     assert(fs==opOK);
1758   }
1759   if (isZero())
1760     sign = origSign; // fmod requires this
1761   return fs;
1762 }
1763 
1764 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1765 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1766                                                 const IEEEFloat &addend,
1767                                                 roundingMode rounding_mode) {
1768   opStatus fs;
1769 
1770   /* Post-multiplication sign, before addition.  */
1771   sign ^= multiplicand.sign;
1772 
1773   /* If and only if all arguments are normal do we need to do an
1774      extended-precision calculation.  */
1775   if (isFiniteNonZero() &&
1776       multiplicand.isFiniteNonZero() &&
1777       addend.isFinite()) {
1778     lostFraction lost_fraction;
1779 
1780     lost_fraction = multiplySignificand(multiplicand, &addend);
1781     fs = normalize(rounding_mode, lost_fraction);
1782     if (lost_fraction != lfExactlyZero)
1783       fs = (opStatus) (fs | opInexact);
1784 
1785     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1786        positive zero unless rounding to minus infinity, except that
1787        adding two like-signed zeroes gives that zero.  */
1788     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1789       sign = (rounding_mode == rmTowardNegative);
1790   } else {
1791     fs = multiplySpecials(multiplicand);
1792 
1793     /* FS can only be opOK or opInvalidOp.  There is no more work
1794        to do in the latter case.  The IEEE-754R standard says it is
1795        implementation-defined in this case whether, if ADDEND is a
1796        quiet NaN, we raise invalid op; this implementation does so.
1797 
1798        If we need to do the addition we can do so with normal
1799        precision.  */
1800     if (fs == opOK)
1801       fs = addOrSubtract(addend, rounding_mode, false);
1802   }
1803 
1804   return fs;
1805 }
1806 
1807 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1808 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1809   opStatus fs;
1810 
1811   // If the exponent is large enough, we know that this value is already
1812   // integral, and the arithmetic below would potentially cause it to saturate
1813   // to +/-Inf.  Bail out early instead.
1814   if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1815     return opOK;
1816 
1817   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1818   // precision of our format, and then subtract it back off again.  The choice
1819   // of rounding modes for the addition/subtraction determines the rounding mode
1820   // for our integral rounding as well.
1821   // NOTE: When the input value is negative, we do subtraction followed by
1822   // addition instead.
1823   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1824   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1825   IEEEFloat MagicConstant(*semantics);
1826   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1827                                       rmNearestTiesToEven);
1828   MagicConstant.sign = sign;
1829 
1830   if (fs != opOK)
1831     return fs;
1832 
1833   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1834   bool inputSign = isNegative();
1835 
1836   fs = add(MagicConstant, rounding_mode);
1837   if (fs != opOK && fs != opInexact)
1838     return fs;
1839 
1840   fs = subtract(MagicConstant, rounding_mode);
1841 
1842   // Restore the input sign.
1843   if (inputSign != isNegative())
1844     changeSign();
1845 
1846   return fs;
1847 }
1848 
1849 
1850 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const1851 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1852   cmpResult result;
1853 
1854   assert(semantics == rhs.semantics);
1855 
1856   switch (PackCategoriesIntoKey(category, rhs.category)) {
1857   default:
1858     llvm_unreachable(nullptr);
1859 
1860   case PackCategoriesIntoKey(fcNaN, fcZero):
1861   case PackCategoriesIntoKey(fcNaN, fcNormal):
1862   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1863   case PackCategoriesIntoKey(fcNaN, fcNaN):
1864   case PackCategoriesIntoKey(fcZero, fcNaN):
1865   case PackCategoriesIntoKey(fcNormal, fcNaN):
1866   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1867     return cmpUnordered;
1868 
1869   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1870   case PackCategoriesIntoKey(fcInfinity, fcZero):
1871   case PackCategoriesIntoKey(fcNormal, fcZero):
1872     if (sign)
1873       return cmpLessThan;
1874     else
1875       return cmpGreaterThan;
1876 
1877   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1878   case PackCategoriesIntoKey(fcZero, fcInfinity):
1879   case PackCategoriesIntoKey(fcZero, fcNormal):
1880     if (rhs.sign)
1881       return cmpGreaterThan;
1882     else
1883       return cmpLessThan;
1884 
1885   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1886     if (sign == rhs.sign)
1887       return cmpEqual;
1888     else if (sign)
1889       return cmpLessThan;
1890     else
1891       return cmpGreaterThan;
1892 
1893   case PackCategoriesIntoKey(fcZero, fcZero):
1894     return cmpEqual;
1895 
1896   case PackCategoriesIntoKey(fcNormal, fcNormal):
1897     break;
1898   }
1899 
1900   /* Two normal numbers.  Do they have the same sign?  */
1901   if (sign != rhs.sign) {
1902     if (sign)
1903       result = cmpLessThan;
1904     else
1905       result = cmpGreaterThan;
1906   } else {
1907     /* Compare absolute values; invert result if negative.  */
1908     result = compareAbsoluteValue(rhs);
1909 
1910     if (sign) {
1911       if (result == cmpLessThan)
1912         result = cmpGreaterThan;
1913       else if (result == cmpGreaterThan)
1914         result = cmpLessThan;
1915     }
1916   }
1917 
1918   return result;
1919 }
1920 
1921 /// IEEEFloat::convert - convert a value of one floating point type to another.
1922 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
1923 /// records whether the transformation lost information, i.e. whether
1924 /// converting the result back to the original type will produce the
1925 /// original value (this is almost the same as return value==fsOK, but there
1926 /// are edge cases where this is not so).
1927 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1928 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1929                                        roundingMode rounding_mode,
1930                                        bool *losesInfo) {
1931   lostFraction lostFraction;
1932   unsigned int newPartCount, oldPartCount;
1933   opStatus fs;
1934   int shift;
1935   const fltSemantics &fromSemantics = *semantics;
1936 
1937   lostFraction = lfExactlyZero;
1938   newPartCount = partCountForBits(toSemantics.precision + 1);
1939   oldPartCount = partCount();
1940   shift = toSemantics.precision - fromSemantics.precision;
1941 
1942   bool X86SpecialNan = false;
1943   if (&fromSemantics == &semX87DoubleExtended &&
1944       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1945       (!(*significandParts() & 0x8000000000000000ULL) ||
1946        !(*significandParts() & 0x4000000000000000ULL))) {
1947     // x86 has some unusual NaNs which cannot be represented in any other
1948     // format; note them here.
1949     X86SpecialNan = true;
1950   }
1951 
1952   // If this is a truncation of a denormal number, and the target semantics
1953   // has larger exponent range than the source semantics (this can happen
1954   // when truncating from PowerPC double-double to double format), the
1955   // right shift could lose result mantissa bits.  Adjust exponent instead
1956   // of performing excessive shift.
1957   if (shift < 0 && isFiniteNonZero()) {
1958     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1959     if (exponent + exponentChange < toSemantics.minExponent)
1960       exponentChange = toSemantics.minExponent - exponent;
1961     if (exponentChange < shift)
1962       exponentChange = shift;
1963     if (exponentChange < 0) {
1964       shift -= exponentChange;
1965       exponent += exponentChange;
1966     }
1967   }
1968 
1969   // If this is a truncation, perform the shift before we narrow the storage.
1970   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1971     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1972 
1973   // Fix the storage so it can hold to new value.
1974   if (newPartCount > oldPartCount) {
1975     // The new type requires more storage; make it available.
1976     integerPart *newParts;
1977     newParts = new integerPart[newPartCount];
1978     APInt::tcSet(newParts, 0, newPartCount);
1979     if (isFiniteNonZero() || category==fcNaN)
1980       APInt::tcAssign(newParts, significandParts(), oldPartCount);
1981     freeSignificand();
1982     significand.parts = newParts;
1983   } else if (newPartCount == 1 && oldPartCount != 1) {
1984     // Switch to built-in storage for a single part.
1985     integerPart newPart = 0;
1986     if (isFiniteNonZero() || category==fcNaN)
1987       newPart = significandParts()[0];
1988     freeSignificand();
1989     significand.part = newPart;
1990   }
1991 
1992   // Now that we have the right storage, switch the semantics.
1993   semantics = &toSemantics;
1994 
1995   // If this is an extension, perform the shift now that the storage is
1996   // available.
1997   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1998     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1999 
2000   if (isFiniteNonZero()) {
2001     fs = normalize(rounding_mode, lostFraction);
2002     *losesInfo = (fs != opOK);
2003   } else if (category == fcNaN) {
2004     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2005 
2006     // For x87 extended precision, we want to make a NaN, not a special NaN if
2007     // the input wasn't special either.
2008     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2009       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2010 
2011     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2012     // does not give you back the same bits.  This is dubious, and we
2013     // don't currently do it.  You're really supposed to get
2014     // an invalid operation signal at runtime, but nobody does that.
2015     fs = opOK;
2016   } else {
2017     *losesInfo = false;
2018     fs = opOK;
2019   }
2020 
2021   return fs;
2022 }
2023 
2024 /* Convert a floating point number to an integer according to the
2025    rounding mode.  If the rounded integer value is out of range this
2026    returns an invalid operation exception and the contents of the
2027    destination parts are unspecified.  If the rounded value is in
2028    range but the floating point number is not the exact integer, the C
2029    standard doesn't require an inexact exception to be raised.  IEEE
2030    854 does require it so we do that.
2031 
2032    Note that for conversions to integer type the C standard requires
2033    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2034 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2035     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2036     roundingMode rounding_mode, bool *isExact) const {
2037   lostFraction lost_fraction;
2038   const integerPart *src;
2039   unsigned int dstPartsCount, truncatedBits;
2040 
2041   *isExact = false;
2042 
2043   /* Handle the three special cases first.  */
2044   if (category == fcInfinity || category == fcNaN)
2045     return opInvalidOp;
2046 
2047   dstPartsCount = partCountForBits(width);
2048   assert(dstPartsCount <= parts.size() && "Integer too big");
2049 
2050   if (category == fcZero) {
2051     APInt::tcSet(parts.data(), 0, dstPartsCount);
2052     // Negative zero can't be represented as an int.
2053     *isExact = !sign;
2054     return opOK;
2055   }
2056 
2057   src = significandParts();
2058 
2059   /* Step 1: place our absolute value, with any fraction truncated, in
2060      the destination.  */
2061   if (exponent < 0) {
2062     /* Our absolute value is less than one; truncate everything.  */
2063     APInt::tcSet(parts.data(), 0, dstPartsCount);
2064     /* For exponent -1 the integer bit represents .5, look at that.
2065        For smaller exponents leftmost truncated bit is 0. */
2066     truncatedBits = semantics->precision -1U - exponent;
2067   } else {
2068     /* We want the most significant (exponent + 1) bits; the rest are
2069        truncated.  */
2070     unsigned int bits = exponent + 1U;
2071 
2072     /* Hopelessly large in magnitude?  */
2073     if (bits > width)
2074       return opInvalidOp;
2075 
2076     if (bits < semantics->precision) {
2077       /* We truncate (semantics->precision - bits) bits.  */
2078       truncatedBits = semantics->precision - bits;
2079       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2080     } else {
2081       /* We want at least as many bits as are available.  */
2082       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2083                        0);
2084       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2085                          bits - semantics->precision);
2086       truncatedBits = 0;
2087     }
2088   }
2089 
2090   /* Step 2: work out any lost fraction, and increment the absolute
2091      value if we would round away from zero.  */
2092   if (truncatedBits) {
2093     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2094                                                   truncatedBits);
2095     if (lost_fraction != lfExactlyZero &&
2096         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2097       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2098         return opInvalidOp;     /* Overflow.  */
2099     }
2100   } else {
2101     lost_fraction = lfExactlyZero;
2102   }
2103 
2104   /* Step 3: check if we fit in the destination.  */
2105   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2106 
2107   if (sign) {
2108     if (!isSigned) {
2109       /* Negative numbers cannot be represented as unsigned.  */
2110       if (omsb != 0)
2111         return opInvalidOp;
2112     } else {
2113       /* It takes omsb bits to represent the unsigned integer value.
2114          We lose a bit for the sign, but care is needed as the
2115          maximally negative integer is a special case.  */
2116       if (omsb == width &&
2117           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2118         return opInvalidOp;
2119 
2120       /* This case can happen because of rounding.  */
2121       if (omsb > width)
2122         return opInvalidOp;
2123     }
2124 
2125     APInt::tcNegate (parts.data(), dstPartsCount);
2126   } else {
2127     if (omsb >= width + !isSigned)
2128       return opInvalidOp;
2129   }
2130 
2131   if (lost_fraction == lfExactlyZero) {
2132     *isExact = true;
2133     return opOK;
2134   } else
2135     return opInexact;
2136 }
2137 
2138 /* Same as convertToSignExtendedInteger, except we provide
2139    deterministic values in case of an invalid operation exception,
2140    namely zero for NaNs and the minimal or maximal value respectively
2141    for underflow or overflow.
2142    The *isExact output tells whether the result is exact, in the sense
2143    that converting it back to the original floating point type produces
2144    the original value.  This is almost equivalent to result==opOK,
2145    except for negative zeroes.
2146 */
2147 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2148 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2149                             unsigned int width, bool isSigned,
2150                             roundingMode rounding_mode, bool *isExact) const {
2151   opStatus fs;
2152 
2153   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2154                                     isExact);
2155 
2156   if (fs == opInvalidOp) {
2157     unsigned int bits, dstPartsCount;
2158 
2159     dstPartsCount = partCountForBits(width);
2160     assert(dstPartsCount <= parts.size() && "Integer too big");
2161 
2162     if (category == fcNaN)
2163       bits = 0;
2164     else if (sign)
2165       bits = isSigned;
2166     else
2167       bits = width - isSigned;
2168 
2169     APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2170     if (sign && isSigned)
2171       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2172   }
2173 
2174   return fs;
2175 }
2176 
2177 /* Convert an unsigned integer SRC to a floating point number,
2178    rounding according to ROUNDING_MODE.  The sign of the floating
2179    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2180 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2181     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2182   unsigned int omsb, precision, dstCount;
2183   integerPart *dst;
2184   lostFraction lost_fraction;
2185 
2186   category = fcNormal;
2187   omsb = APInt::tcMSB(src, srcCount) + 1;
2188   dst = significandParts();
2189   dstCount = partCount();
2190   precision = semantics->precision;
2191 
2192   /* We want the most significant PRECISION bits of SRC.  There may not
2193      be that many; extract what we can.  */
2194   if (precision <= omsb) {
2195     exponent = omsb - 1;
2196     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2197                                                   omsb - precision);
2198     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2199   } else {
2200     exponent = precision - 1;
2201     lost_fraction = lfExactlyZero;
2202     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2203   }
2204 
2205   return normalize(rounding_mode, lost_fraction);
2206 }
2207 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2208 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2209                                                 roundingMode rounding_mode) {
2210   unsigned int partCount = Val.getNumWords();
2211   APInt api = Val;
2212 
2213   sign = false;
2214   if (isSigned && api.isNegative()) {
2215     sign = true;
2216     api = -api;
2217   }
2218 
2219   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2220 }
2221 
2222 /* Convert a two's complement integer SRC to a floating point number,
2223    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2224    integer is signed, in which case it must be sign-extended.  */
2225 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2226 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2227                                           unsigned int srcCount, bool isSigned,
2228                                           roundingMode rounding_mode) {
2229   opStatus status;
2230 
2231   if (isSigned &&
2232       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2233     integerPart *copy;
2234 
2235     /* If we're signed and negative negate a copy.  */
2236     sign = true;
2237     copy = new integerPart[srcCount];
2238     APInt::tcAssign(copy, src, srcCount);
2239     APInt::tcNegate(copy, srcCount);
2240     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2241     delete [] copy;
2242   } else {
2243     sign = false;
2244     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2245   }
2246 
2247   return status;
2248 }
2249 
2250 /* FIXME: should this just take a const APInt reference?  */
2251 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2252 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2253                                           unsigned int width, bool isSigned,
2254                                           roundingMode rounding_mode) {
2255   unsigned int partCount = partCountForBits(width);
2256   APInt api = APInt(width, makeArrayRef(parts, partCount));
2257 
2258   sign = false;
2259   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2260     sign = true;
2261     api = -api;
2262   }
2263 
2264   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2265 }
2266 
2267 IEEEFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2268 IEEEFloat::convertFromHexadecimalString(StringRef s,
2269                                         roundingMode rounding_mode) {
2270   lostFraction lost_fraction = lfExactlyZero;
2271 
2272   category = fcNormal;
2273   zeroSignificand();
2274   exponent = 0;
2275 
2276   integerPart *significand = significandParts();
2277   unsigned partsCount = partCount();
2278   unsigned bitPos = partsCount * integerPartWidth;
2279   bool computedTrailingFraction = false;
2280 
2281   // Skip leading zeroes and any (hexa)decimal point.
2282   StringRef::iterator begin = s.begin();
2283   StringRef::iterator end = s.end();
2284   StringRef::iterator dot;
2285   StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2286   StringRef::iterator firstSignificantDigit = p;
2287 
2288   while (p != end) {
2289     integerPart hex_value;
2290 
2291     if (*p == '.') {
2292       assert(dot == end && "String contains multiple dots");
2293       dot = p++;
2294       continue;
2295     }
2296 
2297     hex_value = hexDigitValue(*p);
2298     if (hex_value == -1U)
2299       break;
2300 
2301     p++;
2302 
2303     // Store the number while we have space.
2304     if (bitPos) {
2305       bitPos -= 4;
2306       hex_value <<= bitPos % integerPartWidth;
2307       significand[bitPos / integerPartWidth] |= hex_value;
2308     } else if (!computedTrailingFraction) {
2309       lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2310       computedTrailingFraction = true;
2311     }
2312   }
2313 
2314   /* Hex floats require an exponent but not a hexadecimal point.  */
2315   assert(p != end && "Hex strings require an exponent");
2316   assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2317   assert(p != begin && "Significand has no digits");
2318   assert((dot == end || p - begin != 1) && "Significand has no digits");
2319 
2320   /* Ignore the exponent if we are zero.  */
2321   if (p != firstSignificantDigit) {
2322     int expAdjustment;
2323 
2324     /* Implicit hexadecimal point?  */
2325     if (dot == end)
2326       dot = p;
2327 
2328     /* Calculate the exponent adjustment implicit in the number of
2329        significant digits.  */
2330     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2331     if (expAdjustment < 0)
2332       expAdjustment++;
2333     expAdjustment = expAdjustment * 4 - 1;
2334 
2335     /* Adjust for writing the significand starting at the most
2336        significant nibble.  */
2337     expAdjustment += semantics->precision;
2338     expAdjustment -= partsCount * integerPartWidth;
2339 
2340     /* Adjust for the given exponent.  */
2341     exponent = totalExponent(p + 1, end, expAdjustment);
2342   }
2343 
2344   return normalize(rounding_mode, lost_fraction);
2345 }
2346 
2347 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2348 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2349                                         unsigned sigPartCount, int exp,
2350                                         roundingMode rounding_mode) {
2351   unsigned int parts, pow5PartCount;
2352   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2353   integerPart pow5Parts[maxPowerOfFiveParts];
2354   bool isNearest;
2355 
2356   isNearest = (rounding_mode == rmNearestTiesToEven ||
2357                rounding_mode == rmNearestTiesToAway);
2358 
2359   parts = partCountForBits(semantics->precision + 11);
2360 
2361   /* Calculate pow(5, abs(exp)).  */
2362   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2363 
2364   for (;; parts *= 2) {
2365     opStatus sigStatus, powStatus;
2366     unsigned int excessPrecision, truncatedBits;
2367 
2368     calcSemantics.precision = parts * integerPartWidth - 1;
2369     excessPrecision = calcSemantics.precision - semantics->precision;
2370     truncatedBits = excessPrecision;
2371 
2372     IEEEFloat decSig(calcSemantics, uninitialized);
2373     decSig.makeZero(sign);
2374     IEEEFloat pow5(calcSemantics);
2375 
2376     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2377                                                 rmNearestTiesToEven);
2378     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2379                                               rmNearestTiesToEven);
2380     /* Add exp, as 10^n = 5^n * 2^n.  */
2381     decSig.exponent += exp;
2382 
2383     lostFraction calcLostFraction;
2384     integerPart HUerr, HUdistance;
2385     unsigned int powHUerr;
2386 
2387     if (exp >= 0) {
2388       /* multiplySignificand leaves the precision-th bit set to 1.  */
2389       calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2390       powHUerr = powStatus != opOK;
2391     } else {
2392       calcLostFraction = decSig.divideSignificand(pow5);
2393       /* Denormal numbers have less precision.  */
2394       if (decSig.exponent < semantics->minExponent) {
2395         excessPrecision += (semantics->minExponent - decSig.exponent);
2396         truncatedBits = excessPrecision;
2397         if (excessPrecision > calcSemantics.precision)
2398           excessPrecision = calcSemantics.precision;
2399       }
2400       /* Extra half-ulp lost in reciprocal of exponent.  */
2401       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2402     }
2403 
2404     /* Both multiplySignificand and divideSignificand return the
2405        result with the integer bit set.  */
2406     assert(APInt::tcExtractBit
2407            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2408 
2409     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2410                        powHUerr);
2411     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2412                                       excessPrecision, isNearest);
2413 
2414     /* Are we guaranteed to round correctly if we truncate?  */
2415     if (HUdistance >= HUerr) {
2416       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2417                        calcSemantics.precision - excessPrecision,
2418                        excessPrecision);
2419       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2420          above we must adjust our exponent to compensate for the
2421          implicit right shift.  */
2422       exponent = (decSig.exponent + semantics->precision
2423                   - (calcSemantics.precision - excessPrecision));
2424       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2425                                                        decSig.partCount(),
2426                                                        truncatedBits);
2427       return normalize(rounding_mode, calcLostFraction);
2428     }
2429   }
2430 }
2431 
2432 IEEEFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2433 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2434   decimalInfo D;
2435   opStatus fs;
2436 
2437   /* Scan the text.  */
2438   StringRef::iterator p = str.begin();
2439   interpretDecimal(p, str.end(), &D);
2440 
2441   /* Handle the quick cases.  First the case of no significant digits,
2442      i.e. zero, and then exponents that are obviously too large or too
2443      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2444      definitely overflows if
2445 
2446            (exp - 1) * L >= maxExponent
2447 
2448      and definitely underflows to zero where
2449 
2450            (exp + 1) * L <= minExponent - precision
2451 
2452      With integer arithmetic the tightest bounds for L are
2453 
2454            93/28 < L < 196/59            [ numerator <= 256 ]
2455            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2456   */
2457 
2458   // Test if we have a zero number allowing for strings with no null terminators
2459   // and zero decimals with non-zero exponents.
2460   //
2461   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2462   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2463   // be at most one dot. On the other hand, if we have a zero with a non-zero
2464   // exponent, then we know that D.firstSigDigit will be non-numeric.
2465   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2466     category = fcZero;
2467     fs = opOK;
2468 
2469   /* Check whether the normalized exponent is high enough to overflow
2470      max during the log-rebasing in the max-exponent check below. */
2471   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2472     fs = handleOverflow(rounding_mode);
2473 
2474   /* If it wasn't, then it also wasn't high enough to overflow max
2475      during the log-rebasing in the min-exponent check.  Check that it
2476      won't overflow min in either check, then perform the min-exponent
2477      check. */
2478   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2479              (D.normalizedExponent + 1) * 28738 <=
2480                8651 * (semantics->minExponent - (int) semantics->precision)) {
2481     /* Underflow to zero and round.  */
2482     category = fcNormal;
2483     zeroSignificand();
2484     fs = normalize(rounding_mode, lfLessThanHalf);
2485 
2486   /* We can finally safely perform the max-exponent check. */
2487   } else if ((D.normalizedExponent - 1) * 42039
2488              >= 12655 * semantics->maxExponent) {
2489     /* Overflow and round.  */
2490     fs = handleOverflow(rounding_mode);
2491   } else {
2492     integerPart *decSignificand;
2493     unsigned int partCount;
2494 
2495     /* A tight upper bound on number of bits required to hold an
2496        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2497        to hold the full significand, and an extra part required by
2498        tcMultiplyPart.  */
2499     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2500     partCount = partCountForBits(1 + 196 * partCount / 59);
2501     decSignificand = new integerPart[partCount + 1];
2502     partCount = 0;
2503 
2504     /* Convert to binary efficiently - we do almost all multiplication
2505        in an integerPart.  When this would overflow do we do a single
2506        bignum multiplication, and then revert again to multiplication
2507        in an integerPart.  */
2508     do {
2509       integerPart decValue, val, multiplier;
2510 
2511       val = 0;
2512       multiplier = 1;
2513 
2514       do {
2515         if (*p == '.') {
2516           p++;
2517           if (p == str.end()) {
2518             break;
2519           }
2520         }
2521         decValue = decDigitValue(*p++);
2522         assert(decValue < 10U && "Invalid character in significand");
2523         multiplier *= 10;
2524         val = val * 10 + decValue;
2525         /* The maximum number that can be multiplied by ten with any
2526            digit added without overflowing an integerPart.  */
2527       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2528 
2529       /* Multiply out the current part.  */
2530       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2531                             partCount, partCount + 1, false);
2532 
2533       /* If we used another part (likely but not guaranteed), increase
2534          the count.  */
2535       if (decSignificand[partCount])
2536         partCount++;
2537     } while (p <= D.lastSigDigit);
2538 
2539     category = fcNormal;
2540     fs = roundSignificandWithExponent(decSignificand, partCount,
2541                                       D.exponent, rounding_mode);
2542 
2543     delete [] decSignificand;
2544   }
2545 
2546   return fs;
2547 }
2548 
convertFromStringSpecials(StringRef str)2549 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2550   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2551     makeInf(false);
2552     return true;
2553   }
2554 
2555   if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2556     makeInf(true);
2557     return true;
2558   }
2559 
2560   if (str.equals("nan") || str.equals("NaN")) {
2561     makeNaN(false, false);
2562     return true;
2563   }
2564 
2565   if (str.equals("-nan") || str.equals("-NaN")) {
2566     makeNaN(false, true);
2567     return true;
2568   }
2569 
2570   return false;
2571 }
2572 
convertFromString(StringRef str,roundingMode rounding_mode)2573 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2574                                                  roundingMode rounding_mode) {
2575   assert(!str.empty() && "Invalid string length");
2576 
2577   // Handle special cases.
2578   if (convertFromStringSpecials(str))
2579     return opOK;
2580 
2581   /* Handle a leading minus sign.  */
2582   StringRef::iterator p = str.begin();
2583   size_t slen = str.size();
2584   sign = *p == '-' ? 1 : 0;
2585   if (*p == '-' || *p == '+') {
2586     p++;
2587     slen--;
2588     assert(slen && "String has no digits");
2589   }
2590 
2591   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2592     assert(slen - 2 && "Invalid string");
2593     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2594                                         rounding_mode);
2595   }
2596 
2597   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2598 }
2599 
2600 /* Write out a hexadecimal representation of the floating point value
2601    to DST, which must be of sufficient size, in the C99 form
2602    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2603    excluding the terminating NUL.
2604 
2605    If UPPERCASE, the output is in upper case, otherwise in lower case.
2606 
2607    HEXDIGITS digits appear altogether, rounding the value if
2608    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2609    number precisely is used instead.  If nothing would appear after
2610    the decimal point it is suppressed.
2611 
2612    The decimal exponent is always printed and has at least one digit.
2613    Zero values display an exponent of zero.  Infinities and NaNs
2614    appear as "infinity" or "nan" respectively.
2615 
2616    The above rules are as specified by C99.  There is ambiguity about
2617    what the leading hexadecimal digit should be.  This implementation
2618    uses whatever is necessary so that the exponent is displayed as
2619    stored.  This implies the exponent will fall within the IEEE format
2620    range, and the leading hexadecimal digit will be 0 (for denormals),
2621    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2622    any other digits zero).
2623 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2624 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2625                                            bool upperCase,
2626                                            roundingMode rounding_mode) const {
2627   char *p;
2628 
2629   p = dst;
2630   if (sign)
2631     *dst++ = '-';
2632 
2633   switch (category) {
2634   case fcInfinity:
2635     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2636     dst += sizeof infinityL - 1;
2637     break;
2638 
2639   case fcNaN:
2640     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2641     dst += sizeof NaNU - 1;
2642     break;
2643 
2644   case fcZero:
2645     *dst++ = '0';
2646     *dst++ = upperCase ? 'X': 'x';
2647     *dst++ = '0';
2648     if (hexDigits > 1) {
2649       *dst++ = '.';
2650       memset (dst, '0', hexDigits - 1);
2651       dst += hexDigits - 1;
2652     }
2653     *dst++ = upperCase ? 'P': 'p';
2654     *dst++ = '0';
2655     break;
2656 
2657   case fcNormal:
2658     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2659     break;
2660   }
2661 
2662   *dst = 0;
2663 
2664   return static_cast<unsigned int>(dst - p);
2665 }
2666 
2667 /* Does the hard work of outputting the correctly rounded hexadecimal
2668    form of a normal floating point number with the specified number of
2669    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2670    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2671 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2672                                           bool upperCase,
2673                                           roundingMode rounding_mode) const {
2674   unsigned int count, valueBits, shift, partsCount, outputDigits;
2675   const char *hexDigitChars;
2676   const integerPart *significand;
2677   char *p;
2678   bool roundUp;
2679 
2680   *dst++ = '0';
2681   *dst++ = upperCase ? 'X': 'x';
2682 
2683   roundUp = false;
2684   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2685 
2686   significand = significandParts();
2687   partsCount = partCount();
2688 
2689   /* +3 because the first digit only uses the single integer bit, so
2690      we have 3 virtual zero most-significant-bits.  */
2691   valueBits = semantics->precision + 3;
2692   shift = integerPartWidth - valueBits % integerPartWidth;
2693 
2694   /* The natural number of digits required ignoring trailing
2695      insignificant zeroes.  */
2696   outputDigits = (valueBits - significandLSB () + 3) / 4;
2697 
2698   /* hexDigits of zero means use the required number for the
2699      precision.  Otherwise, see if we are truncating.  If we are,
2700      find out if we need to round away from zero.  */
2701   if (hexDigits) {
2702     if (hexDigits < outputDigits) {
2703       /* We are dropping non-zero bits, so need to check how to round.
2704          "bits" is the number of dropped bits.  */
2705       unsigned int bits;
2706       lostFraction fraction;
2707 
2708       bits = valueBits - hexDigits * 4;
2709       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2710       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2711     }
2712     outputDigits = hexDigits;
2713   }
2714 
2715   /* Write the digits consecutively, and start writing in the location
2716      of the hexadecimal point.  We move the most significant digit
2717      left and add the hexadecimal point later.  */
2718   p = ++dst;
2719 
2720   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2721 
2722   while (outputDigits && count) {
2723     integerPart part;
2724 
2725     /* Put the most significant integerPartWidth bits in "part".  */
2726     if (--count == partsCount)
2727       part = 0;  /* An imaginary higher zero part.  */
2728     else
2729       part = significand[count] << shift;
2730 
2731     if (count && shift)
2732       part |= significand[count - 1] >> (integerPartWidth - shift);
2733 
2734     /* Convert as much of "part" to hexdigits as we can.  */
2735     unsigned int curDigits = integerPartWidth / 4;
2736 
2737     if (curDigits > outputDigits)
2738       curDigits = outputDigits;
2739     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2740     outputDigits -= curDigits;
2741   }
2742 
2743   if (roundUp) {
2744     char *q = dst;
2745 
2746     /* Note that hexDigitChars has a trailing '0'.  */
2747     do {
2748       q--;
2749       *q = hexDigitChars[hexDigitValue (*q) + 1];
2750     } while (*q == '0');
2751     assert(q >= p);
2752   } else {
2753     /* Add trailing zeroes.  */
2754     memset (dst, '0', outputDigits);
2755     dst += outputDigits;
2756   }
2757 
2758   /* Move the most significant digit to before the point, and if there
2759      is something after the decimal point add it.  This must come
2760      after rounding above.  */
2761   p[-1] = p[0];
2762   if (dst -1 == p)
2763     dst--;
2764   else
2765     p[0] = '.';
2766 
2767   /* Finally output the exponent.  */
2768   *dst++ = upperCase ? 'P': 'p';
2769 
2770   return writeSignedDecimal (dst, exponent);
2771 }
2772 
hash_value(const IEEEFloat & Arg)2773 hash_code hash_value(const IEEEFloat &Arg) {
2774   if (!Arg.isFiniteNonZero())
2775     return hash_combine((uint8_t)Arg.category,
2776                         // NaN has no sign, fix it at zero.
2777                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2778                         Arg.semantics->precision);
2779 
2780   // Normal floats need their exponent and significand hashed.
2781   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2782                       Arg.semantics->precision, Arg.exponent,
2783                       hash_combine_range(
2784                         Arg.significandParts(),
2785                         Arg.significandParts() + Arg.partCount()));
2786 }
2787 
2788 // Conversion from APFloat to/from host float/double.  It may eventually be
2789 // possible to eliminate these and have everybody deal with APFloats, but that
2790 // will take a while.  This approach will not easily extend to long double.
2791 // Current implementation requires integerPartWidth==64, which is correct at
2792 // the moment but could be made more general.
2793 
2794 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2795 // the actual IEEE respresentations.  We compensate for that here.
2796 
convertF80LongDoubleAPFloatToAPInt() const2797 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2798   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2799   assert(partCount()==2);
2800 
2801   uint64_t myexponent, mysignificand;
2802 
2803   if (isFiniteNonZero()) {
2804     myexponent = exponent+16383; //bias
2805     mysignificand = significandParts()[0];
2806     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2807       myexponent = 0;   // denormal
2808   } else if (category==fcZero) {
2809     myexponent = 0;
2810     mysignificand = 0;
2811   } else if (category==fcInfinity) {
2812     myexponent = 0x7fff;
2813     mysignificand = 0x8000000000000000ULL;
2814   } else {
2815     assert(category == fcNaN && "Unknown category");
2816     myexponent = 0x7fff;
2817     mysignificand = significandParts()[0];
2818   }
2819 
2820   uint64_t words[2];
2821   words[0] = mysignificand;
2822   words[1] =  ((uint64_t)(sign & 1) << 15) |
2823               (myexponent & 0x7fffLL);
2824   return APInt(80, words);
2825 }
2826 
convertPPCDoubleDoubleAPFloatToAPInt() const2827 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2828   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2829   assert(partCount()==2);
2830 
2831   uint64_t words[2];
2832   opStatus fs;
2833   bool losesInfo;
2834 
2835   // Convert number to double.  To avoid spurious underflows, we re-
2836   // normalize against the "double" minExponent first, and only *then*
2837   // truncate the mantissa.  The result of that second conversion
2838   // may be inexact, but should never underflow.
2839   // Declare fltSemantics before APFloat that uses it (and
2840   // saves pointer to it) to ensure correct destruction order.
2841   fltSemantics extendedSemantics = *semantics;
2842   extendedSemantics.minExponent = semIEEEdouble.minExponent;
2843   IEEEFloat extended(*this);
2844   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2845   assert(fs == opOK && !losesInfo);
2846   (void)fs;
2847 
2848   IEEEFloat u(extended);
2849   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2850   assert(fs == opOK || fs == opInexact);
2851   (void)fs;
2852   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2853 
2854   // If conversion was exact or resulted in a special case, we're done;
2855   // just set the second double to zero.  Otherwise, re-convert back to
2856   // the extended format and compute the difference.  This now should
2857   // convert exactly to double.
2858   if (u.isFiniteNonZero() && losesInfo) {
2859     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2860     assert(fs == opOK && !losesInfo);
2861     (void)fs;
2862 
2863     IEEEFloat v(extended);
2864     v.subtract(u, rmNearestTiesToEven);
2865     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2866     assert(fs == opOK && !losesInfo);
2867     (void)fs;
2868     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2869   } else {
2870     words[1] = 0;
2871   }
2872 
2873   return APInt(128, words);
2874 }
2875 
convertQuadrupleAPFloatToAPInt() const2876 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2877   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2878   assert(partCount()==2);
2879 
2880   uint64_t myexponent, mysignificand, mysignificand2;
2881 
2882   if (isFiniteNonZero()) {
2883     myexponent = exponent+16383; //bias
2884     mysignificand = significandParts()[0];
2885     mysignificand2 = significandParts()[1];
2886     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2887       myexponent = 0;   // denormal
2888   } else if (category==fcZero) {
2889     myexponent = 0;
2890     mysignificand = mysignificand2 = 0;
2891   } else if (category==fcInfinity) {
2892     myexponent = 0x7fff;
2893     mysignificand = mysignificand2 = 0;
2894   } else {
2895     assert(category == fcNaN && "Unknown category!");
2896     myexponent = 0x7fff;
2897     mysignificand = significandParts()[0];
2898     mysignificand2 = significandParts()[1];
2899   }
2900 
2901   uint64_t words[2];
2902   words[0] = mysignificand;
2903   words[1] = ((uint64_t)(sign & 1) << 63) |
2904              ((myexponent & 0x7fff) << 48) |
2905              (mysignificand2 & 0xffffffffffffLL);
2906 
2907   return APInt(128, words);
2908 }
2909 
convertDoubleAPFloatToAPInt() const2910 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2911   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2912   assert(partCount()==1);
2913 
2914   uint64_t myexponent, mysignificand;
2915 
2916   if (isFiniteNonZero()) {
2917     myexponent = exponent+1023; //bias
2918     mysignificand = *significandParts();
2919     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2920       myexponent = 0;   // denormal
2921   } else if (category==fcZero) {
2922     myexponent = 0;
2923     mysignificand = 0;
2924   } else if (category==fcInfinity) {
2925     myexponent = 0x7ff;
2926     mysignificand = 0;
2927   } else {
2928     assert(category == fcNaN && "Unknown category!");
2929     myexponent = 0x7ff;
2930     mysignificand = *significandParts();
2931   }
2932 
2933   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2934                      ((myexponent & 0x7ff) <<  52) |
2935                      (mysignificand & 0xfffffffffffffLL))));
2936 }
2937 
convertFloatAPFloatToAPInt() const2938 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2939   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2940   assert(partCount()==1);
2941 
2942   uint32_t myexponent, mysignificand;
2943 
2944   if (isFiniteNonZero()) {
2945     myexponent = exponent+127; //bias
2946     mysignificand = (uint32_t)*significandParts();
2947     if (myexponent == 1 && !(mysignificand & 0x800000))
2948       myexponent = 0;   // denormal
2949   } else if (category==fcZero) {
2950     myexponent = 0;
2951     mysignificand = 0;
2952   } else if (category==fcInfinity) {
2953     myexponent = 0xff;
2954     mysignificand = 0;
2955   } else {
2956     assert(category == fcNaN && "Unknown category!");
2957     myexponent = 0xff;
2958     mysignificand = (uint32_t)*significandParts();
2959   }
2960 
2961   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2962                     (mysignificand & 0x7fffff)));
2963 }
2964 
convertHalfAPFloatToAPInt() const2965 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2966   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2967   assert(partCount()==1);
2968 
2969   uint32_t myexponent, mysignificand;
2970 
2971   if (isFiniteNonZero()) {
2972     myexponent = exponent+15; //bias
2973     mysignificand = (uint32_t)*significandParts();
2974     if (myexponent == 1 && !(mysignificand & 0x400))
2975       myexponent = 0;   // denormal
2976   } else if (category==fcZero) {
2977     myexponent = 0;
2978     mysignificand = 0;
2979   } else if (category==fcInfinity) {
2980     myexponent = 0x1f;
2981     mysignificand = 0;
2982   } else {
2983     assert(category == fcNaN && "Unknown category!");
2984     myexponent = 0x1f;
2985     mysignificand = (uint32_t)*significandParts();
2986   }
2987 
2988   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2989                     (mysignificand & 0x3ff)));
2990 }
2991 
2992 // This function creates an APInt that is just a bit map of the floating
2993 // point constant as it would appear in memory.  It is not a conversion,
2994 // and treating the result as a normal integer is unlikely to be useful.
2995 
bitcastToAPInt() const2996 APInt IEEEFloat::bitcastToAPInt() const {
2997   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2998     return convertHalfAPFloatToAPInt();
2999 
3000   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3001     return convertFloatAPFloatToAPInt();
3002 
3003   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3004     return convertDoubleAPFloatToAPInt();
3005 
3006   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3007     return convertQuadrupleAPFloatToAPInt();
3008 
3009   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3010     return convertPPCDoubleDoubleAPFloatToAPInt();
3011 
3012   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3013          "unknown format!");
3014   return convertF80LongDoubleAPFloatToAPInt();
3015 }
3016 
convertToFloat() const3017 float IEEEFloat::convertToFloat() const {
3018   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3019          "Float semantics are not IEEEsingle");
3020   APInt api = bitcastToAPInt();
3021   return api.bitsToFloat();
3022 }
3023 
convertToDouble() const3024 double IEEEFloat::convertToDouble() const {
3025   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3026          "Float semantics are not IEEEdouble");
3027   APInt api = bitcastToAPInt();
3028   return api.bitsToDouble();
3029 }
3030 
3031 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3032 /// does not support these bit patterns:
3033 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3034 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3035 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3036 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3037 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3038 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3039   assert(api.getBitWidth()==80);
3040   uint64_t i1 = api.getRawData()[0];
3041   uint64_t i2 = api.getRawData()[1];
3042   uint64_t myexponent = (i2 & 0x7fff);
3043   uint64_t mysignificand = i1;
3044   uint8_t myintegerbit = mysignificand >> 63;
3045 
3046   initialize(&semX87DoubleExtended);
3047   assert(partCount()==2);
3048 
3049   sign = static_cast<unsigned int>(i2>>15);
3050   if (myexponent == 0 && mysignificand == 0) {
3051     // exponent, significand meaningless
3052     category = fcZero;
3053   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3054     // exponent, significand meaningless
3055     category = fcInfinity;
3056   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3057              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3058     // exponent meaningless
3059     category = fcNaN;
3060     significandParts()[0] = mysignificand;
3061     significandParts()[1] = 0;
3062   } else {
3063     category = fcNormal;
3064     exponent = myexponent - 16383;
3065     significandParts()[0] = mysignificand;
3066     significandParts()[1] = 0;
3067     if (myexponent==0)          // denormal
3068       exponent = -16382;
3069   }
3070 }
3071 
initFromPPCDoubleDoubleAPInt(const APInt & api)3072 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3073   assert(api.getBitWidth()==128);
3074   uint64_t i1 = api.getRawData()[0];
3075   uint64_t i2 = api.getRawData()[1];
3076   opStatus fs;
3077   bool losesInfo;
3078 
3079   // Get the first double and convert to our format.
3080   initFromDoubleAPInt(APInt(64, i1));
3081   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3082   assert(fs == opOK && !losesInfo);
3083   (void)fs;
3084 
3085   // Unless we have a special case, add in second double.
3086   if (isFiniteNonZero()) {
3087     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3088     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3089     assert(fs == opOK && !losesInfo);
3090     (void)fs;
3091 
3092     add(v, rmNearestTiesToEven);
3093   }
3094 }
3095 
initFromQuadrupleAPInt(const APInt & api)3096 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3097   assert(api.getBitWidth()==128);
3098   uint64_t i1 = api.getRawData()[0];
3099   uint64_t i2 = api.getRawData()[1];
3100   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3101   uint64_t mysignificand  = i1;
3102   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3103 
3104   initialize(&semIEEEquad);
3105   assert(partCount()==2);
3106 
3107   sign = static_cast<unsigned int>(i2>>63);
3108   if (myexponent==0 &&
3109       (mysignificand==0 && mysignificand2==0)) {
3110     // exponent, significand meaningless
3111     category = fcZero;
3112   } else if (myexponent==0x7fff &&
3113              (mysignificand==0 && mysignificand2==0)) {
3114     // exponent, significand meaningless
3115     category = fcInfinity;
3116   } else if (myexponent==0x7fff &&
3117              (mysignificand!=0 || mysignificand2 !=0)) {
3118     // exponent meaningless
3119     category = fcNaN;
3120     significandParts()[0] = mysignificand;
3121     significandParts()[1] = mysignificand2;
3122   } else {
3123     category = fcNormal;
3124     exponent = myexponent - 16383;
3125     significandParts()[0] = mysignificand;
3126     significandParts()[1] = mysignificand2;
3127     if (myexponent==0)          // denormal
3128       exponent = -16382;
3129     else
3130       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3131   }
3132 }
3133 
initFromDoubleAPInt(const APInt & api)3134 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3135   assert(api.getBitWidth()==64);
3136   uint64_t i = *api.getRawData();
3137   uint64_t myexponent = (i >> 52) & 0x7ff;
3138   uint64_t mysignificand = i & 0xfffffffffffffLL;
3139 
3140   initialize(&semIEEEdouble);
3141   assert(partCount()==1);
3142 
3143   sign = static_cast<unsigned int>(i>>63);
3144   if (myexponent==0 && mysignificand==0) {
3145     // exponent, significand meaningless
3146     category = fcZero;
3147   } else if (myexponent==0x7ff && mysignificand==0) {
3148     // exponent, significand meaningless
3149     category = fcInfinity;
3150   } else if (myexponent==0x7ff && mysignificand!=0) {
3151     // exponent meaningless
3152     category = fcNaN;
3153     *significandParts() = mysignificand;
3154   } else {
3155     category = fcNormal;
3156     exponent = myexponent - 1023;
3157     *significandParts() = mysignificand;
3158     if (myexponent==0)          // denormal
3159       exponent = -1022;
3160     else
3161       *significandParts() |= 0x10000000000000LL;  // integer bit
3162   }
3163 }
3164 
initFromFloatAPInt(const APInt & api)3165 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3166   assert(api.getBitWidth()==32);
3167   uint32_t i = (uint32_t)*api.getRawData();
3168   uint32_t myexponent = (i >> 23) & 0xff;
3169   uint32_t mysignificand = i & 0x7fffff;
3170 
3171   initialize(&semIEEEsingle);
3172   assert(partCount()==1);
3173 
3174   sign = i >> 31;
3175   if (myexponent==0 && mysignificand==0) {
3176     // exponent, significand meaningless
3177     category = fcZero;
3178   } else if (myexponent==0xff && mysignificand==0) {
3179     // exponent, significand meaningless
3180     category = fcInfinity;
3181   } else if (myexponent==0xff && mysignificand!=0) {
3182     // sign, exponent, significand meaningless
3183     category = fcNaN;
3184     *significandParts() = mysignificand;
3185   } else {
3186     category = fcNormal;
3187     exponent = myexponent - 127;  //bias
3188     *significandParts() = mysignificand;
3189     if (myexponent==0)    // denormal
3190       exponent = -126;
3191     else
3192       *significandParts() |= 0x800000; // integer bit
3193   }
3194 }
3195 
initFromHalfAPInt(const APInt & api)3196 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3197   assert(api.getBitWidth()==16);
3198   uint32_t i = (uint32_t)*api.getRawData();
3199   uint32_t myexponent = (i >> 10) & 0x1f;
3200   uint32_t mysignificand = i & 0x3ff;
3201 
3202   initialize(&semIEEEhalf);
3203   assert(partCount()==1);
3204 
3205   sign = i >> 15;
3206   if (myexponent==0 && mysignificand==0) {
3207     // exponent, significand meaningless
3208     category = fcZero;
3209   } else if (myexponent==0x1f && mysignificand==0) {
3210     // exponent, significand meaningless
3211     category = fcInfinity;
3212   } else if (myexponent==0x1f && mysignificand!=0) {
3213     // sign, exponent, significand meaningless
3214     category = fcNaN;
3215     *significandParts() = mysignificand;
3216   } else {
3217     category = fcNormal;
3218     exponent = myexponent - 15;  //bias
3219     *significandParts() = mysignificand;
3220     if (myexponent==0)    // denormal
3221       exponent = -14;
3222     else
3223       *significandParts() |= 0x400; // integer bit
3224   }
3225 }
3226 
3227 /// Treat api as containing the bits of a floating point number.  Currently
3228 /// we infer the floating point type from the size of the APInt.  The
3229 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3230 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3231 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3232   if (Sem == &semIEEEhalf)
3233     return initFromHalfAPInt(api);
3234   if (Sem == &semIEEEsingle)
3235     return initFromFloatAPInt(api);
3236   if (Sem == &semIEEEdouble)
3237     return initFromDoubleAPInt(api);
3238   if (Sem == &semX87DoubleExtended)
3239     return initFromF80LongDoubleAPInt(api);
3240   if (Sem == &semIEEEquad)
3241     return initFromQuadrupleAPInt(api);
3242   if (Sem == &semPPCDoubleDoubleLegacy)
3243     return initFromPPCDoubleDoubleAPInt(api);
3244 
3245   llvm_unreachable(nullptr);
3246 }
3247 
3248 /// Make this number the largest magnitude normal number in the given
3249 /// semantics.
makeLargest(bool Negative)3250 void IEEEFloat::makeLargest(bool Negative) {
3251   // We want (in interchange format):
3252   //   sign = {Negative}
3253   //   exponent = 1..10
3254   //   significand = 1..1
3255   category = fcNormal;
3256   sign = Negative;
3257   exponent = semantics->maxExponent;
3258 
3259   // Use memset to set all but the highest integerPart to all ones.
3260   integerPart *significand = significandParts();
3261   unsigned PartCount = partCount();
3262   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3263 
3264   // Set the high integerPart especially setting all unused top bits for
3265   // internal consistency.
3266   const unsigned NumUnusedHighBits =
3267     PartCount*integerPartWidth - semantics->precision;
3268   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3269                                    ? (~integerPart(0) >> NumUnusedHighBits)
3270                                    : 0;
3271 }
3272 
3273 /// Make this number the smallest magnitude denormal number in the given
3274 /// semantics.
makeSmallest(bool Negative)3275 void IEEEFloat::makeSmallest(bool Negative) {
3276   // We want (in interchange format):
3277   //   sign = {Negative}
3278   //   exponent = 0..0
3279   //   significand = 0..01
3280   category = fcNormal;
3281   sign = Negative;
3282   exponent = semantics->minExponent;
3283   APInt::tcSet(significandParts(), 1, partCount());
3284 }
3285 
makeSmallestNormalized(bool Negative)3286 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3287   // We want (in interchange format):
3288   //   sign = {Negative}
3289   //   exponent = 0..0
3290   //   significand = 10..0
3291 
3292   category = fcNormal;
3293   zeroSignificand();
3294   sign = Negative;
3295   exponent = semantics->minExponent;
3296   significandParts()[partCountForBits(semantics->precision) - 1] |=
3297       (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3298 }
3299 
IEEEFloat(const fltSemantics & Sem,const APInt & API)3300 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3301   initFromAPInt(&Sem, API);
3302 }
3303 
IEEEFloat(float f)3304 IEEEFloat::IEEEFloat(float f) {
3305   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3306 }
3307 
IEEEFloat(double d)3308 IEEEFloat::IEEEFloat(double d) {
3309   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3310 }
3311 
3312 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3313   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3314     Buffer.append(Str.begin(), Str.end());
3315   }
3316 
3317   /// Removes data from the given significand until it is no more
3318   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3319   void AdjustToPrecision(APInt &significand,
3320                          int &exp, unsigned FormatPrecision) {
3321     unsigned bits = significand.getActiveBits();
3322 
3323     // 196/59 is a very slight overestimate of lg_2(10).
3324     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3325 
3326     if (bits <= bitsRequired) return;
3327 
3328     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3329     if (!tensRemovable) return;
3330 
3331     exp += tensRemovable;
3332 
3333     APInt divisor(significand.getBitWidth(), 1);
3334     APInt powten(significand.getBitWidth(), 10);
3335     while (true) {
3336       if (tensRemovable & 1)
3337         divisor *= powten;
3338       tensRemovable >>= 1;
3339       if (!tensRemovable) break;
3340       powten *= powten;
3341     }
3342 
3343     significand = significand.udiv(divisor);
3344 
3345     // Truncate the significand down to its active bit count.
3346     significand = significand.trunc(significand.getActiveBits());
3347   }
3348 
3349 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3350   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3351                          int &exp, unsigned FormatPrecision) {
3352     unsigned N = buffer.size();
3353     if (N <= FormatPrecision) return;
3354 
3355     // The most significant figures are the last ones in the buffer.
3356     unsigned FirstSignificant = N - FormatPrecision;
3357 
3358     // Round.
3359     // FIXME: this probably shouldn't use 'round half up'.
3360 
3361     // Rounding down is just a truncation, except we also want to drop
3362     // trailing zeros from the new result.
3363     if (buffer[FirstSignificant - 1] < '5') {
3364       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3365         FirstSignificant++;
3366 
3367       exp += FirstSignificant;
3368       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3369       return;
3370     }
3371 
3372     // Rounding up requires a decimal add-with-carry.  If we continue
3373     // the carry, the newly-introduced zeros will just be truncated.
3374     for (unsigned I = FirstSignificant; I != N; ++I) {
3375       if (buffer[I] == '9') {
3376         FirstSignificant++;
3377       } else {
3378         buffer[I]++;
3379         break;
3380       }
3381     }
3382 
3383     // If we carried through, we have exactly one digit of precision.
3384     if (FirstSignificant == N) {
3385       exp += FirstSignificant;
3386       buffer.clear();
3387       buffer.push_back('1');
3388       return;
3389     }
3390 
3391     exp += FirstSignificant;
3392     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3393   }
3394 }
3395 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3396 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3397                          unsigned FormatMaxPadding, bool TruncateZero) const {
3398   switch (category) {
3399   case fcInfinity:
3400     if (isNegative())
3401       return append(Str, "-Inf");
3402     else
3403       return append(Str, "+Inf");
3404 
3405   case fcNaN: return append(Str, "NaN");
3406 
3407   case fcZero:
3408     if (isNegative())
3409       Str.push_back('-');
3410 
3411     if (!FormatMaxPadding) {
3412       if (TruncateZero)
3413         append(Str, "0.0E+0");
3414       else {
3415         append(Str, "0.0");
3416         if (FormatPrecision > 1)
3417           Str.append(FormatPrecision - 1, '0');
3418         append(Str, "e+00");
3419       }
3420     } else
3421       Str.push_back('0');
3422     return;
3423 
3424   case fcNormal:
3425     break;
3426   }
3427 
3428   if (isNegative())
3429     Str.push_back('-');
3430 
3431   // Decompose the number into an APInt and an exponent.
3432   int exp = exponent - ((int) semantics->precision - 1);
3433   APInt significand(semantics->precision,
3434                     makeArrayRef(significandParts(),
3435                                  partCountForBits(semantics->precision)));
3436 
3437   // Set FormatPrecision if zero.  We want to do this before we
3438   // truncate trailing zeros, as those are part of the precision.
3439   if (!FormatPrecision) {
3440     // We use enough digits so the number can be round-tripped back to an
3441     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3442     // Accurately" by Steele and White.
3443     // FIXME: Using a formula based purely on the precision is conservative;
3444     // we can print fewer digits depending on the actual value being printed.
3445 
3446     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3447     FormatPrecision = 2 + semantics->precision * 59 / 196;
3448   }
3449 
3450   // Ignore trailing binary zeros.
3451   int trailingZeros = significand.countTrailingZeros();
3452   exp += trailingZeros;
3453   significand.lshrInPlace(trailingZeros);
3454 
3455   // Change the exponent from 2^e to 10^e.
3456   if (exp == 0) {
3457     // Nothing to do.
3458   } else if (exp > 0) {
3459     // Just shift left.
3460     significand = significand.zext(semantics->precision + exp);
3461     significand <<= exp;
3462     exp = 0;
3463   } else { /* exp < 0 */
3464     int texp = -exp;
3465 
3466     // We transform this using the identity:
3467     //   (N)(2^-e) == (N)(5^e)(10^-e)
3468     // This means we have to multiply N (the significand) by 5^e.
3469     // To avoid overflow, we have to operate on numbers large
3470     // enough to store N * 5^e:
3471     //   log2(N * 5^e) == log2(N) + e * log2(5)
3472     //                 <= semantics->precision + e * 137 / 59
3473     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3474 
3475     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3476 
3477     // Multiply significand by 5^e.
3478     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3479     significand = significand.zext(precision);
3480     APInt five_to_the_i(precision, 5);
3481     while (true) {
3482       if (texp & 1) significand *= five_to_the_i;
3483 
3484       texp >>= 1;
3485       if (!texp) break;
3486       five_to_the_i *= five_to_the_i;
3487     }
3488   }
3489 
3490   AdjustToPrecision(significand, exp, FormatPrecision);
3491 
3492   SmallVector<char, 256> buffer;
3493 
3494   // Fill the buffer.
3495   unsigned precision = significand.getBitWidth();
3496   APInt ten(precision, 10);
3497   APInt digit(precision, 0);
3498 
3499   bool inTrail = true;
3500   while (significand != 0) {
3501     // digit <- significand % 10
3502     // significand <- significand / 10
3503     APInt::udivrem(significand, ten, significand, digit);
3504 
3505     unsigned d = digit.getZExtValue();
3506 
3507     // Drop trailing zeros.
3508     if (inTrail && !d) exp++;
3509     else {
3510       buffer.push_back((char) ('0' + d));
3511       inTrail = false;
3512     }
3513   }
3514 
3515   assert(!buffer.empty() && "no characters in buffer!");
3516 
3517   // Drop down to FormatPrecision.
3518   // TODO: don't do more precise calculations above than are required.
3519   AdjustToPrecision(buffer, exp, FormatPrecision);
3520 
3521   unsigned NDigits = buffer.size();
3522 
3523   // Check whether we should use scientific notation.
3524   bool FormatScientific;
3525   if (!FormatMaxPadding)
3526     FormatScientific = true;
3527   else {
3528     if (exp >= 0) {
3529       // 765e3 --> 765000
3530       //              ^^^
3531       // But we shouldn't make the number look more precise than it is.
3532       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3533                           NDigits + (unsigned) exp > FormatPrecision);
3534     } else {
3535       // Power of the most significant digit.
3536       int MSD = exp + (int) (NDigits - 1);
3537       if (MSD >= 0) {
3538         // 765e-2 == 7.65
3539         FormatScientific = false;
3540       } else {
3541         // 765e-5 == 0.00765
3542         //           ^ ^^
3543         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3544       }
3545     }
3546   }
3547 
3548   // Scientific formatting is pretty straightforward.
3549   if (FormatScientific) {
3550     exp += (NDigits - 1);
3551 
3552     Str.push_back(buffer[NDigits-1]);
3553     Str.push_back('.');
3554     if (NDigits == 1 && TruncateZero)
3555       Str.push_back('0');
3556     else
3557       for (unsigned I = 1; I != NDigits; ++I)
3558         Str.push_back(buffer[NDigits-1-I]);
3559     // Fill with zeros up to FormatPrecision.
3560     if (!TruncateZero && FormatPrecision > NDigits - 1)
3561       Str.append(FormatPrecision - NDigits + 1, '0');
3562     // For !TruncateZero we use lower 'e'.
3563     Str.push_back(TruncateZero ? 'E' : 'e');
3564 
3565     Str.push_back(exp >= 0 ? '+' : '-');
3566     if (exp < 0) exp = -exp;
3567     SmallVector<char, 6> expbuf;
3568     do {
3569       expbuf.push_back((char) ('0' + (exp % 10)));
3570       exp /= 10;
3571     } while (exp);
3572     // Exponent always at least two digits if we do not truncate zeros.
3573     if (!TruncateZero && expbuf.size() < 2)
3574       expbuf.push_back('0');
3575     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3576       Str.push_back(expbuf[E-1-I]);
3577     return;
3578   }
3579 
3580   // Non-scientific, positive exponents.
3581   if (exp >= 0) {
3582     for (unsigned I = 0; I != NDigits; ++I)
3583       Str.push_back(buffer[NDigits-1-I]);
3584     for (unsigned I = 0; I != (unsigned) exp; ++I)
3585       Str.push_back('0');
3586     return;
3587   }
3588 
3589   // Non-scientific, negative exponents.
3590 
3591   // The number of digits to the left of the decimal point.
3592   int NWholeDigits = exp + (int) NDigits;
3593 
3594   unsigned I = 0;
3595   if (NWholeDigits > 0) {
3596     for (; I != (unsigned) NWholeDigits; ++I)
3597       Str.push_back(buffer[NDigits-I-1]);
3598     Str.push_back('.');
3599   } else {
3600     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3601 
3602     Str.push_back('0');
3603     Str.push_back('.');
3604     for (unsigned Z = 1; Z != NZeros; ++Z)
3605       Str.push_back('0');
3606   }
3607 
3608   for (; I != NDigits; ++I)
3609     Str.push_back(buffer[NDigits-I-1]);
3610 }
3611 
getExactInverse(APFloat * inv) const3612 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3613   // Special floats and denormals have no exact inverse.
3614   if (!isFiniteNonZero())
3615     return false;
3616 
3617   // Check that the number is a power of two by making sure that only the
3618   // integer bit is set in the significand.
3619   if (significandLSB() != semantics->precision - 1)
3620     return false;
3621 
3622   // Get the inverse.
3623   IEEEFloat reciprocal(*semantics, 1ULL);
3624   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3625     return false;
3626 
3627   // Avoid multiplication with a denormal, it is not safe on all platforms and
3628   // may be slower than a normal division.
3629   if (reciprocal.isDenormal())
3630     return false;
3631 
3632   assert(reciprocal.isFiniteNonZero() &&
3633          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3634 
3635   if (inv)
3636     *inv = APFloat(reciprocal, *semantics);
3637 
3638   return true;
3639 }
3640 
isSignaling() const3641 bool IEEEFloat::isSignaling() const {
3642   if (!isNaN())
3643     return false;
3644 
3645   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3646   // first bit of the trailing significand being 0.
3647   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3648 }
3649 
3650 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3651 ///
3652 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3653 /// appropriate sign switching before/after the computation.
next(bool nextDown)3654 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3655   // If we are performing nextDown, swap sign so we have -x.
3656   if (nextDown)
3657     changeSign();
3658 
3659   // Compute nextUp(x)
3660   opStatus result = opOK;
3661 
3662   // Handle each float category separately.
3663   switch (category) {
3664   case fcInfinity:
3665     // nextUp(+inf) = +inf
3666     if (!isNegative())
3667       break;
3668     // nextUp(-inf) = -getLargest()
3669     makeLargest(true);
3670     break;
3671   case fcNaN:
3672     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3673     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3674     //                     change the payload.
3675     if (isSignaling()) {
3676       result = opInvalidOp;
3677       // For consistency, propagate the sign of the sNaN to the qNaN.
3678       makeNaN(false, isNegative(), nullptr);
3679     }
3680     break;
3681   case fcZero:
3682     // nextUp(pm 0) = +getSmallest()
3683     makeSmallest(false);
3684     break;
3685   case fcNormal:
3686     // nextUp(-getSmallest()) = -0
3687     if (isSmallest() && isNegative()) {
3688       APInt::tcSet(significandParts(), 0, partCount());
3689       category = fcZero;
3690       exponent = 0;
3691       break;
3692     }
3693 
3694     // nextUp(getLargest()) == INFINITY
3695     if (isLargest() && !isNegative()) {
3696       APInt::tcSet(significandParts(), 0, partCount());
3697       category = fcInfinity;
3698       exponent = semantics->maxExponent + 1;
3699       break;
3700     }
3701 
3702     // nextUp(normal) == normal + inc.
3703     if (isNegative()) {
3704       // If we are negative, we need to decrement the significand.
3705 
3706       // We only cross a binade boundary that requires adjusting the exponent
3707       // if:
3708       //   1. exponent != semantics->minExponent. This implies we are not in the
3709       //   smallest binade or are dealing with denormals.
3710       //   2. Our significand excluding the integral bit is all zeros.
3711       bool WillCrossBinadeBoundary =
3712         exponent != semantics->minExponent && isSignificandAllZeros();
3713 
3714       // Decrement the significand.
3715       //
3716       // We always do this since:
3717       //   1. If we are dealing with a non-binade decrement, by definition we
3718       //   just decrement the significand.
3719       //   2. If we are dealing with a normal -> normal binade decrement, since
3720       //   we have an explicit integral bit the fact that all bits but the
3721       //   integral bit are zero implies that subtracting one will yield a
3722       //   significand with 0 integral bit and 1 in all other spots. Thus we
3723       //   must just adjust the exponent and set the integral bit to 1.
3724       //   3. If we are dealing with a normal -> denormal binade decrement,
3725       //   since we set the integral bit to 0 when we represent denormals, we
3726       //   just decrement the significand.
3727       integerPart *Parts = significandParts();
3728       APInt::tcDecrement(Parts, partCount());
3729 
3730       if (WillCrossBinadeBoundary) {
3731         // Our result is a normal number. Do the following:
3732         // 1. Set the integral bit to 1.
3733         // 2. Decrement the exponent.
3734         APInt::tcSetBit(Parts, semantics->precision - 1);
3735         exponent--;
3736       }
3737     } else {
3738       // If we are positive, we need to increment the significand.
3739 
3740       // We only cross a binade boundary that requires adjusting the exponent if
3741       // the input is not a denormal and all of said input's significand bits
3742       // are set. If all of said conditions are true: clear the significand, set
3743       // the integral bit to 1, and increment the exponent. If we have a
3744       // denormal always increment since moving denormals and the numbers in the
3745       // smallest normal binade have the same exponent in our representation.
3746       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3747 
3748       if (WillCrossBinadeBoundary) {
3749         integerPart *Parts = significandParts();
3750         APInt::tcSet(Parts, 0, partCount());
3751         APInt::tcSetBit(Parts, semantics->precision - 1);
3752         assert(exponent != semantics->maxExponent &&
3753                "We can not increment an exponent beyond the maxExponent allowed"
3754                " by the given floating point semantics.");
3755         exponent++;
3756       } else {
3757         incrementSignificand();
3758       }
3759     }
3760     break;
3761   }
3762 
3763   // If we are performing nextDown, swap sign so we have -nextUp(-x)
3764   if (nextDown)
3765     changeSign();
3766 
3767   return result;
3768 }
3769 
makeInf(bool Negative)3770 void IEEEFloat::makeInf(bool Negative) {
3771   category = fcInfinity;
3772   sign = Negative;
3773   exponent = semantics->maxExponent + 1;
3774   APInt::tcSet(significandParts(), 0, partCount());
3775 }
3776 
makeZero(bool Negative)3777 void IEEEFloat::makeZero(bool Negative) {
3778   category = fcZero;
3779   sign = Negative;
3780   exponent = semantics->minExponent-1;
3781   APInt::tcSet(significandParts(), 0, partCount());
3782 }
3783 
makeQuiet()3784 void IEEEFloat::makeQuiet() {
3785   assert(isNaN());
3786   APInt::tcSetBit(significandParts(), semantics->precision - 2);
3787 }
3788 
ilogb(const IEEEFloat & Arg)3789 int ilogb(const IEEEFloat &Arg) {
3790   if (Arg.isNaN())
3791     return IEEEFloat::IEK_NaN;
3792   if (Arg.isZero())
3793     return IEEEFloat::IEK_Zero;
3794   if (Arg.isInfinity())
3795     return IEEEFloat::IEK_Inf;
3796   if (!Arg.isDenormal())
3797     return Arg.exponent;
3798 
3799   IEEEFloat Normalized(Arg);
3800   int SignificandBits = Arg.getSemantics().precision - 1;
3801 
3802   Normalized.exponent += SignificandBits;
3803   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3804   return Normalized.exponent - SignificandBits;
3805 }
3806 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)3807 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3808   auto MaxExp = X.getSemantics().maxExponent;
3809   auto MinExp = X.getSemantics().minExponent;
3810 
3811   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3812   // overflow; clamp it to a safe range before adding, but ensure that the range
3813   // is large enough that the clamp does not change the result. The range we
3814   // need to support is the difference between the largest possible exponent and
3815   // the normalized exponent of half the smallest denormal.
3816 
3817   int SignificandBits = X.getSemantics().precision - 1;
3818   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3819 
3820   // Clamp to one past the range ends to let normalize handle overlflow.
3821   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3822   X.normalize(RoundingMode, lfExactlyZero);
3823   if (X.isNaN())
3824     X.makeQuiet();
3825   return X;
3826 }
3827 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)3828 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3829   Exp = ilogb(Val);
3830 
3831   // Quiet signalling nans.
3832   if (Exp == IEEEFloat::IEK_NaN) {
3833     IEEEFloat Quiet(Val);
3834     Quiet.makeQuiet();
3835     return Quiet;
3836   }
3837 
3838   if (Exp == IEEEFloat::IEK_Inf)
3839     return Val;
3840 
3841   // 1 is added because frexp is defined to return a normalized fraction in
3842   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3843   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3844   return scalbn(Val, -Exp, RM);
3845 }
3846 
DoubleAPFloat(const fltSemantics & S)3847 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3848     : Semantics(&S),
3849       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3850   assert(Semantics == &semPPCDoubleDouble);
3851 }
3852 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)3853 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3854     : Semantics(&S),
3855       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3856                             APFloat(semIEEEdouble, uninitialized)}) {
3857   assert(Semantics == &semPPCDoubleDouble);
3858 }
3859 
DoubleAPFloat(const fltSemantics & S,integerPart I)3860 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3861     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3862                                            APFloat(semIEEEdouble)}) {
3863   assert(Semantics == &semPPCDoubleDouble);
3864 }
3865 
DoubleAPFloat(const fltSemantics & S,const APInt & I)3866 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3867     : Semantics(&S),
3868       Floats(new APFloat[2]{
3869           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3870           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3871   assert(Semantics == &semPPCDoubleDouble);
3872 }
3873 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)3874 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3875                              APFloat &&Second)
3876     : Semantics(&S),
3877       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3878   assert(Semantics == &semPPCDoubleDouble);
3879   assert(&Floats[0].getSemantics() == &semIEEEdouble);
3880   assert(&Floats[1].getSemantics() == &semIEEEdouble);
3881 }
3882 
DoubleAPFloat(const DoubleAPFloat & RHS)3883 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3884     : Semantics(RHS.Semantics),
3885       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3886                                          APFloat(RHS.Floats[1])}
3887                         : nullptr) {
3888   assert(Semantics == &semPPCDoubleDouble);
3889 }
3890 
DoubleAPFloat(DoubleAPFloat && RHS)3891 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3892     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3893   RHS.Semantics = &semBogus;
3894   assert(Semantics == &semPPCDoubleDouble);
3895 }
3896 
operator =(const DoubleAPFloat & RHS)3897 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3898   if (Semantics == RHS.Semantics && RHS.Floats) {
3899     Floats[0] = RHS.Floats[0];
3900     Floats[1] = RHS.Floats[1];
3901   } else if (this != &RHS) {
3902     this->~DoubleAPFloat();
3903     new (this) DoubleAPFloat(RHS);
3904   }
3905   return *this;
3906 }
3907 
3908 // Implement addition, subtraction, multiplication and division based on:
3909 // "Software for Doubled-Precision Floating-Point Computations",
3910 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)3911 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3912                                          const APFloat &c, const APFloat &cc,
3913                                          roundingMode RM) {
3914   int Status = opOK;
3915   APFloat z = a;
3916   Status |= z.add(c, RM);
3917   if (!z.isFinite()) {
3918     if (!z.isInfinity()) {
3919       Floats[0] = std::move(z);
3920       Floats[1].makeZero(/* Neg = */ false);
3921       return (opStatus)Status;
3922     }
3923     Status = opOK;
3924     auto AComparedToC = a.compareAbsoluteValue(c);
3925     z = cc;
3926     Status |= z.add(aa, RM);
3927     if (AComparedToC == APFloat::cmpGreaterThan) {
3928       // z = cc + aa + c + a;
3929       Status |= z.add(c, RM);
3930       Status |= z.add(a, RM);
3931     } else {
3932       // z = cc + aa + a + c;
3933       Status |= z.add(a, RM);
3934       Status |= z.add(c, RM);
3935     }
3936     if (!z.isFinite()) {
3937       Floats[0] = std::move(z);
3938       Floats[1].makeZero(/* Neg = */ false);
3939       return (opStatus)Status;
3940     }
3941     Floats[0] = z;
3942     APFloat zz = aa;
3943     Status |= zz.add(cc, RM);
3944     if (AComparedToC == APFloat::cmpGreaterThan) {
3945       // Floats[1] = a - z + c + zz;
3946       Floats[1] = a;
3947       Status |= Floats[1].subtract(z, RM);
3948       Status |= Floats[1].add(c, RM);
3949       Status |= Floats[1].add(zz, RM);
3950     } else {
3951       // Floats[1] = c - z + a + zz;
3952       Floats[1] = c;
3953       Status |= Floats[1].subtract(z, RM);
3954       Status |= Floats[1].add(a, RM);
3955       Status |= Floats[1].add(zz, RM);
3956     }
3957   } else {
3958     // q = a - z;
3959     APFloat q = a;
3960     Status |= q.subtract(z, RM);
3961 
3962     // zz = q + c + (a - (q + z)) + aa + cc;
3963     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3964     auto zz = q;
3965     Status |= zz.add(c, RM);
3966     Status |= q.add(z, RM);
3967     Status |= q.subtract(a, RM);
3968     q.changeSign();
3969     Status |= zz.add(q, RM);
3970     Status |= zz.add(aa, RM);
3971     Status |= zz.add(cc, RM);
3972     if (zz.isZero() && !zz.isNegative()) {
3973       Floats[0] = std::move(z);
3974       Floats[1].makeZero(/* Neg = */ false);
3975       return opOK;
3976     }
3977     Floats[0] = z;
3978     Status |= Floats[0].add(zz, RM);
3979     if (!Floats[0].isFinite()) {
3980       Floats[1].makeZero(/* Neg = */ false);
3981       return (opStatus)Status;
3982     }
3983     Floats[1] = std::move(z);
3984     Status |= Floats[1].subtract(Floats[0], RM);
3985     Status |= Floats[1].add(zz, RM);
3986   }
3987   return (opStatus)Status;
3988 }
3989 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)3990 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3991                                                 const DoubleAPFloat &RHS,
3992                                                 DoubleAPFloat &Out,
3993                                                 roundingMode RM) {
3994   if (LHS.getCategory() == fcNaN) {
3995     Out = LHS;
3996     return opOK;
3997   }
3998   if (RHS.getCategory() == fcNaN) {
3999     Out = RHS;
4000     return opOK;
4001   }
4002   if (LHS.getCategory() == fcZero) {
4003     Out = RHS;
4004     return opOK;
4005   }
4006   if (RHS.getCategory() == fcZero) {
4007     Out = LHS;
4008     return opOK;
4009   }
4010   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4011       LHS.isNegative() != RHS.isNegative()) {
4012     Out.makeNaN(false, Out.isNegative(), nullptr);
4013     return opInvalidOp;
4014   }
4015   if (LHS.getCategory() == fcInfinity) {
4016     Out = LHS;
4017     return opOK;
4018   }
4019   if (RHS.getCategory() == fcInfinity) {
4020     Out = RHS;
4021     return opOK;
4022   }
4023   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4024 
4025   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4026       CC(RHS.Floats[1]);
4027   assert(&A.getSemantics() == &semIEEEdouble);
4028   assert(&AA.getSemantics() == &semIEEEdouble);
4029   assert(&C.getSemantics() == &semIEEEdouble);
4030   assert(&CC.getSemantics() == &semIEEEdouble);
4031   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4032   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4033   return Out.addImpl(A, AA, C, CC, RM);
4034 }
4035 
add(const DoubleAPFloat & RHS,roundingMode RM)4036 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4037                                      roundingMode RM) {
4038   return addWithSpecial(*this, RHS, *this, RM);
4039 }
4040 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4041 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4042                                           roundingMode RM) {
4043   changeSign();
4044   auto Ret = add(RHS, RM);
4045   changeSign();
4046   return Ret;
4047 }
4048 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4049 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4050                                           APFloat::roundingMode RM) {
4051   const auto &LHS = *this;
4052   auto &Out = *this;
4053   /* Interesting observation: For special categories, finding the lowest
4054      common ancestor of the following layered graph gives the correct
4055      return category:
4056 
4057         NaN
4058        /   \
4059      Zero  Inf
4060        \   /
4061        Normal
4062 
4063      e.g. NaN * NaN = NaN
4064           Zero * Inf = NaN
4065           Normal * Zero = Zero
4066           Normal * Inf = Inf
4067   */
4068   if (LHS.getCategory() == fcNaN) {
4069     Out = LHS;
4070     return opOK;
4071   }
4072   if (RHS.getCategory() == fcNaN) {
4073     Out = RHS;
4074     return opOK;
4075   }
4076   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4077       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4078     Out.makeNaN(false, false, nullptr);
4079     return opOK;
4080   }
4081   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4082     Out = LHS;
4083     return opOK;
4084   }
4085   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4086     Out = RHS;
4087     return opOK;
4088   }
4089   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4090          "Special cases not handled exhaustively");
4091 
4092   int Status = opOK;
4093   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4094   // t = a * c
4095   APFloat T = A;
4096   Status |= T.multiply(C, RM);
4097   if (!T.isFiniteNonZero()) {
4098     Floats[0] = T;
4099     Floats[1].makeZero(/* Neg = */ false);
4100     return (opStatus)Status;
4101   }
4102 
4103   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4104   APFloat Tau = A;
4105   T.changeSign();
4106   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4107   T.changeSign();
4108   {
4109     // v = a * d
4110     APFloat V = A;
4111     Status |= V.multiply(D, RM);
4112     // w = b * c
4113     APFloat W = B;
4114     Status |= W.multiply(C, RM);
4115     Status |= V.add(W, RM);
4116     // tau += v + w
4117     Status |= Tau.add(V, RM);
4118   }
4119   // u = t + tau
4120   APFloat U = T;
4121   Status |= U.add(Tau, RM);
4122 
4123   Floats[0] = U;
4124   if (!U.isFinite()) {
4125     Floats[1].makeZero(/* Neg = */ false);
4126   } else {
4127     // Floats[1] = (t - u) + tau
4128     Status |= T.subtract(U, RM);
4129     Status |= T.add(Tau, RM);
4130     Floats[1] = T;
4131   }
4132   return (opStatus)Status;
4133 }
4134 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4135 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4136                                         APFloat::roundingMode RM) {
4137   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4138   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4139   auto Ret =
4140       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4141   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4142   return Ret;
4143 }
4144 
remainder(const DoubleAPFloat & RHS)4145 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4146   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4147   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4148   auto Ret =
4149       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4150   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4151   return Ret;
4152 }
4153 
mod(const DoubleAPFloat & RHS)4154 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4155   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4156   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4157   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4158   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4159   return Ret;
4160 }
4161 
4162 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4163 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4164                                 const DoubleAPFloat &Addend,
4165                                 APFloat::roundingMode RM) {
4166   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4167   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4168   auto Ret = Tmp.fusedMultiplyAdd(
4169       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4170       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4171   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4172   return Ret;
4173 }
4174 
roundToIntegral(APFloat::roundingMode RM)4175 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4176   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4177   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4178   auto Ret = Tmp.roundToIntegral(RM);
4179   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4180   return Ret;
4181 }
4182 
changeSign()4183 void DoubleAPFloat::changeSign() {
4184   Floats[0].changeSign();
4185   Floats[1].changeSign();
4186 }
4187 
4188 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4189 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4190   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4191   if (Result != cmpEqual)
4192     return Result;
4193   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4194   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4195     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4196     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4197     if (Against && !RHSAgainst)
4198       return cmpLessThan;
4199     if (!Against && RHSAgainst)
4200       return cmpGreaterThan;
4201     if (!Against && !RHSAgainst)
4202       return Result;
4203     if (Against && RHSAgainst)
4204       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4205   }
4206   return Result;
4207 }
4208 
getCategory() const4209 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4210   return Floats[0].getCategory();
4211 }
4212 
isNegative() const4213 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4214 
makeInf(bool Neg)4215 void DoubleAPFloat::makeInf(bool Neg) {
4216   Floats[0].makeInf(Neg);
4217   Floats[1].makeZero(/* Neg = */ false);
4218 }
4219 
makeZero(bool Neg)4220 void DoubleAPFloat::makeZero(bool Neg) {
4221   Floats[0].makeZero(Neg);
4222   Floats[1].makeZero(/* Neg = */ false);
4223 }
4224 
makeLargest(bool Neg)4225 void DoubleAPFloat::makeLargest(bool Neg) {
4226   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4227   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4228   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4229   if (Neg)
4230     changeSign();
4231 }
4232 
makeSmallest(bool Neg)4233 void DoubleAPFloat::makeSmallest(bool Neg) {
4234   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4235   Floats[0].makeSmallest(Neg);
4236   Floats[1].makeZero(/* Neg = */ false);
4237 }
4238 
makeSmallestNormalized(bool Neg)4239 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4240   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4241   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4242   if (Neg)
4243     Floats[0].changeSign();
4244   Floats[1].makeZero(/* Neg = */ false);
4245 }
4246 
makeNaN(bool SNaN,bool Neg,const APInt * fill)4247 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4248   Floats[0].makeNaN(SNaN, Neg, fill);
4249   Floats[1].makeZero(/* Neg = */ false);
4250 }
4251 
compare(const DoubleAPFloat & RHS) const4252 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4253   auto Result = Floats[0].compare(RHS.Floats[0]);
4254   // |Float[0]| > |Float[1]|
4255   if (Result == APFloat::cmpEqual)
4256     return Floats[1].compare(RHS.Floats[1]);
4257   return Result;
4258 }
4259 
bitwiseIsEqual(const DoubleAPFloat & RHS) const4260 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4261   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4262          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4263 }
4264 
hash_value(const DoubleAPFloat & Arg)4265 hash_code hash_value(const DoubleAPFloat &Arg) {
4266   if (Arg.Floats)
4267     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4268   return hash_combine(Arg.Semantics);
4269 }
4270 
bitcastToAPInt() const4271 APInt DoubleAPFloat::bitcastToAPInt() const {
4272   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4273   uint64_t Data[] = {
4274       Floats[0].bitcastToAPInt().getRawData()[0],
4275       Floats[1].bitcastToAPInt().getRawData()[0],
4276   };
4277   return APInt(128, 2, Data);
4278 }
4279 
convertFromString(StringRef S,roundingMode RM)4280 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4281                                                    roundingMode RM) {
4282   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4283   APFloat Tmp(semPPCDoubleDoubleLegacy);
4284   auto Ret = Tmp.convertFromString(S, RM);
4285   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4286   return Ret;
4287 }
4288 
next(bool nextDown)4289 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4290   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4291   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4292   auto Ret = Tmp.next(nextDown);
4293   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4294   return Ret;
4295 }
4296 
4297 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4298 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4299                                 unsigned int Width, bool IsSigned,
4300                                 roundingMode RM, bool *IsExact) const {
4301   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4302   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4303       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4304 }
4305 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4306 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4307                                                   bool IsSigned,
4308                                                   roundingMode RM) {
4309   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4310   APFloat Tmp(semPPCDoubleDoubleLegacy);
4311   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4312   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4313   return Ret;
4314 }
4315 
4316 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4317 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4318                                               unsigned int InputSize,
4319                                               bool IsSigned, roundingMode RM) {
4320   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4321   APFloat Tmp(semPPCDoubleDoubleLegacy);
4322   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4323   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4324   return Ret;
4325 }
4326 
4327 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4328 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4329                                               unsigned int InputSize,
4330                                               bool IsSigned, roundingMode RM) {
4331   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4332   APFloat Tmp(semPPCDoubleDoubleLegacy);
4333   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4334   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4335   return Ret;
4336 }
4337 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4338 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4339                                                unsigned int HexDigits,
4340                                                bool UpperCase,
4341                                                roundingMode RM) const {
4342   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4343   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4344       .convertToHexString(DST, HexDigits, UpperCase, RM);
4345 }
4346 
isDenormal() const4347 bool DoubleAPFloat::isDenormal() const {
4348   return getCategory() == fcNormal &&
4349          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4350           // (double)(Hi + Lo) == Hi defines a normal number.
4351           Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4352 }
4353 
isSmallest() const4354 bool DoubleAPFloat::isSmallest() const {
4355   if (getCategory() != fcNormal)
4356     return false;
4357   DoubleAPFloat Tmp(*this);
4358   Tmp.makeSmallest(this->isNegative());
4359   return Tmp.compare(*this) == cmpEqual;
4360 }
4361 
isLargest() const4362 bool DoubleAPFloat::isLargest() const {
4363   if (getCategory() != fcNormal)
4364     return false;
4365   DoubleAPFloat Tmp(*this);
4366   Tmp.makeLargest(this->isNegative());
4367   return Tmp.compare(*this) == cmpEqual;
4368 }
4369 
isInteger() const4370 bool DoubleAPFloat::isInteger() const {
4371   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4372   return Floats[0].isInteger() && Floats[1].isInteger();
4373 }
4374 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4375 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4376                              unsigned FormatPrecision,
4377                              unsigned FormatMaxPadding,
4378                              bool TruncateZero) const {
4379   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4380   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4381       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4382 }
4383 
getExactInverse(APFloat * inv) const4384 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4385   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4386   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4387   if (!inv)
4388     return Tmp.getExactInverse(nullptr);
4389   APFloat Inv(semPPCDoubleDoubleLegacy);
4390   auto Ret = Tmp.getExactInverse(&Inv);
4391   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4392   return Ret;
4393 }
4394 
scalbn(DoubleAPFloat Arg,int Exp,APFloat::roundingMode RM)4395 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4396   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4397   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4398                        scalbn(Arg.Floats[1], Exp, RM));
4399 }
4400 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4401 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4402                     APFloat::roundingMode RM) {
4403   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4404   APFloat First = frexp(Arg.Floats[0], Exp, RM);
4405   APFloat Second = Arg.Floats[1];
4406   if (Arg.getCategory() == APFloat::fcNormal)
4407     Second = scalbn(Second, -Exp, RM);
4408   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4409 }
4410 
4411 } // End detail namespace
4412 
Storage(IEEEFloat F,const fltSemantics & Semantics)4413 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4414   if (usesLayout<IEEEFloat>(Semantics)) {
4415     new (&IEEE) IEEEFloat(std::move(F));
4416     return;
4417   }
4418   if (usesLayout<DoubleAPFloat>(Semantics)) {
4419     new (&Double)
4420         DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4421                       APFloat(semIEEEdouble));
4422     return;
4423   }
4424   llvm_unreachable("Unexpected semantics");
4425 }
4426 
convertFromString(StringRef Str,roundingMode RM)4427 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4428   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4429 }
4430 
hash_value(const APFloat & Arg)4431 hash_code hash_value(const APFloat &Arg) {
4432   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4433     return hash_value(Arg.U.IEEE);
4434   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4435     return hash_value(Arg.U.Double);
4436   llvm_unreachable("Unexpected semantics");
4437 }
4438 
APFloat(const fltSemantics & Semantics,StringRef S)4439 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4440     : APFloat(Semantics) {
4441   convertFromString(S, rmNearestTiesToEven);
4442 }
4443 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4444 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4445                                    roundingMode RM, bool *losesInfo) {
4446   if (&getSemantics() == &ToSemantics) {
4447     *losesInfo = false;
4448     return opOK;
4449   }
4450   if (usesLayout<IEEEFloat>(getSemantics()) &&
4451       usesLayout<IEEEFloat>(ToSemantics))
4452     return U.IEEE.convert(ToSemantics, RM, losesInfo);
4453   if (usesLayout<IEEEFloat>(getSemantics()) &&
4454       usesLayout<DoubleAPFloat>(ToSemantics)) {
4455     assert(&ToSemantics == &semPPCDoubleDouble);
4456     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4457     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4458     return Ret;
4459   }
4460   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4461       usesLayout<IEEEFloat>(ToSemantics)) {
4462     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4463     *this = APFloat(std::move(getIEEE()), ToSemantics);
4464     return Ret;
4465   }
4466   llvm_unreachable("Unexpected semantics");
4467 }
4468 
getAllOnesValue(unsigned BitWidth,bool isIEEE)4469 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4470   if (isIEEE) {
4471     switch (BitWidth) {
4472     case 16:
4473       return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4474     case 32:
4475       return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4476     case 64:
4477       return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4478     case 80:
4479       return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4480     case 128:
4481       return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4482     default:
4483       llvm_unreachable("Unknown floating bit width");
4484     }
4485   } else {
4486     assert(BitWidth == 128);
4487     return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4488   }
4489 }
4490 
print(raw_ostream & OS) const4491 void APFloat::print(raw_ostream &OS) const {
4492   SmallVector<char, 16> Buffer;
4493   toString(Buffer);
4494   OS << Buffer << "\n";
4495 }
4496 
4497 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4498 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4499 #endif
4500 
Profile(FoldingSetNodeID & NID) const4501 void APFloat::Profile(FoldingSetNodeID &NID) const {
4502   NID.Add(bitcastToAPInt());
4503 }
4504 
4505 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4506    an APSInt, whose initial bit-width and signed-ness are used to determine the
4507    precision of the conversion.
4508  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4509 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4510                                             roundingMode rounding_mode,
4511                                             bool *isExact) const {
4512   unsigned bitWidth = result.getBitWidth();
4513   SmallVector<uint64_t, 4> parts(result.getNumWords());
4514   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4515                                      rounding_mode, isExact);
4516   // Keeps the original signed-ness.
4517   result = APInt(bitWidth, parts);
4518   return status;
4519 }
4520 
4521 } // End llvm namespace
4522 
4523 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4524