1c36abe0dSDavid Schultz /* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */
215144b0fSOlivier Houchard
315144b0fSOlivier Houchard /*
415144b0fSOlivier Houchard * This version hacked for use with gcc -msoft-float by bjh21.
515144b0fSOlivier Houchard * (Mostly a case of #ifdefing out things GCC doesn't need or provides
615144b0fSOlivier Houchard * itself).
715144b0fSOlivier Houchard */
815144b0fSOlivier Houchard
915144b0fSOlivier Houchard /*
1015144b0fSOlivier Houchard * Things you may want to define:
1115144b0fSOlivier Houchard *
1215144b0fSOlivier Houchard * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
1315144b0fSOlivier Houchard * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
1415144b0fSOlivier Houchard * properly renamed.
1515144b0fSOlivier Houchard */
1615144b0fSOlivier Houchard
1715144b0fSOlivier Houchard /*
1815144b0fSOlivier Houchard ===============================================================================
1915144b0fSOlivier Houchard
2015144b0fSOlivier Houchard This C source file is part of the SoftFloat IEC/IEEE Floating-point
2115144b0fSOlivier Houchard Arithmetic Package, Release 2a.
2215144b0fSOlivier Houchard
2315144b0fSOlivier Houchard Written by John R. Hauser. This work was made possible in part by the
2415144b0fSOlivier Houchard International Computer Science Institute, located at Suite 600, 1947 Center
2515144b0fSOlivier Houchard Street, Berkeley, California 94704. Funding was partially provided by the
2615144b0fSOlivier Houchard National Science Foundation under grant MIP-9311980. The original version
2715144b0fSOlivier Houchard of this code was written as part of a project to build a fixed-point vector
2815144b0fSOlivier Houchard processor in collaboration with the University of California at Berkeley,
2915144b0fSOlivier Houchard overseen by Profs. Nelson Morgan and John Wawrzynek. More information
3015144b0fSOlivier Houchard is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
3115144b0fSOlivier Houchard arithmetic/SoftFloat.html'.
3215144b0fSOlivier Houchard
3315144b0fSOlivier Houchard THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
3415144b0fSOlivier Houchard has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
3515144b0fSOlivier Houchard TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
3615144b0fSOlivier Houchard PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
3715144b0fSOlivier Houchard AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
3815144b0fSOlivier Houchard
3915144b0fSOlivier Houchard Derivative works are acceptable, even for commercial purposes, so long as
4015144b0fSOlivier Houchard (1) they include prominent notice that the work is derivative, and (2) they
4115144b0fSOlivier Houchard include prominent notice akin to these four paragraphs for those parts of
4215144b0fSOlivier Houchard this code that are retained.
4315144b0fSOlivier Houchard
4415144b0fSOlivier Houchard ===============================================================================
4515144b0fSOlivier Houchard */
4615144b0fSOlivier Houchard
4715144b0fSOlivier Houchard #include <sys/cdefs.h>
4815144b0fSOlivier Houchard __FBSDID("$FreeBSD$");
4915144b0fSOlivier Houchard
5015144b0fSOlivier Houchard #ifdef SOFTFLOAT_FOR_GCC
5115144b0fSOlivier Houchard #include "softfloat-for-gcc.h"
5215144b0fSOlivier Houchard #endif
5315144b0fSOlivier Houchard
5415144b0fSOlivier Houchard #include "milieu.h"
5515144b0fSOlivier Houchard #include "softfloat.h"
5615144b0fSOlivier Houchard
5715144b0fSOlivier Houchard /*
5815144b0fSOlivier Houchard * Conversions between floats as stored in memory and floats as
5915144b0fSOlivier Houchard * SoftFloat uses them
6015144b0fSOlivier Houchard */
6115144b0fSOlivier Houchard #ifndef FLOAT64_DEMANGLE
6215144b0fSOlivier Houchard #define FLOAT64_DEMANGLE(a) (a)
6315144b0fSOlivier Houchard #endif
6415144b0fSOlivier Houchard #ifndef FLOAT64_MANGLE
6515144b0fSOlivier Houchard #define FLOAT64_MANGLE(a) (a)
6615144b0fSOlivier Houchard #endif
6715144b0fSOlivier Houchard
6815144b0fSOlivier Houchard /*
6915144b0fSOlivier Houchard -------------------------------------------------------------------------------
7015144b0fSOlivier Houchard Floating-point rounding mode, extended double-precision rounding precision,
7115144b0fSOlivier Houchard and exception flags.
7215144b0fSOlivier Houchard -------------------------------------------------------------------------------
7315144b0fSOlivier Houchard */
74b1d04644SDavid Schultz int float_rounding_mode = float_round_nearest_even;
75b1d04644SDavid Schultz int float_exception_flags = 0;
7615144b0fSOlivier Houchard #ifdef FLOATX80
7715144b0fSOlivier Houchard int8 floatx80_rounding_precision = 80;
7815144b0fSOlivier Houchard #endif
7915144b0fSOlivier Houchard
8015144b0fSOlivier Houchard /*
8115144b0fSOlivier Houchard -------------------------------------------------------------------------------
8215144b0fSOlivier Houchard Primitive arithmetic functions, including multi-word arithmetic, and
8315144b0fSOlivier Houchard division and square root approximations. (Can be specialized to target if
8415144b0fSOlivier Houchard desired.)
8515144b0fSOlivier Houchard -------------------------------------------------------------------------------
8615144b0fSOlivier Houchard */
8715144b0fSOlivier Houchard #include "softfloat-macros"
8815144b0fSOlivier Houchard
8915144b0fSOlivier Houchard /*
9015144b0fSOlivier Houchard -------------------------------------------------------------------------------
9115144b0fSOlivier Houchard Functions and definitions to determine: (1) whether tininess for underflow
9215144b0fSOlivier Houchard is detected before or after rounding by default, (2) what (if anything)
9315144b0fSOlivier Houchard happens when exceptions are raised, (3) how signaling NaNs are distinguished
9415144b0fSOlivier Houchard from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
9515144b0fSOlivier Houchard are propagated from function inputs to output. These details are target-
9615144b0fSOlivier Houchard specific.
9715144b0fSOlivier Houchard -------------------------------------------------------------------------------
9815144b0fSOlivier Houchard */
9915144b0fSOlivier Houchard #include "softfloat-specialize"
10015144b0fSOlivier Houchard
10115144b0fSOlivier Houchard #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
10215144b0fSOlivier Houchard /*
10315144b0fSOlivier Houchard -------------------------------------------------------------------------------
10415144b0fSOlivier Houchard Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
10515144b0fSOlivier Houchard and 7, and returns the properly rounded 32-bit integer corresponding to the
10615144b0fSOlivier Houchard input. If `zSign' is 1, the input is negated before being converted to an
10715144b0fSOlivier Houchard integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
10815144b0fSOlivier Houchard is simply rounded to an integer, with the inexact exception raised if the
10915144b0fSOlivier Houchard input cannot be represented exactly as an integer. However, if the fixed-
11015144b0fSOlivier Houchard point input is too large, the invalid exception is raised and the largest
11115144b0fSOlivier Houchard positive or negative integer is returned.
11215144b0fSOlivier Houchard -------------------------------------------------------------------------------
11315144b0fSOlivier Houchard */
roundAndPackInt32(flag zSign,bits64 absZ)11415144b0fSOlivier Houchard static int32 roundAndPackInt32( flag zSign, bits64 absZ )
11515144b0fSOlivier Houchard {
11615144b0fSOlivier Houchard int8 roundingMode;
11715144b0fSOlivier Houchard flag roundNearestEven;
11815144b0fSOlivier Houchard int8 roundIncrement, roundBits;
11915144b0fSOlivier Houchard int32 z;
12015144b0fSOlivier Houchard
12115144b0fSOlivier Houchard roundingMode = float_rounding_mode;
12215144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
12315144b0fSOlivier Houchard roundIncrement = 0x40;
12415144b0fSOlivier Houchard if ( ! roundNearestEven ) {
12515144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
12615144b0fSOlivier Houchard roundIncrement = 0;
12715144b0fSOlivier Houchard }
12815144b0fSOlivier Houchard else {
12915144b0fSOlivier Houchard roundIncrement = 0x7F;
13015144b0fSOlivier Houchard if ( zSign ) {
13115144b0fSOlivier Houchard if ( roundingMode == float_round_up ) roundIncrement = 0;
13215144b0fSOlivier Houchard }
13315144b0fSOlivier Houchard else {
13415144b0fSOlivier Houchard if ( roundingMode == float_round_down ) roundIncrement = 0;
13515144b0fSOlivier Houchard }
13615144b0fSOlivier Houchard }
13715144b0fSOlivier Houchard }
13815144b0fSOlivier Houchard roundBits = absZ & 0x7F;
13915144b0fSOlivier Houchard absZ = ( absZ + roundIncrement )>>7;
14015144b0fSOlivier Houchard absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
14115144b0fSOlivier Houchard z = absZ;
14215144b0fSOlivier Houchard if ( zSign ) z = - z;
14315144b0fSOlivier Houchard if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
14415144b0fSOlivier Houchard float_raise( float_flag_invalid );
14515144b0fSOlivier Houchard return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
14615144b0fSOlivier Houchard }
14715144b0fSOlivier Houchard if ( roundBits ) float_exception_flags |= float_flag_inexact;
14815144b0fSOlivier Houchard return z;
14915144b0fSOlivier Houchard
15015144b0fSOlivier Houchard }
15115144b0fSOlivier Houchard
15215144b0fSOlivier Houchard /*
15315144b0fSOlivier Houchard -------------------------------------------------------------------------------
15415144b0fSOlivier Houchard Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
15515144b0fSOlivier Houchard `absZ1', with binary point between bits 63 and 64 (between the input words),
15615144b0fSOlivier Houchard and returns the properly rounded 64-bit integer corresponding to the input.
15715144b0fSOlivier Houchard If `zSign' is 1, the input is negated before being converted to an integer.
15815144b0fSOlivier Houchard Ordinarily, the fixed-point input is simply rounded to an integer, with
15915144b0fSOlivier Houchard the inexact exception raised if the input cannot be represented exactly as
16015144b0fSOlivier Houchard an integer. However, if the fixed-point input is too large, the invalid
16115144b0fSOlivier Houchard exception is raised and the largest positive or negative integer is
16215144b0fSOlivier Houchard returned.
16315144b0fSOlivier Houchard -------------------------------------------------------------------------------
16415144b0fSOlivier Houchard */
roundAndPackInt64(flag zSign,bits64 absZ0,bits64 absZ1)16515144b0fSOlivier Houchard static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
16615144b0fSOlivier Houchard {
16715144b0fSOlivier Houchard int8 roundingMode;
16815144b0fSOlivier Houchard flag roundNearestEven, increment;
16915144b0fSOlivier Houchard int64 z;
17015144b0fSOlivier Houchard
17115144b0fSOlivier Houchard roundingMode = float_rounding_mode;
17215144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
17315144b0fSOlivier Houchard increment = ( (sbits64) absZ1 < 0 );
17415144b0fSOlivier Houchard if ( ! roundNearestEven ) {
17515144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
17615144b0fSOlivier Houchard increment = 0;
17715144b0fSOlivier Houchard }
17815144b0fSOlivier Houchard else {
17915144b0fSOlivier Houchard if ( zSign ) {
18015144b0fSOlivier Houchard increment = ( roundingMode == float_round_down ) && absZ1;
18115144b0fSOlivier Houchard }
18215144b0fSOlivier Houchard else {
18315144b0fSOlivier Houchard increment = ( roundingMode == float_round_up ) && absZ1;
18415144b0fSOlivier Houchard }
18515144b0fSOlivier Houchard }
18615144b0fSOlivier Houchard }
18715144b0fSOlivier Houchard if ( increment ) {
18815144b0fSOlivier Houchard ++absZ0;
18915144b0fSOlivier Houchard if ( absZ0 == 0 ) goto overflow;
19015144b0fSOlivier Houchard absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
19115144b0fSOlivier Houchard }
19215144b0fSOlivier Houchard z = absZ0;
19315144b0fSOlivier Houchard if ( zSign ) z = - z;
19415144b0fSOlivier Houchard if ( z && ( ( z < 0 ) ^ zSign ) ) {
19515144b0fSOlivier Houchard overflow:
19615144b0fSOlivier Houchard float_raise( float_flag_invalid );
19715144b0fSOlivier Houchard return
19815144b0fSOlivier Houchard zSign ? (sbits64) LIT64( 0x8000000000000000 )
19915144b0fSOlivier Houchard : LIT64( 0x7FFFFFFFFFFFFFFF );
20015144b0fSOlivier Houchard }
20115144b0fSOlivier Houchard if ( absZ1 ) float_exception_flags |= float_flag_inexact;
20215144b0fSOlivier Houchard return z;
20315144b0fSOlivier Houchard
20415144b0fSOlivier Houchard }
20515144b0fSOlivier Houchard #endif
20615144b0fSOlivier Houchard
20715144b0fSOlivier Houchard /*
20815144b0fSOlivier Houchard -------------------------------------------------------------------------------
20915144b0fSOlivier Houchard Returns the fraction bits of the single-precision floating-point value `a'.
21015144b0fSOlivier Houchard -------------------------------------------------------------------------------
21115144b0fSOlivier Houchard */
extractFloat32Frac(float32 a)21215144b0fSOlivier Houchard INLINE bits32 extractFloat32Frac( float32 a )
21315144b0fSOlivier Houchard {
21415144b0fSOlivier Houchard
21515144b0fSOlivier Houchard return a & 0x007FFFFF;
21615144b0fSOlivier Houchard
21715144b0fSOlivier Houchard }
21815144b0fSOlivier Houchard
21915144b0fSOlivier Houchard /*
22015144b0fSOlivier Houchard -------------------------------------------------------------------------------
22115144b0fSOlivier Houchard Returns the exponent bits of the single-precision floating-point value `a'.
22215144b0fSOlivier Houchard -------------------------------------------------------------------------------
22315144b0fSOlivier Houchard */
extractFloat32Exp(float32 a)22415144b0fSOlivier Houchard INLINE int16 extractFloat32Exp( float32 a )
22515144b0fSOlivier Houchard {
22615144b0fSOlivier Houchard
22715144b0fSOlivier Houchard return ( a>>23 ) & 0xFF;
22815144b0fSOlivier Houchard
22915144b0fSOlivier Houchard }
23015144b0fSOlivier Houchard
23115144b0fSOlivier Houchard /*
23215144b0fSOlivier Houchard -------------------------------------------------------------------------------
23315144b0fSOlivier Houchard Returns the sign bit of the single-precision floating-point value `a'.
23415144b0fSOlivier Houchard -------------------------------------------------------------------------------
23515144b0fSOlivier Houchard */
extractFloat32Sign(float32 a)23615144b0fSOlivier Houchard INLINE flag extractFloat32Sign( float32 a )
23715144b0fSOlivier Houchard {
23815144b0fSOlivier Houchard
23915144b0fSOlivier Houchard return a>>31;
24015144b0fSOlivier Houchard
24115144b0fSOlivier Houchard }
24215144b0fSOlivier Houchard
24315144b0fSOlivier Houchard /*
24415144b0fSOlivier Houchard -------------------------------------------------------------------------------
24515144b0fSOlivier Houchard Normalizes the subnormal single-precision floating-point value represented
24615144b0fSOlivier Houchard by the denormalized significand `aSig'. The normalized exponent and
24715144b0fSOlivier Houchard significand are stored at the locations pointed to by `zExpPtr' and
24815144b0fSOlivier Houchard `zSigPtr', respectively.
24915144b0fSOlivier Houchard -------------------------------------------------------------------------------
25015144b0fSOlivier Houchard */
25115144b0fSOlivier Houchard static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)25215144b0fSOlivier Houchard normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
25315144b0fSOlivier Houchard {
25415144b0fSOlivier Houchard int8 shiftCount;
25515144b0fSOlivier Houchard
25615144b0fSOlivier Houchard shiftCount = countLeadingZeros32( aSig ) - 8;
25715144b0fSOlivier Houchard *zSigPtr = aSig<<shiftCount;
25815144b0fSOlivier Houchard *zExpPtr = 1 - shiftCount;
25915144b0fSOlivier Houchard
26015144b0fSOlivier Houchard }
26115144b0fSOlivier Houchard
26215144b0fSOlivier Houchard /*
26315144b0fSOlivier Houchard -------------------------------------------------------------------------------
26415144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
26515144b0fSOlivier Houchard single-precision floating-point value, returning the result. After being
26615144b0fSOlivier Houchard shifted into the proper positions, the three fields are simply added
26715144b0fSOlivier Houchard together to form the result. This means that any integer portion of `zSig'
26815144b0fSOlivier Houchard will be added into the exponent. Since a properly normalized significand
26915144b0fSOlivier Houchard will have an integer portion equal to 1, the `zExp' input should be 1 less
27015144b0fSOlivier Houchard than the desired result exponent whenever `zSig' is a complete, normalized
27115144b0fSOlivier Houchard significand.
27215144b0fSOlivier Houchard -------------------------------------------------------------------------------
27315144b0fSOlivier Houchard */
packFloat32(flag zSign,int16 zExp,bits32 zSig)27415144b0fSOlivier Houchard INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
27515144b0fSOlivier Houchard {
27615144b0fSOlivier Houchard
27715144b0fSOlivier Houchard return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
27815144b0fSOlivier Houchard
27915144b0fSOlivier Houchard }
28015144b0fSOlivier Houchard
28115144b0fSOlivier Houchard /*
28215144b0fSOlivier Houchard -------------------------------------------------------------------------------
28315144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
28415144b0fSOlivier Houchard and significand `zSig', and returns the proper single-precision floating-
28515144b0fSOlivier Houchard point value corresponding to the abstract input. Ordinarily, the abstract
28615144b0fSOlivier Houchard value is simply rounded and packed into the single-precision format, with
28715144b0fSOlivier Houchard the inexact exception raised if the abstract input cannot be represented
28815144b0fSOlivier Houchard exactly. However, if the abstract value is too large, the overflow and
28915144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
29015144b0fSOlivier Houchard returned. If the abstract value is too small, the input value is rounded to
29115144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
29215144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal single-
29315144b0fSOlivier Houchard precision floating-point number.
29415144b0fSOlivier Houchard The input significand `zSig' has its binary point between bits 30
29515144b0fSOlivier Houchard and 29, which is 7 bits to the left of the usual location. This shifted
29615144b0fSOlivier Houchard significand must be normalized or smaller. If `zSig' is not normalized,
29715144b0fSOlivier Houchard `zExp' must be 0; in that case, the result returned is a subnormal number,
29815144b0fSOlivier Houchard and it must not require rounding. In the usual case that `zSig' is
29915144b0fSOlivier Houchard normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
30015144b0fSOlivier Houchard The handling of underflow and overflow follows the IEC/IEEE Standard for
30115144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
30215144b0fSOlivier Houchard -------------------------------------------------------------------------------
30315144b0fSOlivier Houchard */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)30415144b0fSOlivier Houchard static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
30515144b0fSOlivier Houchard {
30615144b0fSOlivier Houchard int8 roundingMode;
30715144b0fSOlivier Houchard flag roundNearestEven;
30815144b0fSOlivier Houchard int8 roundIncrement, roundBits;
30915144b0fSOlivier Houchard flag isTiny;
31015144b0fSOlivier Houchard
31115144b0fSOlivier Houchard roundingMode = float_rounding_mode;
31215144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
31315144b0fSOlivier Houchard roundIncrement = 0x40;
31415144b0fSOlivier Houchard if ( ! roundNearestEven ) {
31515144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
31615144b0fSOlivier Houchard roundIncrement = 0;
31715144b0fSOlivier Houchard }
31815144b0fSOlivier Houchard else {
31915144b0fSOlivier Houchard roundIncrement = 0x7F;
32015144b0fSOlivier Houchard if ( zSign ) {
32115144b0fSOlivier Houchard if ( roundingMode == float_round_up ) roundIncrement = 0;
32215144b0fSOlivier Houchard }
32315144b0fSOlivier Houchard else {
32415144b0fSOlivier Houchard if ( roundingMode == float_round_down ) roundIncrement = 0;
32515144b0fSOlivier Houchard }
32615144b0fSOlivier Houchard }
32715144b0fSOlivier Houchard }
32815144b0fSOlivier Houchard roundBits = zSig & 0x7F;
32915144b0fSOlivier Houchard if ( 0xFD <= (bits16) zExp ) {
33015144b0fSOlivier Houchard if ( ( 0xFD < zExp )
33115144b0fSOlivier Houchard || ( ( zExp == 0xFD )
33215144b0fSOlivier Houchard && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
33315144b0fSOlivier Houchard ) {
33415144b0fSOlivier Houchard float_raise( float_flag_overflow | float_flag_inexact );
33515144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
33615144b0fSOlivier Houchard }
33715144b0fSOlivier Houchard if ( zExp < 0 ) {
33815144b0fSOlivier Houchard isTiny =
33915144b0fSOlivier Houchard ( float_detect_tininess == float_tininess_before_rounding )
34015144b0fSOlivier Houchard || ( zExp < -1 )
34115144b0fSOlivier Houchard || ( zSig + roundIncrement < 0x80000000 );
34215144b0fSOlivier Houchard shift32RightJamming( zSig, - zExp, &zSig );
34315144b0fSOlivier Houchard zExp = 0;
34415144b0fSOlivier Houchard roundBits = zSig & 0x7F;
34515144b0fSOlivier Houchard if ( isTiny && roundBits ) float_raise( float_flag_underflow );
34615144b0fSOlivier Houchard }
34715144b0fSOlivier Houchard }
34815144b0fSOlivier Houchard if ( roundBits ) float_exception_flags |= float_flag_inexact;
34915144b0fSOlivier Houchard zSig = ( zSig + roundIncrement )>>7;
35015144b0fSOlivier Houchard zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
35115144b0fSOlivier Houchard if ( zSig == 0 ) zExp = 0;
35215144b0fSOlivier Houchard return packFloat32( zSign, zExp, zSig );
35315144b0fSOlivier Houchard
35415144b0fSOlivier Houchard }
35515144b0fSOlivier Houchard
35615144b0fSOlivier Houchard /*
35715144b0fSOlivier Houchard -------------------------------------------------------------------------------
35815144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
35915144b0fSOlivier Houchard and significand `zSig', and returns the proper single-precision floating-
36015144b0fSOlivier Houchard point value corresponding to the abstract input. This routine is just like
36115144b0fSOlivier Houchard `roundAndPackFloat32' except that `zSig' does not have to be normalized.
36215144b0fSOlivier Houchard Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
36315144b0fSOlivier Houchard floating-point exponent.
36415144b0fSOlivier Houchard -------------------------------------------------------------------------------
36515144b0fSOlivier Houchard */
36615144b0fSOlivier Houchard static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)36715144b0fSOlivier Houchard normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
36815144b0fSOlivier Houchard {
36915144b0fSOlivier Houchard int8 shiftCount;
37015144b0fSOlivier Houchard
37115144b0fSOlivier Houchard shiftCount = countLeadingZeros32( zSig ) - 1;
37215144b0fSOlivier Houchard return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
37315144b0fSOlivier Houchard
37415144b0fSOlivier Houchard }
37515144b0fSOlivier Houchard
37615144b0fSOlivier Houchard /*
37715144b0fSOlivier Houchard -------------------------------------------------------------------------------
37815144b0fSOlivier Houchard Returns the fraction bits of the double-precision floating-point value `a'.
37915144b0fSOlivier Houchard -------------------------------------------------------------------------------
38015144b0fSOlivier Houchard */
extractFloat64Frac(float64 a)38115144b0fSOlivier Houchard INLINE bits64 extractFloat64Frac( float64 a )
38215144b0fSOlivier Houchard {
38315144b0fSOlivier Houchard
38415144b0fSOlivier Houchard return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
38515144b0fSOlivier Houchard
38615144b0fSOlivier Houchard }
38715144b0fSOlivier Houchard
38815144b0fSOlivier Houchard /*
38915144b0fSOlivier Houchard -------------------------------------------------------------------------------
39015144b0fSOlivier Houchard Returns the exponent bits of the double-precision floating-point value `a'.
39115144b0fSOlivier Houchard -------------------------------------------------------------------------------
39215144b0fSOlivier Houchard */
extractFloat64Exp(float64 a)39315144b0fSOlivier Houchard INLINE int16 extractFloat64Exp( float64 a )
39415144b0fSOlivier Houchard {
39515144b0fSOlivier Houchard
39615144b0fSOlivier Houchard return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
39715144b0fSOlivier Houchard
39815144b0fSOlivier Houchard }
39915144b0fSOlivier Houchard
40015144b0fSOlivier Houchard /*
40115144b0fSOlivier Houchard -------------------------------------------------------------------------------
40215144b0fSOlivier Houchard Returns the sign bit of the double-precision floating-point value `a'.
40315144b0fSOlivier Houchard -------------------------------------------------------------------------------
40415144b0fSOlivier Houchard */
extractFloat64Sign(float64 a)40515144b0fSOlivier Houchard INLINE flag extractFloat64Sign( float64 a )
40615144b0fSOlivier Houchard {
40715144b0fSOlivier Houchard
40815144b0fSOlivier Houchard return FLOAT64_DEMANGLE(a)>>63;
40915144b0fSOlivier Houchard
41015144b0fSOlivier Houchard }
41115144b0fSOlivier Houchard
41215144b0fSOlivier Houchard /*
41315144b0fSOlivier Houchard -------------------------------------------------------------------------------
41415144b0fSOlivier Houchard Normalizes the subnormal double-precision floating-point value represented
41515144b0fSOlivier Houchard by the denormalized significand `aSig'. The normalized exponent and
41615144b0fSOlivier Houchard significand are stored at the locations pointed to by `zExpPtr' and
41715144b0fSOlivier Houchard `zSigPtr', respectively.
41815144b0fSOlivier Houchard -------------------------------------------------------------------------------
41915144b0fSOlivier Houchard */
42015144b0fSOlivier Houchard static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)42115144b0fSOlivier Houchard normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
42215144b0fSOlivier Houchard {
42315144b0fSOlivier Houchard int8 shiftCount;
42415144b0fSOlivier Houchard
42515144b0fSOlivier Houchard shiftCount = countLeadingZeros64( aSig ) - 11;
42615144b0fSOlivier Houchard *zSigPtr = aSig<<shiftCount;
42715144b0fSOlivier Houchard *zExpPtr = 1 - shiftCount;
42815144b0fSOlivier Houchard
42915144b0fSOlivier Houchard }
43015144b0fSOlivier Houchard
43115144b0fSOlivier Houchard /*
43215144b0fSOlivier Houchard -------------------------------------------------------------------------------
43315144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
43415144b0fSOlivier Houchard double-precision floating-point value, returning the result. After being
43515144b0fSOlivier Houchard shifted into the proper positions, the three fields are simply added
43615144b0fSOlivier Houchard together to form the result. This means that any integer portion of `zSig'
43715144b0fSOlivier Houchard will be added into the exponent. Since a properly normalized significand
43815144b0fSOlivier Houchard will have an integer portion equal to 1, the `zExp' input should be 1 less
43915144b0fSOlivier Houchard than the desired result exponent whenever `zSig' is a complete, normalized
44015144b0fSOlivier Houchard significand.
44115144b0fSOlivier Houchard -------------------------------------------------------------------------------
44215144b0fSOlivier Houchard */
packFloat64(flag zSign,int16 zExp,bits64 zSig)44315144b0fSOlivier Houchard INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
44415144b0fSOlivier Houchard {
44515144b0fSOlivier Houchard
44615144b0fSOlivier Houchard return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
44715144b0fSOlivier Houchard ( ( (bits64) zExp )<<52 ) + zSig );
44815144b0fSOlivier Houchard
44915144b0fSOlivier Houchard }
45015144b0fSOlivier Houchard
45115144b0fSOlivier Houchard /*
45215144b0fSOlivier Houchard -------------------------------------------------------------------------------
45315144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
45415144b0fSOlivier Houchard and significand `zSig', and returns the proper double-precision floating-
45515144b0fSOlivier Houchard point value corresponding to the abstract input. Ordinarily, the abstract
45615144b0fSOlivier Houchard value is simply rounded and packed into the double-precision format, with
45715144b0fSOlivier Houchard the inexact exception raised if the abstract input cannot be represented
45815144b0fSOlivier Houchard exactly. However, if the abstract value is too large, the overflow and
45915144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
46015144b0fSOlivier Houchard returned. If the abstract value is too small, the input value is rounded to
46115144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
46215144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal double-
46315144b0fSOlivier Houchard precision floating-point number.
46415144b0fSOlivier Houchard The input significand `zSig' has its binary point between bits 62
46515144b0fSOlivier Houchard and 61, which is 10 bits to the left of the usual location. This shifted
46615144b0fSOlivier Houchard significand must be normalized or smaller. If `zSig' is not normalized,
46715144b0fSOlivier Houchard `zExp' must be 0; in that case, the result returned is a subnormal number,
46815144b0fSOlivier Houchard and it must not require rounding. In the usual case that `zSig' is
46915144b0fSOlivier Houchard normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
47015144b0fSOlivier Houchard The handling of underflow and overflow follows the IEC/IEEE Standard for
47115144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
47215144b0fSOlivier Houchard -------------------------------------------------------------------------------
47315144b0fSOlivier Houchard */
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)47415144b0fSOlivier Houchard static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
47515144b0fSOlivier Houchard {
47615144b0fSOlivier Houchard int8 roundingMode;
47715144b0fSOlivier Houchard flag roundNearestEven;
47815144b0fSOlivier Houchard int16 roundIncrement, roundBits;
47915144b0fSOlivier Houchard flag isTiny;
48015144b0fSOlivier Houchard
48115144b0fSOlivier Houchard roundingMode = float_rounding_mode;
48215144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
48315144b0fSOlivier Houchard roundIncrement = 0x200;
48415144b0fSOlivier Houchard if ( ! roundNearestEven ) {
48515144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
48615144b0fSOlivier Houchard roundIncrement = 0;
48715144b0fSOlivier Houchard }
48815144b0fSOlivier Houchard else {
48915144b0fSOlivier Houchard roundIncrement = 0x3FF;
49015144b0fSOlivier Houchard if ( zSign ) {
49115144b0fSOlivier Houchard if ( roundingMode == float_round_up ) roundIncrement = 0;
49215144b0fSOlivier Houchard }
49315144b0fSOlivier Houchard else {
49415144b0fSOlivier Houchard if ( roundingMode == float_round_down ) roundIncrement = 0;
49515144b0fSOlivier Houchard }
49615144b0fSOlivier Houchard }
49715144b0fSOlivier Houchard }
49815144b0fSOlivier Houchard roundBits = zSig & 0x3FF;
49915144b0fSOlivier Houchard if ( 0x7FD <= (bits16) zExp ) {
50015144b0fSOlivier Houchard if ( ( 0x7FD < zExp )
50115144b0fSOlivier Houchard || ( ( zExp == 0x7FD )
50215144b0fSOlivier Houchard && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
50315144b0fSOlivier Houchard ) {
50415144b0fSOlivier Houchard float_raise( float_flag_overflow | float_flag_inexact );
50515144b0fSOlivier Houchard return FLOAT64_MANGLE(
50615144b0fSOlivier Houchard FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
50715144b0fSOlivier Houchard ( roundIncrement == 0 ));
50815144b0fSOlivier Houchard }
50915144b0fSOlivier Houchard if ( zExp < 0 ) {
51015144b0fSOlivier Houchard isTiny =
51115144b0fSOlivier Houchard ( float_detect_tininess == float_tininess_before_rounding )
51215144b0fSOlivier Houchard || ( zExp < -1 )
51315144b0fSOlivier Houchard || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
51415144b0fSOlivier Houchard shift64RightJamming( zSig, - zExp, &zSig );
51515144b0fSOlivier Houchard zExp = 0;
51615144b0fSOlivier Houchard roundBits = zSig & 0x3FF;
51715144b0fSOlivier Houchard if ( isTiny && roundBits ) float_raise( float_flag_underflow );
51815144b0fSOlivier Houchard }
51915144b0fSOlivier Houchard }
52015144b0fSOlivier Houchard if ( roundBits ) float_exception_flags |= float_flag_inexact;
52115144b0fSOlivier Houchard zSig = ( zSig + roundIncrement )>>10;
52215144b0fSOlivier Houchard zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
52315144b0fSOlivier Houchard if ( zSig == 0 ) zExp = 0;
52415144b0fSOlivier Houchard return packFloat64( zSign, zExp, zSig );
52515144b0fSOlivier Houchard
52615144b0fSOlivier Houchard }
52715144b0fSOlivier Houchard
52815144b0fSOlivier Houchard /*
52915144b0fSOlivier Houchard -------------------------------------------------------------------------------
53015144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
53115144b0fSOlivier Houchard and significand `zSig', and returns the proper double-precision floating-
53215144b0fSOlivier Houchard point value corresponding to the abstract input. This routine is just like
53315144b0fSOlivier Houchard `roundAndPackFloat64' except that `zSig' does not have to be normalized.
53415144b0fSOlivier Houchard Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
53515144b0fSOlivier Houchard floating-point exponent.
53615144b0fSOlivier Houchard -------------------------------------------------------------------------------
53715144b0fSOlivier Houchard */
53815144b0fSOlivier Houchard static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)53915144b0fSOlivier Houchard normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
54015144b0fSOlivier Houchard {
54115144b0fSOlivier Houchard int8 shiftCount;
54215144b0fSOlivier Houchard
54315144b0fSOlivier Houchard shiftCount = countLeadingZeros64( zSig ) - 1;
54415144b0fSOlivier Houchard return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
54515144b0fSOlivier Houchard
54615144b0fSOlivier Houchard }
54715144b0fSOlivier Houchard
54815144b0fSOlivier Houchard #ifdef FLOATX80
54915144b0fSOlivier Houchard
55015144b0fSOlivier Houchard /*
55115144b0fSOlivier Houchard -------------------------------------------------------------------------------
55215144b0fSOlivier Houchard Returns the fraction bits of the extended double-precision floating-point
55315144b0fSOlivier Houchard value `a'.
55415144b0fSOlivier Houchard -------------------------------------------------------------------------------
55515144b0fSOlivier Houchard */
extractFloatx80Frac(floatx80 a)55615144b0fSOlivier Houchard INLINE bits64 extractFloatx80Frac( floatx80 a )
55715144b0fSOlivier Houchard {
55815144b0fSOlivier Houchard
55915144b0fSOlivier Houchard return a.low;
56015144b0fSOlivier Houchard
56115144b0fSOlivier Houchard }
56215144b0fSOlivier Houchard
56315144b0fSOlivier Houchard /*
56415144b0fSOlivier Houchard -------------------------------------------------------------------------------
56515144b0fSOlivier Houchard Returns the exponent bits of the extended double-precision floating-point
56615144b0fSOlivier Houchard value `a'.
56715144b0fSOlivier Houchard -------------------------------------------------------------------------------
56815144b0fSOlivier Houchard */
extractFloatx80Exp(floatx80 a)56915144b0fSOlivier Houchard INLINE int32 extractFloatx80Exp( floatx80 a )
57015144b0fSOlivier Houchard {
57115144b0fSOlivier Houchard
57215144b0fSOlivier Houchard return a.high & 0x7FFF;
57315144b0fSOlivier Houchard
57415144b0fSOlivier Houchard }
57515144b0fSOlivier Houchard
57615144b0fSOlivier Houchard /*
57715144b0fSOlivier Houchard -------------------------------------------------------------------------------
57815144b0fSOlivier Houchard Returns the sign bit of the extended double-precision floating-point value
57915144b0fSOlivier Houchard `a'.
58015144b0fSOlivier Houchard -------------------------------------------------------------------------------
58115144b0fSOlivier Houchard */
extractFloatx80Sign(floatx80 a)58215144b0fSOlivier Houchard INLINE flag extractFloatx80Sign( floatx80 a )
58315144b0fSOlivier Houchard {
58415144b0fSOlivier Houchard
58515144b0fSOlivier Houchard return a.high>>15;
58615144b0fSOlivier Houchard
58715144b0fSOlivier Houchard }
58815144b0fSOlivier Houchard
58915144b0fSOlivier Houchard /*
59015144b0fSOlivier Houchard -------------------------------------------------------------------------------
59115144b0fSOlivier Houchard Normalizes the subnormal extended double-precision floating-point value
59215144b0fSOlivier Houchard represented by the denormalized significand `aSig'. The normalized exponent
59315144b0fSOlivier Houchard and significand are stored at the locations pointed to by `zExpPtr' and
59415144b0fSOlivier Houchard `zSigPtr', respectively.
59515144b0fSOlivier Houchard -------------------------------------------------------------------------------
59615144b0fSOlivier Houchard */
59715144b0fSOlivier Houchard static void
normalizeFloatx80Subnormal(bits64 aSig,int32 * zExpPtr,bits64 * zSigPtr)59815144b0fSOlivier Houchard normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
59915144b0fSOlivier Houchard {
60015144b0fSOlivier Houchard int8 shiftCount;
60115144b0fSOlivier Houchard
60215144b0fSOlivier Houchard shiftCount = countLeadingZeros64( aSig );
60315144b0fSOlivier Houchard *zSigPtr = aSig<<shiftCount;
60415144b0fSOlivier Houchard *zExpPtr = 1 - shiftCount;
60515144b0fSOlivier Houchard
60615144b0fSOlivier Houchard }
60715144b0fSOlivier Houchard
60815144b0fSOlivier Houchard /*
60915144b0fSOlivier Houchard -------------------------------------------------------------------------------
61015144b0fSOlivier Houchard Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
61115144b0fSOlivier Houchard extended double-precision floating-point value, returning the result.
61215144b0fSOlivier Houchard -------------------------------------------------------------------------------
61315144b0fSOlivier Houchard */
packFloatx80(flag zSign,int32 zExp,bits64 zSig)61415144b0fSOlivier Houchard INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
61515144b0fSOlivier Houchard {
61615144b0fSOlivier Houchard floatx80 z;
61715144b0fSOlivier Houchard
61815144b0fSOlivier Houchard z.low = zSig;
61915144b0fSOlivier Houchard z.high = ( ( (bits16) zSign )<<15 ) + zExp;
62015144b0fSOlivier Houchard return z;
62115144b0fSOlivier Houchard
62215144b0fSOlivier Houchard }
62315144b0fSOlivier Houchard
62415144b0fSOlivier Houchard /*
62515144b0fSOlivier Houchard -------------------------------------------------------------------------------
62615144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
62715144b0fSOlivier Houchard and extended significand formed by the concatenation of `zSig0' and `zSig1',
62815144b0fSOlivier Houchard and returns the proper extended double-precision floating-point value
62915144b0fSOlivier Houchard corresponding to the abstract input. Ordinarily, the abstract value is
63015144b0fSOlivier Houchard rounded and packed into the extended double-precision format, with the
63115144b0fSOlivier Houchard inexact exception raised if the abstract input cannot be represented
63215144b0fSOlivier Houchard exactly. However, if the abstract value is too large, the overflow and
63315144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
63415144b0fSOlivier Houchard returned. If the abstract value is too small, the input value is rounded to
63515144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
63615144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal extended
63715144b0fSOlivier Houchard double-precision floating-point number.
63815144b0fSOlivier Houchard If `roundingPrecision' is 32 or 64, the result is rounded to the same
63915144b0fSOlivier Houchard number of bits as single or double precision, respectively. Otherwise, the
64015144b0fSOlivier Houchard result is rounded to the full precision of the extended double-precision
64115144b0fSOlivier Houchard format.
64215144b0fSOlivier Houchard The input significand must be normalized or smaller. If the input
64315144b0fSOlivier Houchard significand is not normalized, `zExp' must be 0; in that case, the result
64415144b0fSOlivier Houchard returned is a subnormal number, and it must not require rounding. The
64515144b0fSOlivier Houchard handling of underflow and overflow follows the IEC/IEEE Standard for Binary
64615144b0fSOlivier Houchard Floating-Point Arithmetic.
64715144b0fSOlivier Houchard -------------------------------------------------------------------------------
64815144b0fSOlivier Houchard */
64915144b0fSOlivier Houchard static floatx80
roundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)65015144b0fSOlivier Houchard roundAndPackFloatx80(
65115144b0fSOlivier Houchard int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
65215144b0fSOlivier Houchard )
65315144b0fSOlivier Houchard {
65415144b0fSOlivier Houchard int8 roundingMode;
65515144b0fSOlivier Houchard flag roundNearestEven, increment, isTiny;
65615144b0fSOlivier Houchard int64 roundIncrement, roundMask, roundBits;
65715144b0fSOlivier Houchard
65815144b0fSOlivier Houchard roundingMode = float_rounding_mode;
65915144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
66015144b0fSOlivier Houchard if ( roundingPrecision == 80 ) goto precision80;
66115144b0fSOlivier Houchard if ( roundingPrecision == 64 ) {
66215144b0fSOlivier Houchard roundIncrement = LIT64( 0x0000000000000400 );
66315144b0fSOlivier Houchard roundMask = LIT64( 0x00000000000007FF );
66415144b0fSOlivier Houchard }
66515144b0fSOlivier Houchard else if ( roundingPrecision == 32 ) {
66615144b0fSOlivier Houchard roundIncrement = LIT64( 0x0000008000000000 );
66715144b0fSOlivier Houchard roundMask = LIT64( 0x000000FFFFFFFFFF );
66815144b0fSOlivier Houchard }
66915144b0fSOlivier Houchard else {
67015144b0fSOlivier Houchard goto precision80;
67115144b0fSOlivier Houchard }
67215144b0fSOlivier Houchard zSig0 |= ( zSig1 != 0 );
67315144b0fSOlivier Houchard if ( ! roundNearestEven ) {
67415144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
67515144b0fSOlivier Houchard roundIncrement = 0;
67615144b0fSOlivier Houchard }
67715144b0fSOlivier Houchard else {
67815144b0fSOlivier Houchard roundIncrement = roundMask;
67915144b0fSOlivier Houchard if ( zSign ) {
68015144b0fSOlivier Houchard if ( roundingMode == float_round_up ) roundIncrement = 0;
68115144b0fSOlivier Houchard }
68215144b0fSOlivier Houchard else {
68315144b0fSOlivier Houchard if ( roundingMode == float_round_down ) roundIncrement = 0;
68415144b0fSOlivier Houchard }
68515144b0fSOlivier Houchard }
68615144b0fSOlivier Houchard }
68715144b0fSOlivier Houchard roundBits = zSig0 & roundMask;
68815144b0fSOlivier Houchard if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
68915144b0fSOlivier Houchard if ( ( 0x7FFE < zExp )
69015144b0fSOlivier Houchard || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
69115144b0fSOlivier Houchard ) {
69215144b0fSOlivier Houchard goto overflow;
69315144b0fSOlivier Houchard }
69415144b0fSOlivier Houchard if ( zExp <= 0 ) {
69515144b0fSOlivier Houchard isTiny =
69615144b0fSOlivier Houchard ( float_detect_tininess == float_tininess_before_rounding )
69715144b0fSOlivier Houchard || ( zExp < 0 )
69815144b0fSOlivier Houchard || ( zSig0 <= zSig0 + roundIncrement );
69915144b0fSOlivier Houchard shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
70015144b0fSOlivier Houchard zExp = 0;
70115144b0fSOlivier Houchard roundBits = zSig0 & roundMask;
70215144b0fSOlivier Houchard if ( isTiny && roundBits ) float_raise( float_flag_underflow );
70315144b0fSOlivier Houchard if ( roundBits ) float_exception_flags |= float_flag_inexact;
70415144b0fSOlivier Houchard zSig0 += roundIncrement;
70515144b0fSOlivier Houchard if ( (sbits64) zSig0 < 0 ) zExp = 1;
70615144b0fSOlivier Houchard roundIncrement = roundMask + 1;
70715144b0fSOlivier Houchard if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
70815144b0fSOlivier Houchard roundMask |= roundIncrement;
70915144b0fSOlivier Houchard }
71015144b0fSOlivier Houchard zSig0 &= ~ roundMask;
71115144b0fSOlivier Houchard return packFloatx80( zSign, zExp, zSig0 );
71215144b0fSOlivier Houchard }
71315144b0fSOlivier Houchard }
71415144b0fSOlivier Houchard if ( roundBits ) float_exception_flags |= float_flag_inexact;
71515144b0fSOlivier Houchard zSig0 += roundIncrement;
71615144b0fSOlivier Houchard if ( zSig0 < roundIncrement ) {
71715144b0fSOlivier Houchard ++zExp;
71815144b0fSOlivier Houchard zSig0 = LIT64( 0x8000000000000000 );
71915144b0fSOlivier Houchard }
72015144b0fSOlivier Houchard roundIncrement = roundMask + 1;
72115144b0fSOlivier Houchard if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
72215144b0fSOlivier Houchard roundMask |= roundIncrement;
72315144b0fSOlivier Houchard }
72415144b0fSOlivier Houchard zSig0 &= ~ roundMask;
72515144b0fSOlivier Houchard if ( zSig0 == 0 ) zExp = 0;
72615144b0fSOlivier Houchard return packFloatx80( zSign, zExp, zSig0 );
72715144b0fSOlivier Houchard precision80:
72815144b0fSOlivier Houchard increment = ( (sbits64) zSig1 < 0 );
72915144b0fSOlivier Houchard if ( ! roundNearestEven ) {
73015144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
73115144b0fSOlivier Houchard increment = 0;
73215144b0fSOlivier Houchard }
73315144b0fSOlivier Houchard else {
73415144b0fSOlivier Houchard if ( zSign ) {
73515144b0fSOlivier Houchard increment = ( roundingMode == float_round_down ) && zSig1;
73615144b0fSOlivier Houchard }
73715144b0fSOlivier Houchard else {
73815144b0fSOlivier Houchard increment = ( roundingMode == float_round_up ) && zSig1;
73915144b0fSOlivier Houchard }
74015144b0fSOlivier Houchard }
74115144b0fSOlivier Houchard }
74215144b0fSOlivier Houchard if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
74315144b0fSOlivier Houchard if ( ( 0x7FFE < zExp )
74415144b0fSOlivier Houchard || ( ( zExp == 0x7FFE )
74515144b0fSOlivier Houchard && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
74615144b0fSOlivier Houchard && increment
74715144b0fSOlivier Houchard )
74815144b0fSOlivier Houchard ) {
74915144b0fSOlivier Houchard roundMask = 0;
75015144b0fSOlivier Houchard overflow:
75115144b0fSOlivier Houchard float_raise( float_flag_overflow | float_flag_inexact );
75215144b0fSOlivier Houchard if ( ( roundingMode == float_round_to_zero )
75315144b0fSOlivier Houchard || ( zSign && ( roundingMode == float_round_up ) )
75415144b0fSOlivier Houchard || ( ! zSign && ( roundingMode == float_round_down ) )
75515144b0fSOlivier Houchard ) {
75615144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFE, ~ roundMask );
75715144b0fSOlivier Houchard }
75815144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
75915144b0fSOlivier Houchard }
76015144b0fSOlivier Houchard if ( zExp <= 0 ) {
76115144b0fSOlivier Houchard isTiny =
76215144b0fSOlivier Houchard ( float_detect_tininess == float_tininess_before_rounding )
76315144b0fSOlivier Houchard || ( zExp < 0 )
76415144b0fSOlivier Houchard || ! increment
76515144b0fSOlivier Houchard || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
76615144b0fSOlivier Houchard shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
76715144b0fSOlivier Houchard zExp = 0;
76815144b0fSOlivier Houchard if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
76915144b0fSOlivier Houchard if ( zSig1 ) float_exception_flags |= float_flag_inexact;
77015144b0fSOlivier Houchard if ( roundNearestEven ) {
77115144b0fSOlivier Houchard increment = ( (sbits64) zSig1 < 0 );
77215144b0fSOlivier Houchard }
77315144b0fSOlivier Houchard else {
77415144b0fSOlivier Houchard if ( zSign ) {
77515144b0fSOlivier Houchard increment = ( roundingMode == float_round_down ) && zSig1;
77615144b0fSOlivier Houchard }
77715144b0fSOlivier Houchard else {
77815144b0fSOlivier Houchard increment = ( roundingMode == float_round_up ) && zSig1;
77915144b0fSOlivier Houchard }
78015144b0fSOlivier Houchard }
78115144b0fSOlivier Houchard if ( increment ) {
78215144b0fSOlivier Houchard ++zSig0;
78315144b0fSOlivier Houchard zSig0 &=
78415144b0fSOlivier Houchard ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
78515144b0fSOlivier Houchard if ( (sbits64) zSig0 < 0 ) zExp = 1;
78615144b0fSOlivier Houchard }
78715144b0fSOlivier Houchard return packFloatx80( zSign, zExp, zSig0 );
78815144b0fSOlivier Houchard }
78915144b0fSOlivier Houchard }
79015144b0fSOlivier Houchard if ( zSig1 ) float_exception_flags |= float_flag_inexact;
79115144b0fSOlivier Houchard if ( increment ) {
79215144b0fSOlivier Houchard ++zSig0;
79315144b0fSOlivier Houchard if ( zSig0 == 0 ) {
79415144b0fSOlivier Houchard ++zExp;
79515144b0fSOlivier Houchard zSig0 = LIT64( 0x8000000000000000 );
79615144b0fSOlivier Houchard }
79715144b0fSOlivier Houchard else {
79815144b0fSOlivier Houchard zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
79915144b0fSOlivier Houchard }
80015144b0fSOlivier Houchard }
80115144b0fSOlivier Houchard else {
80215144b0fSOlivier Houchard if ( zSig0 == 0 ) zExp = 0;
80315144b0fSOlivier Houchard }
80415144b0fSOlivier Houchard return packFloatx80( zSign, zExp, zSig0 );
80515144b0fSOlivier Houchard
80615144b0fSOlivier Houchard }
80715144b0fSOlivier Houchard
80815144b0fSOlivier Houchard /*
80915144b0fSOlivier Houchard -------------------------------------------------------------------------------
81015144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent
81115144b0fSOlivier Houchard `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
81215144b0fSOlivier Houchard and returns the proper extended double-precision floating-point value
81315144b0fSOlivier Houchard corresponding to the abstract input. This routine is just like
81415144b0fSOlivier Houchard `roundAndPackFloatx80' except that the input significand does not have to be
81515144b0fSOlivier Houchard normalized.
81615144b0fSOlivier Houchard -------------------------------------------------------------------------------
81715144b0fSOlivier Houchard */
81815144b0fSOlivier Houchard static floatx80
normalizeRoundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)81915144b0fSOlivier Houchard normalizeRoundAndPackFloatx80(
82015144b0fSOlivier Houchard int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
82115144b0fSOlivier Houchard )
82215144b0fSOlivier Houchard {
82315144b0fSOlivier Houchard int8 shiftCount;
82415144b0fSOlivier Houchard
82515144b0fSOlivier Houchard if ( zSig0 == 0 ) {
82615144b0fSOlivier Houchard zSig0 = zSig1;
82715144b0fSOlivier Houchard zSig1 = 0;
82815144b0fSOlivier Houchard zExp -= 64;
82915144b0fSOlivier Houchard }
83015144b0fSOlivier Houchard shiftCount = countLeadingZeros64( zSig0 );
83115144b0fSOlivier Houchard shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
83215144b0fSOlivier Houchard zExp -= shiftCount;
83315144b0fSOlivier Houchard return
83415144b0fSOlivier Houchard roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
83515144b0fSOlivier Houchard
83615144b0fSOlivier Houchard }
83715144b0fSOlivier Houchard
83815144b0fSOlivier Houchard #endif
83915144b0fSOlivier Houchard
84015144b0fSOlivier Houchard #ifdef FLOAT128
84115144b0fSOlivier Houchard
84215144b0fSOlivier Houchard /*
84315144b0fSOlivier Houchard -------------------------------------------------------------------------------
84415144b0fSOlivier Houchard Returns the least-significant 64 fraction bits of the quadruple-precision
84515144b0fSOlivier Houchard floating-point value `a'.
84615144b0fSOlivier Houchard -------------------------------------------------------------------------------
84715144b0fSOlivier Houchard */
extractFloat128Frac1(float128 a)84815144b0fSOlivier Houchard INLINE bits64 extractFloat128Frac1( float128 a )
84915144b0fSOlivier Houchard {
85015144b0fSOlivier Houchard
85115144b0fSOlivier Houchard return a.low;
85215144b0fSOlivier Houchard
85315144b0fSOlivier Houchard }
85415144b0fSOlivier Houchard
85515144b0fSOlivier Houchard /*
85615144b0fSOlivier Houchard -------------------------------------------------------------------------------
85715144b0fSOlivier Houchard Returns the most-significant 48 fraction bits of the quadruple-precision
85815144b0fSOlivier Houchard floating-point value `a'.
85915144b0fSOlivier Houchard -------------------------------------------------------------------------------
86015144b0fSOlivier Houchard */
extractFloat128Frac0(float128 a)86115144b0fSOlivier Houchard INLINE bits64 extractFloat128Frac0( float128 a )
86215144b0fSOlivier Houchard {
86315144b0fSOlivier Houchard
86415144b0fSOlivier Houchard return a.high & LIT64( 0x0000FFFFFFFFFFFF );
86515144b0fSOlivier Houchard
86615144b0fSOlivier Houchard }
86715144b0fSOlivier Houchard
86815144b0fSOlivier Houchard /*
86915144b0fSOlivier Houchard -------------------------------------------------------------------------------
87015144b0fSOlivier Houchard Returns the exponent bits of the quadruple-precision floating-point value
87115144b0fSOlivier Houchard `a'.
87215144b0fSOlivier Houchard -------------------------------------------------------------------------------
87315144b0fSOlivier Houchard */
extractFloat128Exp(float128 a)87415144b0fSOlivier Houchard INLINE int32 extractFloat128Exp( float128 a )
87515144b0fSOlivier Houchard {
87615144b0fSOlivier Houchard
87715144b0fSOlivier Houchard return ( a.high>>48 ) & 0x7FFF;
87815144b0fSOlivier Houchard
87915144b0fSOlivier Houchard }
88015144b0fSOlivier Houchard
88115144b0fSOlivier Houchard /*
88215144b0fSOlivier Houchard -------------------------------------------------------------------------------
88315144b0fSOlivier Houchard Returns the sign bit of the quadruple-precision floating-point value `a'.
88415144b0fSOlivier Houchard -------------------------------------------------------------------------------
88515144b0fSOlivier Houchard */
extractFloat128Sign(float128 a)88615144b0fSOlivier Houchard INLINE flag extractFloat128Sign( float128 a )
88715144b0fSOlivier Houchard {
88815144b0fSOlivier Houchard
88915144b0fSOlivier Houchard return a.high>>63;
89015144b0fSOlivier Houchard
89115144b0fSOlivier Houchard }
89215144b0fSOlivier Houchard
89315144b0fSOlivier Houchard /*
89415144b0fSOlivier Houchard -------------------------------------------------------------------------------
89515144b0fSOlivier Houchard Normalizes the subnormal quadruple-precision floating-point value
89615144b0fSOlivier Houchard represented by the denormalized significand formed by the concatenation of
89715144b0fSOlivier Houchard `aSig0' and `aSig1'. The normalized exponent is stored at the location
89815144b0fSOlivier Houchard pointed to by `zExpPtr'. The most significant 49 bits of the normalized
89915144b0fSOlivier Houchard significand are stored at the location pointed to by `zSig0Ptr', and the
90015144b0fSOlivier Houchard least significant 64 bits of the normalized significand are stored at the
90115144b0fSOlivier Houchard location pointed to by `zSig1Ptr'.
90215144b0fSOlivier Houchard -------------------------------------------------------------------------------
90315144b0fSOlivier Houchard */
90415144b0fSOlivier Houchard static void
normalizeFloat128Subnormal(bits64 aSig0,bits64 aSig1,int32 * zExpPtr,bits64 * zSig0Ptr,bits64 * zSig1Ptr)90515144b0fSOlivier Houchard normalizeFloat128Subnormal(
90615144b0fSOlivier Houchard bits64 aSig0,
90715144b0fSOlivier Houchard bits64 aSig1,
90815144b0fSOlivier Houchard int32 *zExpPtr,
90915144b0fSOlivier Houchard bits64 *zSig0Ptr,
91015144b0fSOlivier Houchard bits64 *zSig1Ptr
91115144b0fSOlivier Houchard )
91215144b0fSOlivier Houchard {
91315144b0fSOlivier Houchard int8 shiftCount;
91415144b0fSOlivier Houchard
91515144b0fSOlivier Houchard if ( aSig0 == 0 ) {
91615144b0fSOlivier Houchard shiftCount = countLeadingZeros64( aSig1 ) - 15;
91715144b0fSOlivier Houchard if ( shiftCount < 0 ) {
91815144b0fSOlivier Houchard *zSig0Ptr = aSig1>>( - shiftCount );
91915144b0fSOlivier Houchard *zSig1Ptr = aSig1<<( shiftCount & 63 );
92015144b0fSOlivier Houchard }
92115144b0fSOlivier Houchard else {
92215144b0fSOlivier Houchard *zSig0Ptr = aSig1<<shiftCount;
92315144b0fSOlivier Houchard *zSig1Ptr = 0;
92415144b0fSOlivier Houchard }
92515144b0fSOlivier Houchard *zExpPtr = - shiftCount - 63;
92615144b0fSOlivier Houchard }
92715144b0fSOlivier Houchard else {
92815144b0fSOlivier Houchard shiftCount = countLeadingZeros64( aSig0 ) - 15;
92915144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
93015144b0fSOlivier Houchard *zExpPtr = 1 - shiftCount;
93115144b0fSOlivier Houchard }
93215144b0fSOlivier Houchard
93315144b0fSOlivier Houchard }
93415144b0fSOlivier Houchard
93515144b0fSOlivier Houchard /*
93615144b0fSOlivier Houchard -------------------------------------------------------------------------------
93715144b0fSOlivier Houchard Packs the sign `zSign', the exponent `zExp', and the significand formed
93815144b0fSOlivier Houchard by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
93915144b0fSOlivier Houchard floating-point value, returning the result. After being shifted into the
94015144b0fSOlivier Houchard proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
94115144b0fSOlivier Houchard added together to form the most significant 32 bits of the result. This
94215144b0fSOlivier Houchard means that any integer portion of `zSig0' will be added into the exponent.
94315144b0fSOlivier Houchard Since a properly normalized significand will have an integer portion equal
94415144b0fSOlivier Houchard to 1, the `zExp' input should be 1 less than the desired result exponent
94515144b0fSOlivier Houchard whenever `zSig0' and `zSig1' concatenated form a complete, normalized
94615144b0fSOlivier Houchard significand.
94715144b0fSOlivier Houchard -------------------------------------------------------------------------------
94815144b0fSOlivier Houchard */
94915144b0fSOlivier Houchard INLINE float128
packFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)95015144b0fSOlivier Houchard packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
95115144b0fSOlivier Houchard {
95215144b0fSOlivier Houchard float128 z;
95315144b0fSOlivier Houchard
95415144b0fSOlivier Houchard z.low = zSig1;
95515144b0fSOlivier Houchard z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
95615144b0fSOlivier Houchard return z;
95715144b0fSOlivier Houchard
95815144b0fSOlivier Houchard }
95915144b0fSOlivier Houchard
96015144b0fSOlivier Houchard /*
96115144b0fSOlivier Houchard -------------------------------------------------------------------------------
96215144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
96315144b0fSOlivier Houchard and extended significand formed by the concatenation of `zSig0', `zSig1',
96415144b0fSOlivier Houchard and `zSig2', and returns the proper quadruple-precision floating-point value
96515144b0fSOlivier Houchard corresponding to the abstract input. Ordinarily, the abstract value is
96615144b0fSOlivier Houchard simply rounded and packed into the quadruple-precision format, with the
96715144b0fSOlivier Houchard inexact exception raised if the abstract input cannot be represented
96815144b0fSOlivier Houchard exactly. However, if the abstract value is too large, the overflow and
96915144b0fSOlivier Houchard inexact exceptions are raised and an infinity or maximal finite value is
97015144b0fSOlivier Houchard returned. If the abstract value is too small, the input value is rounded to
97115144b0fSOlivier Houchard a subnormal number, and the underflow and inexact exceptions are raised if
97215144b0fSOlivier Houchard the abstract input cannot be represented exactly as a subnormal quadruple-
97315144b0fSOlivier Houchard precision floating-point number.
97415144b0fSOlivier Houchard The input significand must be normalized or smaller. If the input
97515144b0fSOlivier Houchard significand is not normalized, `zExp' must be 0; in that case, the result
97615144b0fSOlivier Houchard returned is a subnormal number, and it must not require rounding. In the
97715144b0fSOlivier Houchard usual case that the input significand is normalized, `zExp' must be 1 less
97815144b0fSOlivier Houchard than the ``true'' floating-point exponent. The handling of underflow and
97915144b0fSOlivier Houchard overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
98015144b0fSOlivier Houchard -------------------------------------------------------------------------------
98115144b0fSOlivier Houchard */
98215144b0fSOlivier Houchard static float128
roundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1,bits64 zSig2)98315144b0fSOlivier Houchard roundAndPackFloat128(
98415144b0fSOlivier Houchard flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
98515144b0fSOlivier Houchard {
98615144b0fSOlivier Houchard int8 roundingMode;
98715144b0fSOlivier Houchard flag roundNearestEven, increment, isTiny;
98815144b0fSOlivier Houchard
98915144b0fSOlivier Houchard roundingMode = float_rounding_mode;
99015144b0fSOlivier Houchard roundNearestEven = ( roundingMode == float_round_nearest_even );
99115144b0fSOlivier Houchard increment = ( (sbits64) zSig2 < 0 );
99215144b0fSOlivier Houchard if ( ! roundNearestEven ) {
99315144b0fSOlivier Houchard if ( roundingMode == float_round_to_zero ) {
99415144b0fSOlivier Houchard increment = 0;
99515144b0fSOlivier Houchard }
99615144b0fSOlivier Houchard else {
99715144b0fSOlivier Houchard if ( zSign ) {
99815144b0fSOlivier Houchard increment = ( roundingMode == float_round_down ) && zSig2;
99915144b0fSOlivier Houchard }
100015144b0fSOlivier Houchard else {
100115144b0fSOlivier Houchard increment = ( roundingMode == float_round_up ) && zSig2;
100215144b0fSOlivier Houchard }
100315144b0fSOlivier Houchard }
100415144b0fSOlivier Houchard }
100515144b0fSOlivier Houchard if ( 0x7FFD <= (bits32) zExp ) {
100615144b0fSOlivier Houchard if ( ( 0x7FFD < zExp )
100715144b0fSOlivier Houchard || ( ( zExp == 0x7FFD )
100815144b0fSOlivier Houchard && eq128(
100915144b0fSOlivier Houchard LIT64( 0x0001FFFFFFFFFFFF ),
101015144b0fSOlivier Houchard LIT64( 0xFFFFFFFFFFFFFFFF ),
101115144b0fSOlivier Houchard zSig0,
101215144b0fSOlivier Houchard zSig1
101315144b0fSOlivier Houchard )
101415144b0fSOlivier Houchard && increment
101515144b0fSOlivier Houchard )
101615144b0fSOlivier Houchard ) {
101715144b0fSOlivier Houchard float_raise( float_flag_overflow | float_flag_inexact );
101815144b0fSOlivier Houchard if ( ( roundingMode == float_round_to_zero )
101915144b0fSOlivier Houchard || ( zSign && ( roundingMode == float_round_up ) )
102015144b0fSOlivier Houchard || ( ! zSign && ( roundingMode == float_round_down ) )
102115144b0fSOlivier Houchard ) {
102215144b0fSOlivier Houchard return
102315144b0fSOlivier Houchard packFloat128(
102415144b0fSOlivier Houchard zSign,
102515144b0fSOlivier Houchard 0x7FFE,
102615144b0fSOlivier Houchard LIT64( 0x0000FFFFFFFFFFFF ),
102715144b0fSOlivier Houchard LIT64( 0xFFFFFFFFFFFFFFFF )
102815144b0fSOlivier Houchard );
102915144b0fSOlivier Houchard }
103015144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
103115144b0fSOlivier Houchard }
103215144b0fSOlivier Houchard if ( zExp < 0 ) {
103315144b0fSOlivier Houchard isTiny =
103415144b0fSOlivier Houchard ( float_detect_tininess == float_tininess_before_rounding )
103515144b0fSOlivier Houchard || ( zExp < -1 )
103615144b0fSOlivier Houchard || ! increment
103715144b0fSOlivier Houchard || lt128(
103815144b0fSOlivier Houchard zSig0,
103915144b0fSOlivier Houchard zSig1,
104015144b0fSOlivier Houchard LIT64( 0x0001FFFFFFFFFFFF ),
104115144b0fSOlivier Houchard LIT64( 0xFFFFFFFFFFFFFFFF )
104215144b0fSOlivier Houchard );
104315144b0fSOlivier Houchard shift128ExtraRightJamming(
104415144b0fSOlivier Houchard zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
104515144b0fSOlivier Houchard zExp = 0;
104615144b0fSOlivier Houchard if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
104715144b0fSOlivier Houchard if ( roundNearestEven ) {
104815144b0fSOlivier Houchard increment = ( (sbits64) zSig2 < 0 );
104915144b0fSOlivier Houchard }
105015144b0fSOlivier Houchard else {
105115144b0fSOlivier Houchard if ( zSign ) {
105215144b0fSOlivier Houchard increment = ( roundingMode == float_round_down ) && zSig2;
105315144b0fSOlivier Houchard }
105415144b0fSOlivier Houchard else {
105515144b0fSOlivier Houchard increment = ( roundingMode == float_round_up ) && zSig2;
105615144b0fSOlivier Houchard }
105715144b0fSOlivier Houchard }
105815144b0fSOlivier Houchard }
105915144b0fSOlivier Houchard }
106015144b0fSOlivier Houchard if ( zSig2 ) float_exception_flags |= float_flag_inexact;
106115144b0fSOlivier Houchard if ( increment ) {
106215144b0fSOlivier Houchard add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
106315144b0fSOlivier Houchard zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
106415144b0fSOlivier Houchard }
106515144b0fSOlivier Houchard else {
106615144b0fSOlivier Houchard if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
106715144b0fSOlivier Houchard }
106815144b0fSOlivier Houchard return packFloat128( zSign, zExp, zSig0, zSig1 );
106915144b0fSOlivier Houchard
107015144b0fSOlivier Houchard }
107115144b0fSOlivier Houchard
107215144b0fSOlivier Houchard /*
107315144b0fSOlivier Houchard -------------------------------------------------------------------------------
107415144b0fSOlivier Houchard Takes an abstract floating-point value having sign `zSign', exponent `zExp',
107515144b0fSOlivier Houchard and significand formed by the concatenation of `zSig0' and `zSig1', and
107615144b0fSOlivier Houchard returns the proper quadruple-precision floating-point value corresponding
107715144b0fSOlivier Houchard to the abstract input. This routine is just like `roundAndPackFloat128'
107815144b0fSOlivier Houchard except that the input significand has fewer bits and does not have to be
107915144b0fSOlivier Houchard normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
108015144b0fSOlivier Houchard point exponent.
108115144b0fSOlivier Houchard -------------------------------------------------------------------------------
108215144b0fSOlivier Houchard */
108315144b0fSOlivier Houchard static float128
normalizeRoundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)108415144b0fSOlivier Houchard normalizeRoundAndPackFloat128(
108515144b0fSOlivier Houchard flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
108615144b0fSOlivier Houchard {
108715144b0fSOlivier Houchard int8 shiftCount;
108815144b0fSOlivier Houchard bits64 zSig2;
108915144b0fSOlivier Houchard
109015144b0fSOlivier Houchard if ( zSig0 == 0 ) {
109115144b0fSOlivier Houchard zSig0 = zSig1;
109215144b0fSOlivier Houchard zSig1 = 0;
109315144b0fSOlivier Houchard zExp -= 64;
109415144b0fSOlivier Houchard }
109515144b0fSOlivier Houchard shiftCount = countLeadingZeros64( zSig0 ) - 15;
109615144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
109715144b0fSOlivier Houchard zSig2 = 0;
109815144b0fSOlivier Houchard shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
109915144b0fSOlivier Houchard }
110015144b0fSOlivier Houchard else {
110115144b0fSOlivier Houchard shift128ExtraRightJamming(
110215144b0fSOlivier Houchard zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
110315144b0fSOlivier Houchard }
110415144b0fSOlivier Houchard zExp -= shiftCount;
110515144b0fSOlivier Houchard return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
110615144b0fSOlivier Houchard
110715144b0fSOlivier Houchard }
110815144b0fSOlivier Houchard
110915144b0fSOlivier Houchard #endif
111015144b0fSOlivier Houchard
111115144b0fSOlivier Houchard /*
111215144b0fSOlivier Houchard -------------------------------------------------------------------------------
111315144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
111415144b0fSOlivier Houchard to the single-precision floating-point format. The conversion is performed
111515144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
111615144b0fSOlivier Houchard -------------------------------------------------------------------------------
111715144b0fSOlivier Houchard */
int32_to_float32(int32 a)111815144b0fSOlivier Houchard float32 int32_to_float32( int32 a )
111915144b0fSOlivier Houchard {
112015144b0fSOlivier Houchard flag zSign;
112115144b0fSOlivier Houchard
112215144b0fSOlivier Houchard if ( a == 0 ) return 0;
112315144b0fSOlivier Houchard if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
112415144b0fSOlivier Houchard zSign = ( a < 0 );
112515144b0fSOlivier Houchard return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
112615144b0fSOlivier Houchard
112715144b0fSOlivier Houchard }
112815144b0fSOlivier Houchard
1129*7ea324dfSDavid Schultz #ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */
uint32_to_float32(uint32 a)1130c36abe0dSDavid Schultz float32 uint32_to_float32( uint32 a )
1131c36abe0dSDavid Schultz {
1132c36abe0dSDavid Schultz if ( a == 0 ) return 0;
1133c36abe0dSDavid Schultz if ( a & (bits32) 0x80000000 )
1134c36abe0dSDavid Schultz return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1135c36abe0dSDavid Schultz return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1136c36abe0dSDavid Schultz }
1137*7ea324dfSDavid Schultz #endif
1138c36abe0dSDavid Schultz
1139c36abe0dSDavid Schultz
114015144b0fSOlivier Houchard /*
114115144b0fSOlivier Houchard -------------------------------------------------------------------------------
114215144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
114315144b0fSOlivier Houchard to the double-precision floating-point format. The conversion is performed
114415144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
114515144b0fSOlivier Houchard -------------------------------------------------------------------------------
114615144b0fSOlivier Houchard */
int32_to_float64(int32 a)114715144b0fSOlivier Houchard float64 int32_to_float64( int32 a )
114815144b0fSOlivier Houchard {
114915144b0fSOlivier Houchard flag zSign;
115015144b0fSOlivier Houchard uint32 absA;
115115144b0fSOlivier Houchard int8 shiftCount;
115215144b0fSOlivier Houchard bits64 zSig;
115315144b0fSOlivier Houchard
115415144b0fSOlivier Houchard if ( a == 0 ) return 0;
115515144b0fSOlivier Houchard zSign = ( a < 0 );
115615144b0fSOlivier Houchard absA = zSign ? - a : a;
115715144b0fSOlivier Houchard shiftCount = countLeadingZeros32( absA ) + 21;
115815144b0fSOlivier Houchard zSig = absA;
115915144b0fSOlivier Houchard return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
116015144b0fSOlivier Houchard
116115144b0fSOlivier Houchard }
116215144b0fSOlivier Houchard
1163*7ea324dfSDavid Schultz #ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */
uint32_to_float64(uint32 a)1164c36abe0dSDavid Schultz float64 uint32_to_float64( uint32 a )
1165c36abe0dSDavid Schultz {
1166c36abe0dSDavid Schultz int8 shiftCount;
1167c36abe0dSDavid Schultz bits64 zSig = a;
1168c36abe0dSDavid Schultz
1169c36abe0dSDavid Schultz if ( a == 0 ) return 0;
1170c36abe0dSDavid Schultz shiftCount = countLeadingZeros32( a ) + 21;
1171c36abe0dSDavid Schultz return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1172c36abe0dSDavid Schultz
1173c36abe0dSDavid Schultz }
1174*7ea324dfSDavid Schultz #endif
1175c36abe0dSDavid Schultz
117615144b0fSOlivier Houchard #ifdef FLOATX80
117715144b0fSOlivier Houchard
117815144b0fSOlivier Houchard /*
117915144b0fSOlivier Houchard -------------------------------------------------------------------------------
118015144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a'
118115144b0fSOlivier Houchard to the extended double-precision floating-point format. The conversion
118215144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
118315144b0fSOlivier Houchard Arithmetic.
118415144b0fSOlivier Houchard -------------------------------------------------------------------------------
118515144b0fSOlivier Houchard */
int32_to_floatx80(int32 a)118615144b0fSOlivier Houchard floatx80 int32_to_floatx80( int32 a )
118715144b0fSOlivier Houchard {
118815144b0fSOlivier Houchard flag zSign;
118915144b0fSOlivier Houchard uint32 absA;
119015144b0fSOlivier Houchard int8 shiftCount;
119115144b0fSOlivier Houchard bits64 zSig;
119215144b0fSOlivier Houchard
119315144b0fSOlivier Houchard if ( a == 0 ) return packFloatx80( 0, 0, 0 );
119415144b0fSOlivier Houchard zSign = ( a < 0 );
119515144b0fSOlivier Houchard absA = zSign ? - a : a;
119615144b0fSOlivier Houchard shiftCount = countLeadingZeros32( absA ) + 32;
119715144b0fSOlivier Houchard zSig = absA;
119815144b0fSOlivier Houchard return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
119915144b0fSOlivier Houchard
120015144b0fSOlivier Houchard }
120115144b0fSOlivier Houchard
uint32_to_floatx80(uint32 a)1202c36abe0dSDavid Schultz floatx80 uint32_to_floatx80( uint32 a )
1203c36abe0dSDavid Schultz {
1204c36abe0dSDavid Schultz int8 shiftCount;
1205c36abe0dSDavid Schultz bits64 zSig = a;
1206c36abe0dSDavid Schultz
1207c36abe0dSDavid Schultz if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1208c36abe0dSDavid Schultz shiftCount = countLeadingZeros32( a ) + 32;
1209c36abe0dSDavid Schultz return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1210c36abe0dSDavid Schultz
1211c36abe0dSDavid Schultz }
1212c36abe0dSDavid Schultz
121315144b0fSOlivier Houchard #endif
121415144b0fSOlivier Houchard
121515144b0fSOlivier Houchard #ifdef FLOAT128
121615144b0fSOlivier Houchard
121715144b0fSOlivier Houchard /*
121815144b0fSOlivier Houchard -------------------------------------------------------------------------------
121915144b0fSOlivier Houchard Returns the result of converting the 32-bit two's complement integer `a' to
122015144b0fSOlivier Houchard the quadruple-precision floating-point format. The conversion is performed
122115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
122215144b0fSOlivier Houchard -------------------------------------------------------------------------------
122315144b0fSOlivier Houchard */
int32_to_float128(int32 a)122415144b0fSOlivier Houchard float128 int32_to_float128( int32 a )
122515144b0fSOlivier Houchard {
122615144b0fSOlivier Houchard flag zSign;
122715144b0fSOlivier Houchard uint32 absA;
122815144b0fSOlivier Houchard int8 shiftCount;
122915144b0fSOlivier Houchard bits64 zSig0;
123015144b0fSOlivier Houchard
123115144b0fSOlivier Houchard if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
123215144b0fSOlivier Houchard zSign = ( a < 0 );
123315144b0fSOlivier Houchard absA = zSign ? - a : a;
123415144b0fSOlivier Houchard shiftCount = countLeadingZeros32( absA ) + 17;
123515144b0fSOlivier Houchard zSig0 = absA;
123615144b0fSOlivier Houchard return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
123715144b0fSOlivier Houchard
123815144b0fSOlivier Houchard }
123915144b0fSOlivier Houchard
uint32_to_float128(uint32 a)1240c36abe0dSDavid Schultz float128 uint32_to_float128( uint32 a )
1241c36abe0dSDavid Schultz {
1242c36abe0dSDavid Schultz int8 shiftCount;
1243c36abe0dSDavid Schultz bits64 zSig0 = a;
1244c36abe0dSDavid Schultz
1245c36abe0dSDavid Schultz if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1246c36abe0dSDavid Schultz shiftCount = countLeadingZeros32( a ) + 17;
1247c36abe0dSDavid Schultz return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1248c36abe0dSDavid Schultz
1249c36abe0dSDavid Schultz }
1250c36abe0dSDavid Schultz
125115144b0fSOlivier Houchard #endif
125215144b0fSOlivier Houchard
125315144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
125415144b0fSOlivier Houchard /*
125515144b0fSOlivier Houchard -------------------------------------------------------------------------------
125615144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
125715144b0fSOlivier Houchard to the single-precision floating-point format. The conversion is performed
125815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
125915144b0fSOlivier Houchard -------------------------------------------------------------------------------
126015144b0fSOlivier Houchard */
int64_to_float32(int64 a)126115144b0fSOlivier Houchard float32 int64_to_float32( int64 a )
126215144b0fSOlivier Houchard {
126315144b0fSOlivier Houchard flag zSign;
126415144b0fSOlivier Houchard uint64 absA;
126515144b0fSOlivier Houchard int8 shiftCount;
126615144b0fSOlivier Houchard
126715144b0fSOlivier Houchard if ( a == 0 ) return 0;
126815144b0fSOlivier Houchard zSign = ( a < 0 );
126915144b0fSOlivier Houchard absA = zSign ? - a : a;
127015144b0fSOlivier Houchard shiftCount = countLeadingZeros64( absA ) - 40;
127115144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
127215144b0fSOlivier Houchard return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
127315144b0fSOlivier Houchard }
127415144b0fSOlivier Houchard else {
127515144b0fSOlivier Houchard shiftCount += 7;
127615144b0fSOlivier Houchard if ( shiftCount < 0 ) {
127715144b0fSOlivier Houchard shift64RightJamming( absA, - shiftCount, &absA );
127815144b0fSOlivier Houchard }
127915144b0fSOlivier Houchard else {
128015144b0fSOlivier Houchard absA <<= shiftCount;
128115144b0fSOlivier Houchard }
128215144b0fSOlivier Houchard return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
128315144b0fSOlivier Houchard }
128415144b0fSOlivier Houchard
128515144b0fSOlivier Houchard }
128615144b0fSOlivier Houchard
128715144b0fSOlivier Houchard /*
128815144b0fSOlivier Houchard -------------------------------------------------------------------------------
128915144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
129015144b0fSOlivier Houchard to the double-precision floating-point format. The conversion is performed
129115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129215144b0fSOlivier Houchard -------------------------------------------------------------------------------
129315144b0fSOlivier Houchard */
int64_to_float64(int64 a)129415144b0fSOlivier Houchard float64 int64_to_float64( int64 a )
129515144b0fSOlivier Houchard {
129615144b0fSOlivier Houchard flag zSign;
129715144b0fSOlivier Houchard
129815144b0fSOlivier Houchard if ( a == 0 ) return 0;
129915144b0fSOlivier Houchard if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
130015144b0fSOlivier Houchard return packFloat64( 1, 0x43E, 0 );
130115144b0fSOlivier Houchard }
130215144b0fSOlivier Houchard zSign = ( a < 0 );
130315144b0fSOlivier Houchard return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
130415144b0fSOlivier Houchard
130515144b0fSOlivier Houchard }
130615144b0fSOlivier Houchard
130715144b0fSOlivier Houchard #ifdef FLOATX80
130815144b0fSOlivier Houchard
130915144b0fSOlivier Houchard /*
131015144b0fSOlivier Houchard -------------------------------------------------------------------------------
131115144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a'
131215144b0fSOlivier Houchard to the extended double-precision floating-point format. The conversion
131315144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
131415144b0fSOlivier Houchard Arithmetic.
131515144b0fSOlivier Houchard -------------------------------------------------------------------------------
131615144b0fSOlivier Houchard */
int64_to_floatx80(int64 a)131715144b0fSOlivier Houchard floatx80 int64_to_floatx80( int64 a )
131815144b0fSOlivier Houchard {
131915144b0fSOlivier Houchard flag zSign;
132015144b0fSOlivier Houchard uint64 absA;
132115144b0fSOlivier Houchard int8 shiftCount;
132215144b0fSOlivier Houchard
132315144b0fSOlivier Houchard if ( a == 0 ) return packFloatx80( 0, 0, 0 );
132415144b0fSOlivier Houchard zSign = ( a < 0 );
132515144b0fSOlivier Houchard absA = zSign ? - a : a;
132615144b0fSOlivier Houchard shiftCount = countLeadingZeros64( absA );
132715144b0fSOlivier Houchard return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
132815144b0fSOlivier Houchard
132915144b0fSOlivier Houchard }
133015144b0fSOlivier Houchard
133115144b0fSOlivier Houchard #endif
133215144b0fSOlivier Houchard
133315144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
133415144b0fSOlivier Houchard
133515144b0fSOlivier Houchard #ifdef FLOAT128
133615144b0fSOlivier Houchard
133715144b0fSOlivier Houchard /*
133815144b0fSOlivier Houchard -------------------------------------------------------------------------------
133915144b0fSOlivier Houchard Returns the result of converting the 64-bit two's complement integer `a' to
134015144b0fSOlivier Houchard the quadruple-precision floating-point format. The conversion is performed
134115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
134215144b0fSOlivier Houchard -------------------------------------------------------------------------------
134315144b0fSOlivier Houchard */
int64_to_float128(int64 a)134415144b0fSOlivier Houchard float128 int64_to_float128( int64 a )
134515144b0fSOlivier Houchard {
134615144b0fSOlivier Houchard flag zSign;
134715144b0fSOlivier Houchard uint64 absA;
134815144b0fSOlivier Houchard int8 shiftCount;
134915144b0fSOlivier Houchard int32 zExp;
135015144b0fSOlivier Houchard bits64 zSig0, zSig1;
135115144b0fSOlivier Houchard
135215144b0fSOlivier Houchard if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
135315144b0fSOlivier Houchard zSign = ( a < 0 );
135415144b0fSOlivier Houchard absA = zSign ? - a : a;
135515144b0fSOlivier Houchard shiftCount = countLeadingZeros64( absA ) + 49;
135615144b0fSOlivier Houchard zExp = 0x406E - shiftCount;
135715144b0fSOlivier Houchard if ( 64 <= shiftCount ) {
135815144b0fSOlivier Houchard zSig1 = 0;
135915144b0fSOlivier Houchard zSig0 = absA;
136015144b0fSOlivier Houchard shiftCount -= 64;
136115144b0fSOlivier Houchard }
136215144b0fSOlivier Houchard else {
136315144b0fSOlivier Houchard zSig1 = absA;
136415144b0fSOlivier Houchard zSig0 = 0;
136515144b0fSOlivier Houchard }
136615144b0fSOlivier Houchard shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
136715144b0fSOlivier Houchard return packFloat128( zSign, zExp, zSig0, zSig1 );
136815144b0fSOlivier Houchard
136915144b0fSOlivier Houchard }
137015144b0fSOlivier Houchard
137115144b0fSOlivier Houchard #endif
137215144b0fSOlivier Houchard
137315144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
137415144b0fSOlivier Houchard /*
137515144b0fSOlivier Houchard -------------------------------------------------------------------------------
137615144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
137715144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format. The conversion is
137815144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
137915144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
138015144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
138115144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
138215144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
138315144b0fSOlivier Houchard -------------------------------------------------------------------------------
138415144b0fSOlivier Houchard */
float32_to_int32(float32 a)138515144b0fSOlivier Houchard int32 float32_to_int32( float32 a )
138615144b0fSOlivier Houchard {
138715144b0fSOlivier Houchard flag aSign;
138815144b0fSOlivier Houchard int16 aExp, shiftCount;
138915144b0fSOlivier Houchard bits32 aSig;
139015144b0fSOlivier Houchard bits64 aSig64;
139115144b0fSOlivier Houchard
139215144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
139315144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
139415144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
139515144b0fSOlivier Houchard if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
139615144b0fSOlivier Houchard if ( aExp ) aSig |= 0x00800000;
139715144b0fSOlivier Houchard shiftCount = 0xAF - aExp;
139815144b0fSOlivier Houchard aSig64 = aSig;
139915144b0fSOlivier Houchard aSig64 <<= 32;
140015144b0fSOlivier Houchard if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
140115144b0fSOlivier Houchard return roundAndPackInt32( aSign, aSig64 );
140215144b0fSOlivier Houchard
140315144b0fSOlivier Houchard }
140415144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
140515144b0fSOlivier Houchard
140615144b0fSOlivier Houchard /*
140715144b0fSOlivier Houchard -------------------------------------------------------------------------------
140815144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
140915144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format. The conversion is
141015144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
141115144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
141215144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned. Otherwise, if
141315144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
141415144b0fSOlivier Houchard returned.
141515144b0fSOlivier Houchard -------------------------------------------------------------------------------
141615144b0fSOlivier Houchard */
float32_to_int32_round_to_zero(float32 a)141715144b0fSOlivier Houchard int32 float32_to_int32_round_to_zero( float32 a )
141815144b0fSOlivier Houchard {
141915144b0fSOlivier Houchard flag aSign;
142015144b0fSOlivier Houchard int16 aExp, shiftCount;
142115144b0fSOlivier Houchard bits32 aSig;
142215144b0fSOlivier Houchard int32 z;
142315144b0fSOlivier Houchard
142415144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
142515144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
142615144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
142715144b0fSOlivier Houchard shiftCount = aExp - 0x9E;
142815144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
142915144b0fSOlivier Houchard if ( a != 0xCF000000 ) {
143015144b0fSOlivier Houchard float_raise( float_flag_invalid );
143115144b0fSOlivier Houchard if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
143215144b0fSOlivier Houchard }
143315144b0fSOlivier Houchard return (sbits32) 0x80000000;
143415144b0fSOlivier Houchard }
143515144b0fSOlivier Houchard else if ( aExp <= 0x7E ) {
143615144b0fSOlivier Houchard if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
143715144b0fSOlivier Houchard return 0;
143815144b0fSOlivier Houchard }
143915144b0fSOlivier Houchard aSig = ( aSig | 0x00800000 )<<8;
144015144b0fSOlivier Houchard z = aSig>>( - shiftCount );
144115144b0fSOlivier Houchard if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
144215144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
144315144b0fSOlivier Houchard }
144415144b0fSOlivier Houchard if ( aSign ) z = - z;
144515144b0fSOlivier Houchard return z;
144615144b0fSOlivier Houchard
144715144b0fSOlivier Houchard }
144815144b0fSOlivier Houchard
144915144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
145015144b0fSOlivier Houchard /*
145115144b0fSOlivier Houchard -------------------------------------------------------------------------------
145215144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
145315144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format. The conversion is
145415144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
145515144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
145615144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
145715144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
145815144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
145915144b0fSOlivier Houchard -------------------------------------------------------------------------------
146015144b0fSOlivier Houchard */
float32_to_int64(float32 a)146115144b0fSOlivier Houchard int64 float32_to_int64( float32 a )
146215144b0fSOlivier Houchard {
146315144b0fSOlivier Houchard flag aSign;
146415144b0fSOlivier Houchard int16 aExp, shiftCount;
146515144b0fSOlivier Houchard bits32 aSig;
146615144b0fSOlivier Houchard bits64 aSig64, aSigExtra;
146715144b0fSOlivier Houchard
146815144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
146915144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
147015144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
147115144b0fSOlivier Houchard shiftCount = 0xBE - aExp;
147215144b0fSOlivier Houchard if ( shiftCount < 0 ) {
147315144b0fSOlivier Houchard float_raise( float_flag_invalid );
147415144b0fSOlivier Houchard if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
147515144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
147615144b0fSOlivier Houchard }
147715144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
147815144b0fSOlivier Houchard }
147915144b0fSOlivier Houchard if ( aExp ) aSig |= 0x00800000;
148015144b0fSOlivier Houchard aSig64 = aSig;
148115144b0fSOlivier Houchard aSig64 <<= 40;
148215144b0fSOlivier Houchard shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
148315144b0fSOlivier Houchard return roundAndPackInt64( aSign, aSig64, aSigExtra );
148415144b0fSOlivier Houchard
148515144b0fSOlivier Houchard }
148615144b0fSOlivier Houchard
148715144b0fSOlivier Houchard /*
148815144b0fSOlivier Houchard -------------------------------------------------------------------------------
148915144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
149015144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format. The conversion is
149115144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
149215144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero. If
149315144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned. Otherwise, if the
149415144b0fSOlivier Houchard conversion overflows, the largest integer with the same sign as `a' is
149515144b0fSOlivier Houchard returned.
149615144b0fSOlivier Houchard -------------------------------------------------------------------------------
149715144b0fSOlivier Houchard */
float32_to_int64_round_to_zero(float32 a)149815144b0fSOlivier Houchard int64 float32_to_int64_round_to_zero( float32 a )
149915144b0fSOlivier Houchard {
150015144b0fSOlivier Houchard flag aSign;
150115144b0fSOlivier Houchard int16 aExp, shiftCount;
150215144b0fSOlivier Houchard bits32 aSig;
150315144b0fSOlivier Houchard bits64 aSig64;
150415144b0fSOlivier Houchard int64 z;
150515144b0fSOlivier Houchard
150615144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
150715144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
150815144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
150915144b0fSOlivier Houchard shiftCount = aExp - 0xBE;
151015144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
151115144b0fSOlivier Houchard if ( a != 0xDF000000 ) {
151215144b0fSOlivier Houchard float_raise( float_flag_invalid );
151315144b0fSOlivier Houchard if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
151415144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
151515144b0fSOlivier Houchard }
151615144b0fSOlivier Houchard }
151715144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
151815144b0fSOlivier Houchard }
151915144b0fSOlivier Houchard else if ( aExp <= 0x7E ) {
152015144b0fSOlivier Houchard if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
152115144b0fSOlivier Houchard return 0;
152215144b0fSOlivier Houchard }
152315144b0fSOlivier Houchard aSig64 = aSig | 0x00800000;
152415144b0fSOlivier Houchard aSig64 <<= 40;
152515144b0fSOlivier Houchard z = aSig64>>( - shiftCount );
152615144b0fSOlivier Houchard if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
152715144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
152815144b0fSOlivier Houchard }
152915144b0fSOlivier Houchard if ( aSign ) z = - z;
153015144b0fSOlivier Houchard return z;
153115144b0fSOlivier Houchard
153215144b0fSOlivier Houchard }
153315144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
153415144b0fSOlivier Houchard
153515144b0fSOlivier Houchard /*
153615144b0fSOlivier Houchard -------------------------------------------------------------------------------
153715144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
153815144b0fSOlivier Houchard `a' to the double-precision floating-point format. The conversion is
153915144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
154015144b0fSOlivier Houchard Arithmetic.
154115144b0fSOlivier Houchard -------------------------------------------------------------------------------
154215144b0fSOlivier Houchard */
float32_to_float64(float32 a)154315144b0fSOlivier Houchard float64 float32_to_float64( float32 a )
154415144b0fSOlivier Houchard {
154515144b0fSOlivier Houchard flag aSign;
154615144b0fSOlivier Houchard int16 aExp;
154715144b0fSOlivier Houchard bits32 aSig;
154815144b0fSOlivier Houchard
154915144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
155015144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
155115144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
155215144b0fSOlivier Houchard if ( aExp == 0xFF ) {
155315144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
155415144b0fSOlivier Houchard return packFloat64( aSign, 0x7FF, 0 );
155515144b0fSOlivier Houchard }
155615144b0fSOlivier Houchard if ( aExp == 0 ) {
155715144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
155815144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
155915144b0fSOlivier Houchard --aExp;
156015144b0fSOlivier Houchard }
156115144b0fSOlivier Houchard return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
156215144b0fSOlivier Houchard
156315144b0fSOlivier Houchard }
156415144b0fSOlivier Houchard
156515144b0fSOlivier Houchard #ifdef FLOATX80
156615144b0fSOlivier Houchard
156715144b0fSOlivier Houchard /*
156815144b0fSOlivier Houchard -------------------------------------------------------------------------------
156915144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
157015144b0fSOlivier Houchard `a' to the extended double-precision floating-point format. The conversion
157115144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
157215144b0fSOlivier Houchard Arithmetic.
157315144b0fSOlivier Houchard -------------------------------------------------------------------------------
157415144b0fSOlivier Houchard */
float32_to_floatx80(float32 a)157515144b0fSOlivier Houchard floatx80 float32_to_floatx80( float32 a )
157615144b0fSOlivier Houchard {
157715144b0fSOlivier Houchard flag aSign;
157815144b0fSOlivier Houchard int16 aExp;
157915144b0fSOlivier Houchard bits32 aSig;
158015144b0fSOlivier Houchard
158115144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
158215144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
158315144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
158415144b0fSOlivier Houchard if ( aExp == 0xFF ) {
158515144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
158615144b0fSOlivier Houchard return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
158715144b0fSOlivier Houchard }
158815144b0fSOlivier Houchard if ( aExp == 0 ) {
158915144b0fSOlivier Houchard if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
159015144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
159115144b0fSOlivier Houchard }
159215144b0fSOlivier Houchard aSig |= 0x00800000;
159315144b0fSOlivier Houchard return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
159415144b0fSOlivier Houchard
159515144b0fSOlivier Houchard }
159615144b0fSOlivier Houchard
159715144b0fSOlivier Houchard #endif
159815144b0fSOlivier Houchard
159915144b0fSOlivier Houchard #ifdef FLOAT128
160015144b0fSOlivier Houchard
160115144b0fSOlivier Houchard /*
160215144b0fSOlivier Houchard -------------------------------------------------------------------------------
160315144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
160415144b0fSOlivier Houchard `a' to the double-precision floating-point format. The conversion is
160515144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
160615144b0fSOlivier Houchard Arithmetic.
160715144b0fSOlivier Houchard -------------------------------------------------------------------------------
160815144b0fSOlivier Houchard */
float32_to_float128(float32 a)160915144b0fSOlivier Houchard float128 float32_to_float128( float32 a )
161015144b0fSOlivier Houchard {
161115144b0fSOlivier Houchard flag aSign;
161215144b0fSOlivier Houchard int16 aExp;
161315144b0fSOlivier Houchard bits32 aSig;
161415144b0fSOlivier Houchard
161515144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
161615144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
161715144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
161815144b0fSOlivier Houchard if ( aExp == 0xFF ) {
161915144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
162015144b0fSOlivier Houchard return packFloat128( aSign, 0x7FFF, 0, 0 );
162115144b0fSOlivier Houchard }
162215144b0fSOlivier Houchard if ( aExp == 0 ) {
162315144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
162415144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
162515144b0fSOlivier Houchard --aExp;
162615144b0fSOlivier Houchard }
162715144b0fSOlivier Houchard return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
162815144b0fSOlivier Houchard
162915144b0fSOlivier Houchard }
163015144b0fSOlivier Houchard
163115144b0fSOlivier Houchard #endif
163215144b0fSOlivier Houchard
163315144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
163415144b0fSOlivier Houchard /*
163515144b0fSOlivier Houchard -------------------------------------------------------------------------------
163615144b0fSOlivier Houchard Rounds the single-precision floating-point value `a' to an integer, and
163715144b0fSOlivier Houchard returns the result as a single-precision floating-point value. The
163815144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
163915144b0fSOlivier Houchard Floating-Point Arithmetic.
164015144b0fSOlivier Houchard -------------------------------------------------------------------------------
164115144b0fSOlivier Houchard */
float32_round_to_int(float32 a)164215144b0fSOlivier Houchard float32 float32_round_to_int( float32 a )
164315144b0fSOlivier Houchard {
164415144b0fSOlivier Houchard flag aSign;
164515144b0fSOlivier Houchard int16 aExp;
164615144b0fSOlivier Houchard bits32 lastBitMask, roundBitsMask;
164715144b0fSOlivier Houchard int8 roundingMode;
164815144b0fSOlivier Houchard float32 z;
164915144b0fSOlivier Houchard
165015144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
165115144b0fSOlivier Houchard if ( 0x96 <= aExp ) {
165215144b0fSOlivier Houchard if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
165315144b0fSOlivier Houchard return propagateFloat32NaN( a, a );
165415144b0fSOlivier Houchard }
165515144b0fSOlivier Houchard return a;
165615144b0fSOlivier Houchard }
165715144b0fSOlivier Houchard if ( aExp <= 0x7E ) {
165815144b0fSOlivier Houchard if ( (bits32) ( a<<1 ) == 0 ) return a;
165915144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
166015144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
166115144b0fSOlivier Houchard switch ( float_rounding_mode ) {
166215144b0fSOlivier Houchard case float_round_nearest_even:
166315144b0fSOlivier Houchard if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
166415144b0fSOlivier Houchard return packFloat32( aSign, 0x7F, 0 );
166515144b0fSOlivier Houchard }
166615144b0fSOlivier Houchard break;
166715144b0fSOlivier Houchard case float_round_to_zero:
166815144b0fSOlivier Houchard break;
166915144b0fSOlivier Houchard case float_round_down:
167015144b0fSOlivier Houchard return aSign ? 0xBF800000 : 0;
167115144b0fSOlivier Houchard case float_round_up:
167215144b0fSOlivier Houchard return aSign ? 0x80000000 : 0x3F800000;
167315144b0fSOlivier Houchard }
167415144b0fSOlivier Houchard return packFloat32( aSign, 0, 0 );
167515144b0fSOlivier Houchard }
167615144b0fSOlivier Houchard lastBitMask = 1;
167715144b0fSOlivier Houchard lastBitMask <<= 0x96 - aExp;
167815144b0fSOlivier Houchard roundBitsMask = lastBitMask - 1;
167915144b0fSOlivier Houchard z = a;
168015144b0fSOlivier Houchard roundingMode = float_rounding_mode;
168115144b0fSOlivier Houchard if ( roundingMode == float_round_nearest_even ) {
168215144b0fSOlivier Houchard z += lastBitMask>>1;
168315144b0fSOlivier Houchard if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
168415144b0fSOlivier Houchard }
168515144b0fSOlivier Houchard else if ( roundingMode != float_round_to_zero ) {
168615144b0fSOlivier Houchard if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
168715144b0fSOlivier Houchard z += roundBitsMask;
168815144b0fSOlivier Houchard }
168915144b0fSOlivier Houchard }
169015144b0fSOlivier Houchard z &= ~ roundBitsMask;
169115144b0fSOlivier Houchard if ( z != a ) float_exception_flags |= float_flag_inexact;
169215144b0fSOlivier Houchard return z;
169315144b0fSOlivier Houchard
169415144b0fSOlivier Houchard }
169515144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
169615144b0fSOlivier Houchard
169715144b0fSOlivier Houchard /*
169815144b0fSOlivier Houchard -------------------------------------------------------------------------------
169915144b0fSOlivier Houchard Returns the result of adding the absolute values of the single-precision
170015144b0fSOlivier Houchard floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
170115144b0fSOlivier Houchard before being returned. `zSign' is ignored if the result is a NaN.
170215144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
170315144b0fSOlivier Houchard Floating-Point Arithmetic.
170415144b0fSOlivier Houchard -------------------------------------------------------------------------------
170515144b0fSOlivier Houchard */
addFloat32Sigs(float32 a,float32 b,flag zSign)170615144b0fSOlivier Houchard static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
170715144b0fSOlivier Houchard {
170815144b0fSOlivier Houchard int16 aExp, bExp, zExp;
170915144b0fSOlivier Houchard bits32 aSig, bSig, zSig;
171015144b0fSOlivier Houchard int16 expDiff;
171115144b0fSOlivier Houchard
171215144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
171315144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
171415144b0fSOlivier Houchard bSig = extractFloat32Frac( b );
171515144b0fSOlivier Houchard bExp = extractFloat32Exp( b );
171615144b0fSOlivier Houchard expDiff = aExp - bExp;
171715144b0fSOlivier Houchard aSig <<= 6;
171815144b0fSOlivier Houchard bSig <<= 6;
171915144b0fSOlivier Houchard if ( 0 < expDiff ) {
172015144b0fSOlivier Houchard if ( aExp == 0xFF ) {
172115144b0fSOlivier Houchard if ( aSig ) return propagateFloat32NaN( a, b );
172215144b0fSOlivier Houchard return a;
172315144b0fSOlivier Houchard }
172415144b0fSOlivier Houchard if ( bExp == 0 ) {
172515144b0fSOlivier Houchard --expDiff;
172615144b0fSOlivier Houchard }
172715144b0fSOlivier Houchard else {
172815144b0fSOlivier Houchard bSig |= 0x20000000;
172915144b0fSOlivier Houchard }
173015144b0fSOlivier Houchard shift32RightJamming( bSig, expDiff, &bSig );
173115144b0fSOlivier Houchard zExp = aExp;
173215144b0fSOlivier Houchard }
173315144b0fSOlivier Houchard else if ( expDiff < 0 ) {
173415144b0fSOlivier Houchard if ( bExp == 0xFF ) {
173515144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
173615144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 );
173715144b0fSOlivier Houchard }
173815144b0fSOlivier Houchard if ( aExp == 0 ) {
173915144b0fSOlivier Houchard ++expDiff;
174015144b0fSOlivier Houchard }
174115144b0fSOlivier Houchard else {
174215144b0fSOlivier Houchard aSig |= 0x20000000;
174315144b0fSOlivier Houchard }
174415144b0fSOlivier Houchard shift32RightJamming( aSig, - expDiff, &aSig );
174515144b0fSOlivier Houchard zExp = bExp;
174615144b0fSOlivier Houchard }
174715144b0fSOlivier Houchard else {
174815144b0fSOlivier Houchard if ( aExp == 0xFF ) {
174915144b0fSOlivier Houchard if ( aSig | bSig ) return propagateFloat32NaN( a, b );
175015144b0fSOlivier Houchard return a;
175115144b0fSOlivier Houchard }
175215144b0fSOlivier Houchard if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
175315144b0fSOlivier Houchard zSig = 0x40000000 + aSig + bSig;
175415144b0fSOlivier Houchard zExp = aExp;
175515144b0fSOlivier Houchard goto roundAndPack;
175615144b0fSOlivier Houchard }
175715144b0fSOlivier Houchard aSig |= 0x20000000;
175815144b0fSOlivier Houchard zSig = ( aSig + bSig )<<1;
175915144b0fSOlivier Houchard --zExp;
176015144b0fSOlivier Houchard if ( (sbits32) zSig < 0 ) {
176115144b0fSOlivier Houchard zSig = aSig + bSig;
176215144b0fSOlivier Houchard ++zExp;
176315144b0fSOlivier Houchard }
176415144b0fSOlivier Houchard roundAndPack:
176515144b0fSOlivier Houchard return roundAndPackFloat32( zSign, zExp, zSig );
176615144b0fSOlivier Houchard
176715144b0fSOlivier Houchard }
176815144b0fSOlivier Houchard
176915144b0fSOlivier Houchard /*
177015144b0fSOlivier Houchard -------------------------------------------------------------------------------
177115144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the single-
177215144b0fSOlivier Houchard precision floating-point values `a' and `b'. If `zSign' is 1, the
177315144b0fSOlivier Houchard difference is negated before being returned. `zSign' is ignored if the
177415144b0fSOlivier Houchard result is a NaN. The subtraction is performed according to the IEC/IEEE
177515144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
177615144b0fSOlivier Houchard -------------------------------------------------------------------------------
177715144b0fSOlivier Houchard */
subFloat32Sigs(float32 a,float32 b,flag zSign)177815144b0fSOlivier Houchard static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
177915144b0fSOlivier Houchard {
178015144b0fSOlivier Houchard int16 aExp, bExp, zExp;
178115144b0fSOlivier Houchard bits32 aSig, bSig, zSig;
178215144b0fSOlivier Houchard int16 expDiff;
178315144b0fSOlivier Houchard
178415144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
178515144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
178615144b0fSOlivier Houchard bSig = extractFloat32Frac( b );
178715144b0fSOlivier Houchard bExp = extractFloat32Exp( b );
178815144b0fSOlivier Houchard expDiff = aExp - bExp;
178915144b0fSOlivier Houchard aSig <<= 7;
179015144b0fSOlivier Houchard bSig <<= 7;
179115144b0fSOlivier Houchard if ( 0 < expDiff ) goto aExpBigger;
179215144b0fSOlivier Houchard if ( expDiff < 0 ) goto bExpBigger;
179315144b0fSOlivier Houchard if ( aExp == 0xFF ) {
179415144b0fSOlivier Houchard if ( aSig | bSig ) return propagateFloat32NaN( a, b );
179515144b0fSOlivier Houchard float_raise( float_flag_invalid );
179615144b0fSOlivier Houchard return float32_default_nan;
179715144b0fSOlivier Houchard }
179815144b0fSOlivier Houchard if ( aExp == 0 ) {
179915144b0fSOlivier Houchard aExp = 1;
180015144b0fSOlivier Houchard bExp = 1;
180115144b0fSOlivier Houchard }
180215144b0fSOlivier Houchard if ( bSig < aSig ) goto aBigger;
180315144b0fSOlivier Houchard if ( aSig < bSig ) goto bBigger;
180415144b0fSOlivier Houchard return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
180515144b0fSOlivier Houchard bExpBigger:
180615144b0fSOlivier Houchard if ( bExp == 0xFF ) {
180715144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
180815144b0fSOlivier Houchard return packFloat32( zSign ^ 1, 0xFF, 0 );
180915144b0fSOlivier Houchard }
181015144b0fSOlivier Houchard if ( aExp == 0 ) {
181115144b0fSOlivier Houchard ++expDiff;
181215144b0fSOlivier Houchard }
181315144b0fSOlivier Houchard else {
181415144b0fSOlivier Houchard aSig |= 0x40000000;
181515144b0fSOlivier Houchard }
181615144b0fSOlivier Houchard shift32RightJamming( aSig, - expDiff, &aSig );
181715144b0fSOlivier Houchard bSig |= 0x40000000;
181815144b0fSOlivier Houchard bBigger:
181915144b0fSOlivier Houchard zSig = bSig - aSig;
182015144b0fSOlivier Houchard zExp = bExp;
182115144b0fSOlivier Houchard zSign ^= 1;
182215144b0fSOlivier Houchard goto normalizeRoundAndPack;
182315144b0fSOlivier Houchard aExpBigger:
182415144b0fSOlivier Houchard if ( aExp == 0xFF ) {
182515144b0fSOlivier Houchard if ( aSig ) return propagateFloat32NaN( a, b );
182615144b0fSOlivier Houchard return a;
182715144b0fSOlivier Houchard }
182815144b0fSOlivier Houchard if ( bExp == 0 ) {
182915144b0fSOlivier Houchard --expDiff;
183015144b0fSOlivier Houchard }
183115144b0fSOlivier Houchard else {
183215144b0fSOlivier Houchard bSig |= 0x40000000;
183315144b0fSOlivier Houchard }
183415144b0fSOlivier Houchard shift32RightJamming( bSig, expDiff, &bSig );
183515144b0fSOlivier Houchard aSig |= 0x40000000;
183615144b0fSOlivier Houchard aBigger:
183715144b0fSOlivier Houchard zSig = aSig - bSig;
183815144b0fSOlivier Houchard zExp = aExp;
183915144b0fSOlivier Houchard normalizeRoundAndPack:
184015144b0fSOlivier Houchard --zExp;
184115144b0fSOlivier Houchard return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
184215144b0fSOlivier Houchard
184315144b0fSOlivier Houchard }
184415144b0fSOlivier Houchard
184515144b0fSOlivier Houchard /*
184615144b0fSOlivier Houchard -------------------------------------------------------------------------------
184715144b0fSOlivier Houchard Returns the result of adding the single-precision floating-point values `a'
184815144b0fSOlivier Houchard and `b'. The operation is performed according to the IEC/IEEE Standard for
184915144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
185015144b0fSOlivier Houchard -------------------------------------------------------------------------------
185115144b0fSOlivier Houchard */
float32_add(float32 a,float32 b)185215144b0fSOlivier Houchard float32 float32_add( float32 a, float32 b )
185315144b0fSOlivier Houchard {
185415144b0fSOlivier Houchard flag aSign, bSign;
185515144b0fSOlivier Houchard
185615144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
185715144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
185815144b0fSOlivier Houchard if ( aSign == bSign ) {
185915144b0fSOlivier Houchard return addFloat32Sigs( a, b, aSign );
186015144b0fSOlivier Houchard }
186115144b0fSOlivier Houchard else {
186215144b0fSOlivier Houchard return subFloat32Sigs( a, b, aSign );
186315144b0fSOlivier Houchard }
186415144b0fSOlivier Houchard
186515144b0fSOlivier Houchard }
186615144b0fSOlivier Houchard
186715144b0fSOlivier Houchard /*
186815144b0fSOlivier Houchard -------------------------------------------------------------------------------
186915144b0fSOlivier Houchard Returns the result of subtracting the single-precision floating-point values
187015144b0fSOlivier Houchard `a' and `b'. The operation is performed according to the IEC/IEEE Standard
187115144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
187215144b0fSOlivier Houchard -------------------------------------------------------------------------------
187315144b0fSOlivier Houchard */
float32_sub(float32 a,float32 b)187415144b0fSOlivier Houchard float32 float32_sub( float32 a, float32 b )
187515144b0fSOlivier Houchard {
187615144b0fSOlivier Houchard flag aSign, bSign;
187715144b0fSOlivier Houchard
187815144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
187915144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
188015144b0fSOlivier Houchard if ( aSign == bSign ) {
188115144b0fSOlivier Houchard return subFloat32Sigs( a, b, aSign );
188215144b0fSOlivier Houchard }
188315144b0fSOlivier Houchard else {
188415144b0fSOlivier Houchard return addFloat32Sigs( a, b, aSign );
188515144b0fSOlivier Houchard }
188615144b0fSOlivier Houchard
188715144b0fSOlivier Houchard }
188815144b0fSOlivier Houchard
188915144b0fSOlivier Houchard /*
189015144b0fSOlivier Houchard -------------------------------------------------------------------------------
189115144b0fSOlivier Houchard Returns the result of multiplying the single-precision floating-point values
189215144b0fSOlivier Houchard `a' and `b'. The operation is performed according to the IEC/IEEE Standard
189315144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
189415144b0fSOlivier Houchard -------------------------------------------------------------------------------
189515144b0fSOlivier Houchard */
float32_mul(float32 a,float32 b)189615144b0fSOlivier Houchard float32 float32_mul( float32 a, float32 b )
189715144b0fSOlivier Houchard {
189815144b0fSOlivier Houchard flag aSign, bSign, zSign;
189915144b0fSOlivier Houchard int16 aExp, bExp, zExp;
190015144b0fSOlivier Houchard bits32 aSig, bSig;
190115144b0fSOlivier Houchard bits64 zSig64;
190215144b0fSOlivier Houchard bits32 zSig;
190315144b0fSOlivier Houchard
190415144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
190515144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
190615144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
190715144b0fSOlivier Houchard bSig = extractFloat32Frac( b );
190815144b0fSOlivier Houchard bExp = extractFloat32Exp( b );
190915144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
191015144b0fSOlivier Houchard zSign = aSign ^ bSign;
191115144b0fSOlivier Houchard if ( aExp == 0xFF ) {
191215144b0fSOlivier Houchard if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
191315144b0fSOlivier Houchard return propagateFloat32NaN( a, b );
191415144b0fSOlivier Houchard }
191515144b0fSOlivier Houchard if ( ( bExp | bSig ) == 0 ) {
191615144b0fSOlivier Houchard float_raise( float_flag_invalid );
191715144b0fSOlivier Houchard return float32_default_nan;
191815144b0fSOlivier Houchard }
191915144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 );
192015144b0fSOlivier Houchard }
192115144b0fSOlivier Houchard if ( bExp == 0xFF ) {
192215144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
192315144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
192415144b0fSOlivier Houchard float_raise( float_flag_invalid );
192515144b0fSOlivier Houchard return float32_default_nan;
192615144b0fSOlivier Houchard }
192715144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 );
192815144b0fSOlivier Houchard }
192915144b0fSOlivier Houchard if ( aExp == 0 ) {
193015144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
193115144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
193215144b0fSOlivier Houchard }
193315144b0fSOlivier Houchard if ( bExp == 0 ) {
193415144b0fSOlivier Houchard if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
193515144b0fSOlivier Houchard normalizeFloat32Subnormal( bSig, &bExp, &bSig );
193615144b0fSOlivier Houchard }
193715144b0fSOlivier Houchard zExp = aExp + bExp - 0x7F;
193815144b0fSOlivier Houchard aSig = ( aSig | 0x00800000 )<<7;
193915144b0fSOlivier Houchard bSig = ( bSig | 0x00800000 )<<8;
194015144b0fSOlivier Houchard shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
194115144b0fSOlivier Houchard zSig = zSig64;
194215144b0fSOlivier Houchard if ( 0 <= (sbits32) ( zSig<<1 ) ) {
194315144b0fSOlivier Houchard zSig <<= 1;
194415144b0fSOlivier Houchard --zExp;
194515144b0fSOlivier Houchard }
194615144b0fSOlivier Houchard return roundAndPackFloat32( zSign, zExp, zSig );
194715144b0fSOlivier Houchard
194815144b0fSOlivier Houchard }
194915144b0fSOlivier Houchard
195015144b0fSOlivier Houchard /*
195115144b0fSOlivier Houchard -------------------------------------------------------------------------------
195215144b0fSOlivier Houchard Returns the result of dividing the single-precision floating-point value `a'
195315144b0fSOlivier Houchard by the corresponding value `b'. The operation is performed according to the
195415144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
195515144b0fSOlivier Houchard -------------------------------------------------------------------------------
195615144b0fSOlivier Houchard */
float32_div(float32 a,float32 b)195715144b0fSOlivier Houchard float32 float32_div( float32 a, float32 b )
195815144b0fSOlivier Houchard {
195915144b0fSOlivier Houchard flag aSign, bSign, zSign;
196015144b0fSOlivier Houchard int16 aExp, bExp, zExp;
196115144b0fSOlivier Houchard bits32 aSig, bSig, zSig;
196215144b0fSOlivier Houchard
196315144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
196415144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
196515144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
196615144b0fSOlivier Houchard bSig = extractFloat32Frac( b );
196715144b0fSOlivier Houchard bExp = extractFloat32Exp( b );
196815144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
196915144b0fSOlivier Houchard zSign = aSign ^ bSign;
197015144b0fSOlivier Houchard if ( aExp == 0xFF ) {
197115144b0fSOlivier Houchard if ( aSig ) return propagateFloat32NaN( a, b );
197215144b0fSOlivier Houchard if ( bExp == 0xFF ) {
197315144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
197415144b0fSOlivier Houchard float_raise( float_flag_invalid );
197515144b0fSOlivier Houchard return float32_default_nan;
197615144b0fSOlivier Houchard }
197715144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 );
197815144b0fSOlivier Houchard }
197915144b0fSOlivier Houchard if ( bExp == 0xFF ) {
198015144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
198115144b0fSOlivier Houchard return packFloat32( zSign, 0, 0 );
198215144b0fSOlivier Houchard }
198315144b0fSOlivier Houchard if ( bExp == 0 ) {
198415144b0fSOlivier Houchard if ( bSig == 0 ) {
198515144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
198615144b0fSOlivier Houchard float_raise( float_flag_invalid );
198715144b0fSOlivier Houchard return float32_default_nan;
198815144b0fSOlivier Houchard }
198915144b0fSOlivier Houchard float_raise( float_flag_divbyzero );
199015144b0fSOlivier Houchard return packFloat32( zSign, 0xFF, 0 );
199115144b0fSOlivier Houchard }
199215144b0fSOlivier Houchard normalizeFloat32Subnormal( bSig, &bExp, &bSig );
199315144b0fSOlivier Houchard }
199415144b0fSOlivier Houchard if ( aExp == 0 ) {
199515144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
199615144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
199715144b0fSOlivier Houchard }
199815144b0fSOlivier Houchard zExp = aExp - bExp + 0x7D;
199915144b0fSOlivier Houchard aSig = ( aSig | 0x00800000 )<<7;
200015144b0fSOlivier Houchard bSig = ( bSig | 0x00800000 )<<8;
200115144b0fSOlivier Houchard if ( bSig <= ( aSig + aSig ) ) {
200215144b0fSOlivier Houchard aSig >>= 1;
200315144b0fSOlivier Houchard ++zExp;
200415144b0fSOlivier Houchard }
200515144b0fSOlivier Houchard zSig = ( ( (bits64) aSig )<<32 ) / bSig;
200615144b0fSOlivier Houchard if ( ( zSig & 0x3F ) == 0 ) {
200715144b0fSOlivier Houchard zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
200815144b0fSOlivier Houchard }
200915144b0fSOlivier Houchard return roundAndPackFloat32( zSign, zExp, zSig );
201015144b0fSOlivier Houchard
201115144b0fSOlivier Houchard }
201215144b0fSOlivier Houchard
201315144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
201415144b0fSOlivier Houchard /*
201515144b0fSOlivier Houchard -------------------------------------------------------------------------------
201615144b0fSOlivier Houchard Returns the remainder of the single-precision floating-point value `a'
201715144b0fSOlivier Houchard with respect to the corresponding value `b'. The operation is performed
201815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
201915144b0fSOlivier Houchard -------------------------------------------------------------------------------
202015144b0fSOlivier Houchard */
float32_rem(float32 a,float32 b)202115144b0fSOlivier Houchard float32 float32_rem( float32 a, float32 b )
202215144b0fSOlivier Houchard {
202315144b0fSOlivier Houchard flag aSign, bSign, zSign;
202415144b0fSOlivier Houchard int16 aExp, bExp, expDiff;
202515144b0fSOlivier Houchard bits32 aSig, bSig;
202615144b0fSOlivier Houchard bits32 q;
202715144b0fSOlivier Houchard bits64 aSig64, bSig64, q64;
202815144b0fSOlivier Houchard bits32 alternateASig;
202915144b0fSOlivier Houchard sbits32 sigMean;
203015144b0fSOlivier Houchard
203115144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
203215144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
203315144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
203415144b0fSOlivier Houchard bSig = extractFloat32Frac( b );
203515144b0fSOlivier Houchard bExp = extractFloat32Exp( b );
203615144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
203715144b0fSOlivier Houchard if ( aExp == 0xFF ) {
203815144b0fSOlivier Houchard if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
203915144b0fSOlivier Houchard return propagateFloat32NaN( a, b );
204015144b0fSOlivier Houchard }
204115144b0fSOlivier Houchard float_raise( float_flag_invalid );
204215144b0fSOlivier Houchard return float32_default_nan;
204315144b0fSOlivier Houchard }
204415144b0fSOlivier Houchard if ( bExp == 0xFF ) {
204515144b0fSOlivier Houchard if ( bSig ) return propagateFloat32NaN( a, b );
204615144b0fSOlivier Houchard return a;
204715144b0fSOlivier Houchard }
204815144b0fSOlivier Houchard if ( bExp == 0 ) {
204915144b0fSOlivier Houchard if ( bSig == 0 ) {
205015144b0fSOlivier Houchard float_raise( float_flag_invalid );
205115144b0fSOlivier Houchard return float32_default_nan;
205215144b0fSOlivier Houchard }
205315144b0fSOlivier Houchard normalizeFloat32Subnormal( bSig, &bExp, &bSig );
205415144b0fSOlivier Houchard }
205515144b0fSOlivier Houchard if ( aExp == 0 ) {
205615144b0fSOlivier Houchard if ( aSig == 0 ) return a;
205715144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
205815144b0fSOlivier Houchard }
205915144b0fSOlivier Houchard expDiff = aExp - bExp;
206015144b0fSOlivier Houchard aSig |= 0x00800000;
206115144b0fSOlivier Houchard bSig |= 0x00800000;
206215144b0fSOlivier Houchard if ( expDiff < 32 ) {
206315144b0fSOlivier Houchard aSig <<= 8;
206415144b0fSOlivier Houchard bSig <<= 8;
206515144b0fSOlivier Houchard if ( expDiff < 0 ) {
206615144b0fSOlivier Houchard if ( expDiff < -1 ) return a;
206715144b0fSOlivier Houchard aSig >>= 1;
206815144b0fSOlivier Houchard }
206915144b0fSOlivier Houchard q = ( bSig <= aSig );
207015144b0fSOlivier Houchard if ( q ) aSig -= bSig;
207115144b0fSOlivier Houchard if ( 0 < expDiff ) {
207215144b0fSOlivier Houchard q = ( ( (bits64) aSig )<<32 ) / bSig;
207315144b0fSOlivier Houchard q >>= 32 - expDiff;
207415144b0fSOlivier Houchard bSig >>= 2;
207515144b0fSOlivier Houchard aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
207615144b0fSOlivier Houchard }
207715144b0fSOlivier Houchard else {
207815144b0fSOlivier Houchard aSig >>= 2;
207915144b0fSOlivier Houchard bSig >>= 2;
208015144b0fSOlivier Houchard }
208115144b0fSOlivier Houchard }
208215144b0fSOlivier Houchard else {
208315144b0fSOlivier Houchard if ( bSig <= aSig ) aSig -= bSig;
208415144b0fSOlivier Houchard aSig64 = ( (bits64) aSig )<<40;
208515144b0fSOlivier Houchard bSig64 = ( (bits64) bSig )<<40;
208615144b0fSOlivier Houchard expDiff -= 64;
208715144b0fSOlivier Houchard while ( 0 < expDiff ) {
208815144b0fSOlivier Houchard q64 = estimateDiv128To64( aSig64, 0, bSig64 );
208915144b0fSOlivier Houchard q64 = ( 2 < q64 ) ? q64 - 2 : 0;
209015144b0fSOlivier Houchard aSig64 = - ( ( bSig * q64 )<<38 );
209115144b0fSOlivier Houchard expDiff -= 62;
209215144b0fSOlivier Houchard }
209315144b0fSOlivier Houchard expDiff += 64;
209415144b0fSOlivier Houchard q64 = estimateDiv128To64( aSig64, 0, bSig64 );
209515144b0fSOlivier Houchard q64 = ( 2 < q64 ) ? q64 - 2 : 0;
209615144b0fSOlivier Houchard q = q64>>( 64 - expDiff );
209715144b0fSOlivier Houchard bSig <<= 6;
209815144b0fSOlivier Houchard aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
209915144b0fSOlivier Houchard }
210015144b0fSOlivier Houchard do {
210115144b0fSOlivier Houchard alternateASig = aSig;
210215144b0fSOlivier Houchard ++q;
210315144b0fSOlivier Houchard aSig -= bSig;
210415144b0fSOlivier Houchard } while ( 0 <= (sbits32) aSig );
210515144b0fSOlivier Houchard sigMean = aSig + alternateASig;
210615144b0fSOlivier Houchard if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
210715144b0fSOlivier Houchard aSig = alternateASig;
210815144b0fSOlivier Houchard }
210915144b0fSOlivier Houchard zSign = ( (sbits32) aSig < 0 );
211015144b0fSOlivier Houchard if ( zSign ) aSig = - aSig;
211115144b0fSOlivier Houchard return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
211215144b0fSOlivier Houchard
211315144b0fSOlivier Houchard }
211415144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
211515144b0fSOlivier Houchard
211615144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
211715144b0fSOlivier Houchard /*
211815144b0fSOlivier Houchard -------------------------------------------------------------------------------
211915144b0fSOlivier Houchard Returns the square root of the single-precision floating-point value `a'.
212015144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
212115144b0fSOlivier Houchard Floating-Point Arithmetic.
212215144b0fSOlivier Houchard -------------------------------------------------------------------------------
212315144b0fSOlivier Houchard */
float32_sqrt(float32 a)212415144b0fSOlivier Houchard float32 float32_sqrt( float32 a )
212515144b0fSOlivier Houchard {
212615144b0fSOlivier Houchard flag aSign;
212715144b0fSOlivier Houchard int16 aExp, zExp;
212815144b0fSOlivier Houchard bits32 aSig, zSig;
212915144b0fSOlivier Houchard bits64 rem, term;
213015144b0fSOlivier Houchard
213115144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
213215144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
213315144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
213415144b0fSOlivier Houchard if ( aExp == 0xFF ) {
213515144b0fSOlivier Houchard if ( aSig ) return propagateFloat32NaN( a, 0 );
213615144b0fSOlivier Houchard if ( ! aSign ) return a;
213715144b0fSOlivier Houchard float_raise( float_flag_invalid );
213815144b0fSOlivier Houchard return float32_default_nan;
213915144b0fSOlivier Houchard }
214015144b0fSOlivier Houchard if ( aSign ) {
214115144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) return a;
214215144b0fSOlivier Houchard float_raise( float_flag_invalid );
214315144b0fSOlivier Houchard return float32_default_nan;
214415144b0fSOlivier Houchard }
214515144b0fSOlivier Houchard if ( aExp == 0 ) {
214615144b0fSOlivier Houchard if ( aSig == 0 ) return 0;
214715144b0fSOlivier Houchard normalizeFloat32Subnormal( aSig, &aExp, &aSig );
214815144b0fSOlivier Houchard }
214915144b0fSOlivier Houchard zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
215015144b0fSOlivier Houchard aSig = ( aSig | 0x00800000 )<<8;
215115144b0fSOlivier Houchard zSig = estimateSqrt32( aExp, aSig ) + 2;
215215144b0fSOlivier Houchard if ( ( zSig & 0x7F ) <= 5 ) {
215315144b0fSOlivier Houchard if ( zSig < 2 ) {
215415144b0fSOlivier Houchard zSig = 0x7FFFFFFF;
215515144b0fSOlivier Houchard goto roundAndPack;
215615144b0fSOlivier Houchard }
215715144b0fSOlivier Houchard aSig >>= aExp & 1;
215815144b0fSOlivier Houchard term = ( (bits64) zSig ) * zSig;
215915144b0fSOlivier Houchard rem = ( ( (bits64) aSig )<<32 ) - term;
216015144b0fSOlivier Houchard while ( (sbits64) rem < 0 ) {
216115144b0fSOlivier Houchard --zSig;
216215144b0fSOlivier Houchard rem += ( ( (bits64) zSig )<<1 ) | 1;
216315144b0fSOlivier Houchard }
216415144b0fSOlivier Houchard zSig |= ( rem != 0 );
216515144b0fSOlivier Houchard }
216615144b0fSOlivier Houchard shift32RightJamming( zSig, 1, &zSig );
216715144b0fSOlivier Houchard roundAndPack:
216815144b0fSOlivier Houchard return roundAndPackFloat32( 0, zExp, zSig );
216915144b0fSOlivier Houchard
217015144b0fSOlivier Houchard }
217115144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
217215144b0fSOlivier Houchard
217315144b0fSOlivier Houchard /*
217415144b0fSOlivier Houchard -------------------------------------------------------------------------------
217515144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is equal to
217615144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The comparison is performed
217715144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
217815144b0fSOlivier Houchard -------------------------------------------------------------------------------
217915144b0fSOlivier Houchard */
float32_eq(float32 a,float32 b)218015144b0fSOlivier Houchard flag float32_eq( float32 a, float32 b )
218115144b0fSOlivier Houchard {
218215144b0fSOlivier Houchard
218315144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
218415144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
218515144b0fSOlivier Houchard ) {
218615144b0fSOlivier Houchard if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
218715144b0fSOlivier Houchard float_raise( float_flag_invalid );
218815144b0fSOlivier Houchard }
218915144b0fSOlivier Houchard return 0;
219015144b0fSOlivier Houchard }
219115144b0fSOlivier Houchard return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
219215144b0fSOlivier Houchard
219315144b0fSOlivier Houchard }
219415144b0fSOlivier Houchard
219515144b0fSOlivier Houchard /*
219615144b0fSOlivier Houchard -------------------------------------------------------------------------------
219715144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
219815144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise. The comparison
219915144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
220015144b0fSOlivier Houchard Arithmetic.
220115144b0fSOlivier Houchard -------------------------------------------------------------------------------
220215144b0fSOlivier Houchard */
float32_le(float32 a,float32 b)220315144b0fSOlivier Houchard flag float32_le( float32 a, float32 b )
220415144b0fSOlivier Houchard {
220515144b0fSOlivier Houchard flag aSign, bSign;
220615144b0fSOlivier Houchard
220715144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
220815144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
220915144b0fSOlivier Houchard ) {
221015144b0fSOlivier Houchard float_raise( float_flag_invalid );
221115144b0fSOlivier Houchard return 0;
221215144b0fSOlivier Houchard }
221315144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
221415144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
221515144b0fSOlivier Houchard if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
221615144b0fSOlivier Houchard return ( a == b ) || ( aSign ^ ( a < b ) );
221715144b0fSOlivier Houchard
221815144b0fSOlivier Houchard }
221915144b0fSOlivier Houchard
222015144b0fSOlivier Houchard /*
222115144b0fSOlivier Houchard -------------------------------------------------------------------------------
222215144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
222315144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The comparison is performed
222415144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
222515144b0fSOlivier Houchard -------------------------------------------------------------------------------
222615144b0fSOlivier Houchard */
float32_lt(float32 a,float32 b)222715144b0fSOlivier Houchard flag float32_lt( float32 a, float32 b )
222815144b0fSOlivier Houchard {
222915144b0fSOlivier Houchard flag aSign, bSign;
223015144b0fSOlivier Houchard
223115144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
223215144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
223315144b0fSOlivier Houchard ) {
223415144b0fSOlivier Houchard float_raise( float_flag_invalid );
223515144b0fSOlivier Houchard return 0;
223615144b0fSOlivier Houchard }
223715144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
223815144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
223915144b0fSOlivier Houchard if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
224015144b0fSOlivier Houchard return ( a != b ) && ( aSign ^ ( a < b ) );
224115144b0fSOlivier Houchard
224215144b0fSOlivier Houchard }
224315144b0fSOlivier Houchard
224415144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
224515144b0fSOlivier Houchard /*
224615144b0fSOlivier Houchard -------------------------------------------------------------------------------
224715144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is equal to
224815144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The invalid exception is
224915144b0fSOlivier Houchard raised if either operand is a NaN. Otherwise, the comparison is performed
225015144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
225115144b0fSOlivier Houchard -------------------------------------------------------------------------------
225215144b0fSOlivier Houchard */
float32_eq_signaling(float32 a,float32 b)225315144b0fSOlivier Houchard flag float32_eq_signaling( float32 a, float32 b )
225415144b0fSOlivier Houchard {
225515144b0fSOlivier Houchard
225615144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
225715144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
225815144b0fSOlivier Houchard ) {
225915144b0fSOlivier Houchard float_raise( float_flag_invalid );
226015144b0fSOlivier Houchard return 0;
226115144b0fSOlivier Houchard }
226215144b0fSOlivier Houchard return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
226315144b0fSOlivier Houchard
226415144b0fSOlivier Houchard }
226515144b0fSOlivier Houchard
226615144b0fSOlivier Houchard /*
226715144b0fSOlivier Houchard -------------------------------------------------------------------------------
226815144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than or
226915144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
227015144b0fSOlivier Houchard cause an exception. Otherwise, the comparison is performed according to the
227115144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
227215144b0fSOlivier Houchard -------------------------------------------------------------------------------
227315144b0fSOlivier Houchard */
float32_le_quiet(float32 a,float32 b)227415144b0fSOlivier Houchard flag float32_le_quiet( float32 a, float32 b )
227515144b0fSOlivier Houchard {
227615144b0fSOlivier Houchard flag aSign, bSign;
227715144b0fSOlivier Houchard
227815144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
227915144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
228015144b0fSOlivier Houchard ) {
228115144b0fSOlivier Houchard if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
228215144b0fSOlivier Houchard float_raise( float_flag_invalid );
228315144b0fSOlivier Houchard }
228415144b0fSOlivier Houchard return 0;
228515144b0fSOlivier Houchard }
228615144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
228715144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
228815144b0fSOlivier Houchard if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
228915144b0fSOlivier Houchard return ( a == b ) || ( aSign ^ ( a < b ) );
229015144b0fSOlivier Houchard
229115144b0fSOlivier Houchard }
229215144b0fSOlivier Houchard
229315144b0fSOlivier Houchard /*
229415144b0fSOlivier Houchard -------------------------------------------------------------------------------
229515144b0fSOlivier Houchard Returns 1 if the single-precision floating-point value `a' is less than
229615144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
229715144b0fSOlivier Houchard exception. Otherwise, the comparison is performed according to the IEC/IEEE
229815144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
229915144b0fSOlivier Houchard -------------------------------------------------------------------------------
230015144b0fSOlivier Houchard */
float32_lt_quiet(float32 a,float32 b)230115144b0fSOlivier Houchard flag float32_lt_quiet( float32 a, float32 b )
230215144b0fSOlivier Houchard {
230315144b0fSOlivier Houchard flag aSign, bSign;
230415144b0fSOlivier Houchard
230515144b0fSOlivier Houchard if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
230615144b0fSOlivier Houchard || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
230715144b0fSOlivier Houchard ) {
230815144b0fSOlivier Houchard if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
230915144b0fSOlivier Houchard float_raise( float_flag_invalid );
231015144b0fSOlivier Houchard }
231115144b0fSOlivier Houchard return 0;
231215144b0fSOlivier Houchard }
231315144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
231415144b0fSOlivier Houchard bSign = extractFloat32Sign( b );
231515144b0fSOlivier Houchard if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
231615144b0fSOlivier Houchard return ( a != b ) && ( aSign ^ ( a < b ) );
231715144b0fSOlivier Houchard
231815144b0fSOlivier Houchard }
231915144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
232015144b0fSOlivier Houchard
232115144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
232215144b0fSOlivier Houchard /*
232315144b0fSOlivier Houchard -------------------------------------------------------------------------------
232415144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
232515144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format. The conversion is
232615144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
232715144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
232815144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
232915144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
233015144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
233115144b0fSOlivier Houchard -------------------------------------------------------------------------------
233215144b0fSOlivier Houchard */
float64_to_int32(float64 a)233315144b0fSOlivier Houchard int32 float64_to_int32( float64 a )
233415144b0fSOlivier Houchard {
233515144b0fSOlivier Houchard flag aSign;
233615144b0fSOlivier Houchard int16 aExp, shiftCount;
233715144b0fSOlivier Houchard bits64 aSig;
233815144b0fSOlivier Houchard
233915144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
234015144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
234115144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
234215144b0fSOlivier Houchard if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
234315144b0fSOlivier Houchard if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
234415144b0fSOlivier Houchard shiftCount = 0x42C - aExp;
234515144b0fSOlivier Houchard if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
234615144b0fSOlivier Houchard return roundAndPackInt32( aSign, aSig );
234715144b0fSOlivier Houchard
234815144b0fSOlivier Houchard }
234915144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
235015144b0fSOlivier Houchard
235115144b0fSOlivier Houchard /*
235215144b0fSOlivier Houchard -------------------------------------------------------------------------------
235315144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
235415144b0fSOlivier Houchard `a' to the 32-bit two's complement integer format. The conversion is
235515144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
235615144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
235715144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned. Otherwise, if
235815144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
235915144b0fSOlivier Houchard returned.
236015144b0fSOlivier Houchard -------------------------------------------------------------------------------
236115144b0fSOlivier Houchard */
float64_to_int32_round_to_zero(float64 a)236215144b0fSOlivier Houchard int32 float64_to_int32_round_to_zero( float64 a )
236315144b0fSOlivier Houchard {
236415144b0fSOlivier Houchard flag aSign;
236515144b0fSOlivier Houchard int16 aExp, shiftCount;
236615144b0fSOlivier Houchard bits64 aSig, savedASig;
236715144b0fSOlivier Houchard int32 z;
236815144b0fSOlivier Houchard
236915144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
237015144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
237115144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
237215144b0fSOlivier Houchard if ( 0x41E < aExp ) {
237315144b0fSOlivier Houchard if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
237415144b0fSOlivier Houchard goto invalid;
237515144b0fSOlivier Houchard }
237615144b0fSOlivier Houchard else if ( aExp < 0x3FF ) {
237715144b0fSOlivier Houchard if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
237815144b0fSOlivier Houchard return 0;
237915144b0fSOlivier Houchard }
238015144b0fSOlivier Houchard aSig |= LIT64( 0x0010000000000000 );
238115144b0fSOlivier Houchard shiftCount = 0x433 - aExp;
238215144b0fSOlivier Houchard savedASig = aSig;
238315144b0fSOlivier Houchard aSig >>= shiftCount;
238415144b0fSOlivier Houchard z = aSig;
238515144b0fSOlivier Houchard if ( aSign ) z = - z;
238615144b0fSOlivier Houchard if ( ( z < 0 ) ^ aSign ) {
238715144b0fSOlivier Houchard invalid:
238815144b0fSOlivier Houchard float_raise( float_flag_invalid );
238915144b0fSOlivier Houchard return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
239015144b0fSOlivier Houchard }
239115144b0fSOlivier Houchard if ( ( aSig<<shiftCount ) != savedASig ) {
239215144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
239315144b0fSOlivier Houchard }
239415144b0fSOlivier Houchard return z;
239515144b0fSOlivier Houchard
239615144b0fSOlivier Houchard }
239715144b0fSOlivier Houchard
239815144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
239915144b0fSOlivier Houchard /*
240015144b0fSOlivier Houchard -------------------------------------------------------------------------------
240115144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
240215144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format. The conversion is
240315144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
240415144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
240515144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
240615144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
240715144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
240815144b0fSOlivier Houchard -------------------------------------------------------------------------------
240915144b0fSOlivier Houchard */
float64_to_int64(float64 a)241015144b0fSOlivier Houchard int64 float64_to_int64( float64 a )
241115144b0fSOlivier Houchard {
241215144b0fSOlivier Houchard flag aSign;
241315144b0fSOlivier Houchard int16 aExp, shiftCount;
241415144b0fSOlivier Houchard bits64 aSig, aSigExtra;
241515144b0fSOlivier Houchard
241615144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
241715144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
241815144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
241915144b0fSOlivier Houchard if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
242015144b0fSOlivier Houchard shiftCount = 0x433 - aExp;
242115144b0fSOlivier Houchard if ( shiftCount <= 0 ) {
242215144b0fSOlivier Houchard if ( 0x43E < aExp ) {
242315144b0fSOlivier Houchard float_raise( float_flag_invalid );
242415144b0fSOlivier Houchard if ( ! aSign
242515144b0fSOlivier Houchard || ( ( aExp == 0x7FF )
242615144b0fSOlivier Houchard && ( aSig != LIT64( 0x0010000000000000 ) ) )
242715144b0fSOlivier Houchard ) {
242815144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
242915144b0fSOlivier Houchard }
243015144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
243115144b0fSOlivier Houchard }
243215144b0fSOlivier Houchard aSigExtra = 0;
243315144b0fSOlivier Houchard aSig <<= - shiftCount;
243415144b0fSOlivier Houchard }
243515144b0fSOlivier Houchard else {
243615144b0fSOlivier Houchard shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
243715144b0fSOlivier Houchard }
243815144b0fSOlivier Houchard return roundAndPackInt64( aSign, aSig, aSigExtra );
243915144b0fSOlivier Houchard
244015144b0fSOlivier Houchard }
244115144b0fSOlivier Houchard
244215144b0fSOlivier Houchard /*
244315144b0fSOlivier Houchard -------------------------------------------------------------------------------
244415144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
244515144b0fSOlivier Houchard `a' to the 64-bit two's complement integer format. The conversion is
244615144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
244715144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
244815144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned. Otherwise, if
244915144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
245015144b0fSOlivier Houchard returned.
245115144b0fSOlivier Houchard -------------------------------------------------------------------------------
245215144b0fSOlivier Houchard */
float64_to_int64_round_to_zero(float64 a)245315144b0fSOlivier Houchard int64 float64_to_int64_round_to_zero( float64 a )
245415144b0fSOlivier Houchard {
245515144b0fSOlivier Houchard flag aSign;
245615144b0fSOlivier Houchard int16 aExp, shiftCount;
245715144b0fSOlivier Houchard bits64 aSig;
245815144b0fSOlivier Houchard int64 z;
245915144b0fSOlivier Houchard
246015144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
246115144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
246215144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
246315144b0fSOlivier Houchard if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
246415144b0fSOlivier Houchard shiftCount = aExp - 0x433;
246515144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
246615144b0fSOlivier Houchard if ( 0x43E <= aExp ) {
246715144b0fSOlivier Houchard if ( a != LIT64( 0xC3E0000000000000 ) ) {
246815144b0fSOlivier Houchard float_raise( float_flag_invalid );
246915144b0fSOlivier Houchard if ( ! aSign
247015144b0fSOlivier Houchard || ( ( aExp == 0x7FF )
247115144b0fSOlivier Houchard && ( aSig != LIT64( 0x0010000000000000 ) ) )
247215144b0fSOlivier Houchard ) {
247315144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
247415144b0fSOlivier Houchard }
247515144b0fSOlivier Houchard }
247615144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
247715144b0fSOlivier Houchard }
247815144b0fSOlivier Houchard z = aSig<<shiftCount;
247915144b0fSOlivier Houchard }
248015144b0fSOlivier Houchard else {
248115144b0fSOlivier Houchard if ( aExp < 0x3FE ) {
248215144b0fSOlivier Houchard if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
248315144b0fSOlivier Houchard return 0;
248415144b0fSOlivier Houchard }
248515144b0fSOlivier Houchard z = aSig>>( - shiftCount );
248615144b0fSOlivier Houchard if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
248715144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
248815144b0fSOlivier Houchard }
248915144b0fSOlivier Houchard }
249015144b0fSOlivier Houchard if ( aSign ) z = - z;
249115144b0fSOlivier Houchard return z;
249215144b0fSOlivier Houchard
249315144b0fSOlivier Houchard }
249415144b0fSOlivier Houchard #endif /* !SOFTFLOAT_FOR_GCC */
249515144b0fSOlivier Houchard
249615144b0fSOlivier Houchard /*
249715144b0fSOlivier Houchard -------------------------------------------------------------------------------
249815144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
249915144b0fSOlivier Houchard `a' to the single-precision floating-point format. The conversion is
250015144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
250115144b0fSOlivier Houchard Arithmetic.
250215144b0fSOlivier Houchard -------------------------------------------------------------------------------
250315144b0fSOlivier Houchard */
float64_to_float32(float64 a)250415144b0fSOlivier Houchard float32 float64_to_float32( float64 a )
250515144b0fSOlivier Houchard {
250615144b0fSOlivier Houchard flag aSign;
250715144b0fSOlivier Houchard int16 aExp;
250815144b0fSOlivier Houchard bits64 aSig;
250915144b0fSOlivier Houchard bits32 zSig;
251015144b0fSOlivier Houchard
251115144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
251215144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
251315144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
251415144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
251515144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
251615144b0fSOlivier Houchard return packFloat32( aSign, 0xFF, 0 );
251715144b0fSOlivier Houchard }
251815144b0fSOlivier Houchard shift64RightJamming( aSig, 22, &aSig );
251915144b0fSOlivier Houchard zSig = aSig;
252015144b0fSOlivier Houchard if ( aExp || zSig ) {
252115144b0fSOlivier Houchard zSig |= 0x40000000;
252215144b0fSOlivier Houchard aExp -= 0x381;
252315144b0fSOlivier Houchard }
252415144b0fSOlivier Houchard return roundAndPackFloat32( aSign, aExp, zSig );
252515144b0fSOlivier Houchard
252615144b0fSOlivier Houchard }
252715144b0fSOlivier Houchard
252815144b0fSOlivier Houchard #ifdef FLOATX80
252915144b0fSOlivier Houchard
253015144b0fSOlivier Houchard /*
253115144b0fSOlivier Houchard -------------------------------------------------------------------------------
253215144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
253315144b0fSOlivier Houchard `a' to the extended double-precision floating-point format. The conversion
253415144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
253515144b0fSOlivier Houchard Arithmetic.
253615144b0fSOlivier Houchard -------------------------------------------------------------------------------
253715144b0fSOlivier Houchard */
float64_to_floatx80(float64 a)253815144b0fSOlivier Houchard floatx80 float64_to_floatx80( float64 a )
253915144b0fSOlivier Houchard {
254015144b0fSOlivier Houchard flag aSign;
254115144b0fSOlivier Houchard int16 aExp;
254215144b0fSOlivier Houchard bits64 aSig;
254315144b0fSOlivier Houchard
254415144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
254515144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
254615144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
254715144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
254815144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
254915144b0fSOlivier Houchard return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
255015144b0fSOlivier Houchard }
255115144b0fSOlivier Houchard if ( aExp == 0 ) {
255215144b0fSOlivier Houchard if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
255315144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
255415144b0fSOlivier Houchard }
255515144b0fSOlivier Houchard return
255615144b0fSOlivier Houchard packFloatx80(
255715144b0fSOlivier Houchard aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
255815144b0fSOlivier Houchard
255915144b0fSOlivier Houchard }
256015144b0fSOlivier Houchard
256115144b0fSOlivier Houchard #endif
256215144b0fSOlivier Houchard
256315144b0fSOlivier Houchard #ifdef FLOAT128
256415144b0fSOlivier Houchard
256515144b0fSOlivier Houchard /*
256615144b0fSOlivier Houchard -------------------------------------------------------------------------------
256715144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
256815144b0fSOlivier Houchard `a' to the quadruple-precision floating-point format. The conversion is
256915144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
257015144b0fSOlivier Houchard Arithmetic.
257115144b0fSOlivier Houchard -------------------------------------------------------------------------------
257215144b0fSOlivier Houchard */
float64_to_float128(float64 a)257315144b0fSOlivier Houchard float128 float64_to_float128( float64 a )
257415144b0fSOlivier Houchard {
257515144b0fSOlivier Houchard flag aSign;
257615144b0fSOlivier Houchard int16 aExp;
257715144b0fSOlivier Houchard bits64 aSig, zSig0, zSig1;
257815144b0fSOlivier Houchard
257915144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
258015144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
258115144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
258215144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
258315144b0fSOlivier Houchard if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
258415144b0fSOlivier Houchard return packFloat128( aSign, 0x7FFF, 0, 0 );
258515144b0fSOlivier Houchard }
258615144b0fSOlivier Houchard if ( aExp == 0 ) {
258715144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
258815144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
258915144b0fSOlivier Houchard --aExp;
259015144b0fSOlivier Houchard }
259115144b0fSOlivier Houchard shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
259215144b0fSOlivier Houchard return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
259315144b0fSOlivier Houchard
259415144b0fSOlivier Houchard }
259515144b0fSOlivier Houchard
259615144b0fSOlivier Houchard #endif
259715144b0fSOlivier Houchard
259815144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
259915144b0fSOlivier Houchard /*
260015144b0fSOlivier Houchard -------------------------------------------------------------------------------
260115144b0fSOlivier Houchard Rounds the double-precision floating-point value `a' to an integer, and
260215144b0fSOlivier Houchard returns the result as a double-precision floating-point value. The
260315144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
260415144b0fSOlivier Houchard Floating-Point Arithmetic.
260515144b0fSOlivier Houchard -------------------------------------------------------------------------------
260615144b0fSOlivier Houchard */
float64_round_to_int(float64 a)260715144b0fSOlivier Houchard float64 float64_round_to_int( float64 a )
260815144b0fSOlivier Houchard {
260915144b0fSOlivier Houchard flag aSign;
261015144b0fSOlivier Houchard int16 aExp;
261115144b0fSOlivier Houchard bits64 lastBitMask, roundBitsMask;
261215144b0fSOlivier Houchard int8 roundingMode;
261315144b0fSOlivier Houchard float64 z;
261415144b0fSOlivier Houchard
261515144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
261615144b0fSOlivier Houchard if ( 0x433 <= aExp ) {
261715144b0fSOlivier Houchard if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
261815144b0fSOlivier Houchard return propagateFloat64NaN( a, a );
261915144b0fSOlivier Houchard }
262015144b0fSOlivier Houchard return a;
262115144b0fSOlivier Houchard }
262215144b0fSOlivier Houchard if ( aExp < 0x3FF ) {
262315144b0fSOlivier Houchard if ( (bits64) ( a<<1 ) == 0 ) return a;
262415144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
262515144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
262615144b0fSOlivier Houchard switch ( float_rounding_mode ) {
262715144b0fSOlivier Houchard case float_round_nearest_even:
262815144b0fSOlivier Houchard if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
262915144b0fSOlivier Houchard return packFloat64( aSign, 0x3FF, 0 );
263015144b0fSOlivier Houchard }
263115144b0fSOlivier Houchard break;
263215144b0fSOlivier Houchard case float_round_to_zero:
263315144b0fSOlivier Houchard break;
263415144b0fSOlivier Houchard case float_round_down:
263515144b0fSOlivier Houchard return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
263615144b0fSOlivier Houchard case float_round_up:
263715144b0fSOlivier Houchard return
263815144b0fSOlivier Houchard aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
263915144b0fSOlivier Houchard }
264015144b0fSOlivier Houchard return packFloat64( aSign, 0, 0 );
264115144b0fSOlivier Houchard }
264215144b0fSOlivier Houchard lastBitMask = 1;
264315144b0fSOlivier Houchard lastBitMask <<= 0x433 - aExp;
264415144b0fSOlivier Houchard roundBitsMask = lastBitMask - 1;
264515144b0fSOlivier Houchard z = a;
264615144b0fSOlivier Houchard roundingMode = float_rounding_mode;
264715144b0fSOlivier Houchard if ( roundingMode == float_round_nearest_even ) {
264815144b0fSOlivier Houchard z += lastBitMask>>1;
264915144b0fSOlivier Houchard if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
265015144b0fSOlivier Houchard }
265115144b0fSOlivier Houchard else if ( roundingMode != float_round_to_zero ) {
265215144b0fSOlivier Houchard if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
265315144b0fSOlivier Houchard z += roundBitsMask;
265415144b0fSOlivier Houchard }
265515144b0fSOlivier Houchard }
265615144b0fSOlivier Houchard z &= ~ roundBitsMask;
265715144b0fSOlivier Houchard if ( z != a ) float_exception_flags |= float_flag_inexact;
265815144b0fSOlivier Houchard return z;
265915144b0fSOlivier Houchard
266015144b0fSOlivier Houchard }
266115144b0fSOlivier Houchard #endif
266215144b0fSOlivier Houchard
266315144b0fSOlivier Houchard /*
266415144b0fSOlivier Houchard -------------------------------------------------------------------------------
266515144b0fSOlivier Houchard Returns the result of adding the absolute values of the double-precision
266615144b0fSOlivier Houchard floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
266715144b0fSOlivier Houchard before being returned. `zSign' is ignored if the result is a NaN.
266815144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
266915144b0fSOlivier Houchard Floating-Point Arithmetic.
267015144b0fSOlivier Houchard -------------------------------------------------------------------------------
267115144b0fSOlivier Houchard */
addFloat64Sigs(float64 a,float64 b,flag zSign)267215144b0fSOlivier Houchard static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
267315144b0fSOlivier Houchard {
267415144b0fSOlivier Houchard int16 aExp, bExp, zExp;
267515144b0fSOlivier Houchard bits64 aSig, bSig, zSig;
267615144b0fSOlivier Houchard int16 expDiff;
267715144b0fSOlivier Houchard
267815144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
267915144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
268015144b0fSOlivier Houchard bSig = extractFloat64Frac( b );
268115144b0fSOlivier Houchard bExp = extractFloat64Exp( b );
268215144b0fSOlivier Houchard expDiff = aExp - bExp;
268315144b0fSOlivier Houchard aSig <<= 9;
268415144b0fSOlivier Houchard bSig <<= 9;
268515144b0fSOlivier Houchard if ( 0 < expDiff ) {
268615144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
268715144b0fSOlivier Houchard if ( aSig ) return propagateFloat64NaN( a, b );
268815144b0fSOlivier Houchard return a;
268915144b0fSOlivier Houchard }
269015144b0fSOlivier Houchard if ( bExp == 0 ) {
269115144b0fSOlivier Houchard --expDiff;
269215144b0fSOlivier Houchard }
269315144b0fSOlivier Houchard else {
269415144b0fSOlivier Houchard bSig |= LIT64( 0x2000000000000000 );
269515144b0fSOlivier Houchard }
269615144b0fSOlivier Houchard shift64RightJamming( bSig, expDiff, &bSig );
269715144b0fSOlivier Houchard zExp = aExp;
269815144b0fSOlivier Houchard }
269915144b0fSOlivier Houchard else if ( expDiff < 0 ) {
270015144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
270115144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
270215144b0fSOlivier Houchard return packFloat64( zSign, 0x7FF, 0 );
270315144b0fSOlivier Houchard }
270415144b0fSOlivier Houchard if ( aExp == 0 ) {
270515144b0fSOlivier Houchard ++expDiff;
270615144b0fSOlivier Houchard }
270715144b0fSOlivier Houchard else {
270815144b0fSOlivier Houchard aSig |= LIT64( 0x2000000000000000 );
270915144b0fSOlivier Houchard }
271015144b0fSOlivier Houchard shift64RightJamming( aSig, - expDiff, &aSig );
271115144b0fSOlivier Houchard zExp = bExp;
271215144b0fSOlivier Houchard }
271315144b0fSOlivier Houchard else {
271415144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
271515144b0fSOlivier Houchard if ( aSig | bSig ) return propagateFloat64NaN( a, b );
271615144b0fSOlivier Houchard return a;
271715144b0fSOlivier Houchard }
271815144b0fSOlivier Houchard if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
271915144b0fSOlivier Houchard zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
272015144b0fSOlivier Houchard zExp = aExp;
272115144b0fSOlivier Houchard goto roundAndPack;
272215144b0fSOlivier Houchard }
272315144b0fSOlivier Houchard aSig |= LIT64( 0x2000000000000000 );
272415144b0fSOlivier Houchard zSig = ( aSig + bSig )<<1;
272515144b0fSOlivier Houchard --zExp;
272615144b0fSOlivier Houchard if ( (sbits64) zSig < 0 ) {
272715144b0fSOlivier Houchard zSig = aSig + bSig;
272815144b0fSOlivier Houchard ++zExp;
272915144b0fSOlivier Houchard }
273015144b0fSOlivier Houchard roundAndPack:
273115144b0fSOlivier Houchard return roundAndPackFloat64( zSign, zExp, zSig );
273215144b0fSOlivier Houchard
273315144b0fSOlivier Houchard }
273415144b0fSOlivier Houchard
273515144b0fSOlivier Houchard /*
273615144b0fSOlivier Houchard -------------------------------------------------------------------------------
273715144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the double-
273815144b0fSOlivier Houchard precision floating-point values `a' and `b'. If `zSign' is 1, the
273915144b0fSOlivier Houchard difference is negated before being returned. `zSign' is ignored if the
274015144b0fSOlivier Houchard result is a NaN. The subtraction is performed according to the IEC/IEEE
274115144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
274215144b0fSOlivier Houchard -------------------------------------------------------------------------------
274315144b0fSOlivier Houchard */
subFloat64Sigs(float64 a,float64 b,flag zSign)274415144b0fSOlivier Houchard static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
274515144b0fSOlivier Houchard {
274615144b0fSOlivier Houchard int16 aExp, bExp, zExp;
274715144b0fSOlivier Houchard bits64 aSig, bSig, zSig;
274815144b0fSOlivier Houchard int16 expDiff;
274915144b0fSOlivier Houchard
275015144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
275115144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
275215144b0fSOlivier Houchard bSig = extractFloat64Frac( b );
275315144b0fSOlivier Houchard bExp = extractFloat64Exp( b );
275415144b0fSOlivier Houchard expDiff = aExp - bExp;
275515144b0fSOlivier Houchard aSig <<= 10;
275615144b0fSOlivier Houchard bSig <<= 10;
275715144b0fSOlivier Houchard if ( 0 < expDiff ) goto aExpBigger;
275815144b0fSOlivier Houchard if ( expDiff < 0 ) goto bExpBigger;
275915144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
276015144b0fSOlivier Houchard if ( aSig | bSig ) return propagateFloat64NaN( a, b );
276115144b0fSOlivier Houchard float_raise( float_flag_invalid );
276215144b0fSOlivier Houchard return float64_default_nan;
276315144b0fSOlivier Houchard }
276415144b0fSOlivier Houchard if ( aExp == 0 ) {
276515144b0fSOlivier Houchard aExp = 1;
276615144b0fSOlivier Houchard bExp = 1;
276715144b0fSOlivier Houchard }
276815144b0fSOlivier Houchard if ( bSig < aSig ) goto aBigger;
276915144b0fSOlivier Houchard if ( aSig < bSig ) goto bBigger;
277015144b0fSOlivier Houchard return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
277115144b0fSOlivier Houchard bExpBigger:
277215144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
277315144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
277415144b0fSOlivier Houchard return packFloat64( zSign ^ 1, 0x7FF, 0 );
277515144b0fSOlivier Houchard }
277615144b0fSOlivier Houchard if ( aExp == 0 ) {
277715144b0fSOlivier Houchard ++expDiff;
277815144b0fSOlivier Houchard }
277915144b0fSOlivier Houchard else {
278015144b0fSOlivier Houchard aSig |= LIT64( 0x4000000000000000 );
278115144b0fSOlivier Houchard }
278215144b0fSOlivier Houchard shift64RightJamming( aSig, - expDiff, &aSig );
278315144b0fSOlivier Houchard bSig |= LIT64( 0x4000000000000000 );
278415144b0fSOlivier Houchard bBigger:
278515144b0fSOlivier Houchard zSig = bSig - aSig;
278615144b0fSOlivier Houchard zExp = bExp;
278715144b0fSOlivier Houchard zSign ^= 1;
278815144b0fSOlivier Houchard goto normalizeRoundAndPack;
278915144b0fSOlivier Houchard aExpBigger:
279015144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
279115144b0fSOlivier Houchard if ( aSig ) return propagateFloat64NaN( a, b );
279215144b0fSOlivier Houchard return a;
279315144b0fSOlivier Houchard }
279415144b0fSOlivier Houchard if ( bExp == 0 ) {
279515144b0fSOlivier Houchard --expDiff;
279615144b0fSOlivier Houchard }
279715144b0fSOlivier Houchard else {
279815144b0fSOlivier Houchard bSig |= LIT64( 0x4000000000000000 );
279915144b0fSOlivier Houchard }
280015144b0fSOlivier Houchard shift64RightJamming( bSig, expDiff, &bSig );
280115144b0fSOlivier Houchard aSig |= LIT64( 0x4000000000000000 );
280215144b0fSOlivier Houchard aBigger:
280315144b0fSOlivier Houchard zSig = aSig - bSig;
280415144b0fSOlivier Houchard zExp = aExp;
280515144b0fSOlivier Houchard normalizeRoundAndPack:
280615144b0fSOlivier Houchard --zExp;
280715144b0fSOlivier Houchard return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
280815144b0fSOlivier Houchard
280915144b0fSOlivier Houchard }
281015144b0fSOlivier Houchard
281115144b0fSOlivier Houchard /*
281215144b0fSOlivier Houchard -------------------------------------------------------------------------------
281315144b0fSOlivier Houchard Returns the result of adding the double-precision floating-point values `a'
281415144b0fSOlivier Houchard and `b'. The operation is performed according to the IEC/IEEE Standard for
281515144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
281615144b0fSOlivier Houchard -------------------------------------------------------------------------------
281715144b0fSOlivier Houchard */
float64_add(float64 a,float64 b)281815144b0fSOlivier Houchard float64 float64_add( float64 a, float64 b )
281915144b0fSOlivier Houchard {
282015144b0fSOlivier Houchard flag aSign, bSign;
282115144b0fSOlivier Houchard
282215144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
282315144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
282415144b0fSOlivier Houchard if ( aSign == bSign ) {
282515144b0fSOlivier Houchard return addFloat64Sigs( a, b, aSign );
282615144b0fSOlivier Houchard }
282715144b0fSOlivier Houchard else {
282815144b0fSOlivier Houchard return subFloat64Sigs( a, b, aSign );
282915144b0fSOlivier Houchard }
283015144b0fSOlivier Houchard
283115144b0fSOlivier Houchard }
283215144b0fSOlivier Houchard
283315144b0fSOlivier Houchard /*
283415144b0fSOlivier Houchard -------------------------------------------------------------------------------
283515144b0fSOlivier Houchard Returns the result of subtracting the double-precision floating-point values
283615144b0fSOlivier Houchard `a' and `b'. The operation is performed according to the IEC/IEEE Standard
283715144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
283815144b0fSOlivier Houchard -------------------------------------------------------------------------------
283915144b0fSOlivier Houchard */
float64_sub(float64 a,float64 b)284015144b0fSOlivier Houchard float64 float64_sub( float64 a, float64 b )
284115144b0fSOlivier Houchard {
284215144b0fSOlivier Houchard flag aSign, bSign;
284315144b0fSOlivier Houchard
284415144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
284515144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
284615144b0fSOlivier Houchard if ( aSign == bSign ) {
284715144b0fSOlivier Houchard return subFloat64Sigs( a, b, aSign );
284815144b0fSOlivier Houchard }
284915144b0fSOlivier Houchard else {
285015144b0fSOlivier Houchard return addFloat64Sigs( a, b, aSign );
285115144b0fSOlivier Houchard }
285215144b0fSOlivier Houchard
285315144b0fSOlivier Houchard }
285415144b0fSOlivier Houchard
285515144b0fSOlivier Houchard /*
285615144b0fSOlivier Houchard -------------------------------------------------------------------------------
285715144b0fSOlivier Houchard Returns the result of multiplying the double-precision floating-point values
285815144b0fSOlivier Houchard `a' and `b'. The operation is performed according to the IEC/IEEE Standard
285915144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
286015144b0fSOlivier Houchard -------------------------------------------------------------------------------
286115144b0fSOlivier Houchard */
float64_mul(float64 a,float64 b)286215144b0fSOlivier Houchard float64 float64_mul( float64 a, float64 b )
286315144b0fSOlivier Houchard {
286415144b0fSOlivier Houchard flag aSign, bSign, zSign;
286515144b0fSOlivier Houchard int16 aExp, bExp, zExp;
286615144b0fSOlivier Houchard bits64 aSig, bSig, zSig0, zSig1;
286715144b0fSOlivier Houchard
286815144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
286915144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
287015144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
287115144b0fSOlivier Houchard bSig = extractFloat64Frac( b );
287215144b0fSOlivier Houchard bExp = extractFloat64Exp( b );
287315144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
287415144b0fSOlivier Houchard zSign = aSign ^ bSign;
287515144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
287615144b0fSOlivier Houchard if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
287715144b0fSOlivier Houchard return propagateFloat64NaN( a, b );
287815144b0fSOlivier Houchard }
287915144b0fSOlivier Houchard if ( ( bExp | bSig ) == 0 ) {
288015144b0fSOlivier Houchard float_raise( float_flag_invalid );
288115144b0fSOlivier Houchard return float64_default_nan;
288215144b0fSOlivier Houchard }
288315144b0fSOlivier Houchard return packFloat64( zSign, 0x7FF, 0 );
288415144b0fSOlivier Houchard }
288515144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
288615144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
288715144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
288815144b0fSOlivier Houchard float_raise( float_flag_invalid );
288915144b0fSOlivier Houchard return float64_default_nan;
289015144b0fSOlivier Houchard }
289115144b0fSOlivier Houchard return packFloat64( zSign, 0x7FF, 0 );
289215144b0fSOlivier Houchard }
289315144b0fSOlivier Houchard if ( aExp == 0 ) {
289415144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
289515144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
289615144b0fSOlivier Houchard }
289715144b0fSOlivier Houchard if ( bExp == 0 ) {
289815144b0fSOlivier Houchard if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
289915144b0fSOlivier Houchard normalizeFloat64Subnormal( bSig, &bExp, &bSig );
290015144b0fSOlivier Houchard }
290115144b0fSOlivier Houchard zExp = aExp + bExp - 0x3FF;
290215144b0fSOlivier Houchard aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
290315144b0fSOlivier Houchard bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
290415144b0fSOlivier Houchard mul64To128( aSig, bSig, &zSig0, &zSig1 );
290515144b0fSOlivier Houchard zSig0 |= ( zSig1 != 0 );
290615144b0fSOlivier Houchard if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
290715144b0fSOlivier Houchard zSig0 <<= 1;
290815144b0fSOlivier Houchard --zExp;
290915144b0fSOlivier Houchard }
291015144b0fSOlivier Houchard return roundAndPackFloat64( zSign, zExp, zSig0 );
291115144b0fSOlivier Houchard
291215144b0fSOlivier Houchard }
291315144b0fSOlivier Houchard
291415144b0fSOlivier Houchard /*
291515144b0fSOlivier Houchard -------------------------------------------------------------------------------
291615144b0fSOlivier Houchard Returns the result of dividing the double-precision floating-point value `a'
291715144b0fSOlivier Houchard by the corresponding value `b'. The operation is performed according to
291815144b0fSOlivier Houchard the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
291915144b0fSOlivier Houchard -------------------------------------------------------------------------------
292015144b0fSOlivier Houchard */
float64_div(float64 a,float64 b)292115144b0fSOlivier Houchard float64 float64_div( float64 a, float64 b )
292215144b0fSOlivier Houchard {
292315144b0fSOlivier Houchard flag aSign, bSign, zSign;
292415144b0fSOlivier Houchard int16 aExp, bExp, zExp;
292515144b0fSOlivier Houchard bits64 aSig, bSig, zSig;
292615144b0fSOlivier Houchard bits64 rem0, rem1;
292715144b0fSOlivier Houchard bits64 term0, term1;
292815144b0fSOlivier Houchard
292915144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
293015144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
293115144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
293215144b0fSOlivier Houchard bSig = extractFloat64Frac( b );
293315144b0fSOlivier Houchard bExp = extractFloat64Exp( b );
293415144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
293515144b0fSOlivier Houchard zSign = aSign ^ bSign;
293615144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
293715144b0fSOlivier Houchard if ( aSig ) return propagateFloat64NaN( a, b );
293815144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
293915144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
294015144b0fSOlivier Houchard float_raise( float_flag_invalid );
294115144b0fSOlivier Houchard return float64_default_nan;
294215144b0fSOlivier Houchard }
294315144b0fSOlivier Houchard return packFloat64( zSign, 0x7FF, 0 );
294415144b0fSOlivier Houchard }
294515144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
294615144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
294715144b0fSOlivier Houchard return packFloat64( zSign, 0, 0 );
294815144b0fSOlivier Houchard }
294915144b0fSOlivier Houchard if ( bExp == 0 ) {
295015144b0fSOlivier Houchard if ( bSig == 0 ) {
295115144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
295215144b0fSOlivier Houchard float_raise( float_flag_invalid );
295315144b0fSOlivier Houchard return float64_default_nan;
295415144b0fSOlivier Houchard }
295515144b0fSOlivier Houchard float_raise( float_flag_divbyzero );
295615144b0fSOlivier Houchard return packFloat64( zSign, 0x7FF, 0 );
295715144b0fSOlivier Houchard }
295815144b0fSOlivier Houchard normalizeFloat64Subnormal( bSig, &bExp, &bSig );
295915144b0fSOlivier Houchard }
296015144b0fSOlivier Houchard if ( aExp == 0 ) {
296115144b0fSOlivier Houchard if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
296215144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
296315144b0fSOlivier Houchard }
296415144b0fSOlivier Houchard zExp = aExp - bExp + 0x3FD;
296515144b0fSOlivier Houchard aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
296615144b0fSOlivier Houchard bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
296715144b0fSOlivier Houchard if ( bSig <= ( aSig + aSig ) ) {
296815144b0fSOlivier Houchard aSig >>= 1;
296915144b0fSOlivier Houchard ++zExp;
297015144b0fSOlivier Houchard }
297115144b0fSOlivier Houchard zSig = estimateDiv128To64( aSig, 0, bSig );
297215144b0fSOlivier Houchard if ( ( zSig & 0x1FF ) <= 2 ) {
297315144b0fSOlivier Houchard mul64To128( bSig, zSig, &term0, &term1 );
297415144b0fSOlivier Houchard sub128( aSig, 0, term0, term1, &rem0, &rem1 );
297515144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
297615144b0fSOlivier Houchard --zSig;
297715144b0fSOlivier Houchard add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
297815144b0fSOlivier Houchard }
297915144b0fSOlivier Houchard zSig |= ( rem1 != 0 );
298015144b0fSOlivier Houchard }
298115144b0fSOlivier Houchard return roundAndPackFloat64( zSign, zExp, zSig );
298215144b0fSOlivier Houchard
298315144b0fSOlivier Houchard }
298415144b0fSOlivier Houchard
298515144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
298615144b0fSOlivier Houchard /*
298715144b0fSOlivier Houchard -------------------------------------------------------------------------------
298815144b0fSOlivier Houchard Returns the remainder of the double-precision floating-point value `a'
298915144b0fSOlivier Houchard with respect to the corresponding value `b'. The operation is performed
299015144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
299115144b0fSOlivier Houchard -------------------------------------------------------------------------------
299215144b0fSOlivier Houchard */
float64_rem(float64 a,float64 b)299315144b0fSOlivier Houchard float64 float64_rem( float64 a, float64 b )
299415144b0fSOlivier Houchard {
299515144b0fSOlivier Houchard flag aSign, bSign, zSign;
299615144b0fSOlivier Houchard int16 aExp, bExp, expDiff;
299715144b0fSOlivier Houchard bits64 aSig, bSig;
299815144b0fSOlivier Houchard bits64 q, alternateASig;
299915144b0fSOlivier Houchard sbits64 sigMean;
300015144b0fSOlivier Houchard
300115144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
300215144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
300315144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
300415144b0fSOlivier Houchard bSig = extractFloat64Frac( b );
300515144b0fSOlivier Houchard bExp = extractFloat64Exp( b );
300615144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
300715144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
300815144b0fSOlivier Houchard if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
300915144b0fSOlivier Houchard return propagateFloat64NaN( a, b );
301015144b0fSOlivier Houchard }
301115144b0fSOlivier Houchard float_raise( float_flag_invalid );
301215144b0fSOlivier Houchard return float64_default_nan;
301315144b0fSOlivier Houchard }
301415144b0fSOlivier Houchard if ( bExp == 0x7FF ) {
301515144b0fSOlivier Houchard if ( bSig ) return propagateFloat64NaN( a, b );
301615144b0fSOlivier Houchard return a;
301715144b0fSOlivier Houchard }
301815144b0fSOlivier Houchard if ( bExp == 0 ) {
301915144b0fSOlivier Houchard if ( bSig == 0 ) {
302015144b0fSOlivier Houchard float_raise( float_flag_invalid );
302115144b0fSOlivier Houchard return float64_default_nan;
302215144b0fSOlivier Houchard }
302315144b0fSOlivier Houchard normalizeFloat64Subnormal( bSig, &bExp, &bSig );
302415144b0fSOlivier Houchard }
302515144b0fSOlivier Houchard if ( aExp == 0 ) {
302615144b0fSOlivier Houchard if ( aSig == 0 ) return a;
302715144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
302815144b0fSOlivier Houchard }
302915144b0fSOlivier Houchard expDiff = aExp - bExp;
303015144b0fSOlivier Houchard aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
303115144b0fSOlivier Houchard bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
303215144b0fSOlivier Houchard if ( expDiff < 0 ) {
303315144b0fSOlivier Houchard if ( expDiff < -1 ) return a;
303415144b0fSOlivier Houchard aSig >>= 1;
303515144b0fSOlivier Houchard }
303615144b0fSOlivier Houchard q = ( bSig <= aSig );
303715144b0fSOlivier Houchard if ( q ) aSig -= bSig;
303815144b0fSOlivier Houchard expDiff -= 64;
303915144b0fSOlivier Houchard while ( 0 < expDiff ) {
304015144b0fSOlivier Houchard q = estimateDiv128To64( aSig, 0, bSig );
304115144b0fSOlivier Houchard q = ( 2 < q ) ? q - 2 : 0;
304215144b0fSOlivier Houchard aSig = - ( ( bSig>>2 ) * q );
304315144b0fSOlivier Houchard expDiff -= 62;
304415144b0fSOlivier Houchard }
304515144b0fSOlivier Houchard expDiff += 64;
304615144b0fSOlivier Houchard if ( 0 < expDiff ) {
304715144b0fSOlivier Houchard q = estimateDiv128To64( aSig, 0, bSig );
304815144b0fSOlivier Houchard q = ( 2 < q ) ? q - 2 : 0;
304915144b0fSOlivier Houchard q >>= 64 - expDiff;
305015144b0fSOlivier Houchard bSig >>= 2;
305115144b0fSOlivier Houchard aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
305215144b0fSOlivier Houchard }
305315144b0fSOlivier Houchard else {
305415144b0fSOlivier Houchard aSig >>= 2;
305515144b0fSOlivier Houchard bSig >>= 2;
305615144b0fSOlivier Houchard }
305715144b0fSOlivier Houchard do {
305815144b0fSOlivier Houchard alternateASig = aSig;
305915144b0fSOlivier Houchard ++q;
306015144b0fSOlivier Houchard aSig -= bSig;
306115144b0fSOlivier Houchard } while ( 0 <= (sbits64) aSig );
306215144b0fSOlivier Houchard sigMean = aSig + alternateASig;
306315144b0fSOlivier Houchard if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
306415144b0fSOlivier Houchard aSig = alternateASig;
306515144b0fSOlivier Houchard }
306615144b0fSOlivier Houchard zSign = ( (sbits64) aSig < 0 );
306715144b0fSOlivier Houchard if ( zSign ) aSig = - aSig;
306815144b0fSOlivier Houchard return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
306915144b0fSOlivier Houchard
307015144b0fSOlivier Houchard }
307115144b0fSOlivier Houchard
307215144b0fSOlivier Houchard /*
307315144b0fSOlivier Houchard -------------------------------------------------------------------------------
307415144b0fSOlivier Houchard Returns the square root of the double-precision floating-point value `a'.
307515144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
307615144b0fSOlivier Houchard Floating-Point Arithmetic.
307715144b0fSOlivier Houchard -------------------------------------------------------------------------------
307815144b0fSOlivier Houchard */
float64_sqrt(float64 a)307915144b0fSOlivier Houchard float64 float64_sqrt( float64 a )
308015144b0fSOlivier Houchard {
308115144b0fSOlivier Houchard flag aSign;
308215144b0fSOlivier Houchard int16 aExp, zExp;
308315144b0fSOlivier Houchard bits64 aSig, zSig, doubleZSig;
308415144b0fSOlivier Houchard bits64 rem0, rem1, term0, term1;
308515144b0fSOlivier Houchard
308615144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
308715144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
308815144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
308915144b0fSOlivier Houchard if ( aExp == 0x7FF ) {
309015144b0fSOlivier Houchard if ( aSig ) return propagateFloat64NaN( a, a );
309115144b0fSOlivier Houchard if ( ! aSign ) return a;
309215144b0fSOlivier Houchard float_raise( float_flag_invalid );
309315144b0fSOlivier Houchard return float64_default_nan;
309415144b0fSOlivier Houchard }
309515144b0fSOlivier Houchard if ( aSign ) {
309615144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) return a;
309715144b0fSOlivier Houchard float_raise( float_flag_invalid );
309815144b0fSOlivier Houchard return float64_default_nan;
309915144b0fSOlivier Houchard }
310015144b0fSOlivier Houchard if ( aExp == 0 ) {
310115144b0fSOlivier Houchard if ( aSig == 0 ) return 0;
310215144b0fSOlivier Houchard normalizeFloat64Subnormal( aSig, &aExp, &aSig );
310315144b0fSOlivier Houchard }
310415144b0fSOlivier Houchard zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
310515144b0fSOlivier Houchard aSig |= LIT64( 0x0010000000000000 );
310615144b0fSOlivier Houchard zSig = estimateSqrt32( aExp, aSig>>21 );
310715144b0fSOlivier Houchard aSig <<= 9 - ( aExp & 1 );
310815144b0fSOlivier Houchard zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
310915144b0fSOlivier Houchard if ( ( zSig & 0x1FF ) <= 5 ) {
311015144b0fSOlivier Houchard doubleZSig = zSig<<1;
311115144b0fSOlivier Houchard mul64To128( zSig, zSig, &term0, &term1 );
311215144b0fSOlivier Houchard sub128( aSig, 0, term0, term1, &rem0, &rem1 );
311315144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
311415144b0fSOlivier Houchard --zSig;
311515144b0fSOlivier Houchard doubleZSig -= 2;
311615144b0fSOlivier Houchard add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
311715144b0fSOlivier Houchard }
311815144b0fSOlivier Houchard zSig |= ( ( rem0 | rem1 ) != 0 );
311915144b0fSOlivier Houchard }
312015144b0fSOlivier Houchard return roundAndPackFloat64( 0, zExp, zSig );
312115144b0fSOlivier Houchard
312215144b0fSOlivier Houchard }
312315144b0fSOlivier Houchard #endif
312415144b0fSOlivier Houchard
312515144b0fSOlivier Houchard /*
312615144b0fSOlivier Houchard -------------------------------------------------------------------------------
312715144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is equal to the
312815144b0fSOlivier Houchard corresponding value `b', and 0 otherwise. The comparison is performed
312915144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
313015144b0fSOlivier Houchard -------------------------------------------------------------------------------
313115144b0fSOlivier Houchard */
float64_eq(float64 a,float64 b)313215144b0fSOlivier Houchard flag float64_eq( float64 a, float64 b )
313315144b0fSOlivier Houchard {
313415144b0fSOlivier Houchard
313515144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
313615144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
313715144b0fSOlivier Houchard ) {
313815144b0fSOlivier Houchard if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
313915144b0fSOlivier Houchard float_raise( float_flag_invalid );
314015144b0fSOlivier Houchard }
314115144b0fSOlivier Houchard return 0;
314215144b0fSOlivier Houchard }
314315144b0fSOlivier Houchard return ( a == b ) ||
314415144b0fSOlivier Houchard ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
314515144b0fSOlivier Houchard
314615144b0fSOlivier Houchard }
314715144b0fSOlivier Houchard
314815144b0fSOlivier Houchard /*
314915144b0fSOlivier Houchard -------------------------------------------------------------------------------
315015144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than or
315115144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise. The comparison is
315215144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
315315144b0fSOlivier Houchard Arithmetic.
315415144b0fSOlivier Houchard -------------------------------------------------------------------------------
315515144b0fSOlivier Houchard */
float64_le(float64 a,float64 b)315615144b0fSOlivier Houchard flag float64_le( float64 a, float64 b )
315715144b0fSOlivier Houchard {
315815144b0fSOlivier Houchard flag aSign, bSign;
315915144b0fSOlivier Houchard
316015144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
316115144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
316215144b0fSOlivier Houchard ) {
316315144b0fSOlivier Houchard float_raise( float_flag_invalid );
316415144b0fSOlivier Houchard return 0;
316515144b0fSOlivier Houchard }
316615144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
316715144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
316815144b0fSOlivier Houchard if ( aSign != bSign )
316915144b0fSOlivier Houchard return aSign ||
317015144b0fSOlivier Houchard ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
317115144b0fSOlivier Houchard 0 );
317215144b0fSOlivier Houchard return ( a == b ) ||
317315144b0fSOlivier Houchard ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
317415144b0fSOlivier Houchard
317515144b0fSOlivier Houchard }
317615144b0fSOlivier Houchard
317715144b0fSOlivier Houchard /*
317815144b0fSOlivier Houchard -------------------------------------------------------------------------------
317915144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than
318015144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The comparison is performed
318115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
318215144b0fSOlivier Houchard -------------------------------------------------------------------------------
318315144b0fSOlivier Houchard */
float64_lt(float64 a,float64 b)318415144b0fSOlivier Houchard flag float64_lt( float64 a, float64 b )
318515144b0fSOlivier Houchard {
318615144b0fSOlivier Houchard flag aSign, bSign;
318715144b0fSOlivier Houchard
318815144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
318915144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
319015144b0fSOlivier Houchard ) {
319115144b0fSOlivier Houchard float_raise( float_flag_invalid );
319215144b0fSOlivier Houchard return 0;
319315144b0fSOlivier Houchard }
319415144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
319515144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
319615144b0fSOlivier Houchard if ( aSign != bSign )
319715144b0fSOlivier Houchard return aSign &&
319815144b0fSOlivier Houchard ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
319915144b0fSOlivier Houchard 0 );
320015144b0fSOlivier Houchard return ( a != b ) &&
320115144b0fSOlivier Houchard ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
320215144b0fSOlivier Houchard
320315144b0fSOlivier Houchard }
320415144b0fSOlivier Houchard
320515144b0fSOlivier Houchard #ifndef SOFTFLOAT_FOR_GCC
320615144b0fSOlivier Houchard /*
320715144b0fSOlivier Houchard -------------------------------------------------------------------------------
320815144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is equal to the
320915144b0fSOlivier Houchard corresponding value `b', and 0 otherwise. The invalid exception is raised
321015144b0fSOlivier Houchard if either operand is a NaN. Otherwise, the comparison is performed
321115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
321215144b0fSOlivier Houchard -------------------------------------------------------------------------------
321315144b0fSOlivier Houchard */
float64_eq_signaling(float64 a,float64 b)321415144b0fSOlivier Houchard flag float64_eq_signaling( float64 a, float64 b )
321515144b0fSOlivier Houchard {
321615144b0fSOlivier Houchard
321715144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
321815144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
321915144b0fSOlivier Houchard ) {
322015144b0fSOlivier Houchard float_raise( float_flag_invalid );
322115144b0fSOlivier Houchard return 0;
322215144b0fSOlivier Houchard }
322315144b0fSOlivier Houchard return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
322415144b0fSOlivier Houchard
322515144b0fSOlivier Houchard }
322615144b0fSOlivier Houchard
322715144b0fSOlivier Houchard /*
322815144b0fSOlivier Houchard -------------------------------------------------------------------------------
322915144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than or
323015144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
323115144b0fSOlivier Houchard cause an exception. Otherwise, the comparison is performed according to the
323215144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
323315144b0fSOlivier Houchard -------------------------------------------------------------------------------
323415144b0fSOlivier Houchard */
float64_le_quiet(float64 a,float64 b)323515144b0fSOlivier Houchard flag float64_le_quiet( float64 a, float64 b )
323615144b0fSOlivier Houchard {
323715144b0fSOlivier Houchard flag aSign, bSign;
323815144b0fSOlivier Houchard
323915144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
324015144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
324115144b0fSOlivier Houchard ) {
324215144b0fSOlivier Houchard if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
324315144b0fSOlivier Houchard float_raise( float_flag_invalid );
324415144b0fSOlivier Houchard }
324515144b0fSOlivier Houchard return 0;
324615144b0fSOlivier Houchard }
324715144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
324815144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
324915144b0fSOlivier Houchard if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
325015144b0fSOlivier Houchard return ( a == b ) || ( aSign ^ ( a < b ) );
325115144b0fSOlivier Houchard
325215144b0fSOlivier Houchard }
325315144b0fSOlivier Houchard
325415144b0fSOlivier Houchard /*
325515144b0fSOlivier Houchard -------------------------------------------------------------------------------
325615144b0fSOlivier Houchard Returns 1 if the double-precision floating-point value `a' is less than
325715144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
325815144b0fSOlivier Houchard exception. Otherwise, the comparison is performed according to the IEC/IEEE
325915144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
326015144b0fSOlivier Houchard -------------------------------------------------------------------------------
326115144b0fSOlivier Houchard */
float64_lt_quiet(float64 a,float64 b)326215144b0fSOlivier Houchard flag float64_lt_quiet( float64 a, float64 b )
326315144b0fSOlivier Houchard {
326415144b0fSOlivier Houchard flag aSign, bSign;
326515144b0fSOlivier Houchard
326615144b0fSOlivier Houchard if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
326715144b0fSOlivier Houchard || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
326815144b0fSOlivier Houchard ) {
326915144b0fSOlivier Houchard if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
327015144b0fSOlivier Houchard float_raise( float_flag_invalid );
327115144b0fSOlivier Houchard }
327215144b0fSOlivier Houchard return 0;
327315144b0fSOlivier Houchard }
327415144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
327515144b0fSOlivier Houchard bSign = extractFloat64Sign( b );
327615144b0fSOlivier Houchard if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
327715144b0fSOlivier Houchard return ( a != b ) && ( aSign ^ ( a < b ) );
327815144b0fSOlivier Houchard
327915144b0fSOlivier Houchard }
328015144b0fSOlivier Houchard #endif
328115144b0fSOlivier Houchard
328215144b0fSOlivier Houchard #ifdef FLOATX80
328315144b0fSOlivier Houchard
328415144b0fSOlivier Houchard /*
328515144b0fSOlivier Houchard -------------------------------------------------------------------------------
328615144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
328715144b0fSOlivier Houchard point value `a' to the 32-bit two's complement integer format. The
328815144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
328915144b0fSOlivier Houchard Floating-Point Arithmetic---which means in particular that the conversion
329015144b0fSOlivier Houchard is rounded according to the current rounding mode. If `a' is a NaN, the
329115144b0fSOlivier Houchard largest positive integer is returned. Otherwise, if the conversion
329215144b0fSOlivier Houchard overflows, the largest integer with the same sign as `a' is returned.
329315144b0fSOlivier Houchard -------------------------------------------------------------------------------
329415144b0fSOlivier Houchard */
floatx80_to_int32(floatx80 a)329515144b0fSOlivier Houchard int32 floatx80_to_int32( floatx80 a )
329615144b0fSOlivier Houchard {
329715144b0fSOlivier Houchard flag aSign;
329815144b0fSOlivier Houchard int32 aExp, shiftCount;
329915144b0fSOlivier Houchard bits64 aSig;
330015144b0fSOlivier Houchard
330115144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
330215144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
330315144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
330415144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
330515144b0fSOlivier Houchard shiftCount = 0x4037 - aExp;
330615144b0fSOlivier Houchard if ( shiftCount <= 0 ) shiftCount = 1;
330715144b0fSOlivier Houchard shift64RightJamming( aSig, shiftCount, &aSig );
330815144b0fSOlivier Houchard return roundAndPackInt32( aSign, aSig );
330915144b0fSOlivier Houchard
331015144b0fSOlivier Houchard }
331115144b0fSOlivier Houchard
331215144b0fSOlivier Houchard /*
331315144b0fSOlivier Houchard -------------------------------------------------------------------------------
331415144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
331515144b0fSOlivier Houchard point value `a' to the 32-bit two's complement integer format. The
331615144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
331715144b0fSOlivier Houchard Floating-Point Arithmetic, except that the conversion is always rounded
331815144b0fSOlivier Houchard toward zero. If `a' is a NaN, the largest positive integer is returned.
331915144b0fSOlivier Houchard Otherwise, if the conversion overflows, the largest integer with the same
332015144b0fSOlivier Houchard sign as `a' is returned.
332115144b0fSOlivier Houchard -------------------------------------------------------------------------------
332215144b0fSOlivier Houchard */
floatx80_to_int32_round_to_zero(floatx80 a)332315144b0fSOlivier Houchard int32 floatx80_to_int32_round_to_zero( floatx80 a )
332415144b0fSOlivier Houchard {
332515144b0fSOlivier Houchard flag aSign;
332615144b0fSOlivier Houchard int32 aExp, shiftCount;
332715144b0fSOlivier Houchard bits64 aSig, savedASig;
332815144b0fSOlivier Houchard int32 z;
332915144b0fSOlivier Houchard
333015144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
333115144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
333215144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
333315144b0fSOlivier Houchard if ( 0x401E < aExp ) {
333415144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
333515144b0fSOlivier Houchard goto invalid;
333615144b0fSOlivier Houchard }
333715144b0fSOlivier Houchard else if ( aExp < 0x3FFF ) {
333815144b0fSOlivier Houchard if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
333915144b0fSOlivier Houchard return 0;
334015144b0fSOlivier Houchard }
334115144b0fSOlivier Houchard shiftCount = 0x403E - aExp;
334215144b0fSOlivier Houchard savedASig = aSig;
334315144b0fSOlivier Houchard aSig >>= shiftCount;
334415144b0fSOlivier Houchard z = aSig;
334515144b0fSOlivier Houchard if ( aSign ) z = - z;
334615144b0fSOlivier Houchard if ( ( z < 0 ) ^ aSign ) {
334715144b0fSOlivier Houchard invalid:
334815144b0fSOlivier Houchard float_raise( float_flag_invalid );
334915144b0fSOlivier Houchard return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
335015144b0fSOlivier Houchard }
335115144b0fSOlivier Houchard if ( ( aSig<<shiftCount ) != savedASig ) {
335215144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
335315144b0fSOlivier Houchard }
335415144b0fSOlivier Houchard return z;
335515144b0fSOlivier Houchard
335615144b0fSOlivier Houchard }
335715144b0fSOlivier Houchard
335815144b0fSOlivier Houchard /*
335915144b0fSOlivier Houchard -------------------------------------------------------------------------------
336015144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
336115144b0fSOlivier Houchard point value `a' to the 64-bit two's complement integer format. The
336215144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
336315144b0fSOlivier Houchard Floating-Point Arithmetic---which means in particular that the conversion
336415144b0fSOlivier Houchard is rounded according to the current rounding mode. If `a' is a NaN,
336515144b0fSOlivier Houchard the largest positive integer is returned. Otherwise, if the conversion
336615144b0fSOlivier Houchard overflows, the largest integer with the same sign as `a' is returned.
336715144b0fSOlivier Houchard -------------------------------------------------------------------------------
336815144b0fSOlivier Houchard */
floatx80_to_int64(floatx80 a)336915144b0fSOlivier Houchard int64 floatx80_to_int64( floatx80 a )
337015144b0fSOlivier Houchard {
337115144b0fSOlivier Houchard flag aSign;
337215144b0fSOlivier Houchard int32 aExp, shiftCount;
337315144b0fSOlivier Houchard bits64 aSig, aSigExtra;
337415144b0fSOlivier Houchard
337515144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
337615144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
337715144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
337815144b0fSOlivier Houchard shiftCount = 0x403E - aExp;
337915144b0fSOlivier Houchard if ( shiftCount <= 0 ) {
338015144b0fSOlivier Houchard if ( shiftCount ) {
338115144b0fSOlivier Houchard float_raise( float_flag_invalid );
338215144b0fSOlivier Houchard if ( ! aSign
338315144b0fSOlivier Houchard || ( ( aExp == 0x7FFF )
338415144b0fSOlivier Houchard && ( aSig != LIT64( 0x8000000000000000 ) ) )
338515144b0fSOlivier Houchard ) {
338615144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
338715144b0fSOlivier Houchard }
338815144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
338915144b0fSOlivier Houchard }
339015144b0fSOlivier Houchard aSigExtra = 0;
339115144b0fSOlivier Houchard }
339215144b0fSOlivier Houchard else {
339315144b0fSOlivier Houchard shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
339415144b0fSOlivier Houchard }
339515144b0fSOlivier Houchard return roundAndPackInt64( aSign, aSig, aSigExtra );
339615144b0fSOlivier Houchard
339715144b0fSOlivier Houchard }
339815144b0fSOlivier Houchard
339915144b0fSOlivier Houchard /*
340015144b0fSOlivier Houchard -------------------------------------------------------------------------------
340115144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
340215144b0fSOlivier Houchard point value `a' to the 64-bit two's complement integer format. The
340315144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
340415144b0fSOlivier Houchard Floating-Point Arithmetic, except that the conversion is always rounded
340515144b0fSOlivier Houchard toward zero. If `a' is a NaN, the largest positive integer is returned.
340615144b0fSOlivier Houchard Otherwise, if the conversion overflows, the largest integer with the same
340715144b0fSOlivier Houchard sign as `a' is returned.
340815144b0fSOlivier Houchard -------------------------------------------------------------------------------
340915144b0fSOlivier Houchard */
floatx80_to_int64_round_to_zero(floatx80 a)341015144b0fSOlivier Houchard int64 floatx80_to_int64_round_to_zero( floatx80 a )
341115144b0fSOlivier Houchard {
341215144b0fSOlivier Houchard flag aSign;
341315144b0fSOlivier Houchard int32 aExp, shiftCount;
341415144b0fSOlivier Houchard bits64 aSig;
341515144b0fSOlivier Houchard int64 z;
341615144b0fSOlivier Houchard
341715144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
341815144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
341915144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
342015144b0fSOlivier Houchard shiftCount = aExp - 0x403E;
342115144b0fSOlivier Houchard if ( 0 <= shiftCount ) {
342215144b0fSOlivier Houchard aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
342315144b0fSOlivier Houchard if ( ( a.high != 0xC03E ) || aSig ) {
342415144b0fSOlivier Houchard float_raise( float_flag_invalid );
342515144b0fSOlivier Houchard if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
342615144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
342715144b0fSOlivier Houchard }
342815144b0fSOlivier Houchard }
342915144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
343015144b0fSOlivier Houchard }
343115144b0fSOlivier Houchard else if ( aExp < 0x3FFF ) {
343215144b0fSOlivier Houchard if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
343315144b0fSOlivier Houchard return 0;
343415144b0fSOlivier Houchard }
343515144b0fSOlivier Houchard z = aSig>>( - shiftCount );
343615144b0fSOlivier Houchard if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
343715144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
343815144b0fSOlivier Houchard }
343915144b0fSOlivier Houchard if ( aSign ) z = - z;
344015144b0fSOlivier Houchard return z;
344115144b0fSOlivier Houchard
344215144b0fSOlivier Houchard }
344315144b0fSOlivier Houchard
344415144b0fSOlivier Houchard /*
344515144b0fSOlivier Houchard -------------------------------------------------------------------------------
344615144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
344715144b0fSOlivier Houchard point value `a' to the single-precision floating-point format. The
344815144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
344915144b0fSOlivier Houchard Floating-Point Arithmetic.
345015144b0fSOlivier Houchard -------------------------------------------------------------------------------
345115144b0fSOlivier Houchard */
floatx80_to_float32(floatx80 a)345215144b0fSOlivier Houchard float32 floatx80_to_float32( floatx80 a )
345315144b0fSOlivier Houchard {
345415144b0fSOlivier Houchard flag aSign;
345515144b0fSOlivier Houchard int32 aExp;
345615144b0fSOlivier Houchard bits64 aSig;
345715144b0fSOlivier Houchard
345815144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
345915144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
346015144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
346115144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
346215144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 ) ) {
346315144b0fSOlivier Houchard return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
346415144b0fSOlivier Houchard }
346515144b0fSOlivier Houchard return packFloat32( aSign, 0xFF, 0 );
346615144b0fSOlivier Houchard }
346715144b0fSOlivier Houchard shift64RightJamming( aSig, 33, &aSig );
346815144b0fSOlivier Houchard if ( aExp || aSig ) aExp -= 0x3F81;
346915144b0fSOlivier Houchard return roundAndPackFloat32( aSign, aExp, aSig );
347015144b0fSOlivier Houchard
347115144b0fSOlivier Houchard }
347215144b0fSOlivier Houchard
347315144b0fSOlivier Houchard /*
347415144b0fSOlivier Houchard -------------------------------------------------------------------------------
347515144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
347615144b0fSOlivier Houchard point value `a' to the double-precision floating-point format. The
347715144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
347815144b0fSOlivier Houchard Floating-Point Arithmetic.
347915144b0fSOlivier Houchard -------------------------------------------------------------------------------
348015144b0fSOlivier Houchard */
floatx80_to_float64(floatx80 a)348115144b0fSOlivier Houchard float64 floatx80_to_float64( floatx80 a )
348215144b0fSOlivier Houchard {
348315144b0fSOlivier Houchard flag aSign;
348415144b0fSOlivier Houchard int32 aExp;
348515144b0fSOlivier Houchard bits64 aSig, zSig;
348615144b0fSOlivier Houchard
348715144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
348815144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
348915144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
349015144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
349115144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 ) ) {
349215144b0fSOlivier Houchard return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
349315144b0fSOlivier Houchard }
349415144b0fSOlivier Houchard return packFloat64( aSign, 0x7FF, 0 );
349515144b0fSOlivier Houchard }
349615144b0fSOlivier Houchard shift64RightJamming( aSig, 1, &zSig );
349715144b0fSOlivier Houchard if ( aExp || aSig ) aExp -= 0x3C01;
349815144b0fSOlivier Houchard return roundAndPackFloat64( aSign, aExp, zSig );
349915144b0fSOlivier Houchard
350015144b0fSOlivier Houchard }
350115144b0fSOlivier Houchard
350215144b0fSOlivier Houchard #ifdef FLOAT128
350315144b0fSOlivier Houchard
350415144b0fSOlivier Houchard /*
350515144b0fSOlivier Houchard -------------------------------------------------------------------------------
350615144b0fSOlivier Houchard Returns the result of converting the extended double-precision floating-
350715144b0fSOlivier Houchard point value `a' to the quadruple-precision floating-point format. The
350815144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
350915144b0fSOlivier Houchard Floating-Point Arithmetic.
351015144b0fSOlivier Houchard -------------------------------------------------------------------------------
351115144b0fSOlivier Houchard */
floatx80_to_float128(floatx80 a)351215144b0fSOlivier Houchard float128 floatx80_to_float128( floatx80 a )
351315144b0fSOlivier Houchard {
351415144b0fSOlivier Houchard flag aSign;
351515144b0fSOlivier Houchard int16 aExp;
351615144b0fSOlivier Houchard bits64 aSig, zSig0, zSig1;
351715144b0fSOlivier Houchard
351815144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
351915144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
352015144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
352115144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
352215144b0fSOlivier Houchard return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
352315144b0fSOlivier Houchard }
352415144b0fSOlivier Houchard shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
352515144b0fSOlivier Houchard return packFloat128( aSign, aExp, zSig0, zSig1 );
352615144b0fSOlivier Houchard
352715144b0fSOlivier Houchard }
352815144b0fSOlivier Houchard
352915144b0fSOlivier Houchard #endif
353015144b0fSOlivier Houchard
353115144b0fSOlivier Houchard /*
353215144b0fSOlivier Houchard -------------------------------------------------------------------------------
353315144b0fSOlivier Houchard Rounds the extended double-precision floating-point value `a' to an integer,
353415144b0fSOlivier Houchard and returns the result as an extended quadruple-precision floating-point
353515144b0fSOlivier Houchard value. The operation is performed according to the IEC/IEEE Standard for
353615144b0fSOlivier Houchard Binary Floating-Point Arithmetic.
353715144b0fSOlivier Houchard -------------------------------------------------------------------------------
353815144b0fSOlivier Houchard */
floatx80_round_to_int(floatx80 a)353915144b0fSOlivier Houchard floatx80 floatx80_round_to_int( floatx80 a )
354015144b0fSOlivier Houchard {
354115144b0fSOlivier Houchard flag aSign;
354215144b0fSOlivier Houchard int32 aExp;
354315144b0fSOlivier Houchard bits64 lastBitMask, roundBitsMask;
354415144b0fSOlivier Houchard int8 roundingMode;
354515144b0fSOlivier Houchard floatx80 z;
354615144b0fSOlivier Houchard
354715144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
354815144b0fSOlivier Houchard if ( 0x403E <= aExp ) {
354915144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
355015144b0fSOlivier Houchard return propagateFloatx80NaN( a, a );
355115144b0fSOlivier Houchard }
355215144b0fSOlivier Houchard return a;
355315144b0fSOlivier Houchard }
355415144b0fSOlivier Houchard if ( aExp < 0x3FFF ) {
355515144b0fSOlivier Houchard if ( ( aExp == 0 )
355615144b0fSOlivier Houchard && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
355715144b0fSOlivier Houchard return a;
355815144b0fSOlivier Houchard }
355915144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
356015144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
356115144b0fSOlivier Houchard switch ( float_rounding_mode ) {
356215144b0fSOlivier Houchard case float_round_nearest_even:
356315144b0fSOlivier Houchard if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
356415144b0fSOlivier Houchard ) {
356515144b0fSOlivier Houchard return
356615144b0fSOlivier Houchard packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
356715144b0fSOlivier Houchard }
356815144b0fSOlivier Houchard break;
356915144b0fSOlivier Houchard case float_round_to_zero:
357015144b0fSOlivier Houchard break;
357115144b0fSOlivier Houchard case float_round_down:
357215144b0fSOlivier Houchard return
357315144b0fSOlivier Houchard aSign ?
357415144b0fSOlivier Houchard packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
357515144b0fSOlivier Houchard : packFloatx80( 0, 0, 0 );
357615144b0fSOlivier Houchard case float_round_up:
357715144b0fSOlivier Houchard return
357815144b0fSOlivier Houchard aSign ? packFloatx80( 1, 0, 0 )
357915144b0fSOlivier Houchard : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
358015144b0fSOlivier Houchard }
358115144b0fSOlivier Houchard return packFloatx80( aSign, 0, 0 );
358215144b0fSOlivier Houchard }
358315144b0fSOlivier Houchard lastBitMask = 1;
358415144b0fSOlivier Houchard lastBitMask <<= 0x403E - aExp;
358515144b0fSOlivier Houchard roundBitsMask = lastBitMask - 1;
358615144b0fSOlivier Houchard z = a;
358715144b0fSOlivier Houchard roundingMode = float_rounding_mode;
358815144b0fSOlivier Houchard if ( roundingMode == float_round_nearest_even ) {
358915144b0fSOlivier Houchard z.low += lastBitMask>>1;
359015144b0fSOlivier Houchard if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
359115144b0fSOlivier Houchard }
359215144b0fSOlivier Houchard else if ( roundingMode != float_round_to_zero ) {
359315144b0fSOlivier Houchard if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
359415144b0fSOlivier Houchard z.low += roundBitsMask;
359515144b0fSOlivier Houchard }
359615144b0fSOlivier Houchard }
359715144b0fSOlivier Houchard z.low &= ~ roundBitsMask;
359815144b0fSOlivier Houchard if ( z.low == 0 ) {
359915144b0fSOlivier Houchard ++z.high;
360015144b0fSOlivier Houchard z.low = LIT64( 0x8000000000000000 );
360115144b0fSOlivier Houchard }
360215144b0fSOlivier Houchard if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
360315144b0fSOlivier Houchard return z;
360415144b0fSOlivier Houchard
360515144b0fSOlivier Houchard }
360615144b0fSOlivier Houchard
360715144b0fSOlivier Houchard /*
360815144b0fSOlivier Houchard -------------------------------------------------------------------------------
360915144b0fSOlivier Houchard Returns the result of adding the absolute values of the extended double-
361015144b0fSOlivier Houchard precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
361115144b0fSOlivier Houchard negated before being returned. `zSign' is ignored if the result is a NaN.
361215144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
361315144b0fSOlivier Houchard Floating-Point Arithmetic.
361415144b0fSOlivier Houchard -------------------------------------------------------------------------------
361515144b0fSOlivier Houchard */
addFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)361615144b0fSOlivier Houchard static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
361715144b0fSOlivier Houchard {
361815144b0fSOlivier Houchard int32 aExp, bExp, zExp;
361915144b0fSOlivier Houchard bits64 aSig, bSig, zSig0, zSig1;
362015144b0fSOlivier Houchard int32 expDiff;
362115144b0fSOlivier Houchard
362215144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
362315144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
362415144b0fSOlivier Houchard bSig = extractFloatx80Frac( b );
362515144b0fSOlivier Houchard bExp = extractFloatx80Exp( b );
362615144b0fSOlivier Houchard expDiff = aExp - bExp;
362715144b0fSOlivier Houchard if ( 0 < expDiff ) {
362815144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
362915144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
363015144b0fSOlivier Houchard return a;
363115144b0fSOlivier Houchard }
363215144b0fSOlivier Houchard if ( bExp == 0 ) --expDiff;
363315144b0fSOlivier Houchard shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
363415144b0fSOlivier Houchard zExp = aExp;
363515144b0fSOlivier Houchard }
363615144b0fSOlivier Houchard else if ( expDiff < 0 ) {
363715144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
363815144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
363915144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
364015144b0fSOlivier Houchard }
364115144b0fSOlivier Houchard if ( aExp == 0 ) ++expDiff;
364215144b0fSOlivier Houchard shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
364315144b0fSOlivier Houchard zExp = bExp;
364415144b0fSOlivier Houchard }
364515144b0fSOlivier Houchard else {
364615144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
364715144b0fSOlivier Houchard if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
364815144b0fSOlivier Houchard return propagateFloatx80NaN( a, b );
364915144b0fSOlivier Houchard }
365015144b0fSOlivier Houchard return a;
365115144b0fSOlivier Houchard }
365215144b0fSOlivier Houchard zSig1 = 0;
365315144b0fSOlivier Houchard zSig0 = aSig + bSig;
365415144b0fSOlivier Houchard if ( aExp == 0 ) {
365515144b0fSOlivier Houchard normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
365615144b0fSOlivier Houchard goto roundAndPack;
365715144b0fSOlivier Houchard }
365815144b0fSOlivier Houchard zExp = aExp;
365915144b0fSOlivier Houchard goto shiftRight1;
366015144b0fSOlivier Houchard }
366115144b0fSOlivier Houchard zSig0 = aSig + bSig;
366215144b0fSOlivier Houchard if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
366315144b0fSOlivier Houchard shiftRight1:
366415144b0fSOlivier Houchard shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
366515144b0fSOlivier Houchard zSig0 |= LIT64( 0x8000000000000000 );
366615144b0fSOlivier Houchard ++zExp;
366715144b0fSOlivier Houchard roundAndPack:
366815144b0fSOlivier Houchard return
366915144b0fSOlivier Houchard roundAndPackFloatx80(
367015144b0fSOlivier Houchard floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
367115144b0fSOlivier Houchard
367215144b0fSOlivier Houchard }
367315144b0fSOlivier Houchard
367415144b0fSOlivier Houchard /*
367515144b0fSOlivier Houchard -------------------------------------------------------------------------------
367615144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the extended
367715144b0fSOlivier Houchard double-precision floating-point values `a' and `b'. If `zSign' is 1, the
367815144b0fSOlivier Houchard difference is negated before being returned. `zSign' is ignored if the
367915144b0fSOlivier Houchard result is a NaN. The subtraction is performed according to the IEC/IEEE
368015144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
368115144b0fSOlivier Houchard -------------------------------------------------------------------------------
368215144b0fSOlivier Houchard */
subFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)368315144b0fSOlivier Houchard static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
368415144b0fSOlivier Houchard {
368515144b0fSOlivier Houchard int32 aExp, bExp, zExp;
368615144b0fSOlivier Houchard bits64 aSig, bSig, zSig0, zSig1;
368715144b0fSOlivier Houchard int32 expDiff;
368815144b0fSOlivier Houchard floatx80 z;
368915144b0fSOlivier Houchard
369015144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
369115144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
369215144b0fSOlivier Houchard bSig = extractFloatx80Frac( b );
369315144b0fSOlivier Houchard bExp = extractFloatx80Exp( b );
369415144b0fSOlivier Houchard expDiff = aExp - bExp;
369515144b0fSOlivier Houchard if ( 0 < expDiff ) goto aExpBigger;
369615144b0fSOlivier Houchard if ( expDiff < 0 ) goto bExpBigger;
369715144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
369815144b0fSOlivier Houchard if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
369915144b0fSOlivier Houchard return propagateFloatx80NaN( a, b );
370015144b0fSOlivier Houchard }
370115144b0fSOlivier Houchard float_raise( float_flag_invalid );
370215144b0fSOlivier Houchard z.low = floatx80_default_nan_low;
370315144b0fSOlivier Houchard z.high = floatx80_default_nan_high;
370415144b0fSOlivier Houchard return z;
370515144b0fSOlivier Houchard }
370615144b0fSOlivier Houchard if ( aExp == 0 ) {
370715144b0fSOlivier Houchard aExp = 1;
370815144b0fSOlivier Houchard bExp = 1;
370915144b0fSOlivier Houchard }
371015144b0fSOlivier Houchard zSig1 = 0;
371115144b0fSOlivier Houchard if ( bSig < aSig ) goto aBigger;
371215144b0fSOlivier Houchard if ( aSig < bSig ) goto bBigger;
371315144b0fSOlivier Houchard return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
371415144b0fSOlivier Houchard bExpBigger:
371515144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
371615144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
371715144b0fSOlivier Houchard return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
371815144b0fSOlivier Houchard }
371915144b0fSOlivier Houchard if ( aExp == 0 ) ++expDiff;
372015144b0fSOlivier Houchard shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
372115144b0fSOlivier Houchard bBigger:
372215144b0fSOlivier Houchard sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
372315144b0fSOlivier Houchard zExp = bExp;
372415144b0fSOlivier Houchard zSign ^= 1;
372515144b0fSOlivier Houchard goto normalizeRoundAndPack;
372615144b0fSOlivier Houchard aExpBigger:
372715144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
372815144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
372915144b0fSOlivier Houchard return a;
373015144b0fSOlivier Houchard }
373115144b0fSOlivier Houchard if ( bExp == 0 ) --expDiff;
373215144b0fSOlivier Houchard shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
373315144b0fSOlivier Houchard aBigger:
373415144b0fSOlivier Houchard sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
373515144b0fSOlivier Houchard zExp = aExp;
373615144b0fSOlivier Houchard normalizeRoundAndPack:
373715144b0fSOlivier Houchard return
373815144b0fSOlivier Houchard normalizeRoundAndPackFloatx80(
373915144b0fSOlivier Houchard floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
374015144b0fSOlivier Houchard
374115144b0fSOlivier Houchard }
374215144b0fSOlivier Houchard
374315144b0fSOlivier Houchard /*
374415144b0fSOlivier Houchard -------------------------------------------------------------------------------
374515144b0fSOlivier Houchard Returns the result of adding the extended double-precision floating-point
374615144b0fSOlivier Houchard values `a' and `b'. The operation is performed according to the IEC/IEEE
374715144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
374815144b0fSOlivier Houchard -------------------------------------------------------------------------------
374915144b0fSOlivier Houchard */
floatx80_add(floatx80 a,floatx80 b)375015144b0fSOlivier Houchard floatx80 floatx80_add( floatx80 a, floatx80 b )
375115144b0fSOlivier Houchard {
375215144b0fSOlivier Houchard flag aSign, bSign;
375315144b0fSOlivier Houchard
375415144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
375515144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
375615144b0fSOlivier Houchard if ( aSign == bSign ) {
375715144b0fSOlivier Houchard return addFloatx80Sigs( a, b, aSign );
375815144b0fSOlivier Houchard }
375915144b0fSOlivier Houchard else {
376015144b0fSOlivier Houchard return subFloatx80Sigs( a, b, aSign );
376115144b0fSOlivier Houchard }
376215144b0fSOlivier Houchard
376315144b0fSOlivier Houchard }
376415144b0fSOlivier Houchard
376515144b0fSOlivier Houchard /*
376615144b0fSOlivier Houchard -------------------------------------------------------------------------------
376715144b0fSOlivier Houchard Returns the result of subtracting the extended double-precision floating-
376815144b0fSOlivier Houchard point values `a' and `b'. The operation is performed according to the
376915144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
377015144b0fSOlivier Houchard -------------------------------------------------------------------------------
377115144b0fSOlivier Houchard */
floatx80_sub(floatx80 a,floatx80 b)377215144b0fSOlivier Houchard floatx80 floatx80_sub( floatx80 a, floatx80 b )
377315144b0fSOlivier Houchard {
377415144b0fSOlivier Houchard flag aSign, bSign;
377515144b0fSOlivier Houchard
377615144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
377715144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
377815144b0fSOlivier Houchard if ( aSign == bSign ) {
377915144b0fSOlivier Houchard return subFloatx80Sigs( a, b, aSign );
378015144b0fSOlivier Houchard }
378115144b0fSOlivier Houchard else {
378215144b0fSOlivier Houchard return addFloatx80Sigs( a, b, aSign );
378315144b0fSOlivier Houchard }
378415144b0fSOlivier Houchard
378515144b0fSOlivier Houchard }
378615144b0fSOlivier Houchard
378715144b0fSOlivier Houchard /*
378815144b0fSOlivier Houchard -------------------------------------------------------------------------------
378915144b0fSOlivier Houchard Returns the result of multiplying the extended double-precision floating-
379015144b0fSOlivier Houchard point values `a' and `b'. The operation is performed according to the
379115144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
379215144b0fSOlivier Houchard -------------------------------------------------------------------------------
379315144b0fSOlivier Houchard */
floatx80_mul(floatx80 a,floatx80 b)379415144b0fSOlivier Houchard floatx80 floatx80_mul( floatx80 a, floatx80 b )
379515144b0fSOlivier Houchard {
379615144b0fSOlivier Houchard flag aSign, bSign, zSign;
379715144b0fSOlivier Houchard int32 aExp, bExp, zExp;
379815144b0fSOlivier Houchard bits64 aSig, bSig, zSig0, zSig1;
379915144b0fSOlivier Houchard floatx80 z;
380015144b0fSOlivier Houchard
380115144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
380215144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
380315144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
380415144b0fSOlivier Houchard bSig = extractFloatx80Frac( b );
380515144b0fSOlivier Houchard bExp = extractFloatx80Exp( b );
380615144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
380715144b0fSOlivier Houchard zSign = aSign ^ bSign;
380815144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
380915144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 )
381015144b0fSOlivier Houchard || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
381115144b0fSOlivier Houchard return propagateFloatx80NaN( a, b );
381215144b0fSOlivier Houchard }
381315144b0fSOlivier Houchard if ( ( bExp | bSig ) == 0 ) goto invalid;
381415144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
381515144b0fSOlivier Houchard }
381615144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
381715144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
381815144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
381915144b0fSOlivier Houchard invalid:
382015144b0fSOlivier Houchard float_raise( float_flag_invalid );
382115144b0fSOlivier Houchard z.low = floatx80_default_nan_low;
382215144b0fSOlivier Houchard z.high = floatx80_default_nan_high;
382315144b0fSOlivier Houchard return z;
382415144b0fSOlivier Houchard }
382515144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
382615144b0fSOlivier Houchard }
382715144b0fSOlivier Houchard if ( aExp == 0 ) {
382815144b0fSOlivier Houchard if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
382915144b0fSOlivier Houchard normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
383015144b0fSOlivier Houchard }
383115144b0fSOlivier Houchard if ( bExp == 0 ) {
383215144b0fSOlivier Houchard if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
383315144b0fSOlivier Houchard normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
383415144b0fSOlivier Houchard }
383515144b0fSOlivier Houchard zExp = aExp + bExp - 0x3FFE;
383615144b0fSOlivier Houchard mul64To128( aSig, bSig, &zSig0, &zSig1 );
383715144b0fSOlivier Houchard if ( 0 < (sbits64) zSig0 ) {
383815144b0fSOlivier Houchard shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
383915144b0fSOlivier Houchard --zExp;
384015144b0fSOlivier Houchard }
384115144b0fSOlivier Houchard return
384215144b0fSOlivier Houchard roundAndPackFloatx80(
384315144b0fSOlivier Houchard floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
384415144b0fSOlivier Houchard
384515144b0fSOlivier Houchard }
384615144b0fSOlivier Houchard
384715144b0fSOlivier Houchard /*
384815144b0fSOlivier Houchard -------------------------------------------------------------------------------
384915144b0fSOlivier Houchard Returns the result of dividing the extended double-precision floating-point
385015144b0fSOlivier Houchard value `a' by the corresponding value `b'. The operation is performed
385115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
385215144b0fSOlivier Houchard -------------------------------------------------------------------------------
385315144b0fSOlivier Houchard */
floatx80_div(floatx80 a,floatx80 b)385415144b0fSOlivier Houchard floatx80 floatx80_div( floatx80 a, floatx80 b )
385515144b0fSOlivier Houchard {
385615144b0fSOlivier Houchard flag aSign, bSign, zSign;
385715144b0fSOlivier Houchard int32 aExp, bExp, zExp;
385815144b0fSOlivier Houchard bits64 aSig, bSig, zSig0, zSig1;
385915144b0fSOlivier Houchard bits64 rem0, rem1, rem2, term0, term1, term2;
386015144b0fSOlivier Houchard floatx80 z;
386115144b0fSOlivier Houchard
386215144b0fSOlivier Houchard aSig = extractFloatx80Frac( a );
386315144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
386415144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
386515144b0fSOlivier Houchard bSig = extractFloatx80Frac( b );
386615144b0fSOlivier Houchard bExp = extractFloatx80Exp( b );
386715144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
386815144b0fSOlivier Houchard zSign = aSign ^ bSign;
386915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
387015144b0fSOlivier Houchard if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
387115144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
387215144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
387315144b0fSOlivier Houchard goto invalid;
387415144b0fSOlivier Houchard }
387515144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
387615144b0fSOlivier Houchard }
387715144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
387815144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
387915144b0fSOlivier Houchard return packFloatx80( zSign, 0, 0 );
388015144b0fSOlivier Houchard }
388115144b0fSOlivier Houchard if ( bExp == 0 ) {
388215144b0fSOlivier Houchard if ( bSig == 0 ) {
388315144b0fSOlivier Houchard if ( ( aExp | aSig ) == 0 ) {
388415144b0fSOlivier Houchard invalid:
388515144b0fSOlivier Houchard float_raise( float_flag_invalid );
388615144b0fSOlivier Houchard z.low = floatx80_default_nan_low;
388715144b0fSOlivier Houchard z.high = floatx80_default_nan_high;
388815144b0fSOlivier Houchard return z;
388915144b0fSOlivier Houchard }
389015144b0fSOlivier Houchard float_raise( float_flag_divbyzero );
389115144b0fSOlivier Houchard return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
389215144b0fSOlivier Houchard }
389315144b0fSOlivier Houchard normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
389415144b0fSOlivier Houchard }
389515144b0fSOlivier Houchard if ( aExp == 0 ) {
389615144b0fSOlivier Houchard if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
389715144b0fSOlivier Houchard normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
389815144b0fSOlivier Houchard }
389915144b0fSOlivier Houchard zExp = aExp - bExp + 0x3FFE;
390015144b0fSOlivier Houchard rem1 = 0;
390115144b0fSOlivier Houchard if ( bSig <= aSig ) {
390215144b0fSOlivier Houchard shift128Right( aSig, 0, 1, &aSig, &rem1 );
390315144b0fSOlivier Houchard ++zExp;
390415144b0fSOlivier Houchard }
390515144b0fSOlivier Houchard zSig0 = estimateDiv128To64( aSig, rem1, bSig );
390615144b0fSOlivier Houchard mul64To128( bSig, zSig0, &term0, &term1 );
390715144b0fSOlivier Houchard sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
390815144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
390915144b0fSOlivier Houchard --zSig0;
391015144b0fSOlivier Houchard add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
391115144b0fSOlivier Houchard }
391215144b0fSOlivier Houchard zSig1 = estimateDiv128To64( rem1, 0, bSig );
391315144b0fSOlivier Houchard if ( (bits64) ( zSig1<<1 ) <= 8 ) {
391415144b0fSOlivier Houchard mul64To128( bSig, zSig1, &term1, &term2 );
391515144b0fSOlivier Houchard sub128( rem1, 0, term1, term2, &rem1, &rem2 );
391615144b0fSOlivier Houchard while ( (sbits64) rem1 < 0 ) {
391715144b0fSOlivier Houchard --zSig1;
391815144b0fSOlivier Houchard add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
391915144b0fSOlivier Houchard }
392015144b0fSOlivier Houchard zSig1 |= ( ( rem1 | rem2 ) != 0 );
392115144b0fSOlivier Houchard }
392215144b0fSOlivier Houchard return
392315144b0fSOlivier Houchard roundAndPackFloatx80(
392415144b0fSOlivier Houchard floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
392515144b0fSOlivier Houchard
392615144b0fSOlivier Houchard }
392715144b0fSOlivier Houchard
392815144b0fSOlivier Houchard /*
392915144b0fSOlivier Houchard -------------------------------------------------------------------------------
393015144b0fSOlivier Houchard Returns the remainder of the extended double-precision floating-point value
393115144b0fSOlivier Houchard `a' with respect to the corresponding value `b'. The operation is performed
393215144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
393315144b0fSOlivier Houchard -------------------------------------------------------------------------------
393415144b0fSOlivier Houchard */
floatx80_rem(floatx80 a,floatx80 b)393515144b0fSOlivier Houchard floatx80 floatx80_rem( floatx80 a, floatx80 b )
393615144b0fSOlivier Houchard {
393715144b0fSOlivier Houchard flag aSign, bSign, zSign;
393815144b0fSOlivier Houchard int32 aExp, bExp, expDiff;
393915144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig;
394015144b0fSOlivier Houchard bits64 q, term0, term1, alternateASig0, alternateASig1;
394115144b0fSOlivier Houchard floatx80 z;
394215144b0fSOlivier Houchard
394315144b0fSOlivier Houchard aSig0 = extractFloatx80Frac( a );
394415144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
394515144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
394615144b0fSOlivier Houchard bSig = extractFloatx80Frac( b );
394715144b0fSOlivier Houchard bExp = extractFloatx80Exp( b );
394815144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
394915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
395015144b0fSOlivier Houchard if ( (bits64) ( aSig0<<1 )
395115144b0fSOlivier Houchard || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
395215144b0fSOlivier Houchard return propagateFloatx80NaN( a, b );
395315144b0fSOlivier Houchard }
395415144b0fSOlivier Houchard goto invalid;
395515144b0fSOlivier Houchard }
395615144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
395715144b0fSOlivier Houchard if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
395815144b0fSOlivier Houchard return a;
395915144b0fSOlivier Houchard }
396015144b0fSOlivier Houchard if ( bExp == 0 ) {
396115144b0fSOlivier Houchard if ( bSig == 0 ) {
396215144b0fSOlivier Houchard invalid:
396315144b0fSOlivier Houchard float_raise( float_flag_invalid );
396415144b0fSOlivier Houchard z.low = floatx80_default_nan_low;
396515144b0fSOlivier Houchard z.high = floatx80_default_nan_high;
396615144b0fSOlivier Houchard return z;
396715144b0fSOlivier Houchard }
396815144b0fSOlivier Houchard normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
396915144b0fSOlivier Houchard }
397015144b0fSOlivier Houchard if ( aExp == 0 ) {
397115144b0fSOlivier Houchard if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
397215144b0fSOlivier Houchard normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
397315144b0fSOlivier Houchard }
397415144b0fSOlivier Houchard bSig |= LIT64( 0x8000000000000000 );
397515144b0fSOlivier Houchard zSign = aSign;
397615144b0fSOlivier Houchard expDiff = aExp - bExp;
397715144b0fSOlivier Houchard aSig1 = 0;
397815144b0fSOlivier Houchard if ( expDiff < 0 ) {
397915144b0fSOlivier Houchard if ( expDiff < -1 ) return a;
398015144b0fSOlivier Houchard shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
398115144b0fSOlivier Houchard expDiff = 0;
398215144b0fSOlivier Houchard }
398315144b0fSOlivier Houchard q = ( bSig <= aSig0 );
398415144b0fSOlivier Houchard if ( q ) aSig0 -= bSig;
398515144b0fSOlivier Houchard expDiff -= 64;
398615144b0fSOlivier Houchard while ( 0 < expDiff ) {
398715144b0fSOlivier Houchard q = estimateDiv128To64( aSig0, aSig1, bSig );
398815144b0fSOlivier Houchard q = ( 2 < q ) ? q - 2 : 0;
398915144b0fSOlivier Houchard mul64To128( bSig, q, &term0, &term1 );
399015144b0fSOlivier Houchard sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
399115144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
399215144b0fSOlivier Houchard expDiff -= 62;
399315144b0fSOlivier Houchard }
399415144b0fSOlivier Houchard expDiff += 64;
399515144b0fSOlivier Houchard if ( 0 < expDiff ) {
399615144b0fSOlivier Houchard q = estimateDiv128To64( aSig0, aSig1, bSig );
399715144b0fSOlivier Houchard q = ( 2 < q ) ? q - 2 : 0;
399815144b0fSOlivier Houchard q >>= 64 - expDiff;
399915144b0fSOlivier Houchard mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
400015144b0fSOlivier Houchard sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
400115144b0fSOlivier Houchard shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
400215144b0fSOlivier Houchard while ( le128( term0, term1, aSig0, aSig1 ) ) {
400315144b0fSOlivier Houchard ++q;
400415144b0fSOlivier Houchard sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
400515144b0fSOlivier Houchard }
400615144b0fSOlivier Houchard }
400715144b0fSOlivier Houchard else {
400815144b0fSOlivier Houchard term1 = 0;
400915144b0fSOlivier Houchard term0 = bSig;
401015144b0fSOlivier Houchard }
401115144b0fSOlivier Houchard sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
401215144b0fSOlivier Houchard if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
401315144b0fSOlivier Houchard || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
401415144b0fSOlivier Houchard && ( q & 1 ) )
401515144b0fSOlivier Houchard ) {
401615144b0fSOlivier Houchard aSig0 = alternateASig0;
401715144b0fSOlivier Houchard aSig1 = alternateASig1;
401815144b0fSOlivier Houchard zSign = ! zSign;
401915144b0fSOlivier Houchard }
402015144b0fSOlivier Houchard return
402115144b0fSOlivier Houchard normalizeRoundAndPackFloatx80(
402215144b0fSOlivier Houchard 80, zSign, bExp + expDiff, aSig0, aSig1 );
402315144b0fSOlivier Houchard
402415144b0fSOlivier Houchard }
402515144b0fSOlivier Houchard
402615144b0fSOlivier Houchard /*
402715144b0fSOlivier Houchard -------------------------------------------------------------------------------
402815144b0fSOlivier Houchard Returns the square root of the extended double-precision floating-point
402915144b0fSOlivier Houchard value `a'. The operation is performed according to the IEC/IEEE Standard
403015144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
403115144b0fSOlivier Houchard -------------------------------------------------------------------------------
403215144b0fSOlivier Houchard */
floatx80_sqrt(floatx80 a)403315144b0fSOlivier Houchard floatx80 floatx80_sqrt( floatx80 a )
403415144b0fSOlivier Houchard {
403515144b0fSOlivier Houchard flag aSign;
403615144b0fSOlivier Houchard int32 aExp, zExp;
403715144b0fSOlivier Houchard bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
403815144b0fSOlivier Houchard bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
403915144b0fSOlivier Houchard floatx80 z;
404015144b0fSOlivier Houchard
404115144b0fSOlivier Houchard aSig0 = extractFloatx80Frac( a );
404215144b0fSOlivier Houchard aExp = extractFloatx80Exp( a );
404315144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
404415144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
404515144b0fSOlivier Houchard if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
404615144b0fSOlivier Houchard if ( ! aSign ) return a;
404715144b0fSOlivier Houchard goto invalid;
404815144b0fSOlivier Houchard }
404915144b0fSOlivier Houchard if ( aSign ) {
405015144b0fSOlivier Houchard if ( ( aExp | aSig0 ) == 0 ) return a;
405115144b0fSOlivier Houchard invalid:
405215144b0fSOlivier Houchard float_raise( float_flag_invalid );
405315144b0fSOlivier Houchard z.low = floatx80_default_nan_low;
405415144b0fSOlivier Houchard z.high = floatx80_default_nan_high;
405515144b0fSOlivier Houchard return z;
405615144b0fSOlivier Houchard }
405715144b0fSOlivier Houchard if ( aExp == 0 ) {
405815144b0fSOlivier Houchard if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
405915144b0fSOlivier Houchard normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
406015144b0fSOlivier Houchard }
406115144b0fSOlivier Houchard zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
406215144b0fSOlivier Houchard zSig0 = estimateSqrt32( aExp, aSig0>>32 );
406315144b0fSOlivier Houchard shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
406415144b0fSOlivier Houchard zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
406515144b0fSOlivier Houchard doubleZSig0 = zSig0<<1;
406615144b0fSOlivier Houchard mul64To128( zSig0, zSig0, &term0, &term1 );
406715144b0fSOlivier Houchard sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
406815144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
406915144b0fSOlivier Houchard --zSig0;
407015144b0fSOlivier Houchard doubleZSig0 -= 2;
407115144b0fSOlivier Houchard add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
407215144b0fSOlivier Houchard }
407315144b0fSOlivier Houchard zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
407415144b0fSOlivier Houchard if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
407515144b0fSOlivier Houchard if ( zSig1 == 0 ) zSig1 = 1;
407615144b0fSOlivier Houchard mul64To128( doubleZSig0, zSig1, &term1, &term2 );
407715144b0fSOlivier Houchard sub128( rem1, 0, term1, term2, &rem1, &rem2 );
407815144b0fSOlivier Houchard mul64To128( zSig1, zSig1, &term2, &term3 );
407915144b0fSOlivier Houchard sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
408015144b0fSOlivier Houchard while ( (sbits64) rem1 < 0 ) {
408115144b0fSOlivier Houchard --zSig1;
408215144b0fSOlivier Houchard shortShift128Left( 0, zSig1, 1, &term2, &term3 );
408315144b0fSOlivier Houchard term3 |= 1;
408415144b0fSOlivier Houchard term2 |= doubleZSig0;
408515144b0fSOlivier Houchard add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
408615144b0fSOlivier Houchard }
408715144b0fSOlivier Houchard zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
408815144b0fSOlivier Houchard }
408915144b0fSOlivier Houchard shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
409015144b0fSOlivier Houchard zSig0 |= doubleZSig0;
409115144b0fSOlivier Houchard return
409215144b0fSOlivier Houchard roundAndPackFloatx80(
409315144b0fSOlivier Houchard floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
409415144b0fSOlivier Houchard
409515144b0fSOlivier Houchard }
409615144b0fSOlivier Houchard
409715144b0fSOlivier Houchard /*
409815144b0fSOlivier Houchard -------------------------------------------------------------------------------
409915144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
410015144b0fSOlivier Houchard equal to the corresponding value `b', and 0 otherwise. The comparison is
410115144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-Point
410215144b0fSOlivier Houchard Arithmetic.
410315144b0fSOlivier Houchard -------------------------------------------------------------------------------
410415144b0fSOlivier Houchard */
floatx80_eq(floatx80 a,floatx80 b)410515144b0fSOlivier Houchard flag floatx80_eq( floatx80 a, floatx80 b )
410615144b0fSOlivier Houchard {
410715144b0fSOlivier Houchard
410815144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
410915144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
411015144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
411115144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
411215144b0fSOlivier Houchard ) {
411315144b0fSOlivier Houchard if ( floatx80_is_signaling_nan( a )
411415144b0fSOlivier Houchard || floatx80_is_signaling_nan( b ) ) {
411515144b0fSOlivier Houchard float_raise( float_flag_invalid );
411615144b0fSOlivier Houchard }
411715144b0fSOlivier Houchard return 0;
411815144b0fSOlivier Houchard }
411915144b0fSOlivier Houchard return
412015144b0fSOlivier Houchard ( a.low == b.low )
412115144b0fSOlivier Houchard && ( ( a.high == b.high )
412215144b0fSOlivier Houchard || ( ( a.low == 0 )
412315144b0fSOlivier Houchard && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
412415144b0fSOlivier Houchard );
412515144b0fSOlivier Houchard
412615144b0fSOlivier Houchard }
412715144b0fSOlivier Houchard
412815144b0fSOlivier Houchard /*
412915144b0fSOlivier Houchard -------------------------------------------------------------------------------
413015144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
413115144b0fSOlivier Houchard less than or equal to the corresponding value `b', and 0 otherwise. The
413215144b0fSOlivier Houchard comparison is performed according to the IEC/IEEE Standard for Binary
413315144b0fSOlivier Houchard Floating-Point Arithmetic.
413415144b0fSOlivier Houchard -------------------------------------------------------------------------------
413515144b0fSOlivier Houchard */
floatx80_le(floatx80 a,floatx80 b)413615144b0fSOlivier Houchard flag floatx80_le( floatx80 a, floatx80 b )
413715144b0fSOlivier Houchard {
413815144b0fSOlivier Houchard flag aSign, bSign;
413915144b0fSOlivier Houchard
414015144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
414115144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
414215144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
414315144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
414415144b0fSOlivier Houchard ) {
414515144b0fSOlivier Houchard float_raise( float_flag_invalid );
414615144b0fSOlivier Houchard return 0;
414715144b0fSOlivier Houchard }
414815144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
414915144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
415015144b0fSOlivier Houchard if ( aSign != bSign ) {
415115144b0fSOlivier Houchard return
415215144b0fSOlivier Houchard aSign
415315144b0fSOlivier Houchard || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
415415144b0fSOlivier Houchard == 0 );
415515144b0fSOlivier Houchard }
415615144b0fSOlivier Houchard return
415715144b0fSOlivier Houchard aSign ? le128( b.high, b.low, a.high, a.low )
415815144b0fSOlivier Houchard : le128( a.high, a.low, b.high, b.low );
415915144b0fSOlivier Houchard
416015144b0fSOlivier Houchard }
416115144b0fSOlivier Houchard
416215144b0fSOlivier Houchard /*
416315144b0fSOlivier Houchard -------------------------------------------------------------------------------
416415144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is
416515144b0fSOlivier Houchard less than the corresponding value `b', and 0 otherwise. The comparison
416615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
416715144b0fSOlivier Houchard Arithmetic.
416815144b0fSOlivier Houchard -------------------------------------------------------------------------------
416915144b0fSOlivier Houchard */
floatx80_lt(floatx80 a,floatx80 b)417015144b0fSOlivier Houchard flag floatx80_lt( floatx80 a, floatx80 b )
417115144b0fSOlivier Houchard {
417215144b0fSOlivier Houchard flag aSign, bSign;
417315144b0fSOlivier Houchard
417415144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
417515144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
417615144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
417715144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
417815144b0fSOlivier Houchard ) {
417915144b0fSOlivier Houchard float_raise( float_flag_invalid );
418015144b0fSOlivier Houchard return 0;
418115144b0fSOlivier Houchard }
418215144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
418315144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
418415144b0fSOlivier Houchard if ( aSign != bSign ) {
418515144b0fSOlivier Houchard return
418615144b0fSOlivier Houchard aSign
418715144b0fSOlivier Houchard && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
418815144b0fSOlivier Houchard != 0 );
418915144b0fSOlivier Houchard }
419015144b0fSOlivier Houchard return
419115144b0fSOlivier Houchard aSign ? lt128( b.high, b.low, a.high, a.low )
419215144b0fSOlivier Houchard : lt128( a.high, a.low, b.high, b.low );
419315144b0fSOlivier Houchard
419415144b0fSOlivier Houchard }
419515144b0fSOlivier Houchard
419615144b0fSOlivier Houchard /*
419715144b0fSOlivier Houchard -------------------------------------------------------------------------------
419815144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is equal
419915144b0fSOlivier Houchard to the corresponding value `b', and 0 otherwise. The invalid exception is
420015144b0fSOlivier Houchard raised if either operand is a NaN. Otherwise, the comparison is performed
420115144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
420215144b0fSOlivier Houchard -------------------------------------------------------------------------------
420315144b0fSOlivier Houchard */
floatx80_eq_signaling(floatx80 a,floatx80 b)420415144b0fSOlivier Houchard flag floatx80_eq_signaling( floatx80 a, floatx80 b )
420515144b0fSOlivier Houchard {
420615144b0fSOlivier Houchard
420715144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
420815144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
420915144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
421015144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
421115144b0fSOlivier Houchard ) {
421215144b0fSOlivier Houchard float_raise( float_flag_invalid );
421315144b0fSOlivier Houchard return 0;
421415144b0fSOlivier Houchard }
421515144b0fSOlivier Houchard return
421615144b0fSOlivier Houchard ( a.low == b.low )
421715144b0fSOlivier Houchard && ( ( a.high == b.high )
421815144b0fSOlivier Houchard || ( ( a.low == 0 )
421915144b0fSOlivier Houchard && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
422015144b0fSOlivier Houchard );
422115144b0fSOlivier Houchard
422215144b0fSOlivier Houchard }
422315144b0fSOlivier Houchard
422415144b0fSOlivier Houchard /*
422515144b0fSOlivier Houchard -------------------------------------------------------------------------------
422615144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is less
422715144b0fSOlivier Houchard than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
422815144b0fSOlivier Houchard do not cause an exception. Otherwise, the comparison is performed according
422915144b0fSOlivier Houchard to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
423015144b0fSOlivier Houchard -------------------------------------------------------------------------------
423115144b0fSOlivier Houchard */
floatx80_le_quiet(floatx80 a,floatx80 b)423215144b0fSOlivier Houchard flag floatx80_le_quiet( floatx80 a, floatx80 b )
423315144b0fSOlivier Houchard {
423415144b0fSOlivier Houchard flag aSign, bSign;
423515144b0fSOlivier Houchard
423615144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
423715144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
423815144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
423915144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
424015144b0fSOlivier Houchard ) {
424115144b0fSOlivier Houchard if ( floatx80_is_signaling_nan( a )
424215144b0fSOlivier Houchard || floatx80_is_signaling_nan( b ) ) {
424315144b0fSOlivier Houchard float_raise( float_flag_invalid );
424415144b0fSOlivier Houchard }
424515144b0fSOlivier Houchard return 0;
424615144b0fSOlivier Houchard }
424715144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
424815144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
424915144b0fSOlivier Houchard if ( aSign != bSign ) {
425015144b0fSOlivier Houchard return
425115144b0fSOlivier Houchard aSign
425215144b0fSOlivier Houchard || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
425315144b0fSOlivier Houchard == 0 );
425415144b0fSOlivier Houchard }
425515144b0fSOlivier Houchard return
425615144b0fSOlivier Houchard aSign ? le128( b.high, b.low, a.high, a.low )
425715144b0fSOlivier Houchard : le128( a.high, a.low, b.high, b.low );
425815144b0fSOlivier Houchard
425915144b0fSOlivier Houchard }
426015144b0fSOlivier Houchard
426115144b0fSOlivier Houchard /*
426215144b0fSOlivier Houchard -------------------------------------------------------------------------------
426315144b0fSOlivier Houchard Returns 1 if the extended double-precision floating-point value `a' is less
426415144b0fSOlivier Houchard than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
426515144b0fSOlivier Houchard an exception. Otherwise, the comparison is performed according to the
426615144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
426715144b0fSOlivier Houchard -------------------------------------------------------------------------------
426815144b0fSOlivier Houchard */
floatx80_lt_quiet(floatx80 a,floatx80 b)426915144b0fSOlivier Houchard flag floatx80_lt_quiet( floatx80 a, floatx80 b )
427015144b0fSOlivier Houchard {
427115144b0fSOlivier Houchard flag aSign, bSign;
427215144b0fSOlivier Houchard
427315144b0fSOlivier Houchard if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
427415144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( a )<<1 ) )
427515144b0fSOlivier Houchard || ( ( extractFloatx80Exp( b ) == 0x7FFF )
427615144b0fSOlivier Houchard && (bits64) ( extractFloatx80Frac( b )<<1 ) )
427715144b0fSOlivier Houchard ) {
427815144b0fSOlivier Houchard if ( floatx80_is_signaling_nan( a )
427915144b0fSOlivier Houchard || floatx80_is_signaling_nan( b ) ) {
428015144b0fSOlivier Houchard float_raise( float_flag_invalid );
428115144b0fSOlivier Houchard }
428215144b0fSOlivier Houchard return 0;
428315144b0fSOlivier Houchard }
428415144b0fSOlivier Houchard aSign = extractFloatx80Sign( a );
428515144b0fSOlivier Houchard bSign = extractFloatx80Sign( b );
428615144b0fSOlivier Houchard if ( aSign != bSign ) {
428715144b0fSOlivier Houchard return
428815144b0fSOlivier Houchard aSign
428915144b0fSOlivier Houchard && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
429015144b0fSOlivier Houchard != 0 );
429115144b0fSOlivier Houchard }
429215144b0fSOlivier Houchard return
429315144b0fSOlivier Houchard aSign ? lt128( b.high, b.low, a.high, a.low )
429415144b0fSOlivier Houchard : lt128( a.high, a.low, b.high, b.low );
429515144b0fSOlivier Houchard
429615144b0fSOlivier Houchard }
429715144b0fSOlivier Houchard
429815144b0fSOlivier Houchard #endif
429915144b0fSOlivier Houchard
430015144b0fSOlivier Houchard #ifdef FLOAT128
430115144b0fSOlivier Houchard
430215144b0fSOlivier Houchard /*
430315144b0fSOlivier Houchard -------------------------------------------------------------------------------
430415144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
430515144b0fSOlivier Houchard value `a' to the 32-bit two's complement integer format. The conversion
430615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
430715144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
430815144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
430915144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
431015144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
431115144b0fSOlivier Houchard -------------------------------------------------------------------------------
431215144b0fSOlivier Houchard */
float128_to_int32(float128 a)431315144b0fSOlivier Houchard int32 float128_to_int32( float128 a )
431415144b0fSOlivier Houchard {
431515144b0fSOlivier Houchard flag aSign;
431615144b0fSOlivier Houchard int32 aExp, shiftCount;
431715144b0fSOlivier Houchard bits64 aSig0, aSig1;
431815144b0fSOlivier Houchard
431915144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
432015144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
432115144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
432215144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
432315144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
432415144b0fSOlivier Houchard if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
432515144b0fSOlivier Houchard aSig0 |= ( aSig1 != 0 );
432615144b0fSOlivier Houchard shiftCount = 0x4028 - aExp;
432715144b0fSOlivier Houchard if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
432815144b0fSOlivier Houchard return roundAndPackInt32( aSign, aSig0 );
432915144b0fSOlivier Houchard
433015144b0fSOlivier Houchard }
433115144b0fSOlivier Houchard
433215144b0fSOlivier Houchard /*
433315144b0fSOlivier Houchard -------------------------------------------------------------------------------
433415144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
433515144b0fSOlivier Houchard value `a' to the 32-bit two's complement integer format. The conversion
433615144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
433715144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero. If
433815144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned. Otherwise, if the
433915144b0fSOlivier Houchard conversion overflows, the largest integer with the same sign as `a' is
434015144b0fSOlivier Houchard returned.
434115144b0fSOlivier Houchard -------------------------------------------------------------------------------
434215144b0fSOlivier Houchard */
float128_to_int32_round_to_zero(float128 a)434315144b0fSOlivier Houchard int32 float128_to_int32_round_to_zero( float128 a )
434415144b0fSOlivier Houchard {
434515144b0fSOlivier Houchard flag aSign;
434615144b0fSOlivier Houchard int32 aExp, shiftCount;
434715144b0fSOlivier Houchard bits64 aSig0, aSig1, savedASig;
434815144b0fSOlivier Houchard int32 z;
434915144b0fSOlivier Houchard
435015144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
435115144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
435215144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
435315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
435415144b0fSOlivier Houchard aSig0 |= ( aSig1 != 0 );
435515144b0fSOlivier Houchard if ( 0x401E < aExp ) {
435615144b0fSOlivier Houchard if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
435715144b0fSOlivier Houchard goto invalid;
435815144b0fSOlivier Houchard }
435915144b0fSOlivier Houchard else if ( aExp < 0x3FFF ) {
436015144b0fSOlivier Houchard if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
436115144b0fSOlivier Houchard return 0;
436215144b0fSOlivier Houchard }
436315144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
436415144b0fSOlivier Houchard shiftCount = 0x402F - aExp;
436515144b0fSOlivier Houchard savedASig = aSig0;
436615144b0fSOlivier Houchard aSig0 >>= shiftCount;
436715144b0fSOlivier Houchard z = aSig0;
436815144b0fSOlivier Houchard if ( aSign ) z = - z;
436915144b0fSOlivier Houchard if ( ( z < 0 ) ^ aSign ) {
437015144b0fSOlivier Houchard invalid:
437115144b0fSOlivier Houchard float_raise( float_flag_invalid );
437215144b0fSOlivier Houchard return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
437315144b0fSOlivier Houchard }
437415144b0fSOlivier Houchard if ( ( aSig0<<shiftCount ) != savedASig ) {
437515144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
437615144b0fSOlivier Houchard }
437715144b0fSOlivier Houchard return z;
437815144b0fSOlivier Houchard
437915144b0fSOlivier Houchard }
438015144b0fSOlivier Houchard
438115144b0fSOlivier Houchard /*
438215144b0fSOlivier Houchard -------------------------------------------------------------------------------
438315144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
438415144b0fSOlivier Houchard value `a' to the 64-bit two's complement integer format. The conversion
438515144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
438615144b0fSOlivier Houchard Arithmetic---which means in particular that the conversion is rounded
438715144b0fSOlivier Houchard according to the current rounding mode. If `a' is a NaN, the largest
438815144b0fSOlivier Houchard positive integer is returned. Otherwise, if the conversion overflows, the
438915144b0fSOlivier Houchard largest integer with the same sign as `a' is returned.
439015144b0fSOlivier Houchard -------------------------------------------------------------------------------
439115144b0fSOlivier Houchard */
float128_to_int64(float128 a)439215144b0fSOlivier Houchard int64 float128_to_int64( float128 a )
439315144b0fSOlivier Houchard {
439415144b0fSOlivier Houchard flag aSign;
439515144b0fSOlivier Houchard int32 aExp, shiftCount;
439615144b0fSOlivier Houchard bits64 aSig0, aSig1;
439715144b0fSOlivier Houchard
439815144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
439915144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
440015144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
440115144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
440215144b0fSOlivier Houchard if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
440315144b0fSOlivier Houchard shiftCount = 0x402F - aExp;
440415144b0fSOlivier Houchard if ( shiftCount <= 0 ) {
440515144b0fSOlivier Houchard if ( 0x403E < aExp ) {
440615144b0fSOlivier Houchard float_raise( float_flag_invalid );
440715144b0fSOlivier Houchard if ( ! aSign
440815144b0fSOlivier Houchard || ( ( aExp == 0x7FFF )
440915144b0fSOlivier Houchard && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
441015144b0fSOlivier Houchard )
441115144b0fSOlivier Houchard ) {
441215144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
441315144b0fSOlivier Houchard }
441415144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
441515144b0fSOlivier Houchard }
441615144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
441715144b0fSOlivier Houchard }
441815144b0fSOlivier Houchard else {
441915144b0fSOlivier Houchard shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
442015144b0fSOlivier Houchard }
442115144b0fSOlivier Houchard return roundAndPackInt64( aSign, aSig0, aSig1 );
442215144b0fSOlivier Houchard
442315144b0fSOlivier Houchard }
442415144b0fSOlivier Houchard
442515144b0fSOlivier Houchard /*
442615144b0fSOlivier Houchard -------------------------------------------------------------------------------
442715144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
442815144b0fSOlivier Houchard value `a' to the 64-bit two's complement integer format. The conversion
442915144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
443015144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero.
443115144b0fSOlivier Houchard If `a' is a NaN, the largest positive integer is returned. Otherwise, if
443215144b0fSOlivier Houchard the conversion overflows, the largest integer with the same sign as `a' is
443315144b0fSOlivier Houchard returned.
443415144b0fSOlivier Houchard -------------------------------------------------------------------------------
443515144b0fSOlivier Houchard */
float128_to_int64_round_to_zero(float128 a)443615144b0fSOlivier Houchard int64 float128_to_int64_round_to_zero( float128 a )
443715144b0fSOlivier Houchard {
443815144b0fSOlivier Houchard flag aSign;
443915144b0fSOlivier Houchard int32 aExp, shiftCount;
444015144b0fSOlivier Houchard bits64 aSig0, aSig1;
444115144b0fSOlivier Houchard int64 z;
444215144b0fSOlivier Houchard
444315144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
444415144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
444515144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
444615144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
444715144b0fSOlivier Houchard if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
444815144b0fSOlivier Houchard shiftCount = aExp - 0x402F;
444915144b0fSOlivier Houchard if ( 0 < shiftCount ) {
445015144b0fSOlivier Houchard if ( 0x403E <= aExp ) {
445115144b0fSOlivier Houchard aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
445215144b0fSOlivier Houchard if ( ( a.high == LIT64( 0xC03E000000000000 ) )
445315144b0fSOlivier Houchard && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
445415144b0fSOlivier Houchard if ( aSig1 ) float_exception_flags |= float_flag_inexact;
445515144b0fSOlivier Houchard }
445615144b0fSOlivier Houchard else {
445715144b0fSOlivier Houchard float_raise( float_flag_invalid );
445815144b0fSOlivier Houchard if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
445915144b0fSOlivier Houchard return LIT64( 0x7FFFFFFFFFFFFFFF );
446015144b0fSOlivier Houchard }
446115144b0fSOlivier Houchard }
446215144b0fSOlivier Houchard return (sbits64) LIT64( 0x8000000000000000 );
446315144b0fSOlivier Houchard }
446415144b0fSOlivier Houchard z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
446515144b0fSOlivier Houchard if ( (bits64) ( aSig1<<shiftCount ) ) {
446615144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
446715144b0fSOlivier Houchard }
446815144b0fSOlivier Houchard }
446915144b0fSOlivier Houchard else {
447015144b0fSOlivier Houchard if ( aExp < 0x3FFF ) {
447115144b0fSOlivier Houchard if ( aExp | aSig0 | aSig1 ) {
447215144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
447315144b0fSOlivier Houchard }
447415144b0fSOlivier Houchard return 0;
447515144b0fSOlivier Houchard }
447615144b0fSOlivier Houchard z = aSig0>>( - shiftCount );
447715144b0fSOlivier Houchard if ( aSig1
447815144b0fSOlivier Houchard || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
447915144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
448015144b0fSOlivier Houchard }
448115144b0fSOlivier Houchard }
448215144b0fSOlivier Houchard if ( aSign ) z = - z;
448315144b0fSOlivier Houchard return z;
448415144b0fSOlivier Houchard
448515144b0fSOlivier Houchard }
448615144b0fSOlivier Houchard
4487c36abe0dSDavid Schultz #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4488c36abe0dSDavid Schultz && defined(SOFTFLOAT_NEED_FIXUNS)
4489c36abe0dSDavid Schultz /*
4490c36abe0dSDavid Schultz * just like above - but do not care for overflow of signed results
4491c36abe0dSDavid Schultz */
float128_to_uint64_round_to_zero(float128 a)4492c36abe0dSDavid Schultz uint64 float128_to_uint64_round_to_zero( float128 a )
4493c36abe0dSDavid Schultz {
4494c36abe0dSDavid Schultz flag aSign;
4495c36abe0dSDavid Schultz int32 aExp, shiftCount;
4496c36abe0dSDavid Schultz bits64 aSig0, aSig1;
4497c36abe0dSDavid Schultz uint64 z;
4498c36abe0dSDavid Schultz
4499c36abe0dSDavid Schultz aSig1 = extractFloat128Frac1( a );
4500c36abe0dSDavid Schultz aSig0 = extractFloat128Frac0( a );
4501c36abe0dSDavid Schultz aExp = extractFloat128Exp( a );
4502c36abe0dSDavid Schultz aSign = extractFloat128Sign( a );
4503c36abe0dSDavid Schultz if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4504c36abe0dSDavid Schultz shiftCount = aExp - 0x402F;
4505c36abe0dSDavid Schultz if ( 0 < shiftCount ) {
4506c36abe0dSDavid Schultz if ( 0x403F <= aExp ) {
4507c36abe0dSDavid Schultz aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4508c36abe0dSDavid Schultz if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4509c36abe0dSDavid Schultz && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4510c36abe0dSDavid Schultz if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4511c36abe0dSDavid Schultz }
4512c36abe0dSDavid Schultz else {
4513c36abe0dSDavid Schultz float_raise( float_flag_invalid );
4514c36abe0dSDavid Schultz }
4515c36abe0dSDavid Schultz return LIT64( 0xFFFFFFFFFFFFFFFF );
4516c36abe0dSDavid Schultz }
4517c36abe0dSDavid Schultz z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4518c36abe0dSDavid Schultz if ( (bits64) ( aSig1<<shiftCount ) ) {
4519c36abe0dSDavid Schultz float_exception_flags |= float_flag_inexact;
4520c36abe0dSDavid Schultz }
4521c36abe0dSDavid Schultz }
4522c36abe0dSDavid Schultz else {
4523c36abe0dSDavid Schultz if ( aExp < 0x3FFF ) {
4524c36abe0dSDavid Schultz if ( aExp | aSig0 | aSig1 ) {
4525c36abe0dSDavid Schultz float_exception_flags |= float_flag_inexact;
4526c36abe0dSDavid Schultz }
4527c36abe0dSDavid Schultz return 0;
4528c36abe0dSDavid Schultz }
4529c36abe0dSDavid Schultz z = aSig0>>( - shiftCount );
4530c36abe0dSDavid Schultz if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4531c36abe0dSDavid Schultz float_exception_flags |= float_flag_inexact;
4532c36abe0dSDavid Schultz }
4533c36abe0dSDavid Schultz }
4534c36abe0dSDavid Schultz if ( aSign ) z = - z;
4535c36abe0dSDavid Schultz return z;
4536c36abe0dSDavid Schultz
4537c36abe0dSDavid Schultz }
4538c36abe0dSDavid Schultz #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4539c36abe0dSDavid Schultz
454015144b0fSOlivier Houchard /*
454115144b0fSOlivier Houchard -------------------------------------------------------------------------------
454215144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
454315144b0fSOlivier Houchard value `a' to the single-precision floating-point format. The conversion
454415144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
454515144b0fSOlivier Houchard Arithmetic.
454615144b0fSOlivier Houchard -------------------------------------------------------------------------------
454715144b0fSOlivier Houchard */
float128_to_float32(float128 a)454815144b0fSOlivier Houchard float32 float128_to_float32( float128 a )
454915144b0fSOlivier Houchard {
455015144b0fSOlivier Houchard flag aSign;
455115144b0fSOlivier Houchard int32 aExp;
455215144b0fSOlivier Houchard bits64 aSig0, aSig1;
455315144b0fSOlivier Houchard bits32 zSig;
455415144b0fSOlivier Houchard
455515144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
455615144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
455715144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
455815144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
455915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
456015144b0fSOlivier Houchard if ( aSig0 | aSig1 ) {
456115144b0fSOlivier Houchard return commonNaNToFloat32( float128ToCommonNaN( a ) );
456215144b0fSOlivier Houchard }
456315144b0fSOlivier Houchard return packFloat32( aSign, 0xFF, 0 );
456415144b0fSOlivier Houchard }
456515144b0fSOlivier Houchard aSig0 |= ( aSig1 != 0 );
456615144b0fSOlivier Houchard shift64RightJamming( aSig0, 18, &aSig0 );
456715144b0fSOlivier Houchard zSig = aSig0;
456815144b0fSOlivier Houchard if ( aExp || zSig ) {
456915144b0fSOlivier Houchard zSig |= 0x40000000;
457015144b0fSOlivier Houchard aExp -= 0x3F81;
457115144b0fSOlivier Houchard }
457215144b0fSOlivier Houchard return roundAndPackFloat32( aSign, aExp, zSig );
457315144b0fSOlivier Houchard
457415144b0fSOlivier Houchard }
457515144b0fSOlivier Houchard
457615144b0fSOlivier Houchard /*
457715144b0fSOlivier Houchard -------------------------------------------------------------------------------
457815144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
457915144b0fSOlivier Houchard value `a' to the double-precision floating-point format. The conversion
458015144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
458115144b0fSOlivier Houchard Arithmetic.
458215144b0fSOlivier Houchard -------------------------------------------------------------------------------
458315144b0fSOlivier Houchard */
float128_to_float64(float128 a)458415144b0fSOlivier Houchard float64 float128_to_float64( float128 a )
458515144b0fSOlivier Houchard {
458615144b0fSOlivier Houchard flag aSign;
458715144b0fSOlivier Houchard int32 aExp;
458815144b0fSOlivier Houchard bits64 aSig0, aSig1;
458915144b0fSOlivier Houchard
459015144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
459115144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
459215144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
459315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
459415144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
459515144b0fSOlivier Houchard if ( aSig0 | aSig1 ) {
459615144b0fSOlivier Houchard return commonNaNToFloat64( float128ToCommonNaN( a ) );
459715144b0fSOlivier Houchard }
459815144b0fSOlivier Houchard return packFloat64( aSign, 0x7FF, 0 );
459915144b0fSOlivier Houchard }
460015144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
460115144b0fSOlivier Houchard aSig0 |= ( aSig1 != 0 );
460215144b0fSOlivier Houchard if ( aExp || aSig0 ) {
460315144b0fSOlivier Houchard aSig0 |= LIT64( 0x4000000000000000 );
460415144b0fSOlivier Houchard aExp -= 0x3C01;
460515144b0fSOlivier Houchard }
460615144b0fSOlivier Houchard return roundAndPackFloat64( aSign, aExp, aSig0 );
460715144b0fSOlivier Houchard
460815144b0fSOlivier Houchard }
460915144b0fSOlivier Houchard
461015144b0fSOlivier Houchard #ifdef FLOATX80
461115144b0fSOlivier Houchard
461215144b0fSOlivier Houchard /*
461315144b0fSOlivier Houchard -------------------------------------------------------------------------------
461415144b0fSOlivier Houchard Returns the result of converting the quadruple-precision floating-point
461515144b0fSOlivier Houchard value `a' to the extended double-precision floating-point format. The
461615144b0fSOlivier Houchard conversion is performed according to the IEC/IEEE Standard for Binary
461715144b0fSOlivier Houchard Floating-Point Arithmetic.
461815144b0fSOlivier Houchard -------------------------------------------------------------------------------
461915144b0fSOlivier Houchard */
float128_to_floatx80(float128 a)462015144b0fSOlivier Houchard floatx80 float128_to_floatx80( float128 a )
462115144b0fSOlivier Houchard {
462215144b0fSOlivier Houchard flag aSign;
462315144b0fSOlivier Houchard int32 aExp;
462415144b0fSOlivier Houchard bits64 aSig0, aSig1;
462515144b0fSOlivier Houchard
462615144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
462715144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
462815144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
462915144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
463015144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
463115144b0fSOlivier Houchard if ( aSig0 | aSig1 ) {
463215144b0fSOlivier Houchard return commonNaNToFloatx80( float128ToCommonNaN( a ) );
463315144b0fSOlivier Houchard }
463415144b0fSOlivier Houchard return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
463515144b0fSOlivier Houchard }
463615144b0fSOlivier Houchard if ( aExp == 0 ) {
463715144b0fSOlivier Houchard if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
463815144b0fSOlivier Houchard normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
463915144b0fSOlivier Houchard }
464015144b0fSOlivier Houchard else {
464115144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
464215144b0fSOlivier Houchard }
464315144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
464415144b0fSOlivier Houchard return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
464515144b0fSOlivier Houchard
464615144b0fSOlivier Houchard }
464715144b0fSOlivier Houchard
464815144b0fSOlivier Houchard #endif
464915144b0fSOlivier Houchard
465015144b0fSOlivier Houchard /*
465115144b0fSOlivier Houchard -------------------------------------------------------------------------------
465215144b0fSOlivier Houchard Rounds the quadruple-precision floating-point value `a' to an integer, and
465315144b0fSOlivier Houchard returns the result as a quadruple-precision floating-point value. The
465415144b0fSOlivier Houchard operation is performed according to the IEC/IEEE Standard for Binary
465515144b0fSOlivier Houchard Floating-Point Arithmetic.
465615144b0fSOlivier Houchard -------------------------------------------------------------------------------
465715144b0fSOlivier Houchard */
float128_round_to_int(float128 a)465815144b0fSOlivier Houchard float128 float128_round_to_int( float128 a )
465915144b0fSOlivier Houchard {
466015144b0fSOlivier Houchard flag aSign;
466115144b0fSOlivier Houchard int32 aExp;
466215144b0fSOlivier Houchard bits64 lastBitMask, roundBitsMask;
466315144b0fSOlivier Houchard int8 roundingMode;
466415144b0fSOlivier Houchard float128 z;
466515144b0fSOlivier Houchard
466615144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
466715144b0fSOlivier Houchard if ( 0x402F <= aExp ) {
466815144b0fSOlivier Houchard if ( 0x406F <= aExp ) {
466915144b0fSOlivier Houchard if ( ( aExp == 0x7FFF )
467015144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
467115144b0fSOlivier Houchard ) {
467215144b0fSOlivier Houchard return propagateFloat128NaN( a, a );
467315144b0fSOlivier Houchard }
467415144b0fSOlivier Houchard return a;
467515144b0fSOlivier Houchard }
467615144b0fSOlivier Houchard lastBitMask = 1;
467715144b0fSOlivier Houchard lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
467815144b0fSOlivier Houchard roundBitsMask = lastBitMask - 1;
467915144b0fSOlivier Houchard z = a;
468015144b0fSOlivier Houchard roundingMode = float_rounding_mode;
468115144b0fSOlivier Houchard if ( roundingMode == float_round_nearest_even ) {
468215144b0fSOlivier Houchard if ( lastBitMask ) {
468315144b0fSOlivier Houchard add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
468415144b0fSOlivier Houchard if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
468515144b0fSOlivier Houchard }
468615144b0fSOlivier Houchard else {
468715144b0fSOlivier Houchard if ( (sbits64) z.low < 0 ) {
468815144b0fSOlivier Houchard ++z.high;
468915144b0fSOlivier Houchard if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
469015144b0fSOlivier Houchard }
469115144b0fSOlivier Houchard }
469215144b0fSOlivier Houchard }
469315144b0fSOlivier Houchard else if ( roundingMode != float_round_to_zero ) {
469415144b0fSOlivier Houchard if ( extractFloat128Sign( z )
469515144b0fSOlivier Houchard ^ ( roundingMode == float_round_up ) ) {
469615144b0fSOlivier Houchard add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
469715144b0fSOlivier Houchard }
469815144b0fSOlivier Houchard }
469915144b0fSOlivier Houchard z.low &= ~ roundBitsMask;
470015144b0fSOlivier Houchard }
470115144b0fSOlivier Houchard else {
470215144b0fSOlivier Houchard if ( aExp < 0x3FFF ) {
470315144b0fSOlivier Houchard if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
470415144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
470515144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
470615144b0fSOlivier Houchard switch ( float_rounding_mode ) {
470715144b0fSOlivier Houchard case float_round_nearest_even:
470815144b0fSOlivier Houchard if ( ( aExp == 0x3FFE )
470915144b0fSOlivier Houchard && ( extractFloat128Frac0( a )
471015144b0fSOlivier Houchard | extractFloat128Frac1( a ) )
471115144b0fSOlivier Houchard ) {
471215144b0fSOlivier Houchard return packFloat128( aSign, 0x3FFF, 0, 0 );
471315144b0fSOlivier Houchard }
471415144b0fSOlivier Houchard break;
471515144b0fSOlivier Houchard case float_round_to_zero:
471615144b0fSOlivier Houchard break;
471715144b0fSOlivier Houchard case float_round_down:
471815144b0fSOlivier Houchard return
471915144b0fSOlivier Houchard aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
472015144b0fSOlivier Houchard : packFloat128( 0, 0, 0, 0 );
472115144b0fSOlivier Houchard case float_round_up:
472215144b0fSOlivier Houchard return
472315144b0fSOlivier Houchard aSign ? packFloat128( 1, 0, 0, 0 )
472415144b0fSOlivier Houchard : packFloat128( 0, 0x3FFF, 0, 0 );
472515144b0fSOlivier Houchard }
472615144b0fSOlivier Houchard return packFloat128( aSign, 0, 0, 0 );
472715144b0fSOlivier Houchard }
472815144b0fSOlivier Houchard lastBitMask = 1;
472915144b0fSOlivier Houchard lastBitMask <<= 0x402F - aExp;
473015144b0fSOlivier Houchard roundBitsMask = lastBitMask - 1;
473115144b0fSOlivier Houchard z.low = 0;
473215144b0fSOlivier Houchard z.high = a.high;
473315144b0fSOlivier Houchard roundingMode = float_rounding_mode;
473415144b0fSOlivier Houchard if ( roundingMode == float_round_nearest_even ) {
473515144b0fSOlivier Houchard z.high += lastBitMask>>1;
473615144b0fSOlivier Houchard if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
473715144b0fSOlivier Houchard z.high &= ~ lastBitMask;
473815144b0fSOlivier Houchard }
473915144b0fSOlivier Houchard }
474015144b0fSOlivier Houchard else if ( roundingMode != float_round_to_zero ) {
474115144b0fSOlivier Houchard if ( extractFloat128Sign( z )
474215144b0fSOlivier Houchard ^ ( roundingMode == float_round_up ) ) {
474315144b0fSOlivier Houchard z.high |= ( a.low != 0 );
474415144b0fSOlivier Houchard z.high += roundBitsMask;
474515144b0fSOlivier Houchard }
474615144b0fSOlivier Houchard }
474715144b0fSOlivier Houchard z.high &= ~ roundBitsMask;
474815144b0fSOlivier Houchard }
474915144b0fSOlivier Houchard if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
475015144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
475115144b0fSOlivier Houchard }
475215144b0fSOlivier Houchard return z;
475315144b0fSOlivier Houchard
475415144b0fSOlivier Houchard }
475515144b0fSOlivier Houchard
475615144b0fSOlivier Houchard /*
475715144b0fSOlivier Houchard -------------------------------------------------------------------------------
475815144b0fSOlivier Houchard Returns the result of adding the absolute values of the quadruple-precision
475915144b0fSOlivier Houchard floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
476015144b0fSOlivier Houchard before being returned. `zSign' is ignored if the result is a NaN.
476115144b0fSOlivier Houchard The addition is performed according to the IEC/IEEE Standard for Binary
476215144b0fSOlivier Houchard Floating-Point Arithmetic.
476315144b0fSOlivier Houchard -------------------------------------------------------------------------------
476415144b0fSOlivier Houchard */
addFloat128Sigs(float128 a,float128 b,flag zSign)476515144b0fSOlivier Houchard static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
476615144b0fSOlivier Houchard {
476715144b0fSOlivier Houchard int32 aExp, bExp, zExp;
476815144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
476915144b0fSOlivier Houchard int32 expDiff;
477015144b0fSOlivier Houchard
477115144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
477215144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
477315144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
477415144b0fSOlivier Houchard bSig1 = extractFloat128Frac1( b );
477515144b0fSOlivier Houchard bSig0 = extractFloat128Frac0( b );
477615144b0fSOlivier Houchard bExp = extractFloat128Exp( b );
477715144b0fSOlivier Houchard expDiff = aExp - bExp;
477815144b0fSOlivier Houchard if ( 0 < expDiff ) {
477915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
478015144b0fSOlivier Houchard if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
478115144b0fSOlivier Houchard return a;
478215144b0fSOlivier Houchard }
478315144b0fSOlivier Houchard if ( bExp == 0 ) {
478415144b0fSOlivier Houchard --expDiff;
478515144b0fSOlivier Houchard }
478615144b0fSOlivier Houchard else {
478715144b0fSOlivier Houchard bSig0 |= LIT64( 0x0001000000000000 );
478815144b0fSOlivier Houchard }
478915144b0fSOlivier Houchard shift128ExtraRightJamming(
479015144b0fSOlivier Houchard bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
479115144b0fSOlivier Houchard zExp = aExp;
479215144b0fSOlivier Houchard }
479315144b0fSOlivier Houchard else if ( expDiff < 0 ) {
479415144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
479515144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
479615144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
479715144b0fSOlivier Houchard }
479815144b0fSOlivier Houchard if ( aExp == 0 ) {
479915144b0fSOlivier Houchard ++expDiff;
480015144b0fSOlivier Houchard }
480115144b0fSOlivier Houchard else {
480215144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
480315144b0fSOlivier Houchard }
480415144b0fSOlivier Houchard shift128ExtraRightJamming(
480515144b0fSOlivier Houchard aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
480615144b0fSOlivier Houchard zExp = bExp;
480715144b0fSOlivier Houchard }
480815144b0fSOlivier Houchard else {
480915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
481015144b0fSOlivier Houchard if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
481115144b0fSOlivier Houchard return propagateFloat128NaN( a, b );
481215144b0fSOlivier Houchard }
481315144b0fSOlivier Houchard return a;
481415144b0fSOlivier Houchard }
481515144b0fSOlivier Houchard add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
481615144b0fSOlivier Houchard if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
481715144b0fSOlivier Houchard zSig2 = 0;
481815144b0fSOlivier Houchard zSig0 |= LIT64( 0x0002000000000000 );
481915144b0fSOlivier Houchard zExp = aExp;
482015144b0fSOlivier Houchard goto shiftRight1;
482115144b0fSOlivier Houchard }
482215144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
482315144b0fSOlivier Houchard add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
482415144b0fSOlivier Houchard --zExp;
482515144b0fSOlivier Houchard if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
482615144b0fSOlivier Houchard ++zExp;
482715144b0fSOlivier Houchard shiftRight1:
482815144b0fSOlivier Houchard shift128ExtraRightJamming(
482915144b0fSOlivier Houchard zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
483015144b0fSOlivier Houchard roundAndPack:
483115144b0fSOlivier Houchard return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
483215144b0fSOlivier Houchard
483315144b0fSOlivier Houchard }
483415144b0fSOlivier Houchard
483515144b0fSOlivier Houchard /*
483615144b0fSOlivier Houchard -------------------------------------------------------------------------------
483715144b0fSOlivier Houchard Returns the result of subtracting the absolute values of the quadruple-
483815144b0fSOlivier Houchard precision floating-point values `a' and `b'. If `zSign' is 1, the
483915144b0fSOlivier Houchard difference is negated before being returned. `zSign' is ignored if the
484015144b0fSOlivier Houchard result is a NaN. The subtraction is performed according to the IEC/IEEE
484115144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
484215144b0fSOlivier Houchard -------------------------------------------------------------------------------
484315144b0fSOlivier Houchard */
subFloat128Sigs(float128 a,float128 b,flag zSign)484415144b0fSOlivier Houchard static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
484515144b0fSOlivier Houchard {
484615144b0fSOlivier Houchard int32 aExp, bExp, zExp;
484715144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
484815144b0fSOlivier Houchard int32 expDiff;
484915144b0fSOlivier Houchard float128 z;
485015144b0fSOlivier Houchard
485115144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
485215144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
485315144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
485415144b0fSOlivier Houchard bSig1 = extractFloat128Frac1( b );
485515144b0fSOlivier Houchard bSig0 = extractFloat128Frac0( b );
485615144b0fSOlivier Houchard bExp = extractFloat128Exp( b );
485715144b0fSOlivier Houchard expDiff = aExp - bExp;
485815144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
485915144b0fSOlivier Houchard shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
486015144b0fSOlivier Houchard if ( 0 < expDiff ) goto aExpBigger;
486115144b0fSOlivier Houchard if ( expDiff < 0 ) goto bExpBigger;
486215144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
486315144b0fSOlivier Houchard if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
486415144b0fSOlivier Houchard return propagateFloat128NaN( a, b );
486515144b0fSOlivier Houchard }
486615144b0fSOlivier Houchard float_raise( float_flag_invalid );
486715144b0fSOlivier Houchard z.low = float128_default_nan_low;
486815144b0fSOlivier Houchard z.high = float128_default_nan_high;
486915144b0fSOlivier Houchard return z;
487015144b0fSOlivier Houchard }
487115144b0fSOlivier Houchard if ( aExp == 0 ) {
487215144b0fSOlivier Houchard aExp = 1;
487315144b0fSOlivier Houchard bExp = 1;
487415144b0fSOlivier Houchard }
487515144b0fSOlivier Houchard if ( bSig0 < aSig0 ) goto aBigger;
487615144b0fSOlivier Houchard if ( aSig0 < bSig0 ) goto bBigger;
487715144b0fSOlivier Houchard if ( bSig1 < aSig1 ) goto aBigger;
487815144b0fSOlivier Houchard if ( aSig1 < bSig1 ) goto bBigger;
487915144b0fSOlivier Houchard return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
488015144b0fSOlivier Houchard bExpBigger:
488115144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
488215144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
488315144b0fSOlivier Houchard return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
488415144b0fSOlivier Houchard }
488515144b0fSOlivier Houchard if ( aExp == 0 ) {
488615144b0fSOlivier Houchard ++expDiff;
488715144b0fSOlivier Houchard }
488815144b0fSOlivier Houchard else {
488915144b0fSOlivier Houchard aSig0 |= LIT64( 0x4000000000000000 );
489015144b0fSOlivier Houchard }
489115144b0fSOlivier Houchard shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
489215144b0fSOlivier Houchard bSig0 |= LIT64( 0x4000000000000000 );
489315144b0fSOlivier Houchard bBigger:
489415144b0fSOlivier Houchard sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
489515144b0fSOlivier Houchard zExp = bExp;
489615144b0fSOlivier Houchard zSign ^= 1;
489715144b0fSOlivier Houchard goto normalizeRoundAndPack;
489815144b0fSOlivier Houchard aExpBigger:
489915144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
490015144b0fSOlivier Houchard if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
490115144b0fSOlivier Houchard return a;
490215144b0fSOlivier Houchard }
490315144b0fSOlivier Houchard if ( bExp == 0 ) {
490415144b0fSOlivier Houchard --expDiff;
490515144b0fSOlivier Houchard }
490615144b0fSOlivier Houchard else {
490715144b0fSOlivier Houchard bSig0 |= LIT64( 0x4000000000000000 );
490815144b0fSOlivier Houchard }
490915144b0fSOlivier Houchard shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
491015144b0fSOlivier Houchard aSig0 |= LIT64( 0x4000000000000000 );
491115144b0fSOlivier Houchard aBigger:
491215144b0fSOlivier Houchard sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
491315144b0fSOlivier Houchard zExp = aExp;
491415144b0fSOlivier Houchard normalizeRoundAndPack:
491515144b0fSOlivier Houchard --zExp;
491615144b0fSOlivier Houchard return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
491715144b0fSOlivier Houchard
491815144b0fSOlivier Houchard }
491915144b0fSOlivier Houchard
492015144b0fSOlivier Houchard /*
492115144b0fSOlivier Houchard -------------------------------------------------------------------------------
492215144b0fSOlivier Houchard Returns the result of adding the quadruple-precision floating-point values
492315144b0fSOlivier Houchard `a' and `b'. The operation is performed according to the IEC/IEEE Standard
492415144b0fSOlivier Houchard for Binary Floating-Point Arithmetic.
492515144b0fSOlivier Houchard -------------------------------------------------------------------------------
492615144b0fSOlivier Houchard */
float128_add(float128 a,float128 b)492715144b0fSOlivier Houchard float128 float128_add( float128 a, float128 b )
492815144b0fSOlivier Houchard {
492915144b0fSOlivier Houchard flag aSign, bSign;
493015144b0fSOlivier Houchard
493115144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
493215144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
493315144b0fSOlivier Houchard if ( aSign == bSign ) {
493415144b0fSOlivier Houchard return addFloat128Sigs( a, b, aSign );
493515144b0fSOlivier Houchard }
493615144b0fSOlivier Houchard else {
493715144b0fSOlivier Houchard return subFloat128Sigs( a, b, aSign );
493815144b0fSOlivier Houchard }
493915144b0fSOlivier Houchard
494015144b0fSOlivier Houchard }
494115144b0fSOlivier Houchard
494215144b0fSOlivier Houchard /*
494315144b0fSOlivier Houchard -------------------------------------------------------------------------------
494415144b0fSOlivier Houchard Returns the result of subtracting the quadruple-precision floating-point
494515144b0fSOlivier Houchard values `a' and `b'. The operation is performed according to the IEC/IEEE
494615144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
494715144b0fSOlivier Houchard -------------------------------------------------------------------------------
494815144b0fSOlivier Houchard */
float128_sub(float128 a,float128 b)494915144b0fSOlivier Houchard float128 float128_sub( float128 a, float128 b )
495015144b0fSOlivier Houchard {
495115144b0fSOlivier Houchard flag aSign, bSign;
495215144b0fSOlivier Houchard
495315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
495415144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
495515144b0fSOlivier Houchard if ( aSign == bSign ) {
495615144b0fSOlivier Houchard return subFloat128Sigs( a, b, aSign );
495715144b0fSOlivier Houchard }
495815144b0fSOlivier Houchard else {
495915144b0fSOlivier Houchard return addFloat128Sigs( a, b, aSign );
496015144b0fSOlivier Houchard }
496115144b0fSOlivier Houchard
496215144b0fSOlivier Houchard }
496315144b0fSOlivier Houchard
496415144b0fSOlivier Houchard /*
496515144b0fSOlivier Houchard -------------------------------------------------------------------------------
496615144b0fSOlivier Houchard Returns the result of multiplying the quadruple-precision floating-point
496715144b0fSOlivier Houchard values `a' and `b'. The operation is performed according to the IEC/IEEE
496815144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
496915144b0fSOlivier Houchard -------------------------------------------------------------------------------
497015144b0fSOlivier Houchard */
float128_mul(float128 a,float128 b)497115144b0fSOlivier Houchard float128 float128_mul( float128 a, float128 b )
497215144b0fSOlivier Houchard {
497315144b0fSOlivier Houchard flag aSign, bSign, zSign;
497415144b0fSOlivier Houchard int32 aExp, bExp, zExp;
497515144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
497615144b0fSOlivier Houchard float128 z;
497715144b0fSOlivier Houchard
497815144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
497915144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
498015144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
498115144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
498215144b0fSOlivier Houchard bSig1 = extractFloat128Frac1( b );
498315144b0fSOlivier Houchard bSig0 = extractFloat128Frac0( b );
498415144b0fSOlivier Houchard bExp = extractFloat128Exp( b );
498515144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
498615144b0fSOlivier Houchard zSign = aSign ^ bSign;
498715144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
498815144b0fSOlivier Houchard if ( ( aSig0 | aSig1 )
498915144b0fSOlivier Houchard || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
499015144b0fSOlivier Houchard return propagateFloat128NaN( a, b );
499115144b0fSOlivier Houchard }
499215144b0fSOlivier Houchard if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
499315144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
499415144b0fSOlivier Houchard }
499515144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
499615144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
499715144b0fSOlivier Houchard if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
499815144b0fSOlivier Houchard invalid:
499915144b0fSOlivier Houchard float_raise( float_flag_invalid );
500015144b0fSOlivier Houchard z.low = float128_default_nan_low;
500115144b0fSOlivier Houchard z.high = float128_default_nan_high;
500215144b0fSOlivier Houchard return z;
500315144b0fSOlivier Houchard }
500415144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
500515144b0fSOlivier Houchard }
500615144b0fSOlivier Houchard if ( aExp == 0 ) {
500715144b0fSOlivier Houchard if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
500815144b0fSOlivier Houchard normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
500915144b0fSOlivier Houchard }
501015144b0fSOlivier Houchard if ( bExp == 0 ) {
501115144b0fSOlivier Houchard if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
501215144b0fSOlivier Houchard normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
501315144b0fSOlivier Houchard }
501415144b0fSOlivier Houchard zExp = aExp + bExp - 0x4000;
501515144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
501615144b0fSOlivier Houchard shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
501715144b0fSOlivier Houchard mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
501815144b0fSOlivier Houchard add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
501915144b0fSOlivier Houchard zSig2 |= ( zSig3 != 0 );
502015144b0fSOlivier Houchard if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
502115144b0fSOlivier Houchard shift128ExtraRightJamming(
502215144b0fSOlivier Houchard zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
502315144b0fSOlivier Houchard ++zExp;
502415144b0fSOlivier Houchard }
502515144b0fSOlivier Houchard return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
502615144b0fSOlivier Houchard
502715144b0fSOlivier Houchard }
502815144b0fSOlivier Houchard
502915144b0fSOlivier Houchard /*
503015144b0fSOlivier Houchard -------------------------------------------------------------------------------
503115144b0fSOlivier Houchard Returns the result of dividing the quadruple-precision floating-point value
503215144b0fSOlivier Houchard `a' by the corresponding value `b'. The operation is performed according to
503315144b0fSOlivier Houchard the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
503415144b0fSOlivier Houchard -------------------------------------------------------------------------------
503515144b0fSOlivier Houchard */
float128_div(float128 a,float128 b)503615144b0fSOlivier Houchard float128 float128_div( float128 a, float128 b )
503715144b0fSOlivier Houchard {
503815144b0fSOlivier Houchard flag aSign, bSign, zSign;
503915144b0fSOlivier Houchard int32 aExp, bExp, zExp;
504015144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
504115144b0fSOlivier Houchard bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
504215144b0fSOlivier Houchard float128 z;
504315144b0fSOlivier Houchard
504415144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
504515144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
504615144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
504715144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
504815144b0fSOlivier Houchard bSig1 = extractFloat128Frac1( b );
504915144b0fSOlivier Houchard bSig0 = extractFloat128Frac0( b );
505015144b0fSOlivier Houchard bExp = extractFloat128Exp( b );
505115144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
505215144b0fSOlivier Houchard zSign = aSign ^ bSign;
505315144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
505415144b0fSOlivier Houchard if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
505515144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
505615144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
505715144b0fSOlivier Houchard goto invalid;
505815144b0fSOlivier Houchard }
505915144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
506015144b0fSOlivier Houchard }
506115144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
506215144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
506315144b0fSOlivier Houchard return packFloat128( zSign, 0, 0, 0 );
506415144b0fSOlivier Houchard }
506515144b0fSOlivier Houchard if ( bExp == 0 ) {
506615144b0fSOlivier Houchard if ( ( bSig0 | bSig1 ) == 0 ) {
506715144b0fSOlivier Houchard if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
506815144b0fSOlivier Houchard invalid:
506915144b0fSOlivier Houchard float_raise( float_flag_invalid );
507015144b0fSOlivier Houchard z.low = float128_default_nan_low;
507115144b0fSOlivier Houchard z.high = float128_default_nan_high;
507215144b0fSOlivier Houchard return z;
507315144b0fSOlivier Houchard }
507415144b0fSOlivier Houchard float_raise( float_flag_divbyzero );
507515144b0fSOlivier Houchard return packFloat128( zSign, 0x7FFF, 0, 0 );
507615144b0fSOlivier Houchard }
507715144b0fSOlivier Houchard normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
507815144b0fSOlivier Houchard }
507915144b0fSOlivier Houchard if ( aExp == 0 ) {
508015144b0fSOlivier Houchard if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
508115144b0fSOlivier Houchard normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
508215144b0fSOlivier Houchard }
508315144b0fSOlivier Houchard zExp = aExp - bExp + 0x3FFD;
508415144b0fSOlivier Houchard shortShift128Left(
508515144b0fSOlivier Houchard aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
508615144b0fSOlivier Houchard shortShift128Left(
508715144b0fSOlivier Houchard bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
508815144b0fSOlivier Houchard if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
508915144b0fSOlivier Houchard shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
509015144b0fSOlivier Houchard ++zExp;
509115144b0fSOlivier Houchard }
509215144b0fSOlivier Houchard zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
509315144b0fSOlivier Houchard mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
509415144b0fSOlivier Houchard sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
509515144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
509615144b0fSOlivier Houchard --zSig0;
509715144b0fSOlivier Houchard add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
509815144b0fSOlivier Houchard }
509915144b0fSOlivier Houchard zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
510015144b0fSOlivier Houchard if ( ( zSig1 & 0x3FFF ) <= 4 ) {
510115144b0fSOlivier Houchard mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
510215144b0fSOlivier Houchard sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
510315144b0fSOlivier Houchard while ( (sbits64) rem1 < 0 ) {
510415144b0fSOlivier Houchard --zSig1;
510515144b0fSOlivier Houchard add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
510615144b0fSOlivier Houchard }
510715144b0fSOlivier Houchard zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
510815144b0fSOlivier Houchard }
510915144b0fSOlivier Houchard shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
511015144b0fSOlivier Houchard return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
511115144b0fSOlivier Houchard
511215144b0fSOlivier Houchard }
511315144b0fSOlivier Houchard
511415144b0fSOlivier Houchard /*
511515144b0fSOlivier Houchard -------------------------------------------------------------------------------
511615144b0fSOlivier Houchard Returns the remainder of the quadruple-precision floating-point value `a'
511715144b0fSOlivier Houchard with respect to the corresponding value `b'. The operation is performed
511815144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
511915144b0fSOlivier Houchard -------------------------------------------------------------------------------
512015144b0fSOlivier Houchard */
float128_rem(float128 a,float128 b)512115144b0fSOlivier Houchard float128 float128_rem( float128 a, float128 b )
512215144b0fSOlivier Houchard {
512315144b0fSOlivier Houchard flag aSign, bSign, zSign;
512415144b0fSOlivier Houchard int32 aExp, bExp, expDiff;
512515144b0fSOlivier Houchard bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
512615144b0fSOlivier Houchard bits64 allZero, alternateASig0, alternateASig1, sigMean1;
512715144b0fSOlivier Houchard sbits64 sigMean0;
512815144b0fSOlivier Houchard float128 z;
512915144b0fSOlivier Houchard
513015144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
513115144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
513215144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
513315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
513415144b0fSOlivier Houchard bSig1 = extractFloat128Frac1( b );
513515144b0fSOlivier Houchard bSig0 = extractFloat128Frac0( b );
513615144b0fSOlivier Houchard bExp = extractFloat128Exp( b );
513715144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
513815144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
513915144b0fSOlivier Houchard if ( ( aSig0 | aSig1 )
514015144b0fSOlivier Houchard || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
514115144b0fSOlivier Houchard return propagateFloat128NaN( a, b );
514215144b0fSOlivier Houchard }
514315144b0fSOlivier Houchard goto invalid;
514415144b0fSOlivier Houchard }
514515144b0fSOlivier Houchard if ( bExp == 0x7FFF ) {
514615144b0fSOlivier Houchard if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
514715144b0fSOlivier Houchard return a;
514815144b0fSOlivier Houchard }
514915144b0fSOlivier Houchard if ( bExp == 0 ) {
515015144b0fSOlivier Houchard if ( ( bSig0 | bSig1 ) == 0 ) {
515115144b0fSOlivier Houchard invalid:
515215144b0fSOlivier Houchard float_raise( float_flag_invalid );
515315144b0fSOlivier Houchard z.low = float128_default_nan_low;
515415144b0fSOlivier Houchard z.high = float128_default_nan_high;
515515144b0fSOlivier Houchard return z;
515615144b0fSOlivier Houchard }
515715144b0fSOlivier Houchard normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
515815144b0fSOlivier Houchard }
515915144b0fSOlivier Houchard if ( aExp == 0 ) {
516015144b0fSOlivier Houchard if ( ( aSig0 | aSig1 ) == 0 ) return a;
516115144b0fSOlivier Houchard normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
516215144b0fSOlivier Houchard }
516315144b0fSOlivier Houchard expDiff = aExp - bExp;
516415144b0fSOlivier Houchard if ( expDiff < -1 ) return a;
516515144b0fSOlivier Houchard shortShift128Left(
516615144b0fSOlivier Houchard aSig0 | LIT64( 0x0001000000000000 ),
516715144b0fSOlivier Houchard aSig1,
516815144b0fSOlivier Houchard 15 - ( expDiff < 0 ),
516915144b0fSOlivier Houchard &aSig0,
517015144b0fSOlivier Houchard &aSig1
517115144b0fSOlivier Houchard );
517215144b0fSOlivier Houchard shortShift128Left(
517315144b0fSOlivier Houchard bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
517415144b0fSOlivier Houchard q = le128( bSig0, bSig1, aSig0, aSig1 );
517515144b0fSOlivier Houchard if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
517615144b0fSOlivier Houchard expDiff -= 64;
517715144b0fSOlivier Houchard while ( 0 < expDiff ) {
517815144b0fSOlivier Houchard q = estimateDiv128To64( aSig0, aSig1, bSig0 );
517915144b0fSOlivier Houchard q = ( 4 < q ) ? q - 4 : 0;
518015144b0fSOlivier Houchard mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
518115144b0fSOlivier Houchard shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
518215144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
518315144b0fSOlivier Houchard sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
518415144b0fSOlivier Houchard expDiff -= 61;
518515144b0fSOlivier Houchard }
518615144b0fSOlivier Houchard if ( -64 < expDiff ) {
518715144b0fSOlivier Houchard q = estimateDiv128To64( aSig0, aSig1, bSig0 );
518815144b0fSOlivier Houchard q = ( 4 < q ) ? q - 4 : 0;
518915144b0fSOlivier Houchard q >>= - expDiff;
519015144b0fSOlivier Houchard shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
519115144b0fSOlivier Houchard expDiff += 52;
519215144b0fSOlivier Houchard if ( expDiff < 0 ) {
519315144b0fSOlivier Houchard shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
519415144b0fSOlivier Houchard }
519515144b0fSOlivier Houchard else {
519615144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
519715144b0fSOlivier Houchard }
519815144b0fSOlivier Houchard mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
519915144b0fSOlivier Houchard sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
520015144b0fSOlivier Houchard }
520115144b0fSOlivier Houchard else {
520215144b0fSOlivier Houchard shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
520315144b0fSOlivier Houchard shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
520415144b0fSOlivier Houchard }
520515144b0fSOlivier Houchard do {
520615144b0fSOlivier Houchard alternateASig0 = aSig0;
520715144b0fSOlivier Houchard alternateASig1 = aSig1;
520815144b0fSOlivier Houchard ++q;
520915144b0fSOlivier Houchard sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
521015144b0fSOlivier Houchard } while ( 0 <= (sbits64) aSig0 );
521115144b0fSOlivier Houchard add128(
5212c36abe0dSDavid Schultz aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
521315144b0fSOlivier Houchard if ( ( sigMean0 < 0 )
521415144b0fSOlivier Houchard || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
521515144b0fSOlivier Houchard aSig0 = alternateASig0;
521615144b0fSOlivier Houchard aSig1 = alternateASig1;
521715144b0fSOlivier Houchard }
521815144b0fSOlivier Houchard zSign = ( (sbits64) aSig0 < 0 );
521915144b0fSOlivier Houchard if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
522015144b0fSOlivier Houchard return
522115144b0fSOlivier Houchard normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
522215144b0fSOlivier Houchard
522315144b0fSOlivier Houchard }
522415144b0fSOlivier Houchard
522515144b0fSOlivier Houchard /*
522615144b0fSOlivier Houchard -------------------------------------------------------------------------------
522715144b0fSOlivier Houchard Returns the square root of the quadruple-precision floating-point value `a'.
522815144b0fSOlivier Houchard The operation is performed according to the IEC/IEEE Standard for Binary
522915144b0fSOlivier Houchard Floating-Point Arithmetic.
523015144b0fSOlivier Houchard -------------------------------------------------------------------------------
523115144b0fSOlivier Houchard */
float128_sqrt(float128 a)523215144b0fSOlivier Houchard float128 float128_sqrt( float128 a )
523315144b0fSOlivier Houchard {
523415144b0fSOlivier Houchard flag aSign;
523515144b0fSOlivier Houchard int32 aExp, zExp;
523615144b0fSOlivier Houchard bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
523715144b0fSOlivier Houchard bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
523815144b0fSOlivier Houchard float128 z;
523915144b0fSOlivier Houchard
524015144b0fSOlivier Houchard aSig1 = extractFloat128Frac1( a );
524115144b0fSOlivier Houchard aSig0 = extractFloat128Frac0( a );
524215144b0fSOlivier Houchard aExp = extractFloat128Exp( a );
524315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
524415144b0fSOlivier Houchard if ( aExp == 0x7FFF ) {
524515144b0fSOlivier Houchard if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
524615144b0fSOlivier Houchard if ( ! aSign ) return a;
524715144b0fSOlivier Houchard goto invalid;
524815144b0fSOlivier Houchard }
524915144b0fSOlivier Houchard if ( aSign ) {
525015144b0fSOlivier Houchard if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
525115144b0fSOlivier Houchard invalid:
525215144b0fSOlivier Houchard float_raise( float_flag_invalid );
525315144b0fSOlivier Houchard z.low = float128_default_nan_low;
525415144b0fSOlivier Houchard z.high = float128_default_nan_high;
525515144b0fSOlivier Houchard return z;
525615144b0fSOlivier Houchard }
525715144b0fSOlivier Houchard if ( aExp == 0 ) {
525815144b0fSOlivier Houchard if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
525915144b0fSOlivier Houchard normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
526015144b0fSOlivier Houchard }
526115144b0fSOlivier Houchard zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
526215144b0fSOlivier Houchard aSig0 |= LIT64( 0x0001000000000000 );
526315144b0fSOlivier Houchard zSig0 = estimateSqrt32( aExp, aSig0>>17 );
526415144b0fSOlivier Houchard shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
526515144b0fSOlivier Houchard zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
526615144b0fSOlivier Houchard doubleZSig0 = zSig0<<1;
526715144b0fSOlivier Houchard mul64To128( zSig0, zSig0, &term0, &term1 );
526815144b0fSOlivier Houchard sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
526915144b0fSOlivier Houchard while ( (sbits64) rem0 < 0 ) {
527015144b0fSOlivier Houchard --zSig0;
527115144b0fSOlivier Houchard doubleZSig0 -= 2;
527215144b0fSOlivier Houchard add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
527315144b0fSOlivier Houchard }
527415144b0fSOlivier Houchard zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
527515144b0fSOlivier Houchard if ( ( zSig1 & 0x1FFF ) <= 5 ) {
527615144b0fSOlivier Houchard if ( zSig1 == 0 ) zSig1 = 1;
527715144b0fSOlivier Houchard mul64To128( doubleZSig0, zSig1, &term1, &term2 );
527815144b0fSOlivier Houchard sub128( rem1, 0, term1, term2, &rem1, &rem2 );
527915144b0fSOlivier Houchard mul64To128( zSig1, zSig1, &term2, &term3 );
528015144b0fSOlivier Houchard sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
528115144b0fSOlivier Houchard while ( (sbits64) rem1 < 0 ) {
528215144b0fSOlivier Houchard --zSig1;
528315144b0fSOlivier Houchard shortShift128Left( 0, zSig1, 1, &term2, &term3 );
528415144b0fSOlivier Houchard term3 |= 1;
528515144b0fSOlivier Houchard term2 |= doubleZSig0;
528615144b0fSOlivier Houchard add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
528715144b0fSOlivier Houchard }
528815144b0fSOlivier Houchard zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
528915144b0fSOlivier Houchard }
529015144b0fSOlivier Houchard shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
529115144b0fSOlivier Houchard return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
529215144b0fSOlivier Houchard
529315144b0fSOlivier Houchard }
529415144b0fSOlivier Houchard
529515144b0fSOlivier Houchard /*
529615144b0fSOlivier Houchard -------------------------------------------------------------------------------
529715144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is equal to
529815144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The comparison is performed
529915144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
530015144b0fSOlivier Houchard -------------------------------------------------------------------------------
530115144b0fSOlivier Houchard */
float128_eq(float128 a,float128 b)530215144b0fSOlivier Houchard flag float128_eq( float128 a, float128 b )
530315144b0fSOlivier Houchard {
530415144b0fSOlivier Houchard
530515144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
530615144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
530715144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
530815144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
530915144b0fSOlivier Houchard ) {
531015144b0fSOlivier Houchard if ( float128_is_signaling_nan( a )
531115144b0fSOlivier Houchard || float128_is_signaling_nan( b ) ) {
531215144b0fSOlivier Houchard float_raise( float_flag_invalid );
531315144b0fSOlivier Houchard }
531415144b0fSOlivier Houchard return 0;
531515144b0fSOlivier Houchard }
531615144b0fSOlivier Houchard return
531715144b0fSOlivier Houchard ( a.low == b.low )
531815144b0fSOlivier Houchard && ( ( a.high == b.high )
531915144b0fSOlivier Houchard || ( ( a.low == 0 )
532015144b0fSOlivier Houchard && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
532115144b0fSOlivier Houchard );
532215144b0fSOlivier Houchard
532315144b0fSOlivier Houchard }
532415144b0fSOlivier Houchard
532515144b0fSOlivier Houchard /*
532615144b0fSOlivier Houchard -------------------------------------------------------------------------------
532715144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
532815144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise. The comparison
532915144b0fSOlivier Houchard is performed according to the IEC/IEEE Standard for Binary Floating-Point
533015144b0fSOlivier Houchard Arithmetic.
533115144b0fSOlivier Houchard -------------------------------------------------------------------------------
533215144b0fSOlivier Houchard */
float128_le(float128 a,float128 b)533315144b0fSOlivier Houchard flag float128_le( float128 a, float128 b )
533415144b0fSOlivier Houchard {
533515144b0fSOlivier Houchard flag aSign, bSign;
533615144b0fSOlivier Houchard
533715144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
533815144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
533915144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
534015144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
534115144b0fSOlivier Houchard ) {
534215144b0fSOlivier Houchard float_raise( float_flag_invalid );
534315144b0fSOlivier Houchard return 0;
534415144b0fSOlivier Houchard }
534515144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
534615144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
534715144b0fSOlivier Houchard if ( aSign != bSign ) {
534815144b0fSOlivier Houchard return
534915144b0fSOlivier Houchard aSign
535015144b0fSOlivier Houchard || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
535115144b0fSOlivier Houchard == 0 );
535215144b0fSOlivier Houchard }
535315144b0fSOlivier Houchard return
535415144b0fSOlivier Houchard aSign ? le128( b.high, b.low, a.high, a.low )
535515144b0fSOlivier Houchard : le128( a.high, a.low, b.high, b.low );
535615144b0fSOlivier Houchard
535715144b0fSOlivier Houchard }
535815144b0fSOlivier Houchard
535915144b0fSOlivier Houchard /*
536015144b0fSOlivier Houchard -------------------------------------------------------------------------------
536115144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
536215144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The comparison is performed
536315144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
536415144b0fSOlivier Houchard -------------------------------------------------------------------------------
536515144b0fSOlivier Houchard */
float128_lt(float128 a,float128 b)536615144b0fSOlivier Houchard flag float128_lt( float128 a, float128 b )
536715144b0fSOlivier Houchard {
536815144b0fSOlivier Houchard flag aSign, bSign;
536915144b0fSOlivier Houchard
537015144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
537115144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
537215144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
537315144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
537415144b0fSOlivier Houchard ) {
537515144b0fSOlivier Houchard float_raise( float_flag_invalid );
537615144b0fSOlivier Houchard return 0;
537715144b0fSOlivier Houchard }
537815144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
537915144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
538015144b0fSOlivier Houchard if ( aSign != bSign ) {
538115144b0fSOlivier Houchard return
538215144b0fSOlivier Houchard aSign
538315144b0fSOlivier Houchard && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
538415144b0fSOlivier Houchard != 0 );
538515144b0fSOlivier Houchard }
538615144b0fSOlivier Houchard return
538715144b0fSOlivier Houchard aSign ? lt128( b.high, b.low, a.high, a.low )
538815144b0fSOlivier Houchard : lt128( a.high, a.low, b.high, b.low );
538915144b0fSOlivier Houchard
539015144b0fSOlivier Houchard }
539115144b0fSOlivier Houchard
539215144b0fSOlivier Houchard /*
539315144b0fSOlivier Houchard -------------------------------------------------------------------------------
539415144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is equal to
539515144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. The invalid exception is
539615144b0fSOlivier Houchard raised if either operand is a NaN. Otherwise, the comparison is performed
539715144b0fSOlivier Houchard according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
539815144b0fSOlivier Houchard -------------------------------------------------------------------------------
539915144b0fSOlivier Houchard */
float128_eq_signaling(float128 a,float128 b)540015144b0fSOlivier Houchard flag float128_eq_signaling( float128 a, float128 b )
540115144b0fSOlivier Houchard {
540215144b0fSOlivier Houchard
540315144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
540415144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
540515144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
540615144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
540715144b0fSOlivier Houchard ) {
540815144b0fSOlivier Houchard float_raise( float_flag_invalid );
540915144b0fSOlivier Houchard return 0;
541015144b0fSOlivier Houchard }
541115144b0fSOlivier Houchard return
541215144b0fSOlivier Houchard ( a.low == b.low )
541315144b0fSOlivier Houchard && ( ( a.high == b.high )
541415144b0fSOlivier Houchard || ( ( a.low == 0 )
541515144b0fSOlivier Houchard && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
541615144b0fSOlivier Houchard );
541715144b0fSOlivier Houchard
541815144b0fSOlivier Houchard }
541915144b0fSOlivier Houchard
542015144b0fSOlivier Houchard /*
542115144b0fSOlivier Houchard -------------------------------------------------------------------------------
542215144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
542315144b0fSOlivier Houchard or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
542415144b0fSOlivier Houchard cause an exception. Otherwise, the comparison is performed according to the
542515144b0fSOlivier Houchard IEC/IEEE Standard for Binary Floating-Point Arithmetic.
542615144b0fSOlivier Houchard -------------------------------------------------------------------------------
542715144b0fSOlivier Houchard */
float128_le_quiet(float128 a,float128 b)542815144b0fSOlivier Houchard flag float128_le_quiet( float128 a, float128 b )
542915144b0fSOlivier Houchard {
543015144b0fSOlivier Houchard flag aSign, bSign;
543115144b0fSOlivier Houchard
543215144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
543315144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
543415144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
543515144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
543615144b0fSOlivier Houchard ) {
543715144b0fSOlivier Houchard if ( float128_is_signaling_nan( a )
543815144b0fSOlivier Houchard || float128_is_signaling_nan( b ) ) {
543915144b0fSOlivier Houchard float_raise( float_flag_invalid );
544015144b0fSOlivier Houchard }
544115144b0fSOlivier Houchard return 0;
544215144b0fSOlivier Houchard }
544315144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
544415144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
544515144b0fSOlivier Houchard if ( aSign != bSign ) {
544615144b0fSOlivier Houchard return
544715144b0fSOlivier Houchard aSign
544815144b0fSOlivier Houchard || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
544915144b0fSOlivier Houchard == 0 );
545015144b0fSOlivier Houchard }
545115144b0fSOlivier Houchard return
545215144b0fSOlivier Houchard aSign ? le128( b.high, b.low, a.high, a.low )
545315144b0fSOlivier Houchard : le128( a.high, a.low, b.high, b.low );
545415144b0fSOlivier Houchard
545515144b0fSOlivier Houchard }
545615144b0fSOlivier Houchard
545715144b0fSOlivier Houchard /*
545815144b0fSOlivier Houchard -------------------------------------------------------------------------------
545915144b0fSOlivier Houchard Returns 1 if the quadruple-precision floating-point value `a' is less than
546015144b0fSOlivier Houchard the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
546115144b0fSOlivier Houchard exception. Otherwise, the comparison is performed according to the IEC/IEEE
546215144b0fSOlivier Houchard Standard for Binary Floating-Point Arithmetic.
546315144b0fSOlivier Houchard -------------------------------------------------------------------------------
546415144b0fSOlivier Houchard */
float128_lt_quiet(float128 a,float128 b)546515144b0fSOlivier Houchard flag float128_lt_quiet( float128 a, float128 b )
546615144b0fSOlivier Houchard {
546715144b0fSOlivier Houchard flag aSign, bSign;
546815144b0fSOlivier Houchard
546915144b0fSOlivier Houchard if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
547015144b0fSOlivier Houchard && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
547115144b0fSOlivier Houchard || ( ( extractFloat128Exp( b ) == 0x7FFF )
547215144b0fSOlivier Houchard && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
547315144b0fSOlivier Houchard ) {
547415144b0fSOlivier Houchard if ( float128_is_signaling_nan( a )
547515144b0fSOlivier Houchard || float128_is_signaling_nan( b ) ) {
547615144b0fSOlivier Houchard float_raise( float_flag_invalid );
547715144b0fSOlivier Houchard }
547815144b0fSOlivier Houchard return 0;
547915144b0fSOlivier Houchard }
548015144b0fSOlivier Houchard aSign = extractFloat128Sign( a );
548115144b0fSOlivier Houchard bSign = extractFloat128Sign( b );
548215144b0fSOlivier Houchard if ( aSign != bSign ) {
548315144b0fSOlivier Houchard return
548415144b0fSOlivier Houchard aSign
548515144b0fSOlivier Houchard && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
548615144b0fSOlivier Houchard != 0 );
548715144b0fSOlivier Houchard }
548815144b0fSOlivier Houchard return
548915144b0fSOlivier Houchard aSign ? lt128( b.high, b.low, a.high, a.low )
549015144b0fSOlivier Houchard : lt128( a.high, a.low, b.high, b.low );
549115144b0fSOlivier Houchard
549215144b0fSOlivier Houchard }
549315144b0fSOlivier Houchard
549415144b0fSOlivier Houchard #endif
549515144b0fSOlivier Houchard
549615144b0fSOlivier Houchard
549715144b0fSOlivier Houchard #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
549815144b0fSOlivier Houchard
549915144b0fSOlivier Houchard /*
550015144b0fSOlivier Houchard * These two routines are not part of the original softfloat distribution.
550115144b0fSOlivier Houchard *
550215144b0fSOlivier Houchard * They are based on the corresponding conversions to integer but return
550315144b0fSOlivier Houchard * unsigned numbers instead since these functions are required by GCC.
550415144b0fSOlivier Houchard *
550515144b0fSOlivier Houchard * Added by Mark Brinicombe <[email protected]> 27/09/97
550615144b0fSOlivier Houchard *
550715144b0fSOlivier Houchard * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
550815144b0fSOlivier Houchard */
550915144b0fSOlivier Houchard
551015144b0fSOlivier Houchard /*
551115144b0fSOlivier Houchard -------------------------------------------------------------------------------
551215144b0fSOlivier Houchard Returns the result of converting the double-precision floating-point value
551315144b0fSOlivier Houchard `a' to the 32-bit unsigned integer format. The conversion is
551415144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-point
551515144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero. If
551615144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned. If the conversion
551715144b0fSOlivier Houchard overflows, the largest integer positive is returned.
551815144b0fSOlivier Houchard -------------------------------------------------------------------------------
551915144b0fSOlivier Houchard */
float64_to_uint32_round_to_zero(float64 a)552015144b0fSOlivier Houchard uint32 float64_to_uint32_round_to_zero( float64 a )
552115144b0fSOlivier Houchard {
552215144b0fSOlivier Houchard flag aSign;
552315144b0fSOlivier Houchard int16 aExp, shiftCount;
552415144b0fSOlivier Houchard bits64 aSig, savedASig;
552515144b0fSOlivier Houchard uint32 z;
552615144b0fSOlivier Houchard
552715144b0fSOlivier Houchard aSig = extractFloat64Frac( a );
552815144b0fSOlivier Houchard aExp = extractFloat64Exp( a );
552915144b0fSOlivier Houchard aSign = extractFloat64Sign( a );
553015144b0fSOlivier Houchard
553115144b0fSOlivier Houchard if (aSign) {
553215144b0fSOlivier Houchard float_raise( float_flag_invalid );
553315144b0fSOlivier Houchard return(0);
553415144b0fSOlivier Houchard }
553515144b0fSOlivier Houchard
553615144b0fSOlivier Houchard if ( 0x41E < aExp ) {
553715144b0fSOlivier Houchard float_raise( float_flag_invalid );
553815144b0fSOlivier Houchard return 0xffffffff;
553915144b0fSOlivier Houchard }
554015144b0fSOlivier Houchard else if ( aExp < 0x3FF ) {
554115144b0fSOlivier Houchard if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
554215144b0fSOlivier Houchard return 0;
554315144b0fSOlivier Houchard }
554415144b0fSOlivier Houchard aSig |= LIT64( 0x0010000000000000 );
554515144b0fSOlivier Houchard shiftCount = 0x433 - aExp;
554615144b0fSOlivier Houchard savedASig = aSig;
554715144b0fSOlivier Houchard aSig >>= shiftCount;
554815144b0fSOlivier Houchard z = aSig;
554915144b0fSOlivier Houchard if ( ( aSig<<shiftCount ) != savedASig ) {
555015144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
555115144b0fSOlivier Houchard }
555215144b0fSOlivier Houchard return z;
555315144b0fSOlivier Houchard
555415144b0fSOlivier Houchard }
555515144b0fSOlivier Houchard
555615144b0fSOlivier Houchard /*
555715144b0fSOlivier Houchard -------------------------------------------------------------------------------
555815144b0fSOlivier Houchard Returns the result of converting the single-precision floating-point value
555915144b0fSOlivier Houchard `a' to the 32-bit unsigned integer format. The conversion is
556015144b0fSOlivier Houchard performed according to the IEC/IEEE Standard for Binary Floating-point
556115144b0fSOlivier Houchard Arithmetic, except that the conversion is always rounded toward zero. If
556215144b0fSOlivier Houchard `a' is a NaN, the largest positive integer is returned. If the conversion
556315144b0fSOlivier Houchard overflows, the largest positive integer is returned.
556415144b0fSOlivier Houchard -------------------------------------------------------------------------------
556515144b0fSOlivier Houchard */
float32_to_uint32_round_to_zero(float32 a)556615144b0fSOlivier Houchard uint32 float32_to_uint32_round_to_zero( float32 a )
556715144b0fSOlivier Houchard {
556815144b0fSOlivier Houchard flag aSign;
556915144b0fSOlivier Houchard int16 aExp, shiftCount;
557015144b0fSOlivier Houchard bits32 aSig;
557115144b0fSOlivier Houchard uint32 z;
557215144b0fSOlivier Houchard
557315144b0fSOlivier Houchard aSig = extractFloat32Frac( a );
557415144b0fSOlivier Houchard aExp = extractFloat32Exp( a );
557515144b0fSOlivier Houchard aSign = extractFloat32Sign( a );
557615144b0fSOlivier Houchard shiftCount = aExp - 0x9E;
557715144b0fSOlivier Houchard
557815144b0fSOlivier Houchard if (aSign) {
557915144b0fSOlivier Houchard float_raise( float_flag_invalid );
558015144b0fSOlivier Houchard return(0);
558115144b0fSOlivier Houchard }
558215144b0fSOlivier Houchard if ( 0 < shiftCount ) {
558315144b0fSOlivier Houchard float_raise( float_flag_invalid );
558415144b0fSOlivier Houchard return 0xFFFFFFFF;
558515144b0fSOlivier Houchard }
558615144b0fSOlivier Houchard else if ( aExp <= 0x7E ) {
558715144b0fSOlivier Houchard if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
558815144b0fSOlivier Houchard return 0;
558915144b0fSOlivier Houchard }
559015144b0fSOlivier Houchard aSig = ( aSig | 0x800000 )<<8;
559115144b0fSOlivier Houchard z = aSig>>( - shiftCount );
559215144b0fSOlivier Houchard if ( aSig<<( shiftCount & 31 ) ) {
559315144b0fSOlivier Houchard float_exception_flags |= float_flag_inexact;
559415144b0fSOlivier Houchard }
559515144b0fSOlivier Houchard return z;
559615144b0fSOlivier Houchard
559715144b0fSOlivier Houchard }
559815144b0fSOlivier Houchard
559915144b0fSOlivier Houchard #endif
5600