1
2 /*
3 ===============================================================================
4
5 This C source file is part of TestFloat, Release 2a, a package of programs
6 for testing the correctness of floating-point arithmetic complying to the
7 IEC/IEEE Standard for Floating-Point.
8
9 Written by John R. Hauser. More information is available through the Web
10 page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
11
12 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
13 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
14 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
15 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
16 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
17
18 Derivative works are acceptable, even for commercial purposes, so long as
19 (1) they include prominent notice that the work is derivative, and (2) they
20 include prominent notice akin to these four paragraphs for those parts of
21 this code that are retained.
22
23 ===============================================================================
24 */
25
26 int8 slow_float_rounding_mode;
27 int8 slow_float_exception_flags;
28 int8 slow_float_detect_tininess;
29
30 typedef struct {
31 bits32 a0, a1;
32 } bits64X;
33
34 typedef struct {
35 flag isNaN;
36 flag isInf;
37 flag isZero;
38 flag sign;
39 int16 exp;
40 bits64X sig;
41 } floatX;
42
43 static const floatX floatXNaN = { TRUE, FALSE, FALSE, FALSE, 0, { 0, 0 } };
44 static const floatX floatXPositiveZero =
45 { FALSE, FALSE, TRUE, FALSE, 0, { 0, 0 } };
46 static const floatX floatXNegativeZero =
47 { FALSE, FALSE, TRUE, TRUE, 0, { 0, 0 } };
48
shortShift64Left(bits64X a,int8 shiftCount)49 static bits64X shortShift64Left( bits64X a, int8 shiftCount )
50 {
51 int8 negShiftCount;
52
53 negShiftCount = ( - shiftCount & 31 );
54 a.a0 = ( a.a0<<shiftCount ) | ( a.a1>>negShiftCount );
55 a.a1 <<= shiftCount;
56 return a;
57
58 }
59
shortShift64RightJamming(bits64X a,int8 shiftCount)60 static bits64X shortShift64RightJamming( bits64X a, int8 shiftCount )
61 {
62 int8 negShiftCount;
63 bits32 extra;
64
65 negShiftCount = ( - shiftCount & 31 );
66 extra = a.a1<<negShiftCount;
67 a.a1 = ( a.a0<<negShiftCount ) | ( a.a1>>shiftCount ) | ( extra != 0 );
68 a.a0 >>= shiftCount;
69 return a;
70
71 }
72
neg64(bits64X a)73 static bits64X neg64( bits64X a )
74 {
75
76 if ( a.a1 == 0 ) {
77 a.a0 = - a.a0;
78 }
79 else {
80 a.a1 = - a.a1;
81 a.a0 = ~ a.a0;
82 }
83 return a;
84
85 }
86
add64(bits64X a,bits64X b)87 static bits64X add64( bits64X a, bits64X b )
88 {
89
90 a.a1 += b.a1;
91 a.a0 += b.a0 + ( a.a1 < b.a1 );
92 return a;
93
94 }
95
eq64(bits64X a,bits64X b)96 static flag eq64( bits64X a, bits64X b )
97 {
98
99 return ( a.a0 == b.a0 ) && ( a.a1 == b.a1 );
100
101 }
102
le64(bits64X a,bits64X b)103 static flag le64( bits64X a, bits64X b )
104 {
105
106 return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 <= b.a1 ) );
107
108 }
109
lt64(bits64X a,bits64X b)110 static flag lt64( bits64X a, bits64X b )
111 {
112
113 return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 < b.a1 ) );
114
115 }
116
roundFloatXTo24(flag isTiny,floatX zx)117 static floatX roundFloatXTo24( flag isTiny, floatX zx )
118 {
119
120 if ( zx.sig.a1 ) {
121 slow_float_exception_flags |= float_flag_inexact;
122 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
123 switch ( slow_float_rounding_mode ) {
124 case float_round_nearest_even:
125 if ( zx.sig.a1 < 0x80000000 ) goto noIncrement;
126 if ( ( zx.sig.a1 == 0x80000000 ) && ! ( zx.sig.a0 & 1 ) ) {
127 goto noIncrement;
128 }
129 break;
130 case float_round_to_zero:
131 goto noIncrement;
132 case float_round_down:
133 if ( ! zx.sign ) goto noIncrement;
134 break;
135 case float_round_up:
136 if ( zx.sign ) goto noIncrement;
137 break;
138 }
139 ++zx.sig.a0;
140 if ( zx.sig.a0 == 0x01000000 ) {
141 zx.sig.a0 = 0x00800000;
142 ++zx.exp;
143 }
144 }
145 noIncrement:
146 zx.sig.a1 = 0;
147 return zx;
148
149 }
150
roundFloatXTo53(flag isTiny,floatX zx)151 static floatX roundFloatXTo53( flag isTiny, floatX zx )
152 {
153 int8 roundBits;
154
155 roundBits = zx.sig.a1 & 7;
156 zx.sig.a1 -= roundBits;
157 if ( roundBits ) {
158 slow_float_exception_flags |= float_flag_inexact;
159 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
160 switch ( slow_float_rounding_mode ) {
161 case float_round_nearest_even:
162 if ( roundBits < 4 ) goto noIncrement;
163 if ( ( roundBits == 4 ) && ! ( zx.sig.a1 & 8 ) ) goto noIncrement;
164 break;
165 case float_round_to_zero:
166 goto noIncrement;
167 case float_round_down:
168 if ( ! zx.sign ) goto noIncrement;
169 break;
170 case float_round_up:
171 if ( zx.sign ) goto noIncrement;
172 break;
173 }
174 zx.sig.a1 += 8;
175 zx.sig.a0 += ( zx.sig.a1 == 0 );
176 if ( zx.sig.a0 == 0x01000000 ) {
177 zx.sig.a0 = 0x00800000;
178 ++zx.exp;
179 }
180 }
181 noIncrement:
182 return zx;
183
184 }
185
int32ToFloatX(int32 a)186 static floatX int32ToFloatX( int32 a )
187 {
188 floatX ax;
189
190 ax.isNaN = FALSE;
191 ax.isInf = FALSE;
192 ax.sign = ( a < 0 );
193 ax.sig.a1 = ax.sign ? - a : a;
194 ax.sig.a0 = 0;
195 if ( a == 0 ) {
196 ax.isZero = TRUE;
197 return ax;
198 }
199 ax.isZero = FALSE;
200 ax.sig = shortShift64Left( ax.sig, 23 );
201 ax.exp = 32;
202 while ( ax.sig.a0 < 0x00800000 ) {
203 ax.sig = shortShift64Left( ax.sig, 1 );
204 --ax.exp;
205 }
206 return ax;
207
208 }
209
floatXToInt32(floatX ax)210 static int32 floatXToInt32( floatX ax )
211 {
212 int8 savedExceptionFlags;
213 int16 shiftCount;
214 int32 z;
215
216 if ( ax.isInf || ax.isNaN ) {
217 slow_float_exception_flags |= float_flag_invalid;
218 return ( ax.isInf & ax.sign ) ? 0x80000000 : 0x7FFFFFFF;
219 }
220 if ( ax.isZero ) return 0;
221 savedExceptionFlags = slow_float_exception_flags;
222 shiftCount = 52 - ax.exp;
223 if ( 56 < shiftCount ) {
224 ax.sig.a1 = 1;
225 ax.sig.a0 = 0;
226 }
227 else {
228 while ( 0 < shiftCount ) {
229 ax.sig = shortShift64RightJamming( ax.sig, 1 );
230 --shiftCount;
231 }
232 }
233 ax = roundFloatXTo53( FALSE, ax );
234 ax.sig = shortShift64RightJamming( ax.sig, 3 );
235 z = ax.sig.a1;
236 if ( ax.sign ) z = - z;
237 if ( ( shiftCount < 0 )
238 || ax.sig.a0
239 || ( ( z != 0 ) && ( ( ax.sign ^ ( z < 0 ) ) != 0 ) )
240 ) {
241 slow_float_exception_flags = savedExceptionFlags | float_flag_invalid;
242 return ax.sign ? 0x80000000 : 0x7FFFFFFF;
243 }
244 return z;
245
246 }
247
float32ToFloatX(float32 a)248 static floatX float32ToFloatX( float32 a )
249 {
250 int16 expField;
251 floatX ax;
252
253 ax.isNaN = FALSE;
254 ax.isInf = FALSE;
255 ax.isZero = FALSE;
256 ax.sign = ( ( a & 0x80000000 ) != 0 );
257 expField = ( a>>23 ) & 0xFF;
258 ax.sig.a1 = 0;
259 ax.sig.a0 = a & 0x007FFFFF;
260 if ( expField == 0 ) {
261 if ( ax.sig.a0 == 0 ) {
262 ax.isZero = TRUE;
263 }
264 else {
265 expField = 1 - 0x7F;
266 do {
267 ax.sig.a0 <<= 1;
268 --expField;
269 } while ( ax.sig.a0 < 0x00800000 );
270 ax.exp = expField;
271 }
272 }
273 else if ( expField == 0xFF ) {
274 if ( ax.sig.a0 == 0 ) {
275 ax.isInf = TRUE;
276 }
277 else {
278 ax.isNaN = TRUE;
279 }
280 }
281 else {
282 ax.sig.a0 |= 0x00800000;
283 ax.exp = expField - 0x7F;
284 }
285 return ax;
286
287 }
288
floatXToFloat32(floatX zx)289 static float32 floatXToFloat32( floatX zx )
290 {
291 floatX savedZ;
292 flag isTiny;
293 int16 expField;
294 float32 z;
295
296 if ( zx.isZero ) return zx.sign ? 0x80000000 : 0;
297 if ( zx.isInf ) return zx.sign ? 0xFF800000 : 0x7F800000;
298 if ( zx.isNaN ) return 0xFFFFFFFF;
299 while ( 0x01000000 <= zx.sig.a0 ) {
300 zx.sig = shortShift64RightJamming( zx.sig, 1 );
301 ++zx.exp;
302 }
303 while ( zx.sig.a0 < 0x00800000 ) {
304 zx.sig = shortShift64Left( zx.sig, 1 );
305 --zx.exp;
306 }
307 savedZ = zx;
308 isTiny =
309 ( slow_float_detect_tininess == float_tininess_before_rounding )
310 && ( zx.exp + 0x7F <= 0 );
311 zx = roundFloatXTo24( isTiny, zx );
312 expField = zx.exp + 0x7F;
313 if ( 0xFF <= expField ) {
314 slow_float_exception_flags |=
315 float_flag_overflow | float_flag_inexact;
316 if ( zx.sign ) {
317 switch ( slow_float_rounding_mode ) {
318 case float_round_nearest_even:
319 case float_round_down:
320 z = 0xFF800000;
321 break;
322 case float_round_to_zero:
323 case float_round_up:
324 z = 0xFF7FFFFF;
325 break;
326 }
327 }
328 else {
329 switch ( slow_float_rounding_mode ) {
330 case float_round_nearest_even:
331 case float_round_up:
332 z = 0x7F800000;
333 break;
334 case float_round_to_zero:
335 case float_round_down:
336 z = 0x7F7FFFFF;
337 break;
338 }
339 }
340 return z;
341 }
342 if ( expField <= 0 ) {
343 isTiny = TRUE;
344 zx = savedZ;
345 expField = zx.exp + 0x7F;
346 if ( expField < -27 ) {
347 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
348 zx.sig.a0 = 0;
349 }
350 else {
351 while ( expField <= 0 ) {
352 zx.sig = shortShift64RightJamming( zx.sig, 1 );
353 ++expField;
354 }
355 }
356 zx = roundFloatXTo24( isTiny, zx );
357 expField = ( 0x00800000 <= zx.sig.a0 ) ? 1 : 0;
358 }
359 z = expField;
360 z <<= 23;
361 if ( zx.sign ) z |= 0x80000000;
362 z |= zx.sig.a0 & 0x007FFFFF;
363 return z;
364
365 }
366
float64ToFloatX(float64 a)367 static floatX float64ToFloatX( float64 a )
368 {
369 int16 expField;
370 floatX ax;
371
372 ax.isNaN = FALSE;
373 ax.isInf = FALSE;
374 ax.isZero = FALSE;
375 #ifdef BITS64
376 ax.sign = ( ( a & LIT64( 0x8000000000000000 ) ) != 0 );
377 expField = ( a>>52 ) & 0x7FF;
378 ax.sig.a1 = a;
379 ax.sig.a0 = ( a>>32 ) & 0x000FFFFF;
380 #else
381 ax.sign = ( ( a.high & 0x80000000 ) != 0 );
382 expField = ( a.high>>( 52 - 32 ) ) & 0x7FF;
383 ax.sig.a1 = a.low;
384 ax.sig.a0 = a.high & 0x000FFFFF;
385 #endif
386 if ( expField == 0 ) {
387 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
388 ax.isZero = TRUE;
389 }
390 else {
391 expField = 1 - 0x3FF;
392 do {
393 ax.sig = shortShift64Left( ax.sig, 1 );
394 --expField;
395 } while ( ax.sig.a0 < 0x00100000 );
396 ax.exp = expField;
397 }
398 }
399 else if ( expField == 0x7FF ) {
400 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
401 ax.isInf = TRUE;
402 }
403 else {
404 ax.isNaN = TRUE;
405 }
406 }
407 else {
408 ax.exp = expField - 0x3FF;
409 ax.sig.a0 |= 0x00100000;
410 }
411 ax.sig = shortShift64Left( ax.sig, 3 );
412 return ax;
413
414 }
415
floatXToFloat64(floatX zx)416 static float64 floatXToFloat64( floatX zx )
417 {
418 floatX savedZ;
419 flag isTiny;
420 int16 expField;
421 float64 z;
422
423 #ifdef BITS64
424 if ( zx.isZero ) return zx.sign ? LIT64( 0x8000000000000000 ) : 0;
425 if ( zx.isInf ) {
426 return
427 zx.sign ? LIT64( 0xFFF0000000000000 )
428 : LIT64( 0x7FF0000000000000 );
429 }
430 if ( zx.isNaN ) return LIT64( 0xFFFFFFFFFFFFFFFF );
431 #else
432 if ( zx.isZero ) {
433 z.low = 0;
434 z.high = zx.sign ? 0x80000000 : 0;
435 return z;
436 }
437 if ( zx.isInf ) {
438 z.low = 0;
439 z.high = zx.sign ? 0xFFF00000 : 0x7FF00000;
440 return z;
441 }
442 if ( zx.isNaN ) {
443 z.high = z.low = 0xFFFFFFFF;
444 return z;
445 }
446 #endif
447 while ( 0x01000000 <= zx.sig.a0 ) {
448 zx.sig = shortShift64RightJamming( zx.sig, 1 );
449 ++zx.exp;
450 }
451 while ( zx.sig.a0 < 0x00800000 ) {
452 zx.sig = shortShift64Left( zx.sig, 1 );
453 --zx.exp;
454 }
455 savedZ = zx;
456 isTiny =
457 ( slow_float_detect_tininess == float_tininess_before_rounding )
458 && ( zx.exp + 0x3FF <= 0 );
459 zx = roundFloatXTo53( isTiny, zx );
460 expField = zx.exp + 0x3FF;
461 if ( 0x7FF <= expField ) {
462 slow_float_exception_flags |=
463 float_flag_overflow | float_flag_inexact;
464 #ifdef BITS64
465 if ( zx.sign ) {
466 switch ( slow_float_rounding_mode ) {
467 case float_round_nearest_even:
468 case float_round_down:
469 z = LIT64( 0xFFF0000000000000 );
470 break;
471 case float_round_to_zero:
472 case float_round_up:
473 z = LIT64( 0xFFEFFFFFFFFFFFFF );
474 break;
475 }
476 }
477 else {
478 switch ( slow_float_rounding_mode ) {
479 case float_round_nearest_even:
480 case float_round_up:
481 z = LIT64( 0x7FF0000000000000 );
482 break;
483 case float_round_to_zero:
484 case float_round_down:
485 z = LIT64( 0x7FEFFFFFFFFFFFFF );
486 break;
487 }
488 }
489 #else
490 if ( zx.sign ) {
491 switch ( slow_float_rounding_mode ) {
492 case float_round_nearest_even:
493 case float_round_down:
494 z.low = 0;
495 z.high = 0xFFF00000;
496 break;
497 case float_round_to_zero:
498 case float_round_up:
499 z.low = 0xFFFFFFFF;
500 z.high = 0xFFEFFFFF;
501 break;
502 }
503 }
504 else {
505 switch ( slow_float_rounding_mode ) {
506 case float_round_nearest_even:
507 case float_round_up:
508 z.low = 0;
509 z.high = 0x7FF00000;
510 break;
511 case float_round_to_zero:
512 case float_round_down:
513 z.low = 0xFFFFFFFF;
514 z.high = 0x7FEFFFFF;
515 break;
516 }
517 }
518 #endif
519 return z;
520 }
521 if ( expField <= 0 ) {
522 isTiny = TRUE;
523 zx = savedZ;
524 expField = zx.exp + 0x3FF;
525 if ( expField < -56 ) {
526 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
527 zx.sig.a0 = 0;
528 }
529 else {
530 while ( expField <= 0 ) {
531 zx.sig = shortShift64RightJamming( zx.sig, 1 );
532 ++expField;
533 }
534 }
535 zx = roundFloatXTo53( isTiny, zx );
536 expField = ( 0x00800000 <= zx.sig.a0 ) ? 1 : 0;
537 }
538 zx.sig = shortShift64RightJamming( zx.sig, 3 );
539 #ifdef BITS64
540 z = expField;
541 z <<= 52;
542 if ( zx.sign ) z |= LIT64( 0x8000000000000000 );
543 z |= ( ( (bits64) ( zx.sig.a0 & 0x000FFFFF ) )<<32 ) | zx.sig.a1;
544 #else
545 z.low = zx.sig.a1;
546 z.high = expField;
547 z.high <<= 52 - 32;
548 if ( zx.sign ) z.high |= 0x80000000;
549 z.high |= zx.sig.a0 & 0x000FFFFF;
550 #endif
551 return z;
552
553 }
554
floatXInvalid(void)555 static floatX floatXInvalid( void )
556 {
557
558 slow_float_exception_flags |= float_flag_invalid;
559 return floatXNaN;
560
561 }
562
floatXRoundToInt(floatX ax)563 static floatX floatXRoundToInt( floatX ax )
564 {
565 int16 shiftCount, i;
566
567 if ( ax.isNaN || ax.isInf ) return ax;
568 shiftCount = 52 - ax.exp;
569 if ( shiftCount <= 0 ) return ax;
570 if ( 55 < shiftCount ) {
571 ax.exp = 52;
572 ax.sig.a1 = ! ax.isZero;
573 ax.sig.a0 = 0;
574 }
575 else {
576 while ( 0 < shiftCount ) {
577 ax.sig = shortShift64RightJamming( ax.sig, 1 );
578 ++ax.exp;
579 --shiftCount;
580 }
581 }
582 ax = roundFloatXTo53( FALSE, ax );
583 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
584 return ax;
585
586 }
587
floatXAdd(floatX ax,floatX bx)588 static floatX floatXAdd( floatX ax, floatX bx )
589 {
590 int16 expDiff;
591 floatX zx;
592
593 if ( ax.isNaN ) return ax;
594 if ( bx.isNaN ) return bx;
595 if ( ax.isInf && bx.isInf ) {
596 if ( ax.sign == bx.sign ) return ax;
597 return floatXInvalid();
598 }
599 if ( ax.isInf ) return ax;
600 if ( bx.isInf ) return bx;
601 if ( ax.isZero && bx.isZero ) {
602 if ( ax.sign == bx.sign ) return ax;
603 goto completeCancellation;
604 }
605 if ( ( ax.sign != bx.sign )
606 && ( ax.exp == bx.exp )
607 && eq64( ax.sig, bx.sig )
608 ) {
609 completeCancellation:
610 return
611 ( slow_float_rounding_mode == float_round_down ) ?
612 floatXNegativeZero
613 : floatXPositiveZero;
614 }
615 if ( ax.isZero ) return bx;
616 if ( bx.isZero ) return ax;
617 expDiff = ax.exp - bx.exp;
618 if ( expDiff < 0 ) {
619 zx = ax;
620 zx.exp = bx.exp;
621 if ( expDiff < -56 ) {
622 zx.sig.a1 = 1;
623 zx.sig.a0 = 0;
624 }
625 else {
626 while ( expDiff < 0 ) {
627 zx.sig = shortShift64RightJamming( zx.sig, 1 );
628 ++expDiff;
629 }
630 }
631 if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
632 zx.sign = bx.sign;
633 zx.sig = add64( zx.sig, bx.sig );
634 }
635 else {
636 zx = bx;
637 zx.exp = ax.exp;
638 if ( 56 < expDiff ) {
639 zx.sig.a1 = 1;
640 zx.sig.a0 = 0;
641 }
642 else {
643 while ( 0 < expDiff ) {
644 zx.sig = shortShift64RightJamming( zx.sig, 1 );
645 --expDiff;
646 }
647 }
648 if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
649 zx.sign = ax.sign;
650 zx.sig = add64( zx.sig, ax.sig );
651 }
652 if ( zx.sig.a0 & 0x80000000 ) {
653 zx.sig = neg64( zx.sig );
654 zx.sign = ! zx.sign;
655 }
656 return zx;
657
658 }
659
floatXMul(floatX ax,floatX bx)660 static floatX floatXMul( floatX ax, floatX bx )
661 {
662 int8 bitNum;
663 floatX zx;
664
665 if ( ax.isNaN ) return ax;
666 if ( bx.isNaN ) return bx;
667 if ( ax.isInf ) {
668 if ( bx.isZero ) return floatXInvalid();
669 if ( bx.sign ) ax.sign = ! ax.sign;
670 return ax;
671 }
672 if ( bx.isInf ) {
673 if ( ax.isZero ) return floatXInvalid();
674 if ( ax.sign ) bx.sign = ! bx.sign;
675 return bx;
676 }
677 zx = ax;
678 zx.sign ^= bx.sign;
679 if ( ax.isZero || bx.isZero ) {
680 return zx.sign ? floatXNegativeZero : floatXPositiveZero;
681 }
682 zx.exp += bx.exp + 1;
683 zx.sig.a1 = 0;
684 zx.sig.a0 = 0;
685 for ( bitNum = 0; bitNum < 55; ++bitNum ) {
686 if ( bx.sig.a1 & 2 ) zx.sig = add64( zx.sig, ax.sig );
687 bx.sig = shortShift64RightJamming( bx.sig, 1 );
688 zx.sig = shortShift64RightJamming( zx.sig, 1 );
689 }
690 return zx;
691
692 }
693
floatXDiv(floatX ax,floatX bx)694 static floatX floatXDiv( floatX ax, floatX bx )
695 {
696 bits64X negBSig;
697 int8 bitNum;
698 floatX zx;
699
700 if ( ax.isNaN ) return ax;
701 if ( bx.isNaN ) return bx;
702 if ( ax.isInf ) {
703 if ( bx.isInf ) return floatXInvalid();
704 if ( bx.sign ) ax.sign = ! ax.sign;
705 return ax;
706 }
707 if ( bx.isZero ) {
708 if ( ax.isZero ) return floatXInvalid();
709 slow_float_exception_flags |= float_flag_divbyzero;
710 if ( ax.sign ) bx.sign = ! bx.sign;
711 bx.isZero = FALSE;
712 bx.isInf = TRUE;
713 return bx;
714 }
715 zx = ax;
716 zx.sign ^= bx.sign;
717 if ( ax.isZero || bx.isInf ) {
718 return zx.sign ? floatXNegativeZero : floatXPositiveZero;
719 }
720 zx.exp -= bx.exp + 1;
721 zx.sig.a1 = 0;
722 zx.sig.a0 = 0;
723 negBSig = neg64( bx.sig );
724 for ( bitNum = 0; bitNum < 56; ++bitNum ) {
725 if ( le64( bx.sig, ax.sig ) ) {
726 zx.sig.a1 |= 1;
727 ax.sig = add64( ax.sig, negBSig );
728 }
729 ax.sig = shortShift64Left( ax.sig, 1 );
730 zx.sig = shortShift64Left( zx.sig, 1 );
731 }
732 if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
733 return zx;
734
735 }
736
floatXRem(floatX ax,floatX bx)737 static floatX floatXRem( floatX ax, floatX bx )
738 {
739 bits64X negBSig;
740 flag lastQuotientBit;
741 bits64X savedASig;
742
743 if ( ax.isNaN ) return ax;
744 if ( bx.isNaN ) return bx;
745 if ( ax.isInf || bx.isZero ) return floatXInvalid();
746 if ( ax.isZero || bx.isInf ) return ax;
747 --bx.exp;
748 if ( ax.exp < bx.exp ) return ax;
749 bx.sig = shortShift64Left( bx.sig, 1 );
750 negBSig = neg64( bx.sig );
751 while ( bx.exp < ax.exp ) {
752 if ( le64( bx.sig, ax.sig ) ) ax.sig = add64( ax.sig, negBSig );
753 ax.sig = shortShift64Left( ax.sig, 1 );
754 --ax.exp;
755 }
756 lastQuotientBit = le64( bx.sig, ax.sig );
757 if ( lastQuotientBit ) ax.sig = add64( ax.sig, negBSig );
758 savedASig = ax.sig;
759 ax.sig = neg64( add64( ax.sig, negBSig ) );
760 if ( lt64( ax.sig, savedASig ) ) {
761 ax.sign = ! ax.sign;
762 }
763 else if ( lt64( savedASig, ax.sig ) ) {
764 ax.sig = savedASig;
765 }
766 else {
767 if ( lastQuotientBit ) {
768 ax.sign = ! ax.sign;
769 }
770 else {
771 ax.sig = savedASig;
772 }
773 }
774 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
775 return ax;
776
777 }
778
floatXSqrt(floatX ax)779 static floatX floatXSqrt( floatX ax )
780 {
781 int8 bitNum;
782 bits64X bitSig, savedASig;
783 floatX zx;
784
785 if ( ax.isNaN || ax.isZero ) return ax;
786 if ( ax.sign ) return floatXInvalid();
787 if ( ax.isInf ) return ax;
788 zx = ax;
789 zx.exp >>= 1;
790 if ( ( ax.exp & 1 ) == 0 ) ax.sig = shortShift64RightJamming( ax.sig, 1 );
791 zx.sig.a1 = 0;
792 zx.sig.a0 = 0;
793 bitSig.a1 = 0;
794 bitSig.a0 = 0x00800000;
795 for ( bitNum = 0; bitNum < 56; ++bitNum ) {
796 savedASig = ax.sig;
797 ax.sig = add64( ax.sig, neg64( zx.sig ) );
798 ax.sig = shortShift64Left( ax.sig, 1 );
799 ax.sig = add64( ax.sig, neg64( bitSig ) );
800 if ( ax.sig.a0 & 0x80000000 ) {
801 ax.sig = shortShift64Left( savedASig, 1 );
802 }
803 else {
804 zx.sig.a1 |= bitSig.a1;
805 zx.sig.a0 |= bitSig.a0;
806 }
807 bitSig = shortShift64RightJamming( bitSig, 1 );
808 }
809 if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
810 return zx;
811
812 }
813
floatXEq(floatX ax,floatX bx)814 static flag floatXEq( floatX ax, floatX bx )
815 {
816
817 if ( ax.isNaN || bx.isNaN ) return FALSE;
818 if ( ax.isZero && bx.isZero ) return TRUE;
819 if ( ax.sign != bx.sign ) return FALSE;
820 if ( ax.isInf || bx.isInf ) return ax.isInf && bx.isInf;
821 return ( ax.exp == bx.exp ) && eq64( ax.sig, bx.sig );
822
823 }
824
floatXLe(floatX ax,floatX bx)825 static flag floatXLe( floatX ax, floatX bx )
826 {
827
828 if ( ax.isNaN || bx.isNaN ) return FALSE;
829 if ( ax.isZero && bx.isZero ) return TRUE;
830 if ( ax.sign != bx.sign ) return ax.sign;
831 if ( ax.sign ) {
832 if ( ax.isInf || bx.isZero ) return TRUE;
833 if ( bx.isInf || ax.isZero ) return FALSE;
834 if ( bx.exp < ax.exp ) return TRUE;
835 if ( ax.exp < bx.exp ) return FALSE;
836 return le64( bx.sig, ax.sig );
837 }
838 else {
839 if ( bx.isInf || ax.isZero ) return TRUE;
840 if ( ax.isInf || bx.isZero ) return FALSE;
841 if ( ax.exp < bx.exp ) return TRUE;
842 if ( bx.exp < ax.exp ) return FALSE;
843 return le64( ax.sig, bx.sig );
844 }
845
846 }
847
floatXLt(floatX ax,floatX bx)848 static flag floatXLt( floatX ax, floatX bx )
849 {
850
851 if ( ax.isNaN || bx.isNaN ) return FALSE;
852 if ( ax.isZero && bx.isZero ) return FALSE;
853 if ( ax.sign != bx.sign ) return ax.sign;
854 if ( ax.isInf && bx.isInf ) return FALSE;
855 if ( ax.sign ) {
856 if ( ax.isInf || bx.isZero ) return TRUE;
857 if ( bx.isInf || ax.isZero ) return FALSE;
858 if ( bx.exp < ax.exp ) return TRUE;
859 if ( ax.exp < bx.exp ) return FALSE;
860 return lt64( bx.sig, ax.sig );
861 }
862 else {
863 if ( bx.isInf || ax.isZero ) return TRUE;
864 if ( ax.isInf || bx.isZero ) return FALSE;
865 if ( ax.exp < bx.exp ) return TRUE;
866 if ( bx.exp < ax.exp ) return FALSE;
867 return lt64( ax.sig, bx.sig );
868 }
869
870 }
871
slow_int32_to_float32(int32 a)872 float32 slow_int32_to_float32( int32 a )
873 {
874
875 return floatXToFloat32( int32ToFloatX( a ) );
876
877 }
878
slow_int32_to_float64(int32 a)879 float64 slow_int32_to_float64( int32 a )
880 {
881
882 return floatXToFloat64( int32ToFloatX( a ) );
883
884 }
885
slow_float32_to_int32(float32 a)886 int32 slow_float32_to_int32( float32 a )
887 {
888
889 return floatXToInt32( float32ToFloatX( a ) );
890
891 }
892
slow_float32_to_int32_round_to_zero(float32 a)893 int32 slow_float32_to_int32_round_to_zero( float32 a )
894 {
895 int8 savedRoundingMode;
896 int32 z;
897
898 savedRoundingMode = slow_float_rounding_mode;
899 slow_float_rounding_mode = float_round_to_zero;
900 z = floatXToInt32( float32ToFloatX( a ) );
901 slow_float_rounding_mode = savedRoundingMode;
902 return z;
903
904 }
905
slow_float32_to_float64(float32 a)906 float64 slow_float32_to_float64( float32 a )
907 {
908
909 return floatXToFloat64( float32ToFloatX( a ) );
910
911 }
912
slow_float32_round_to_int(float32 a)913 float32 slow_float32_round_to_int( float32 a )
914 {
915
916 return floatXToFloat32( floatXRoundToInt( float32ToFloatX( a ) ) );
917
918 }
919
slow_float32_add(float32 a,float32 b)920 float32 slow_float32_add( float32 a, float32 b )
921 {
922
923 return
924 floatXToFloat32(
925 floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
926
927 }
928
slow_float32_sub(float32 a,float32 b)929 float32 slow_float32_sub( float32 a, float32 b )
930 {
931
932 b ^= 0x80000000;
933 return
934 floatXToFloat32(
935 floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
936
937 }
938
slow_float32_mul(float32 a,float32 b)939 float32 slow_float32_mul( float32 a, float32 b )
940 {
941
942 return
943 floatXToFloat32(
944 floatXMul( float32ToFloatX( a ), float32ToFloatX( b ) ) );
945
946 }
947
slow_float32_div(float32 a,float32 b)948 float32 slow_float32_div( float32 a, float32 b )
949 {
950
951 return
952 floatXToFloat32(
953 floatXDiv( float32ToFloatX( a ), float32ToFloatX( b ) ) );
954
955 }
956
slow_float32_rem(float32 a,float32 b)957 float32 slow_float32_rem( float32 a, float32 b )
958 {
959
960 return
961 floatXToFloat32(
962 floatXRem( float32ToFloatX( a ), float32ToFloatX( b ) ) );
963
964 }
965
slow_float32_sqrt(float32 a)966 float32 slow_float32_sqrt( float32 a )
967 {
968
969 return floatXToFloat32( floatXSqrt( float32ToFloatX( a ) ) );
970
971 }
972
slow_float32_eq(float32 a,float32 b)973 flag slow_float32_eq( float32 a, float32 b )
974 {
975
976 return floatXEq( float32ToFloatX( a ), float32ToFloatX( b ) );
977
978 }
979
slow_float32_le(float32 a,float32 b)980 flag slow_float32_le( float32 a, float32 b )
981 {
982 floatX ax, bx;
983
984 ax = float32ToFloatX( a );
985 bx = float32ToFloatX( b );
986 if ( ax.isNaN || bx.isNaN ) {
987 slow_float_exception_flags |= float_flag_invalid;
988 }
989 return floatXLe( ax, bx );
990
991 }
992
slow_float32_lt(float32 a,float32 b)993 flag slow_float32_lt( float32 a, float32 b )
994 {
995 floatX ax, bx;
996
997 ax = float32ToFloatX( a );
998 bx = float32ToFloatX( b );
999 if ( ax.isNaN || bx.isNaN ) {
1000 slow_float_exception_flags |= float_flag_invalid;
1001 }
1002 return floatXLt( ax, bx );
1003
1004 }
1005
slow_float32_eq_signaling(float32 a,float32 b)1006 flag slow_float32_eq_signaling( float32 a, float32 b )
1007 {
1008 floatX ax, bx;
1009
1010 ax = float32ToFloatX( a );
1011 bx = float32ToFloatX( b );
1012 if ( ax.isNaN || bx.isNaN ) {
1013 slow_float_exception_flags |= float_flag_invalid;
1014 }
1015 return floatXEq( ax, bx );
1016
1017 }
1018
slow_float32_le_quiet(float32 a,float32 b)1019 flag slow_float32_le_quiet( float32 a, float32 b )
1020 {
1021
1022 return floatXLe( float32ToFloatX( a ), float32ToFloatX( b ) );
1023
1024 }
1025
slow_float32_lt_quiet(float32 a,float32 b)1026 flag slow_float32_lt_quiet( float32 a, float32 b )
1027 {
1028
1029 return floatXLt( float32ToFloatX( a ), float32ToFloatX( b ) );
1030
1031 }
1032
slow_float64_to_int32(float64 a)1033 int32 slow_float64_to_int32( float64 a )
1034 {
1035
1036 return floatXToInt32( float64ToFloatX( a ) );
1037
1038 }
1039
slow_float64_to_int32_round_to_zero(float64 a)1040 int32 slow_float64_to_int32_round_to_zero( float64 a )
1041 {
1042 int8 savedRoundingMode;
1043 int32 z;
1044
1045 savedRoundingMode = slow_float_rounding_mode;
1046 slow_float_rounding_mode = float_round_to_zero;
1047 z = floatXToInt32( float64ToFloatX( a ) );
1048 slow_float_rounding_mode = savedRoundingMode;
1049 return z;
1050
1051 }
1052
slow_float64_to_float32(float64 a)1053 float32 slow_float64_to_float32( float64 a )
1054 {
1055
1056 return floatXToFloat32( float64ToFloatX( a ) );
1057
1058 }
1059
slow_float64_round_to_int(float64 a)1060 float64 slow_float64_round_to_int( float64 a )
1061 {
1062
1063 return floatXToFloat64( floatXRoundToInt( float64ToFloatX( a ) ) );
1064
1065 }
1066
slow_float64_add(float64 a,float64 b)1067 float64 slow_float64_add( float64 a, float64 b )
1068 {
1069
1070 return
1071 floatXToFloat64(
1072 floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1073
1074 }
1075
slow_float64_sub(float64 a,float64 b)1076 float64 slow_float64_sub( float64 a, float64 b )
1077 {
1078
1079 #ifdef BITS64
1080 b ^= LIT64( 0x8000000000000000 );
1081 #else
1082 b.high ^= 0x80000000;
1083 #endif
1084 return
1085 floatXToFloat64(
1086 floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1087
1088 }
1089
slow_float64_mul(float64 a,float64 b)1090 float64 slow_float64_mul( float64 a, float64 b )
1091 {
1092
1093 return
1094 floatXToFloat64(
1095 floatXMul( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1096
1097 }
1098
slow_float64_div(float64 a,float64 b)1099 float64 slow_float64_div( float64 a, float64 b )
1100 {
1101
1102 return
1103 floatXToFloat64(
1104 floatXDiv( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1105
1106 }
1107
slow_float64_rem(float64 a,float64 b)1108 float64 slow_float64_rem( float64 a, float64 b )
1109 {
1110
1111 return
1112 floatXToFloat64(
1113 floatXRem( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1114
1115 }
1116
slow_float64_sqrt(float64 a)1117 float64 slow_float64_sqrt( float64 a )
1118 {
1119
1120 return floatXToFloat64( floatXSqrt( float64ToFloatX( a ) ) );
1121
1122 }
1123
slow_float64_eq(float64 a,float64 b)1124 flag slow_float64_eq( float64 a, float64 b )
1125 {
1126
1127 return floatXEq( float64ToFloatX( a ), float64ToFloatX( b ) );
1128
1129 }
1130
slow_float64_le(float64 a,float64 b)1131 flag slow_float64_le( float64 a, float64 b )
1132 {
1133 floatX ax, bx;
1134
1135 ax = float64ToFloatX( a );
1136 bx = float64ToFloatX( b );
1137 if ( ax.isNaN || bx.isNaN ) {
1138 slow_float_exception_flags |= float_flag_invalid;
1139 }
1140 return floatXLe( ax, bx );
1141
1142 }
1143
slow_float64_lt(float64 a,float64 b)1144 flag slow_float64_lt( float64 a, float64 b )
1145 {
1146 floatX ax, bx;
1147
1148 ax = float64ToFloatX( a );
1149 bx = float64ToFloatX( b );
1150 if ( ax.isNaN || bx.isNaN ) {
1151 slow_float_exception_flags |= float_flag_invalid;
1152 }
1153 return floatXLt( ax, bx );
1154
1155 }
1156
slow_float64_eq_signaling(float64 a,float64 b)1157 flag slow_float64_eq_signaling( float64 a, float64 b )
1158 {
1159 floatX ax, bx;
1160
1161 ax = float64ToFloatX( a );
1162 bx = float64ToFloatX( b );
1163 if ( ax.isNaN || bx.isNaN ) {
1164 slow_float_exception_flags |= float_flag_invalid;
1165 }
1166 return floatXEq( ax, bx );
1167
1168 }
1169
slow_float64_le_quiet(float64 a,float64 b)1170 flag slow_float64_le_quiet( float64 a, float64 b )
1171 {
1172
1173 return floatXLe( float64ToFloatX( a ), float64ToFloatX( b ) );
1174
1175 }
1176
slow_float64_lt_quiet(float64 a,float64 b)1177 flag slow_float64_lt_quiet( float64 a, float64 b )
1178 {
1179
1180 return floatXLt( float64ToFloatX( a ), float64ToFloatX( b ) );
1181
1182 }
1183
1184