arm/nwfpe/softfloat.c

4 This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 National Science Foundation under grant MIP-9311980.  The original version
11 of this code was written as part of a project to build a fixed-point vector
15 http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
38 -------------------------------------------------------------------------------
39 Primitive arithmetic functions, including multi-word arithmetic, and
42 -------------------------------------------------------------------------------
44 #include "softfloat-macros"
47 -------------------------------------------------------------------------------
52 are propagated from function inputs to output.  These details are target-
54 -------------------------------------------------------------------------------
56 #include "softfloat-specialize"
59 -------------------------------------------------------------------------------
60 Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
61 and 7, and returns the properly rounded 32-bit integer corresponding to the
62 input.  If `zSign' is nonzero, the input is negated before being converted
63 to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
64 input is simply rounded to an integer, with the inexact exception raised if
65 the input cannot be represented exactly as an integer.  If the fixed-point
66 input is too large, however, the invalid exception is raised and the largest
68 -------------------------------------------------------------------------------
75     int32 z;  in roundAndPackInt32()  local
77     roundingMode = roundData->mode;  in roundAndPackInt32()
97     z = absZ;  in roundAndPackInt32()
98     if ( zSign ) z = - z;  in roundAndPackInt32()
99     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {  in roundAndPackInt32()
100         roundData->exception |= float_flag_invalid;  in roundAndPackInt32()
103     if ( roundBits ) roundData->exception |= float_flag_inexact;  in roundAndPackInt32()
104     return z;  in roundAndPackInt32()
109 -------------------------------------------------------------------------------
110 Returns the fraction bits of the single-precision floating-point value `a'.
111 -------------------------------------------------------------------------------
121 -------------------------------------------------------------------------------
122 Returns the exponent bits of the single-precision floating-point value `a'.
123 -------------------------------------------------------------------------------
133 -------------------------------------------------------------------------------
134 Returns the sign bit of the single-precision floating-point value `a'.
135 -------------------------------------------------------------------------------
147 -------------------------------------------------------------------------------
148 Normalizes the subnormal single-precision floating-point value represented
152 -------------------------------------------------------------------------------
159     shiftCount = countLeadingZeros32( aSig ) - 8;  in normalizeFloat32Subnormal()
161     *zExpPtr = 1 - shiftCount;  in normalizeFloat32Subnormal()
166 -------------------------------------------------------------------------------
168 single-precision floating-point value, returning the result.  After being
172 will have an integer portion equal to 1, the `zExp' input should be 1 less
175 -------------------------------------------------------------------------------
195 -------------------------------------------------------------------------------
196 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
197 and significand `zSig', and returns the proper single-precision floating-
198 point value corresponding to the abstract input.  Ordinarily, the abstract
199 value is simply rounded and packed into the single-precision format, with
200 the inexact exception raised if the abstract input cannot be represented
203 returned.  If the abstract value is too small, the input value is rounded to
205 the abstract input cannot be represented exactly as a subnormal single-
206 precision floating-point number.
207     The input significand `zSig' has its binary point between bits 30
212 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
214 Binary Floating-point Arithmetic.
215 -------------------------------------------------------------------------------
224     roundingMode = roundData->mode;  in roundAndPackFloat32()
247             roundData->exception |= float_flag_overflow | float_flag_inexact;  in roundAndPackFloat32()
248             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );  in roundAndPackFloat32()
253                 || ( zExp < -1 )  in roundAndPackFloat32()
255             shift32RightJamming( zSig, - zExp, &zSig );  in roundAndPackFloat32()
258             if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;  in roundAndPackFloat32()
261     if ( roundBits ) roundData->exception |= float_flag_inexact;  in roundAndPackFloat32()
270 -------------------------------------------------------------------------------
271 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
272 and significand `zSig', and returns the proper single-precision floating-
273 point value corresponding to the abstract input.  This routine is just like
275 any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
277 -------------------------------------------------------------------------------
284     shiftCount = countLeadingZeros32( zSig ) - 1;  in normalizeRoundAndPackFloat32()
285     return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );  in normalizeRoundAndPackFloat32()
290 -------------------------------------------------------------------------------
291 Returns the fraction bits of the double-precision floating-point value `a'.
292 -------------------------------------------------------------------------------
302 -------------------------------------------------------------------------------
303 Returns the exponent bits of the double-precision floating-point value `a'.
304 -------------------------------------------------------------------------------
314 -------------------------------------------------------------------------------
315 Returns the sign bit of the double-precision floating-point value `a'.
316 -------------------------------------------------------------------------------
328 -------------------------------------------------------------------------------
329 Normalizes the subnormal double-precision floating-point value represented
333 -------------------------------------------------------------------------------
340     shiftCount = countLeadingZeros64( aSig ) - 11;  in normalizeFloat64Subnormal()
342     *zExpPtr = 1 - shiftCount;  in normalizeFloat64Subnormal()
347 -------------------------------------------------------------------------------
349 double-precision floating-point value, returning the result.  After being
353 will have an integer portion equal to 1, the `zExp' input should be 1 less
356 -------------------------------------------------------------------------------
366 -------------------------------------------------------------------------------
367 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368 and significand `zSig', and returns the proper double-precision floating-
369 point value corresponding to the abstract input.  Ordinarily, the abstract
370 value is simply rounded and packed into the double-precision format, with
371 the inexact exception raised if the abstract input cannot be represented
374 returned.  If the abstract value is too small, the input value is rounded to
376 the abstract input cannot be represented exactly as a subnormal double-
377 precision floating-point number.
378     The input significand `zSig' has its binary point between bits 62
383 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
385 Binary Floating-point Arithmetic.
386 -------------------------------------------------------------------------------
395     roundingMode = roundData->mode;  in roundAndPackFloat64()
420             roundData->exception |= float_flag_overflow | float_flag_inexact;  in roundAndPackFloat64()
421             return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );  in roundAndPackFloat64()
426                 || ( zExp < -1 )  in roundAndPackFloat64()
428             shift64RightJamming( zSig, - zExp, &zSig );  in roundAndPackFloat64()
431             if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;  in roundAndPackFloat64()
434     if ( roundBits ) roundData->exception |= float_flag_inexact;  in roundAndPackFloat64()
443 -------------------------------------------------------------------------------
444 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
445 and significand `zSig', and returns the proper double-precision floating-
446 point value corresponding to the abstract input.  This routine is just like
448 any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
450 -------------------------------------------------------------------------------
457     shiftCount = countLeadingZeros64( zSig ) - 1;  in normalizeRoundAndPackFloat64()
458     return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );  in normalizeRoundAndPackFloat64()
465 -------------------------------------------------------------------------------
466 Returns the fraction bits of the extended double-precision floating-point
468 -------------------------------------------------------------------------------
478 -------------------------------------------------------------------------------
479 Returns the exponent bits of the extended double-precision floating-point
481 -------------------------------------------------------------------------------
486     return a.high & 0x7FFF;  in extractFloatx80Exp()
491 -------------------------------------------------------------------------------
492 Returns the sign bit of the extended double-precision floating-point value
494 -------------------------------------------------------------------------------
499     return a.high>>15;  in extractFloatx80Sign()
504 -------------------------------------------------------------------------------
505 Normalizes the subnormal extended double-precision floating-point value
509 -------------------------------------------------------------------------------
518     *zExpPtr = 1 - shiftCount;  in normalizeFloatx80Subnormal()
523 -------------------------------------------------------------------------------
525 extended double-precision floating-point value, returning the result.
526 -------------------------------------------------------------------------------
530     floatx80 z;  in packFloatx80()  local
532     z.low = zSig;  in packFloatx80()
533     z.high = ( ( (bits16) zSign )<<15 ) + zExp;  in packFloatx80()
534     z.__padding = 0;  in packFloatx80()
535     return z;  in packFloatx80()
540 -------------------------------------------------------------------------------
541 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
543 and returns the proper extended double-precision floating-point value
544 corresponding to the abstract input.  Ordinarily, the abstract value is
545 rounded and packed into the extended double-precision format, with the
546 inexact exception raised if the abstract input cannot be represented
549 returned.  If the abstract value is too small, the input value is rounded to
551 the abstract input cannot be represented exactly as a subnormal extended
552 double-precision floating-point number.
555 result is rounded to the full precision of the extended double-precision
557     The input significand must be normalized or smaller.  If the input
561 Floating-point Arithmetic.
562 -------------------------------------------------------------------------------
573     roundingMode = roundData->mode;  in roundAndPackFloatx80()
574     roundingPrecision = roundData->precision;  in roundAndPackFloatx80()
604     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {  in roundAndPackFloatx80()
615             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );  in roundAndPackFloatx80()
618             if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;  in roundAndPackFloatx80()
619             if ( roundBits ) roundData->exception |= float_flag_inexact;  in roundAndPackFloatx80()
630     if ( roundBits ) roundData->exception |= float_flag_inexact;  in roundAndPackFloatx80()
658     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {  in roundAndPackFloatx80()
667             roundData->exception |= float_flag_overflow | float_flag_inexact;  in roundAndPackFloatx80()
682             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );  in roundAndPackFloatx80()
684             if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;  in roundAndPackFloatx80()
685             if ( zSig1 ) roundData->exception |= float_flag_inexact;  in roundAndPackFloatx80()
705     if ( zSig1 ) roundData->exception |= float_flag_inexact;  in roundAndPackFloatx80()
724 -------------------------------------------------------------------------------
725 Takes an abstract floating-point value having sign `zSign', exponent
727 and returns the proper extended double-precision floating-point value
728 corresponding to the abstract input.  This routine is just like
729 `roundAndPackFloatx80' except that the input significand does not have to be
731 -------------------------------------------------------------------------------
743         zExp -= 64;  in normalizeRoundAndPackFloatx80()
747     zExp -= shiftCount;  in normalizeRoundAndPackFloatx80()
756 -------------------------------------------------------------------------------
757 Returns the result of converting the 32-bit two's complement integer `a' to
758 the single-precision floating-point format.  The conversion is performed
759 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
760 -------------------------------------------------------------------------------
769     return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );  in int32_to_float32()
774 -------------------------------------------------------------------------------
775 Returns the result of converting the 32-bit two's complement integer `a' to
776 the double-precision floating-point format.  The conversion is performed
777 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
778 -------------------------------------------------------------------------------
789     absA = aSign ? - a : a;  in int32_to_float64()
792     return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );  in int32_to_float64()
799 -------------------------------------------------------------------------------
800 Returns the result of converting the 32-bit two's complement integer `a'
801 to the extended double-precision floating-point format.  The conversion
802 is performed according to the IEC/IEEE Standard for Binary Floating-point
804 -------------------------------------------------------------------------------
815     absA = zSign ? - a : a;  in int32_to_floatx80()
818     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );  in int32_to_floatx80()
825 -------------------------------------------------------------------------------
826 Returns the result of converting the single-precision floating-point value
827 `a' to the 32-bit two's complement integer format.  The conversion is
828 performed according to the IEC/IEEE Standard for Binary Floating-point
829 Arithmetic---which means in particular that the conversion is rounded
833 -------------------------------------------------------------------------------
847     shiftCount = 0xAF - aExp;  in float32_to_int32()
856 -------------------------------------------------------------------------------
857 Returns the result of converting the single-precision floating-point value
858 `a' to the 32-bit two's complement integer format.  The conversion is
859 performed according to the IEC/IEEE Standard for Binary Floating-point
864 -------------------------------------------------------------------------------
871     int32 z;  in float32_to_int32_round_to_zero()  local
876     shiftCount = aExp - 0x9E;  in float32_to_int32_round_to_zero()
888     z = aSig>>( - shiftCount );  in float32_to_int32_round_to_zero()
892     return aSign ? - z : z;  in float32_to_int32_round_to_zero()
897 -------------------------------------------------------------------------------
898 Returns the result of converting the single-precision floating-point value
899 `a' to the double-precision floating-point format.  The conversion is
900 performed according to the IEC/IEEE Standard for Binary Floating-point
902 -------------------------------------------------------------------------------
920         --aExp;  in float32_to_float64()
929 -------------------------------------------------------------------------------
930 Returns the result of converting the single-precision floating-point value
931 `a' to the extended double-precision floating-point format.  The conversion
932 is performed according to the IEC/IEEE Standard for Binary Floating-point
934 -------------------------------------------------------------------------------
961 -------------------------------------------------------------------------------
962 Rounds the single-precision floating-point value `a' to an integer, and
963 returns the result as a single-precision floating-point value.  The
965 Floating-point Arithmetic.
966 -------------------------------------------------------------------------------
974     float32 z;  in float32_round_to_int()  local
983     roundingMode = roundData->mode;  in float32_round_to_int()
986         roundData->exception |= float_flag_inexact;  in float32_round_to_int()
1002     lastBitMask <<= 0x96 - aExp;  in float32_round_to_int()
1003     roundBitsMask = lastBitMask - 1;  in float32_round_to_int()
1004     z = a;  in float32_round_to_int()
1006         z += lastBitMask>>1;  in float32_round_to_int()
1007         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;  in float32_round_to_int()
1010         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {  in float32_round_to_int()
1011             z += roundBitsMask;  in float32_round_to_int()
1014     z &= ~ roundBitsMask;  in float32_round_to_int()
1015     if ( z != a ) roundData->exception |= float_flag_inexact;  in float32_round_to_int()
1016     return z;  in float32_round_to_int()
1021 -------------------------------------------------------------------------------
1022 Returns the result of adding the absolute values of the single-precision
1023 floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
1026 Floating-point Arithmetic.
1027 -------------------------------------------------------------------------------
1039     expDiff = aExp - bExp;  in addFloat32Sigs()
1048             --expDiff;  in addFloat32Sigs()
1067         shift32RightJamming( aSig, - expDiff, &aSig );  in addFloat32Sigs()
1082     --zExp;  in addFloat32Sigs()
1093 -------------------------------------------------------------------------------
1094 Returns the result of subtracting the absolute values of the single-
1095 precision floating-point values `a' and `b'.  If `zSign' is true, the
1098 Standard for Binary Floating-point Arithmetic.
1099 -------------------------------------------------------------------------------
1111     expDiff = aExp - bExp;  in subFloat32Sigs()
1118         roundData->exception |= float_flag_invalid;  in subFloat32Sigs()
1127     return packFloat32( roundData->mode == float_round_down, 0, 0 );  in subFloat32Sigs()
1139     shift32RightJamming( aSig, - expDiff, &aSig );  in subFloat32Sigs()
1142     zSig = bSig - aSig;  in subFloat32Sigs()
1152         --expDiff;  in subFloat32Sigs()
1160     zSig = aSig - bSig;  in subFloat32Sigs()
1163     --zExp;  in subFloat32Sigs()
1169 -------------------------------------------------------------------------------
1170 Returns the result of adding the single-precision floating-point values `a'
1172 Binary Floating-point Arithmetic.
1173 -------------------------------------------------------------------------------
1191 -------------------------------------------------------------------------------
1192 Returns the result of subtracting the single-precision floating-point values
1194 for Binary Floating-point Arithmetic.
1195 -------------------------------------------------------------------------------
1213 -------------------------------------------------------------------------------
1214 Returns the result of multiplying the single-precision floating-point values
1216 for Binary Floating-point Arithmetic.
1217 -------------------------------------------------------------------------------
1239             roundData->exception |= float_flag_invalid;  in float32_mul()
1247             roundData->exception |= float_flag_invalid;  in float32_mul()
1260     zExp = aExp + bExp - 0x7F;  in float32_mul()
1267         --zExp;  in float32_mul()
1274 -------------------------------------------------------------------------------
1275 Returns the result of dividing the single-precision floating-point value `a'
1277 IEC/IEEE Standard for Binary Floating-point Arithmetic.
1278 -------------------------------------------------------------------------------
1297             roundData->exception |= float_flag_invalid;  in float32_div()
1309                 roundData->exception |= float_flag_invalid;  in float32_div()
1312             roundData->exception |= float_flag_divbyzero;  in float32_div()
1321     zExp = aExp - bExp + 0x7D;  in float32_div()
1341 -------------------------------------------------------------------------------
1342 Returns the remainder of the single-precision floating-point value `a'
1344 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1345 -------------------------------------------------------------------------------
1367         roundData->exception |= float_flag_invalid;  in float32_rem()
1376             roundData->exception |= float_flag_invalid;  in float32_rem()
1385     expDiff = aExp - bExp;  in float32_rem()
1392             if ( expDiff < -1 ) return a;  in float32_rem()
1396         if ( q ) aSig -= bSig;  in float32_rem()
1401             q >>= 32 - expDiff;  in float32_rem()
1403             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;  in float32_rem()
1411         if ( bSig <= aSig ) aSig -= bSig;  in float32_rem()
1414         expDiff -= 64;  in float32_rem()
1417             q64 = ( 2 < q64 ) ? q64 - 2 : 0;  in float32_rem()
1418             aSig64 = - ( ( bSig * q64 )<<38 );  in float32_rem()
1419             expDiff -= 62;  in float32_rem()
1423         q64 = ( 2 < q64 ) ? q64 - 2 : 0;  in float32_rem()
1424         q = q64>>( 64 - expDiff );  in float32_rem()
1426         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;  in float32_rem()
1431         aSig -= bSig;  in float32_rem()
1438     if ( zSign ) aSig = - aSig;  in float32_rem()
1444 -------------------------------------------------------------------------------
1445 Returns the square root of the single-precision floating-point value `a'.
1447 Floating-point Arithmetic.
1448 -------------------------------------------------------------------------------
1463         roundData->exception |= float_flag_invalid;  in float32_sqrt()
1468         roundData->exception |= float_flag_invalid;  in float32_sqrt()
1475     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;  in float32_sqrt()
1485             rem = ( ( (bits64) aSig )<<32 ) - term;  in float32_sqrt()
1487                 --zSig;  in float32_sqrt()
1499 -------------------------------------------------------------------------------
1500 Returns 1 if the single-precision floating-point value `a' is equal to the
1502 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1503 -------------------------------------------------------------------------------
1521 -------------------------------------------------------------------------------
1522 Returns 1 if the single-precision floating-point value `a' is less than or
1524 performed according to the IEC/IEEE Standard for Binary Floating-point
1526 -------------------------------------------------------------------------------
1546 -------------------------------------------------------------------------------
1547 Returns 1 if the single-precision floating-point value `a' is less than
1549 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1550 -------------------------------------------------------------------------------
1570 -------------------------------------------------------------------------------
1571 Returns 1 if the single-precision floating-point value `a' is equal to the
1574 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1575 -------------------------------------------------------------------------------
1591 -------------------------------------------------------------------------------
1592 Returns 1 if the single-precision floating-point value `a' is less than or
1595 IEC/IEEE Standard for Binary Floating-point Arithmetic.
1596 -------------------------------------------------------------------------------
1617 -------------------------------------------------------------------------------
1618 Returns 1 if the single-precision floating-point value `a' is less than
1621 Standard for Binary Floating-point Arithmetic.
1622 -------------------------------------------------------------------------------
1642 -------------------------------------------------------------------------------
1643 Returns the result of converting the double-precision floating-point value
1644 `a' to the 32-bit two's complement integer format.  The conversion is
1645 performed according to the IEC/IEEE Standard for Binary Floating-point
1646 Arithmetic---which means in particular that the conversion is rounded
1650 -------------------------------------------------------------------------------
1663     shiftCount = 0x42C - aExp;  in float64_to_int32()
1670 -------------------------------------------------------------------------------
1671 Returns the result of converting the double-precision floating-point value
1672 `a' to the 32-bit two's complement integer format.  The conversion is
1673 performed according to the IEC/IEEE Standard for Binary Floating-point
1678 -------------------------------------------------------------------------------
1685     int32 z;  in float64_to_int32_round_to_zero()  local
1690     shiftCount = 0x433 - aExp;  in float64_to_int32_round_to_zero()
1702     z = aSig;  in float64_to_int32_round_to_zero()
1703     if ( aSign ) z = - z;  in float64_to_int32_round_to_zero()
1704     if ( ( z < 0 ) ^ aSign ) {  in float64_to_int32_round_to_zero()
1712     return z;  in float64_to_int32_round_to_zero()
1717 -------------------------------------------------------------------------------
1718 Returns the result of converting the double-precision floating-point value
1719 `a' to the 32-bit two's complement unsigned integer format.  The conversion
1720 is performed according to the IEC/IEEE Standard for Binary Floating-point
1721 Arithmetic---which means in particular that the conversion is rounded
1725 -------------------------------------------------------------------------------
1738     shiftCount = 0x42C - aExp;  in float64_to_uint32()
1744 -------------------------------------------------------------------------------
1745 Returns the result of converting the double-precision floating-point value
1746 `a' to the 32-bit two's complement integer format.  The conversion is
1747 performed according to the IEC/IEEE Standard for Binary Floating-point
1751 -------------------------------------------------------------------------------
1758     int32 z;  in float64_to_uint32_round_to_zero()  local
1763     shiftCount = 0x433 - aExp;  in float64_to_uint32_round_to_zero()
1775     z = aSig;  in float64_to_uint32_round_to_zero()
1776     if ( aSign ) z = - z;  in float64_to_uint32_round_to_zero()
1777     if ( ( z < 0 ) ^ aSign ) {  in float64_to_uint32_round_to_zero()
1785     return z;  in float64_to_uint32_round_to_zero()
1789 -------------------------------------------------------------------------------
1790 Returns the result of converting the double-precision floating-point value
1791 `a' to the single-precision floating-point format.  The conversion is
1792 performed according to the IEC/IEEE Standard for Binary Floating-point
1794 -------------------------------------------------------------------------------
1814         aExp -= 0x381;  in float64_to_float32()
1823 -------------------------------------------------------------------------------
1824 Returns the result of converting the double-precision floating-point value
1825 `a' to the extended double-precision floating-point format.  The conversion
1826 is performed according to the IEC/IEEE Standard for Binary Floating-point
1828 -------------------------------------------------------------------------------
1856 -------------------------------------------------------------------------------
1857 Rounds the double-precision floating-point value `a' to an integer, and
1858 returns the result as a double-precision floating-point value.  The
1860 Floating-point Arithmetic.
1861 -------------------------------------------------------------------------------
1869     float64 z;  in float64_round_to_int()  local
1880         roundData->exception |= float_flag_inexact;  in float64_round_to_int()
1882         switch ( roundData->mode ) {  in float64_round_to_int()
1897     lastBitMask <<= 0x433 - aExp;  in float64_round_to_int()
1898     roundBitsMask = lastBitMask - 1;  in float64_round_to_int()
1899     z = a;  in float64_round_to_int()
1900     roundingMode = roundData->mode;  in float64_round_to_int()
1902         z += lastBitMask>>1;  in float64_round_to_int()
1903         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;  in float64_round_to_int()
1906         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {  in float64_round_to_int()
1907             z += roundBitsMask;  in float64_round_to_int()
1910     z &= ~ roundBitsMask;  in float64_round_to_int()
1911     if ( z != a ) roundData->exception |= float_flag_inexact;  in float64_round_to_int()
1912     return z;  in float64_round_to_int()
1917 -------------------------------------------------------------------------------
1918 Returns the result of adding the absolute values of the double-precision
1919 floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
1922 Floating-point Arithmetic.
1923 -------------------------------------------------------------------------------
1935     expDiff = aExp - bExp;  in addFloat64Sigs()
1944             --expDiff;  in addFloat64Sigs()
1963         shift64RightJamming( aSig, - expDiff, &aSig );  in addFloat64Sigs()
1978     --zExp;  in addFloat64Sigs()
1989 -------------------------------------------------------------------------------
1990 Returns the result of subtracting the absolute values of the double-
1991 precision floating-point values `a' and `b'.  If `zSign' is true, the
1994 Standard for Binary Floating-point Arithmetic.
1995 -------------------------------------------------------------------------------
2007     expDiff = aExp - bExp;  in subFloat64Sigs()
2014         roundData->exception |= float_flag_invalid;  in subFloat64Sigs()
2023     return packFloat64( roundData->mode == float_round_down, 0, 0 );  in subFloat64Sigs()
2035     shift64RightJamming( aSig, - expDiff, &aSig );  in subFloat64Sigs()
2038     zSig = bSig - aSig;  in subFloat64Sigs()
2048         --expDiff;  in subFloat64Sigs()
2056     zSig = aSig - bSig;  in subFloat64Sigs()
2059     --zExp;  in subFloat64Sigs()
2065 -------------------------------------------------------------------------------
2066 Returns the result of adding the double-precision floating-point values `a'
2068 Binary Floating-point Arithmetic.
2069 -------------------------------------------------------------------------------
2087 -------------------------------------------------------------------------------
2088 Returns the result of subtracting the double-precision floating-point values
2090 for Binary Floating-point Arithmetic.
2091 -------------------------------------------------------------------------------
2109 -------------------------------------------------------------------------------
2110 Returns the result of multiplying the double-precision floating-point values
2112 for Binary Floating-point Arithmetic.
2113 -------------------------------------------------------------------------------
2133             roundData->exception |= float_flag_invalid;  in float64_mul()
2141             roundData->exception |= float_flag_invalid;  in float64_mul()
2154     zExp = aExp + bExp - 0x3FF;  in float64_mul()
2161         --zExp;  in float64_mul()
2168 -------------------------------------------------------------------------------
2169 Returns the result of dividing the double-precision floating-point value `a'
2171 the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2172 -------------------------------------------------------------------------------
2193             roundData->exception |= float_flag_invalid;  in float64_div()
2205                 roundData->exception |= float_flag_invalid;  in float64_div()
2208             roundData->exception |= float_flag_divbyzero;  in float64_div()
2217     zExp = aExp - bExp + 0x3FD;  in float64_div()
2229             --zSig;  in float64_div()
2239 -------------------------------------------------------------------------------
2240 Returns the remainder of the double-precision floating-point value `a'
2242 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2243 -------------------------------------------------------------------------------
2263         roundData->exception |= float_flag_invalid;  in float64_rem()
2272             roundData->exception |= float_flag_invalid;  in float64_rem()
2281     expDiff = aExp - bExp;  in float64_rem()
2285         if ( expDiff < -1 ) return a;  in float64_rem()
2289     if ( q ) aSig -= bSig;  in float64_rem()
2290     expDiff -= 64;  in float64_rem()
2293         q = ( 2 < q ) ? q - 2 : 0;  in float64_rem()
2294         aSig = - ( ( bSig>>2 ) * q );  in float64_rem()
2295         expDiff -= 62;  in float64_rem()
2300         q = ( 2 < q ) ? q - 2 : 0;  in float64_rem()
2301         q >>= 64 - expDiff;  in float64_rem()
2303         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;  in float64_rem()
2312         aSig -= bSig;  in float64_rem()
2319     if ( zSign ) aSig = - aSig;  in float64_rem()
2325 -------------------------------------------------------------------------------
2326 Returns the square root of the double-precision floating-point value `a'.
2328 Floating-point Arithmetic.
2329 -------------------------------------------------------------------------------
2337     //float64 z;  in float64_sqrt()
2345         roundData->exception |= float_flag_invalid;  in float64_sqrt()
2350         roundData->exception |= float_flag_invalid;  in float64_sqrt()
2357     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;  in float64_sqrt()
2361     aSig <<= 9 - ( aExp & 1 );  in float64_sqrt()
2372                 --zSig;  in float64_sqrt()
2386 -------------------------------------------------------------------------------
2387 Returns 1 if the double-precision floating-point value `a' is equal to the
2389 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2390 -------------------------------------------------------------------------------
2408 -------------------------------------------------------------------------------
2409 Returns 1 if the double-precision floating-point value `a' is less than or
2411 performed according to the IEC/IEEE Standard for Binary Floating-point
2413 -------------------------------------------------------------------------------
2433 -------------------------------------------------------------------------------
2434 Returns 1 if the double-precision floating-point value `a' is less than
2436 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2437 -------------------------------------------------------------------------------
2457 -------------------------------------------------------------------------------
2458 Returns 1 if the double-precision floating-point value `a' is equal to the
2461 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2462 -------------------------------------------------------------------------------
2478 -------------------------------------------------------------------------------
2479 Returns 1 if the double-precision floating-point value `a' is less than or
2482 IEC/IEEE Standard for Binary Floating-point Arithmetic.
2483 -------------------------------------------------------------------------------
2504 -------------------------------------------------------------------------------
2505 Returns 1 if the double-precision floating-point value `a' is less than
2508 Standard for Binary Floating-point Arithmetic.
2509 -------------------------------------------------------------------------------
2531 -------------------------------------------------------------------------------
2532 Returns the result of converting the extended double-precision floating-
2533 point value `a' to the 32-bit two's complement integer format.  The
2535 Floating-point Arithmetic---which means in particular that the conversion
2539 -------------------------------------------------------------------------------
2551     shiftCount = 0x4037 - aExp;  in floatx80_to_int32()
2559 -------------------------------------------------------------------------------
2560 Returns the result of converting the extended double-precision floating-
2561 point value `a' to the 32-bit two's complement integer format.  The
2563 Floating-point Arithmetic, except that the conversion is always rounded
2567 -------------------------------------------------------------------------------
2574     int32 z;  in floatx80_to_int32_round_to_zero()  local
2579     shiftCount = 0x403E - aExp;  in floatx80_to_int32_round_to_zero()
2590     z = aSig;  in floatx80_to_int32_round_to_zero()
2591     if ( aSign ) z = - z;  in floatx80_to_int32_round_to_zero()
2592     if ( ( z < 0 ) ^ aSign ) {  in floatx80_to_int32_round_to_zero()
2600     return z;  in floatx80_to_int32_round_to_zero()
2605 -------------------------------------------------------------------------------
2606 Returns the result of converting the extended double-precision floating-
2607 point value `a' to the single-precision floating-point format.  The
2609 Floating-point Arithmetic.
2610 -------------------------------------------------------------------------------
2628     if ( aExp || aSig ) aExp -= 0x3F81;  in floatx80_to_float32()
2634 -------------------------------------------------------------------------------
2635 Returns the result of converting the extended double-precision floating-
2636 point value `a' to the double-precision floating-point format.  The
2638 Floating-point Arithmetic.
2639 -------------------------------------------------------------------------------
2657     if ( aExp || aSig ) aExp -= 0x3C01;  in floatx80_to_float64()
2663 -------------------------------------------------------------------------------
2664 Rounds the extended double-precision floating-point value `a' to an integer,
2665 and returns the result as an extended quadruple-precision floating-point
2667 Binary Floating-point Arithmetic.
2668 -------------------------------------------------------------------------------
2676     floatx80 z;  in floatx80_round_to_int()  local
2690         roundData->exception |= float_flag_inexact;  in floatx80_round_to_int()
2692         switch ( roundData->mode ) {  in floatx80_round_to_int()
2713     lastBitMask <<= 0x403E - aExp;  in floatx80_round_to_int()
2714     roundBitsMask = lastBitMask - 1;  in floatx80_round_to_int()
2715     z = a;  in floatx80_round_to_int()
2716     roundingMode = roundData->mode;  in floatx80_round_to_int()
2718         z.low += lastBitMask>>1;  in floatx80_round_to_int()
2719         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;  in floatx80_round_to_int()
2722         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {  in floatx80_round_to_int()
2723             z.low += roundBitsMask;  in floatx80_round_to_int()
2726     z.low &= ~ roundBitsMask;  in floatx80_round_to_int()
2727     if ( z.low == 0 ) {  in floatx80_round_to_int()
2728         ++z.high;  in floatx80_round_to_int()
2729         z.low = LIT64( 0x8000000000000000 );  in floatx80_round_to_int()
2731     if ( z.low != a.low ) roundData->exception |= float_flag_inexact;  in floatx80_round_to_int()
2732     return z;  in floatx80_round_to_int()
2737 -------------------------------------------------------------------------------
2738 Returns the result of adding the absolute values of the extended double-
2739 precision floating-point values `a' and `b'.  If `zSign' is true, the sum is
2742 Floating-point Arithmetic.
2743 -------------------------------------------------------------------------------
2755     expDiff = aExp - bExp;  in addFloatx80Sigs()
2761         if ( bExp == 0 ) --expDiff;  in addFloatx80Sigs()
2771         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );  in addFloatx80Sigs()
2806 -------------------------------------------------------------------------------
2808 double-precision floating-point values `a' and `b'.  If `zSign' is true,
2811 Standard for Binary Floating-point Arithmetic.
2812 -------------------------------------------------------------------------------
2819     floatx80 z;  in subFloatx80Sigs()  local
2825     expDiff = aExp - bExp;  in subFloatx80Sigs()
2832         roundData->exception |= float_flag_invalid;  in subFloatx80Sigs()
2833         z.low = floatx80_default_nan_low;  in subFloatx80Sigs()
2834         z.high = floatx80_default_nan_high;  in subFloatx80Sigs()
2835         z.__padding = 0;  in subFloatx80Sigs()
2836         return z;  in subFloatx80Sigs()
2845     return packFloatx80( roundData->mode == float_round_down, 0, 0 );  in subFloatx80Sigs()
2852     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );  in subFloatx80Sigs()
2863     if ( bExp == 0 ) --expDiff;  in subFloatx80Sigs()
2876 -------------------------------------------------------------------------------
2877 Returns the result of adding the extended double-precision floating-point
2879 Standard for Binary Floating-point Arithmetic.
2880 -------------------------------------------------------------------------------
2898 -------------------------------------------------------------------------------
2899 Returns the result of subtracting the extended double-precision floating-
2901 IEC/IEEE Standard for Binary Floating-point Arithmetic.
2902 -------------------------------------------------------------------------------
2920 -------------------------------------------------------------------------------
2921 Returns the result of multiplying the extended double-precision floating-
2923 IEC/IEEE Standard for Binary Floating-point Arithmetic.
2924 -------------------------------------------------------------------------------
2931     floatx80 z;  in floatx80_mul()  local
2952             roundData->exception |= float_flag_invalid;  in floatx80_mul()
2953             z.low = floatx80_default_nan_low;  in floatx80_mul()
2954             z.high = floatx80_default_nan_high;  in floatx80_mul()
2955             z.__padding = 0;  in floatx80_mul()
2956             return z;  in floatx80_mul()
2968     zExp = aExp + bExp - 0x3FFE;  in floatx80_mul()
2972         --zExp;  in floatx80_mul()
2981 -------------------------------------------------------------------------------
2982 Returns the result of dividing the extended double-precision floating-point
2984 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2985 -------------------------------------------------------------------------------
2993     floatx80 z;  in floatx80_div()  local
3018                 roundData->exception |= float_flag_invalid;  in floatx80_div()
3019                 z.low = floatx80_default_nan_low;  in floatx80_div()
3020                 z.high = floatx80_default_nan_high;  in floatx80_div()
3021                 z.__padding = 0;  in floatx80_div()
3022                 return z;  in floatx80_div()
3024             roundData->exception |= float_flag_divbyzero;  in floatx80_div()
3033     zExp = aExp - bExp + 0x3FFE;  in floatx80_div()
3043         --zSig0;  in floatx80_div()
3051             --zSig1;  in floatx80_div()
3063 -------------------------------------------------------------------------------
3064 Returns the remainder of the extended double-precision floating-point value
3066 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3067 -------------------------------------------------------------------------------
3075     floatx80 z;  in floatx80_rem()  local
3097             roundData->exception |= float_flag_invalid;  in floatx80_rem()
3098             z.low = floatx80_default_nan_low;  in floatx80_rem()
3099             z.high = floatx80_default_nan_high;  in floatx80_rem()
3100             z.__padding = 0;  in floatx80_rem()
3101             return z;  in floatx80_rem()
3111     expDiff = aExp - bExp;  in floatx80_rem()
3114         if ( expDiff < -1 ) return a;  in floatx80_rem()
3119     if ( q ) aSig0 -= bSig;  in floatx80_rem()
3120     expDiff -= 64;  in floatx80_rem()
3123         q = ( 2 < q ) ? q - 2 : 0;  in floatx80_rem()
3127         expDiff -= 62;  in floatx80_rem()
3132         q = ( 2 < q ) ? q - 2 : 0;  in floatx80_rem()
3133         q >>= 64 - expDiff;  in floatx80_rem()
3134         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );  in floatx80_rem()
3136         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );  in floatx80_rem()
3163 -------------------------------------------------------------------------------
3164 Returns the square root of the extended double-precision floating-point
3166 for Binary Floating-point Arithmetic.
3167 -------------------------------------------------------------------------------
3176     floatx80 z;  in floatx80_sqrt()  local
3189         roundData->exception |= float_flag_invalid;  in floatx80_sqrt()
3190         z.low = floatx80_default_nan_low;  in floatx80_sqrt()
3191         z.high = floatx80_default_nan_high;  in floatx80_sqrt()
3192         z.__padding = 0;  in floatx80_sqrt()
3193         return z;  in floatx80_sqrt()
3199     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;  in floatx80_sqrt()
3210         --zSig0;  in floatx80_sqrt()
3225             --zSig1;  in floatx80_sqrt()
3240 -------------------------------------------------------------------------------
3241 Returns 1 if the extended double-precision floating-point value `a' is
3243 performed according to the IEC/IEEE Standard for Binary Floating-point
3245 -------------------------------------------------------------------------------
3263         && (    ( a.high == b.high )  in floatx80_eq()
3265                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )  in floatx80_eq()
3271 -------------------------------------------------------------------------------
3272 Returns 1 if the extended double-precision floating-point value `a' is
3275 Floating-point Arithmetic.
3276 -------------------------------------------------------------------------------
3295             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  in floatx80_le()
3299           aSign ? le128( b.high, b.low, a.high, a.low )  in floatx80_le()
3300         : le128( a.high, a.low, b.high, b.low );  in floatx80_le()
3305 -------------------------------------------------------------------------------
3306 Returns 1 if the extended double-precision floating-point value `a' is
3308 is performed according to the IEC/IEEE Standard for Binary Floating-point
3310 -------------------------------------------------------------------------------
3329             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  in floatx80_lt()
3333           aSign ? lt128( b.high, b.low, a.high, a.low )  in floatx80_lt()
3334         : lt128( a.high, a.low, b.high, b.low );  in floatx80_lt()
3339 -------------------------------------------------------------------------------
3340 Returns 1 if the extended double-precision floating-point value `a' is equal
3343 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3344 -------------------------------------------------------------------------------
3359         && (    ( a.high == b.high )  in floatx80_eq_signaling()
3361                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )  in floatx80_eq_signaling()
3367 -------------------------------------------------------------------------------
3368 Returns 1 if the extended double-precision floating-point value `a' is less
3371 to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3372 -------------------------------------------------------------------------------
3391             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  in floatx80_le_quiet()
3395           aSign ? le128( b.high, b.low, a.high, a.low )  in floatx80_le_quiet()
3396         : le128( a.high, a.low, b.high, b.low );  in floatx80_le_quiet()
3401 -------------------------------------------------------------------------------
3402 Returns 1 if the extended double-precision floating-point value `a' is less
3405 IEC/IEEE Standard for Binary Floating-point Arithmetic.
3406 -------------------------------------------------------------------------------
3425             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  in floatx80_lt_quiet()
3429           aSign ? lt128( b.high, b.low, a.high, a.low )  in floatx80_lt_quiet()
3430         : lt128( a.high, a.low, b.high, b.low );  in floatx80_lt_quiet()