diff options
Diffstat (limited to 'libc/sysdeps/ieee754/ldbl-96')
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fma.c | 18 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fmal.c | 135 |
2 files changed, 113 insertions, 40 deletions
diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fma.c b/libc/sysdeps/ieee754/ldbl-96/s_fma.c index 001d8063d..bf2d794c9 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fma.c @@ -42,6 +42,10 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; @@ -60,9 +64,19 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ a2 = a2 + m2; diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c index d12512428..c86dff6f8 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -22,6 +22,7 @@ #include <fenv.h> #include <ieee754.h> #include <math_private.h> +#include <tininess.h> /* This implementation uses rounding to odd to avoid problems with double rounding. See a paper by Boldo and Melquiond: @@ -55,17 +56,47 @@ __fmal (long double x, long double y, long double z) underflows to 0. */ if (z == 0 && x != 0 && y != 0) return x * y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff - || u.ieee.exponent + v.ieee.exponent - > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent + > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) + return x * y; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16445L : 0x1p-16445L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p65L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 66 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0x80000000))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-65L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { @@ -85,8 +116,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -116,15 +156,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p128L; + w.d *= 0x1p130L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -139,6 +179,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -157,9 +201,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; @@ -195,39 +249,44 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 128) - return (a1 + u.d) * 0x1p-128L; - /* If v.d * 0x1p-128L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa + if (v.ieee.exponent > 130) + return (a1 + u.d) * 0x1p-130L; + /* If v.d * 0x1p-130L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-128L never normalizes by shifting up, + v.d * 0x1p-130L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 128) + if (v.ieee.exponent == 130) { - /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, - v.ieee.mantissa1 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa1 & 3) == 1) + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) { - v.d *= 0x1p-128L; - if (v.ieee.negative) - return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */; + w.d = a1 + u.d; + if (w.ieee.exponent == 131) + return w.d * 0x1p-130L; } - else - return v.d * 0x1p-128L; + /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, + v.ieee.mantissa1 & 1 is the round bit and j is our sticky + bit. */ + w.d = 0.0L; + w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa1 &= ~3U; + v.d *= 0x1p-130L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; } } weak_alias (__fmal, fmal) |