diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-10-10 15:35:46 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-10-10 15:35:46 +0000 |
commit | 2d32c4f00084f68a390e8fa4291acb49e9c0df8e (patch) | |
tree | 00964019e9307917f730b8c6b817f0cb9496a167 /libc/sysdeps/ieee754 | |
parent | 7dfcd4332472afda13e2ea9c0eaba15a08d8351e (diff) |
Merge changes between r20863 and r21108 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@21109 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/ieee754')
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/s_fma.c | 10 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/s_fmaf.c | 7 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/x2y2m1.c | 111 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/x2y2m1f.c | 32 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/flt-32/s_sincosf.c | 10 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/bits/huge_vall.h | 50 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/s_fma.c | 8 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/s_fmal.c | 10 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/x2y2m1l.c | 111 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c | 128 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fma.c | 6 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fmal.c | 10 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/x2y2m1.c | 39 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/x2y2m1l.c | 111 | ||||
-rwxr-xr-x | libc/sysdeps/ieee754/ldbl-opt/configure | 120 |
15 files changed, 590 insertions, 173 deletions
diff --git a/libc/sysdeps/ieee754/dbl-64/s_fma.c b/libc/sysdeps/ieee754/dbl-64/s_fma.c index ce3bd36fa..5e21461a4 100644 --- a/libc/sysdeps/ieee754/dbl-64/s_fma.c +++ b/libc/sysdeps/ieee754/dbl-64/s_fma.c @@ -49,6 +49,11 @@ __fma (double x, double y, double z) && u.ieee.exponent != 0x7ff && v.ieee.exponent != 0x7ff) return (z + x) + y; + /* If z is zero and x are y are nonzero, compute the result + as x * y to avoid the wrong sign of a zero result if x * y + underflows to 0. */ + if (z == 0 && x != 0 && y != 0) + return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, or if x * y is less than half of DBL_DENORM_MIN, compute as x * y + z. */ @@ -128,6 +133,11 @@ __fma (double x, double y, double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) double x1 = x * C; diff --git a/libc/sysdeps/ieee754/dbl-64/s_fmaf.c b/libc/sysdeps/ieee754/dbl-64/s_fmaf.c index e7a0650f0..a4f12d9f7 100644 --- a/libc/sysdeps/ieee754/dbl-64/s_fmaf.c +++ b/libc/sysdeps/ieee754/dbl-64/s_fmaf.c @@ -32,8 +32,15 @@ float __fmaf (float x, float y, float z) { fenv_t env; + /* Multiplication is always exact. */ double temp = (double) x * (double) y; + + /* Ensure correct sign of an exact zero result by performing the + addition in the original rounding mode in that case. */ + if (temp == -z) + return (float) temp + z; + union ieee754_double u; libc_feholdexcept_setround (&env, FE_TOWARDZERO); diff --git a/libc/sysdeps/ieee754/dbl-64/x2y2m1.c b/libc/sysdeps/ieee754/dbl-64/x2y2m1.c new file mode 100644 index 000000000..4badde3be --- /dev/null +++ b/libc/sysdeps/ieee754/dbl-64/x2y2m1.c @@ -0,0 +1,111 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> +#include <stdlib.h> + +/* Calculate X + Y exactly and store the result in *HI + *LO. It is + given that |X| >= |Y| and the values are small enough that no + overflow occurs. */ + +static inline void +add_split (double *hi, double *lo, double x, double y) +{ + /* Apply Dekker's algorithm. */ + *hi = x + y; + *lo = (x - *hi) + y; +} + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static inline void +mul_split (double *hi, double *lo, double x, double y) +{ +#ifdef __FP_FAST_FMA + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fma (x, y, -*hi); +#elif defined FP_FAST_FMA + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fma (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) + double x1 = x * C; + double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + double x2 = x - x1; + double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compare absolute values of floating-point values pointed to by P + and Q for qsort. */ + +static int +compare (const void *p, const void *q) +{ + double pd = fabs (*(const double *) p); + double qd = fabs (*(const double *) q); + if (pd < qd) + return -1; + else if (pd == qd) + return 0; + else + return 1; +} + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +double +__x2y2m1 (double x, double y) +{ + double vals[4]; + SET_RESTORE_ROUND (FE_TONEAREST); + mul_split (&vals[1], &vals[0], x, x); + mul_split (&vals[3], &vals[2], y, y); + if (x >= 0.75) + vals[1] -= 1.0; + else + { + vals[1] -= 0.5; + vals[3] -= 0.5; + } + qsort (vals, 4, sizeof (double), compare); + /* Add up the values so that each element of VALS has absolute value + at most equal to the last set bit of the next nonzero + element. */ + for (size_t i = 0; i <= 2; i++) + { + add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]); + qsort (vals + i + 1, 3 - i, sizeof (double), compare); + } + /* Now any error from this addition will be small. */ + return vals[3] + vals[2] + vals[1] + vals[0]; +} diff --git a/libc/sysdeps/ieee754/dbl-64/x2y2m1f.c b/libc/sysdeps/ieee754/dbl-64/x2y2m1f.c new file mode 100644 index 000000000..fcaa70851 --- /dev/null +++ b/libc/sysdeps/ieee754/dbl-64/x2y2m1f.c @@ -0,0 +1,32 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +float +__x2y2m1f (float x, float y) +{ + double dx = x, dy = y; + return (float) ((dx - 1) * (dx + 1) + dy * dy); +} diff --git a/libc/sysdeps/ieee754/flt-32/s_sincosf.c b/libc/sysdeps/ieee754/flt-32/s_sincosf.c index 1b4d000e1..c3bd998ab 100644 --- a/libc/sysdeps/ieee754/flt-32/s_sincosf.c +++ b/libc/sysdeps/ieee754/flt-32/s_sincosf.c @@ -21,9 +21,14 @@ #include <math_private.h> +#ifndef SINCOSF +# define SINCOSF_FUNC __sincosf +#else +# define SINCOSF_FUNC SINCOSF +#endif void -__sincosf (float x, float *sinx, float *cosx) +SINCOSF_FUNC (float x, float *sinx, float *cosx) { int32_t ix; @@ -70,4 +75,7 @@ __sincosf (float x, float *sinx, float *cosx) } } } + +#ifndef SINCOSF weak_alias (__sincosf, sincosf) +#endif diff --git a/libc/sysdeps/ieee754/ldbl-128/bits/huge_vall.h b/libc/sysdeps/ieee754/ldbl-128/bits/huge_vall.h deleted file mode 100644 index 89b207331..000000000 --- a/libc/sysdeps/ieee754/ldbl-128/bits/huge_vall.h +++ /dev/null @@ -1,50 +0,0 @@ -/* `HUGE_VALL' constant for IEEE 754 machines (where it is infinity). - Used by <stdlib.h> and <math.h> functions for overflow. - Copyright (C) 1992, 1995, 1996, 1997, 1999, 2000, 2004 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _MATH_H -# error "Never use <bits/huge_vall.h> directly; include <math.h> instead." -#endif - -/* IEEE positive infinity (-HUGE_VAL is negative infinity). */ - -#if __GNUC_PREREQ(3,3) -# define HUGE_VALL (__builtin_huge_vall()) -#elif __GNUC_PREREQ(2,96) -# define HUGE_VALL (__extension__ 0x1.0p32767L) -#else -# include <endian.h> - -typedef union { unsigned char __c[16]; long double __ld; } __huge_vall_t; - -# if __BYTE_ORDER == __BIG_ENDIAN -# define __HUGE_VALL_bytes { 0x7f, 0xff, 0,0,0,0,0,0,0,0,0,0,0,0,0,0 } -# endif -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define __HUGE_VALL_bytes { 0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xff, 0x7f } -# endif - -# ifdef __GNUC__ -# define HUGE_VALL (__extension__ \ - ((__huge_vall_t) { __c : __HUGE_VALL_bytes }).__ld) -# else -static __huge_vall_t __huge_vall = { __HUGE_VALL_bytes }; -# define HUGE_VALL (__huge_vall.__ld) -# endif -#endif diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fma.c b/libc/sysdeps/ieee754/ldbl-128/s_fma.c index 355b60ebb..b08ff29c0 100644 --- a/libc/sysdeps/ieee754/ldbl-128/s_fma.c +++ b/libc/sysdeps/ieee754/ldbl-128/s_fma.c @@ -1,5 +1,5 @@ /* Compute x * y + z as ternary operation. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. @@ -33,6 +33,12 @@ __fma (double x, double y, double z) fenv_t env; /* Multiplication is always exact. */ long double temp = (long double) x * (long double) y; + + /* Ensure correct sign of an exact zero result by performing the + addition in the original rounding mode in that case. */ + if (temp == -z) + return (double) temp + z; + union ieee854_long_double u; feholdexcept (&env); fesetround (FE_TOWARDZERO); diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c index 963bbd734..46b3d81ce 100644 --- a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -50,6 +50,11 @@ __fmal (long double x, long double y, long double z) && u.ieee.exponent != 0x7fff && v.ieee.exponent != 0x7fff) return (z + x) + y; + /* If z is zero and x are y are nonzero, compute the result + as x * y to avoid the wrong sign of a zero result if x * y + underflows to 0. */ + if (z == 0 && x != 0 && y != 0) + return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, or if x * y is less than half of LDBL_DENORM_MIN, compute as x * y + z. */ @@ -129,6 +134,11 @@ __fmal (long double x, long double y, long double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; diff --git a/libc/sysdeps/ieee754/ldbl-128/x2y2m1l.c b/libc/sysdeps/ieee754/ldbl-128/x2y2m1l.c new file mode 100644 index 000000000..a249cd347 --- /dev/null +++ b/libc/sysdeps/ieee754/ldbl-128/x2y2m1l.c @@ -0,0 +1,111 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> +#include <stdlib.h> + +/* Calculate X + Y exactly and store the result in *HI + *LO. It is + given that |X| >= |Y| and the values are small enough that no + overflow occurs. */ + +static inline void +add_split (long double *hi, long double *lo, long double x, long double y) +{ + /* Apply Dekker's algorithm. */ + *hi = x + y; + *lo = (x - *hi) + y; +} + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static inline void +mul_split (long double *hi, long double *lo, long double x, long double y) +{ +#ifdef __FP_FAST_FMAL + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fmal (x, y, -*hi); +#elif defined FP_FAST_FMAL + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fmal (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) + long double x1 = x * C; + long double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + long double x2 = x - x1; + long double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compare absolute values of floating-point values pointed to by P + and Q for qsort. */ + +static int +compare (const void *p, const void *q) +{ + long double pld = fabsl (*(const long double *) p); + long double qld = fabsl (*(const long double *) q); + if (pld < qld) + return -1; + else if (pld == qld) + return 0; + else + return 1; +} + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +long double +__x2y2m1l (long double x, long double y) +{ + long double vals[4]; + SET_RESTORE_ROUNDL (FE_TONEAREST); + mul_split (&vals[1], &vals[0], x, x); + mul_split (&vals[3], &vals[2], y, y); + if (x >= 0.75L) + vals[1] -= 1.0L; + else + { + vals[1] -= 0.5L; + vals[3] -= 0.5L; + } + qsort (vals, 4, sizeof (long double), compare); + /* Add up the values so that each element of VALS has absolute value + at most equal to the last set bit of the next nonzero + element. */ + for (size_t i = 0; i <= 2; i++) + { + add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]); + qsort (vals + i + 1, 3 - i, sizeof (long double), compare); + } + /* Now any error from this addition will be small. */ + return vals[3] + vals[2] + vals[1] + vals[0]; +} diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c new file mode 100644 index 000000000..4379b68c3 --- /dev/null +++ b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c @@ -0,0 +1,128 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +/* Calculate X + Y exactly and store the result in *HI + *LO. It is + given that |X| >= |Y| and the values are small enough that no + overflow occurs. */ + +static inline void +add_split (double *hi, double *lo, double x, double y) +{ + /* Apply Dekker's algorithm. */ + *hi = x + y; + *lo = (x - *hi) + y; +} + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static inline void +mul_split (double *hi, double *lo, double x, double y) +{ +#ifdef __FP_FAST_FMA + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fma (x, y, -*hi); +#elif defined FP_FAST_FMA + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fma (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) + double x1 = x * C; + double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + double x2 = x - x1; + double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compare absolute values of floating-point values pointed to by P + and Q for qsort. */ + +static int +compare (const void *p, const void *q) +{ + double pd = fabs (*(const double *) p); + double qd = fabs (*(const double *) q); + if (pd < qd) + return -1; + else if (pd == qd) + return 0; + else + return 1; +} + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +long double +__x2y2m1l (long double x, long double y) +{ + double vals[12]; + SET_RESTORE_ROUND (FE_TONEAREST); + union ibm_extended_long_double xu, yu; + xu.d = x; + yu.d = y; + if (fabs (xu.dd[1]) < 0x1p-500) + xu.dd[1] = 0.0; + if (fabs (yu.dd[1]) < 0x1p-500) + yu.dd[1] = 0.0; + mul_split (&vals[1], &vals[0], xu.dd[0], xu.dd[0]); + mul_split (&vals[3], &vals[2], xu.dd[0], xu.dd[1]); + vals[2] *= 2.0; + vals[3] *= 2.0; + mul_split (&vals[5], &vals[4], xu.dd[1], xu.dd[1]); + mul_split (&vals[7], &vals[6], yu.dd[0], yu.dd[0]); + mul_split (&vals[9], &vals[8], yu.dd[0], yu.dd[1]); + vals[8] *= 2.0; + vals[9] *= 2.0; + mul_split (&vals[11], &vals[10], yu.dd[1], yu.dd[1]); + if (xu.dd[0] >= 0.75) + vals[1] -= 1.0; + else + { + vals[1] -= 0.5; + vals[7] -= 0.5; + } + qsort (vals, 12, sizeof (double), compare); + /* Add up the values so that each element of VALS has absolute value + at most equal to the last set bit of the next nonzero + element. */ + for (size_t i = 0; i <= 10; i++) + { + add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]); + qsort (vals + i + 1, 11 - i, sizeof (double), compare); + } + /* Now any error from this addition will be small. */ + long double retval = (long double) vals[11]; + for (size_t i = 10; i != (size_t) -1; i--) + retval += (long double) vals[i]; + return retval; +} diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fma.c b/libc/sysdeps/ieee754/ldbl-96/s_fma.c index 78c0b0db1..001d8063d 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fma.c @@ -1,5 +1,5 @@ /* Compute x * y + z as ternary operation. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. @@ -38,6 +38,10 @@ __fma (double x, double y, double z) return (x * y) + z; } + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c index ca1e0905a..d12512428 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -50,6 +50,11 @@ __fmal (long double x, long double y, long double z) && u.ieee.exponent != 0x7fff && v.ieee.exponent != 0x7fff) return (z + x) + y; + /* If z is zero and x are y are nonzero, compute the result + as x * y to avoid the wrong sign of a zero result if x * y + underflows to 0. */ + if (z == 0 && x != 0 && y != 0) + return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, or if x * y is less than half of LDBL_DENORM_MIN, compute as x * y + z. */ @@ -129,6 +134,11 @@ __fmal (long double x, long double y, long double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; diff --git a/libc/sysdeps/ieee754/ldbl-96/x2y2m1.c b/libc/sysdeps/ieee754/ldbl-96/x2y2m1.c new file mode 100644 index 000000000..3086fa2c8 --- /dev/null +++ b/libc/sysdeps/ieee754/ldbl-96/x2y2m1.c @@ -0,0 +1,39 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> + +#if FLT_EVAL_METHOD == 0 + +# include <sysdeps/ieee754/dbl-64/x2y2m1.c> + +#else + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +double +__x2y2m1 (double x, double y) +{ + return (double) __x2y2m1l (x, y); +} + +#endif diff --git a/libc/sysdeps/ieee754/ldbl-96/x2y2m1l.c b/libc/sysdeps/ieee754/ldbl-96/x2y2m1l.c new file mode 100644 index 000000000..a249cd347 --- /dev/null +++ b/libc/sysdeps/ieee754/ldbl-96/x2y2m1l.c @@ -0,0 +1,111 @@ +/* Compute x^2 + y^2 - 1, without large cancellation error. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <float.h> +#include <stdlib.h> + +/* Calculate X + Y exactly and store the result in *HI + *LO. It is + given that |X| >= |Y| and the values are small enough that no + overflow occurs. */ + +static inline void +add_split (long double *hi, long double *lo, long double x, long double y) +{ + /* Apply Dekker's algorithm. */ + *hi = x + y; + *lo = (x - *hi) + y; +} + +/* Calculate X * Y exactly and store the result in *HI + *LO. It is + given that the values are small enough that no overflow occurs and + large enough (or zero) that no underflow occurs. */ + +static inline void +mul_split (long double *hi, long double *lo, long double x, long double y) +{ +#ifdef __FP_FAST_FMAL + /* Fast built-in fused multiply-add. */ + *hi = x * y; + *lo = __builtin_fmal (x, y, -*hi); +#elif defined FP_FAST_FMAL + /* Fast library fused multiply-add, compiler before GCC 4.6. */ + *hi = x * y; + *lo = __fmal (x, y, -*hi); +#else + /* Apply Dekker's algorithm. */ + *hi = x * y; +# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) + long double x1 = x * C; + long double y1 = y * C; +# undef C + x1 = (x - x1) + x1; + y1 = (y - y1) + y1; + long double x2 = x - x1; + long double y2 = y - y1; + *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2; +#endif +} + +/* Compare absolute values of floating-point values pointed to by P + and Q for qsort. */ + +static int +compare (const void *p, const void *q) +{ + long double pld = fabsl (*(const long double *) p); + long double qld = fabsl (*(const long double *) q); + if (pld < qld) + return -1; + else if (pld == qld) + return 0; + else + return 1; +} + +/* Return X^2 + Y^2 - 1, computed without large cancellation error. + It is given that 1 > X >= Y >= epsilon / 2, and that either X >= + 0.75 or Y >= 0.5. */ + +long double +__x2y2m1l (long double x, long double y) +{ + long double vals[4]; + SET_RESTORE_ROUNDL (FE_TONEAREST); + mul_split (&vals[1], &vals[0], x, x); + mul_split (&vals[3], &vals[2], y, y); + if (x >= 0.75L) + vals[1] -= 1.0L; + else + { + vals[1] -= 0.5L; + vals[3] -= 0.5L; + } + qsort (vals, 4, sizeof (long double), compare); + /* Add up the values so that each element of VALS has absolute value + at most equal to the last set bit of the next nonzero + element. */ + for (size_t i = 0; i <= 2; i++) + { + add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]); + qsort (vals + i + 1, 3 - i, sizeof (long double), compare); + } + /* Now any error from this addition will be small. */ + return vals[3] + vals[2] + vals[1] + vals[0]; +} diff --git a/libc/sysdeps/ieee754/ldbl-opt/configure b/libc/sysdeps/ieee754/ldbl-opt/configure index d1f3177cf..6e69038b9 100755 --- a/libc/sysdeps/ieee754/ldbl-opt/configure +++ b/libc/sysdeps/ieee754/ldbl-opt/configure @@ -1,123 +1,3 @@ - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - - - as_lineno_1=$LINENO as_lineno_1a=$LINENO - as_lineno_2=$LINENO as_lineno_2a=$LINENO - eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && - test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { - # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } - - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - - -# ac_fn_c_try_compile LINENO -# -------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext - if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_compile # This file is generated from configure.in by Autoconf. DO NOT EDIT! # Local configure fragment for sysdeps/ieee754/ldbl-opt/. |