aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTue Ly <lntue@google.com>2022-08-01 09:57:29 -0400
committerTue Ly <lntue@google.com>2022-08-05 09:58:01 -0400
commit131dda9acc6978aba4740d75ca45f411fbabd726 (patch)
treeca81a0032bb1376efc5760f7dbd48fd87369798b
parent19bb535ed99407c95863807a01bc11e205842070 (diff)
[libc] Implement sincosf function correctly rounded to all rounding modes.linaro-local/ci/tcwg_kernel/llvm-master-aarch64-stable-allmodconfig
Refactor common range reductions and evaluations for sinf, cosf, and sincosf. Added exhaustive tests for sincosf. Performance before the patch: ``` System LIBC reciprocal throughput : 30.205 LIBC reciprocal throughput : 30.533 System LIBC latency : 67.961 LIBC latency : 61.564 ``` Performance after the patch: ``` System LIBC reciprocal throughput : 30.409 LIBC reciprocal throughput : 20.273 System LIBC latency : 67.527 LIBC latency : 61.959 ``` Reviewed By: orex Differential Revision: https://reviews.llvm.org/D130901
-rw-r--r--libc/docs/math.rst4
-rw-r--r--libc/src/math/generic/CMakeLists.txt36
-rw-r--r--libc/src/math/generic/common_constants.cpp17
-rw-r--r--libc/src/math/generic/common_constants.h7
-rw-r--r--libc/src/math/generic/cosf.cpp73
-rw-r--r--libc/src/math/generic/sincosf.cpp235
-rw-r--r--libc/src/math/generic/sincosf_data.cpp51
-rw-r--r--libc/src/math/generic/sincosf_utils.h188
-rw-r--r--libc/src/math/generic/sinf.cpp59
-rw-r--r--libc/test/src/math/exhaustive/CMakeLists.txt17
-rw-r--r--libc/test/src/math/exhaustive/sincosf_test.cpp77
-rw-r--r--libc/test/src/math/sincosf_test.cpp118
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel31
13 files changed, 508 insertions, 405 deletions
diff --git a/libc/docs/math.rst b/libc/docs/math.rst
index 4445c1a1ad1a..f3bab2633e62 100644
--- a/libc/docs/math.rst
+++ b/libc/docs/math.rst
@@ -166,7 +166,7 @@ log10 |check|
log1p |check|
log2 |check|
sin |check| large
-sincos 0.776 ULPs large
+sincos |check| large
sinh |check|
sqrt |check| |check| |check|
tanh |check|
@@ -232,6 +232,8 @@ Performance
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| sinf | 13 | 25 | 54 | 57 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
+| sincosf | 20 | 30 | 62 | 68 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| sinhf | 23 | 64 | 73 | 141 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| tanhf | 25 | 59 | 95 | 125 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index b10af04e4737..cd027a069351 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -46,14 +46,26 @@ add_object_library(
libc.src.errno.errno
)
-add_object_library(
+add_header_library(
+ range_reduction
+ HDRS
+ range_reduction.h
+ range_reduction_fma.h
+ DEPENDS
+ libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.fma
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+)
+
+add_header_library(
sincosf_utils
HDRS
sincosf_utils.h
- SRCS
- sincosf_data.cpp
DEPENDS
- .math_utils
+ .range_reduction
+ libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.polyeval
)
add_entrypoint_object(
@@ -62,16 +74,13 @@ add_entrypoint_object(
cosf.cpp
HDRS
../cosf.h
- range_reduction.h
- range_reduction_fma.h
DEPENDS
- .common_constants
+ .sincosf_utils
libc.include.math
libc.src.errno.errno
libc.src.__support.FPUtil.fputil
libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
@@ -83,16 +92,14 @@ add_entrypoint_object(
sinf.cpp
HDRS
../sinf.h
- range_reduction.h
- range_reduction_fma.h
DEPENDS
- .common_constants
+ .range_reduction
+ .sincosf_utils
libc.include.math
libc.src.errno.errno
libc.src.__support.FPUtil.fputil
libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
@@ -105,9 +112,14 @@ add_entrypoint_object(
HDRS
../sincosf.h
DEPENDS
+ .range_reduction
.sincosf_utils
libc.include.math
libc.src.errno.errno
+ libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.fma
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
)
diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp
index 1bbca8ff57fc..5f7fe56a4d54 100644
--- a/libc/src/math/generic/common_constants.cpp
+++ b/libc/src/math/generic/common_constants.cpp
@@ -240,21 +240,4 @@ const double EXP_2_POW[EXP_num_p] = {
0x1.13821818624b4p-2, 0x1.2ff6b54d8a89cp-2, 0x1.4d0ad5a753e07p-2,
0x1.6ac1f752150a5p-2, 0x1.891fac0e95613p-2};
-// Lookup table for sin(k * pi / 16) with k = 0, ..., 31.
-// Table is generated with Sollya as follow:
-// > display = hexadecimal;
-// > for k from 0 to 31 do { D(sin(k * pi/16)); };
-const double SIN_K_PI_OVER_16[32] = {
- 0x0.0000000000000p+0, 0x1.8f8b83c69a60bp-3, 0x1.87de2a6aea963p-2,
- 0x1.1c73b39ae68c8p-1, 0x1.6a09e667f3bcdp-1, 0x1.a9b66290ea1a3p-1,
- 0x1.d906bcf328d46p-1, 0x1.f6297cff75cb0p-1, 0x1.0000000000000p+0,
- 0x1.f6297cff75cb0p-1, 0x1.d906bcf328d46p-1, 0x1.a9b66290ea1a3p-1,
- 0x1.6a09e667f3bcdp-1, 0x1.1c73b39ae68c8p-1, 0x1.87de2a6aea963p-2,
- 0x1.8f8b83c69a60bp-3, 0x0.0000000000000p+0, -0x1.8f8b83c69a60bp-3,
- -0x1.87de2a6aea963p-2, -0x1.1c73b39ae68c8p-1, -0x1.6a09e667f3bcdp-1,
- -0x1.a9b66290ea1a3p-1, -0x1.d906bcf328d46p-1, -0x1.f6297cff75cb0p-1,
- -0x1.0000000000000p+0, -0x1.f6297cff75cb0p-1, -0x1.d906bcf328d46p-1,
- -0x1.a9b66290ea1a3p-1, -0x1.6a09e667f3bcdp-1, -0x1.1c73b39ae68c8p-1,
- -0x1.87de2a6aea963p-2, -0x1.8f8b83c69a60bp-3};
-
} // namespace __llvm_libc
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index 1c0d685abc21..998d1eb2cd06 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -31,18 +31,13 @@ extern const double EXP_M1[195];
// > for i from 0 to 127 do { D(exp(i / 128)); };
extern const double EXP_M2[128];
-// Lookup table for sin(k * pi / 16) with k = 0, ..., 31.
-// Table is generated with Sollya as follow:
-// > display = hexadecimal;
-// > for k from 0 to 31 do { D(sin(k * pi/16)); };
-extern const double SIN_K_PI_OVER_16[32];
-
static constexpr int EXP_bits_p = 5;
static constexpr int EXP_num_p = 1 << EXP_bits_p;
// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27]
// printf("%.13a,\n", d[i]);
extern const double EXP_2_POW[EXP_num_p];
+
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H
diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp
index 5298b2b305b7..e0824006582b 100644
--- a/libc/src/math/generic/cosf.cpp
+++ b/libc/src/math/generic/cosf.cpp
@@ -7,31 +7,16 @@
//===----------------------------------------------------------------------===//
#include "src/math/cosf.h"
-#include "common_constants.h"
+#include "sincosf_utils.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
#include <errno.h>
-#if defined(LIBC_TARGET_HAS_FMA)
-#include "range_reduction_fma.h"
-// using namespace __llvm_libc::fma;
-using __llvm_libc::fma::FAST_PASS_BOUND;
-using __llvm_libc::fma::large_range_reduction;
-using __llvm_libc::fma::small_range_reduction;
-#else
-#include "range_reduction.h"
-// using namespace __llvm_libc::generic;
-using __llvm_libc::generic::FAST_PASS_BOUND;
-using __llvm_libc::generic::large_range_reduction;
-using __llvm_libc::generic::small_range_reduction;
-#endif
-
namespace __llvm_libc {
// Exceptional cases for cosf.
@@ -132,58 +117,26 @@ LLVM_LIBC_FUNCTION(float, cosf, (float x)) {
return result;
}
- // TODO(lntue): refactor range reduction and most of polynomial approximation
- // to share between sinf, cosf, and sincosf.
- int k;
- double y;
-
- if (likely(x_abs < FAST_PASS_BOUND)) {
- k = small_range_reduction(xd, y);
- } else {
- // x is inf or nan.
- if (unlikely(x_abs >= 0x7f80'0000U)) {
- if (x_abs == 0x7f80'0000U) {
- errno = EDOM;
- fputil::set_except(FE_INVALID);
- }
- return x +
- FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+ // x is inf or nan.
+ if (unlikely(x_abs >= 0x7f80'0000U)) {
+ if (x_abs == 0x7f80'0000U) {
+ errno = EDOM;
+ fputil::set_except(FE_INVALID);
}
-
- k = large_range_reduction(xd, xbits.get_exponent(), y);
+ return x +
+ FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
}
- // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
- // So k is an integer and -0.5 <= y <= 0.5.
- // Then cos(x) = cos((k + y)*pi/16)
- // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
-
- double ysq = y * y;
-
- // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya
- // with:
- // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]);
- double sin_y =
- y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10,
- 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29);
- // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with:
- // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]);
- // Note that cosm1_y = cos(y*pi/16) - 1.
- double cosm1_y =
- ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14,
- -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35);
-
- double sin_k = -SIN_K_PI_OVER_16[k & 31];
- // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16).
- // cos_k = y * cos(k * pi/16)
- double cos_k = SIN_K_PI_OVER_16[(k + 8) & 31];
-
// Combine the results with the sine of sum formula:
// cos(x) = cos((k + y)*pi/16)
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
// = cosm1_y * cos_k + sin_y * sin_k
// = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
- return fputil::multiply_add(sin_y, sin_k,
+ double sin_k, cos_k, sin_y, cosm1_y;
+
+ sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
+
+ return fputil::multiply_add(sin_y, -sin_k,
fputil::multiply_add(cosm1_y, cos_k, cos_k));
}
diff --git a/libc/src/math/generic/sincosf.cpp b/libc/src/math/generic/sincosf.cpp
index fa05cd24bea4..3a1149f01bec 100644
--- a/libc/src/math/generic/sincosf.cpp
+++ b/libc/src/math/generic/sincosf.cpp
@@ -7,71 +7,210 @@
//===----------------------------------------------------------------------===//
#include "src/math/sincosf.h"
-#include "math_utils.h"
#include "sincosf_utils.h"
-
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
-#include <math.h>
-#include <stdint.h>
+#include <errno.h>
namespace __llvm_libc {
+// Exceptional values
+static constexpr int N_EXCEPTS = 10;
+
+static constexpr uint32_t EXCEPT_INPUTS[N_EXCEPTS] = {
+ 0x3b5637f5, // x = 0x1.ac6feap-9
+ 0x3fa7832a, // x = 0x1.4f0654p0
+ 0x46199998, // x = 0x1.33333p13
+ 0x55325019, // x = 0x1.64a032p43
+ 0x55cafb2a, // x = 0x1.95f654p44
+ 0x5922aa80, // x = 0x1.4555p51
+ 0x5aa4542c, // x = 0x1.48a858p54
+ 0x5f18b878, // x = 0x1.3170fp63
+ 0x6115cb11, // x = 0x1.2b9622p67
+ 0x7beef5ef, // x = 0x1.ddebdep120
+};
+
+static constexpr uint32_t EXCEPT_OUTPUTS_SIN[N_EXCEPTS][4] = {
+ {0x3b5637dc, 1, 0, 0}, // x = 0x1.ac6feap-9, sin(x) = 0x1.ac6fb8p-9 (RZ)
+ {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
+ {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
+ {0xbf171adf, 0, 1, 1}, // x = 0x1.64a032p43, sin(x) = -0x1.2e35bep-1 (RZ)
+ {0xbf7e7a16, 0, 1, 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
+ {0xbf587521, 0, 1, 1}, // x = 0x1.4555p51, sin(x) = -0x1.b0ea42p-1 (RZ)
+ {0x3f5f5646, 1, 0, 0}, // x = 0x1.48a858p54, sin(x) = 0x1.beac8cp-1 (RZ)
+ {0x3dad60f6, 1, 0, 1}, // x = 0x1.3170fp63, sin(x) = 0x1.5ac1ecp-4 (RZ)
+ {0xbe7cc1e0, 0, 1, 1}, // x = 0x1.2b9622p67, sin(x) = -0x1.f983cp-3 (RZ)
+ {0xbf587d1b, 0, 1, 1}, // x = 0x1.ddebdep120, sin(x) = -0x1.b0fa36p-1 (RZ)
+};
+
+static constexpr uint32_t EXCEPT_OUTPUTS_COS[N_EXCEPTS][4] = {
+ {0x3f7fffa6, 1, 0, 0}, // x = 0x1.ac6feap-9, cos(x) = 0x1.ffff4cp-1 (RZ)
+ {0x3e84aabf, 1, 0, 1}, // x = 0x1.4f0654p0, cos(x) = 0x1.09557ep-2 (RZ)
+ {0xbf70090b, 0, 1, 0}, // x = 0x1.33333p13, cos(x) = -0x1.e01216p-1 (RZ)
+ {0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ)
+ {0x3ddf11f3, 1, 0, 1}, // x = 0x1.95f654p44, cos(x) = 0x1.be23e6p-4 (RZ)
+ {0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ)
+ {0x3efa40a4, 1, 0, 0}, // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ)
+ {0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ)
+ {0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ)
+ {0x3f08a21c, 1, 0, 0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ)
+};
+
// Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
// error is 0.5303 * 2^-23. A single-step range reduction is used for
// small values. Large inputs have their range reduced using fast integer
// arithmetic.
-LLVM_LIBC_FUNCTION(void, sincosf, (float y, float *sinp, float *cosp)) {
- double x = y;
- double s;
- int n;
- const sincos_t *p = &SINCOSF_TABLE[0];
-
- if (abstop12(y) < abstop12(PIO4)) {
- double x2 = x * x;
-
- if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) {
- if (unlikely(abstop12(y) < abstop12(as_float(0x800000))))
- // Force underflow for tiny y.
- force_eval<float>(x2);
- *sinp = y;
+LLVM_LIBC_FUNCTION(void, sincosf, (float x, float *sinp, float *cosp)) {
+ using FPBits = typename fputil::FPBits<float>;
+ FPBits xbits(x);
+
+ uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
+ double xd = static_cast<double>(x);
+
+ // Range reduction:
+ // For |x| > pi/16, we perform range reduction as follows:
+ // Find k and y such that:
+ // x = (k + y) * pi/16
+ // k is an integer
+ // |y| < 0.5
+ // For small range (|x| < 2^46 when FMA instructions are available, 2^22
+ // otherwise), this is done by performing:
+ // k = round(x * 16/pi)
+ // y = x * 16/pi - k
+ // For large range, we will omit all the higher parts of 16/pi such that the
+ // least significant bits of their full products with x are larger than 31,
+ // since:
+ // sin((k + y + 32*i) * pi/16) = sin(x + i * 2pi) = sin(x), and
+ // cos((k + y + 32*i) * pi/16) = cos(x + i * 2pi) = cos(x).
+ //
+ // When FMA instructions are not available, we store the digits of 16/pi in
+ // chunks of 28-bit precision. This will make sure that the products:
+ // x * SIXTEEN_OVER_PI_28[i] are all exact.
+ // When FMA instructions are available, we simply store the digits of 16/pi in
+ // chunks of doubles (53-bit of precision).
+ // So when multiplying by the largest values of single precision, the
+ // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
+ // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
+ // us more than 40 bits of accuracy. For the worst-case estimation of range
+ // reduction, see for instances:
+ // Elementary Functions by J-M. Muller, Chapter 11,
+ // Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
+ // Chapter 10.2.
+ //
+ // Once k and y are computed, we then deduce the answer by the sine and cosine
+ // of sum formulas:
+ // sin(x) = sin((k + y)*pi/16)
+ // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16)
+ // cos(x) = cos((k + y)*pi/16)
+ // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
+ // The values of sin(k*pi/16) and cos(k*pi/16) for k = 0..31 are precomputed
+ // and stored using a vector of 32 doubles. Sin(y*pi/16) and cos(y*pi/16) are
+ // computed using degree-7 and degree-8 minimax polynomials generated by
+ // Sollya respectively.
+
+ // |x| < 0x1.0p-12f
+ if (unlikely(x_abs < 0x3980'0000U)) {
+ if (unlikely(x_abs == 0U)) {
+ // For signed zeros.
+ *sinp = x;
*cosp = 1.0f;
return;
}
+ // When |x| < 2^-12, the relative errors of the approximations
+ // sin(x) ~ x, cos(x) ~ 1
+ // are:
+ // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
+ // = x^2 / 6
+ // < 2^-25
+ // < epsilon(1)/2.
+ // |cos(x) - 1| < |x^2 / 2| = 2^-25 < epsilon(1)/2.
+ // So the correctly rounded values of sin(x) and cos(x) are:
+ // sin(x) = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+ // or (rounding mode = FE_UPWARD and x is
+ // negative),
+ // = x otherwise.
+ // cos(x) = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD,
+ // = 1 otherwise.
+ // To simplify the rounding decision and make it more efficient and to
+ // prevent compiler to perform constant folding, we use
+ // sin(x) = fma(x, -2^-25, x),
+ // cos(x) = fma(x*0.5f, -x, 1)
+ // instead.
+ // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
+ // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
+ // |x| < 2^-125. For targets without FMA instructions, we simply use
+ // double for intermediate results as it is more efficient than using an
+ // emulated version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+ *sinp = fputil::multiply_add(x, -0x1.0p-25f, x);
+ *cosp = fputil::multiply_add(FPBits(x_abs).get_val(), -0x1.0p-25f, 1.0f);
+#else
+ *sinp = static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
+ *cosp = static_cast<float>(fputil::multiply_add(
+ static_cast<double>(FPBits(x_abs).get_val()), -0x1.0p-25, 1.0));
+#endif // LIBC_TARGET_HAS_FMA
+ return;
+ }
- sincosf_poly(x, x2, p, 0, sinp, cosp);
- } else if (abstop12(y) < abstop12(120.0f)) {
- x = reduce_fast(x, p, &n);
-
- // Setup the signs for sin and cos.
- s = p->sign[n & 3];
-
- if (n & 2)
- p = &SINCOSF_TABLE[1];
-
- sincosf_poly(x * s, x * x, p, n, sinp, cosp);
- } else if (likely(abstop12(y) < abstop12(INFINITY))) {
- uint32_t xi = as_uint32_bits(y);
- int sign = xi >> 31;
-
- x = reduce_large(xi, &n);
-
- // Setup signs for sin and cos - include original sign.
- s = p->sign[(n + sign) & 3];
-
- if ((n + sign) & 2)
- p = &SINCOSF_TABLE[1];
+ // x is inf or nan.
+ if (unlikely(x_abs >= 0x7f80'0000U)) {
+ if (x_abs == 0x7f80'0000U) {
+ errno = EDOM;
+ fputil::set_except(FE_INVALID);
+ }
+ *sinp =
+ x + FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+ *cosp = *sinp;
+ return;
+ }
- sincosf_poly(x * s, x * x, p, n, sinp, cosp);
- } else {
- // Return NaN if Inf or NaN for both sin and cos.
- *sinp = *cosp = y - y;
+ // Check exceptional values.
+ for (int i = 0; i < N_EXCEPTS; ++i) {
+ if (unlikely(x_abs == EXCEPT_INPUTS[i])) {
+ uint32_t s = EXCEPT_OUTPUTS_SIN[i][0]; // FE_TOWARDZERO
+ uint32_t c = EXCEPT_OUTPUTS_COS[i][0]; // FE_TOWARDZERO
+ bool x_sign = x < 0;
+ switch (fputil::get_round()) {
+ case FE_UPWARD:
+ s += x_sign ? EXCEPT_OUTPUTS_SIN[i][2] : EXCEPT_OUTPUTS_SIN[i][1];
+ c += EXCEPT_OUTPUTS_COS[i][1];
+ break;
+ case FE_DOWNWARD:
+ s += x_sign ? EXCEPT_OUTPUTS_SIN[i][1] : EXCEPT_OUTPUTS_SIN[i][2];
+ c += EXCEPT_OUTPUTS_COS[i][2];
+ break;
+ case FE_TONEAREST:
+ s += EXCEPT_OUTPUTS_SIN[i][3];
+ c += EXCEPT_OUTPUTS_COS[i][3];
+ break;
+ }
+ *sinp = x_sign ? -FPBits(s).get_val() : FPBits(s).get_val();
+ *cosp = FPBits(c).get_val();
- // Needed to set errno for +-Inf, the add is a hack to work
- // around a gcc register allocation issue: just passing y
- // affects code generation in the fast path.
- invalid(y + y);
+ return;
+ }
}
+
+ // Combine the results with the sine and cosine of sum formulas:
+ // sin(x) = sin((k + y)*pi/16)
+ // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16)
+ // = sin_y * cos_k + (1 + cosm1_y) * sin_k
+ // = sin_y * cos_k + (cosm1_y * sin_k + sin_k)
+ // cos(x) = cos((k + y)*pi/16)
+ // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
+ // = cosm1_y * cos_k + sin_y * sin_k
+ // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
+ double sin_k, cos_k, sin_y, cosm1_y;
+
+ sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
+
+ *sinp = fputil::multiply_add(sin_y, cos_k,
+ fputil::multiply_add(cosm1_y, sin_k, sin_k));
+ *cosp = fputil::multiply_add(sin_y, -sin_k,
+ fputil::multiply_add(cosm1_y, cos_k, cos_k));
}
} // namespace __llvm_libc
diff --git a/libc/src/math/generic/sincosf_data.cpp b/libc/src/math/generic/sincosf_data.cpp
deleted file mode 100644
index ac632652f169..000000000000
--- a/libc/src/math/generic/sincosf_data.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- sinf/cosf data tables ---------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "math_utils.h"
-#include "sincosf_utils.h"
-
-#include <stdint.h>
-
-namespace __llvm_libc {
-
-// The constants and polynomials for sine and cosine. The 2nd entry
-// computes -cos (x) rather than cos (x) to get negation for free.
-constexpr sincos_t SINCOSF_TABLE[2] = {
- {{1.0, -1.0, -1.0, 1.0},
- 0x1.45f306dc9c883p+23,
- 0x1.921fb54442d18p+0,
- 0x1p+0,
- -0x1.ffffffd0c621cp-2,
- 0x1.55553e1068f19p-5,
- -0x1.6c087e89a359dp-10,
- 0x1.99343027bf8c3p-16,
- -0x1.555545995a603p-3,
- 0x1.1107605230bc4p-7,
- -0x1.994eb3774cf24p-13},
- {{1.0, -1.0, -1.0, 1.0},
- 0x1.45f306dc9c883p+23,
- 0x1.921fb54442d18p+0,
- -0x1p+0,
- 0x1.ffffffd0c621cp-2,
- -0x1.55553e1068f19p-5,
- 0x1.6c087e89a359dp-10,
- -0x1.99343027bf8c3p-16,
- -0x1.555545995a603p-3,
- 0x1.1107605230bc4p-7,
- -0x1.994eb3774cf24p-13},
-};
-
-// Table with 4/PI to 192 bit precision. To avoid unaligned accesses
-// only 8 new bits are added per entry, making the table 4 times larger.
-constexpr uint32_t INV_PIO4[24] = {
- 0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
- 0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
- 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
- 0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
-
-} // namespace __llvm_libc
diff --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h
index 5ddcc9c5b55b..06064c8fdb7c 100644
--- a/libc/src/math/generic/sincosf_utils.h
+++ b/libc/src/math/generic/sincosf_utils.h
@@ -1,4 +1,4 @@
-//===-- Collection of utils for cosf/sinf/sincosf ---------------*- C++ -*-===//
+//===-- Collection of utils for sinf/cosf/sincosf ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,132 +9,78 @@
#ifndef LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H
#define LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H
-#include "math_utils.h"
-
-#include <stdint.h>
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/common.h"
+
+#if defined(LIBC_TARGET_HAS_FMA)
+#include "range_reduction_fma.h"
+// using namespace __llvm_libc::fma;
+using __llvm_libc::fma::FAST_PASS_BOUND;
+using __llvm_libc::fma::large_range_reduction;
+using __llvm_libc::fma::small_range_reduction;
+#else
+#include "range_reduction.h"
+// using namespace __llvm_libc::generic;
+using __llvm_libc::generic::FAST_PASS_BOUND;
+using __llvm_libc::generic::large_range_reduction;
+using __llvm_libc::generic::small_range_reduction;
+#endif // LIBC_TARGET_HAS_FMA
namespace __llvm_libc {
-// 2PI * 2^-64.
-static constexpr double PI63 = 0x1.921fb54442d18p-62;
-// PI / 4.
-static constexpr double PIO4 = 0x1.921fb54442d18p-1;
-
-// The constants and polynomials for sine and cosine.
-typedef struct {
- double sign[4]; // Sign of sine in quadrants 0..3.
- double hpi_inv; // 2 / PI ( * 2^24 ).
- double hpi; // PI / 2.
- double c0, c1, c2, c3, c4; // Cosine polynomial.
- double s1, s2, s3; // Sine polynomial.
-} sincos_t;
-
-// Polynomial data (the cosine polynomial is negated in the 2nd entry).
-extern const sincos_t SINCOSF_TABLE[2];
-
-// Table with 4/PI to 192 bit precision.
-extern const uint32_t INV_PIO4[];
-
-// Top 12 bits of the float representation with the sign bit cleared.
-static inline uint32_t abstop12(float x) {
- return (as_uint32_bits(x) >> 20) & 0x7ff;
-}
-
-// Compute the sine and cosine of inputs X and X2 (X squared), using the
-// polynomial P and store the results in SINP and COSP. N is the quadrant,
-// if odd the cosine and sine polynomials are swapped.
-static inline void sincosf_poly(double x, double x2, const sincos_t *p, int n,
- float *sinp, float *cosp) {
- double x3, x4, x5, x6, s, c, c1, c2, s1;
-
- x4 = x2 * x2;
- x3 = x2 * x;
- c2 = p->c3 + x2 * p->c4;
- s1 = p->s2 + x2 * p->s3;
-
- // Swap sin/cos result based on quadrant.
- float *tmp = (n & 1 ? cosp : sinp);
- cosp = (n & 1 ? sinp : cosp);
- sinp = tmp;
-
- c1 = p->c0 + x2 * p->c1;
- x5 = x3 * x2;
- x6 = x4 * x2;
-
- s = x + x3 * p->s1;
- c = c1 + x4 * p->c2;
-
- *sinp = s + x5 * s1;
- *cosp = c + x6 * c2;
-}
-
-// Return the sine of inputs X and X2 (X squared) using the polynomial P.
-// N is the quadrant, and if odd the cosine polynomial is used.
-static inline float sinf_poly(double x, double x2, const sincos_t *p, int n) {
- double x3, x4, x6, x7, s, c, c1, c2, s1;
-
- if ((n & 1) == 0) {
- x3 = x * x2;
- s1 = p->s2 + x2 * p->s3;
-
- x7 = x3 * x2;
- s = x + x3 * p->s1;
-
- return s + x7 * s1;
+// Lookup table for sin(k * pi / 16) with k = 0, ..., 31.
+// Table is generated with Sollya as follow:
+// > display = hexadecimal;
+// > for k from 0 to 31 do { D(sin(k * pi/16)); };
+const double SIN_K_PI_OVER_16[32] = {
+ 0x0.0000000000000p+0, 0x1.8f8b83c69a60bp-3, 0x1.87de2a6aea963p-2,
+ 0x1.1c73b39ae68c8p-1, 0x1.6a09e667f3bcdp-1, 0x1.a9b66290ea1a3p-1,
+ 0x1.d906bcf328d46p-1, 0x1.f6297cff75cb0p-1, 0x1.0000000000000p+0,
+ 0x1.f6297cff75cb0p-1, 0x1.d906bcf328d46p-1, 0x1.a9b66290ea1a3p-1,
+ 0x1.6a09e667f3bcdp-1, 0x1.1c73b39ae68c8p-1, 0x1.87de2a6aea963p-2,
+ 0x1.8f8b83c69a60bp-3, 0x0.0000000000000p+0, -0x1.8f8b83c69a60bp-3,
+ -0x1.87de2a6aea963p-2, -0x1.1c73b39ae68c8p-1, -0x1.6a09e667f3bcdp-1,
+ -0x1.a9b66290ea1a3p-1, -0x1.d906bcf328d46p-1, -0x1.f6297cff75cb0p-1,
+ -0x1.0000000000000p+0, -0x1.f6297cff75cb0p-1, -0x1.d906bcf328d46p-1,
+ -0x1.a9b66290ea1a3p-1, -0x1.6a09e667f3bcdp-1, -0x1.1c73b39ae68c8p-1,
+ -0x1.87de2a6aea963p-2, -0x1.8f8b83c69a60bp-3};
+
+static inline void sincosf_eval(double xd, uint32_t x_abs, double &sin_k,
+ double &cos_k, double &sin_y, double &cosm1_y) {
+ int64_t k;
+ double y;
+
+ if (likely(x_abs < FAST_PASS_BOUND)) {
+ k = small_range_reduction(xd, y);
} else {
- x4 = x2 * x2;
- c2 = p->c3 + x2 * p->c4;
- c1 = p->c0 + x2 * p->c1;
-
- x6 = x4 * x2;
- c = c1 + x4 * p->c2;
-
- return c + x6 * c2;
+ fputil::FPBits<float> x_bits(x_abs);
+ k = large_range_reduction(xd, x_bits.get_exponent(), y);
}
-}
-
-// Fast range reduction using single multiply-subtract. Return the modulo of
-// X as a value between -PI/4 and PI/4 and store the quadrant in NP.
-// The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
-// is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
-// the result is accurate for |X| <= 120.0.
-static inline double reduce_fast(double x, const sincos_t *p, int *np) {
- double r;
- // Use scaled float to int conversion with explicit rounding.
- // hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
- // This avoids inaccuracies introduced by truncating negative values.
- r = x * p->hpi_inv;
- int n = ((int32_t)r + 0x800000) >> 24;
- *np = n;
- return x - n * p->hpi;
-}
-
-// Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
-// XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
-// Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
-// Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
-// multiply computes the exact 2.62-bit fixed-point modulo. Since the result
-// can have at most 29 leading zeros after the binary point, the double
-// precision result is accurate to 33 bits.
-static inline double reduce_large(uint32_t xi, int *np) {
- const uint32_t *arr = &INV_PIO4[(xi >> 26) & 15];
- int shift = (xi >> 23) & 7;
- uint64_t n, res0, res1, res2;
-
- xi = (xi & 0xffffff) | 0x800000;
- xi <<= shift;
-
- res0 = xi * arr[0];
- res1 = (uint64_t)xi * arr[4];
- res2 = (uint64_t)xi * arr[8];
- res0 = (res2 >> 32) | (res0 << 32);
- res0 += res1;
- n = (res0 + (1ULL << 61)) >> 62;
- res0 -= n << 62;
- double x = (int64_t)res0;
- *np = n;
- return x * PI63;
+ // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
+ // So k is an integer and -0.5 <= y <= 0.5.
+ // Then sin(x) = sin((k + y)*pi/16)
+ // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16)
+
+ sin_k = SIN_K_PI_OVER_16[k & 31];
+ // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16).
+ // cos_k = y * cos(k * pi/16)
+ cos_k = SIN_K_PI_OVER_16[(k + 8) & 31];
+
+ double ysq = y * y;
+
+ // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya
+ // with:
+ // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]);
+ sin_y = y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10,
+ 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29);
+ // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with:
+ // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]);
+ // Note that cosm1_y = cos(y*pi/16) - 1.
+ cosm1_y =
+ ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14,
+ -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35);
}
} // namespace __llvm_libc
diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp
index 444940370162..bc725ea4dd5d 100644
--- a/libc/src/math/generic/sinf.cpp
+++ b/libc/src/math/generic/sinf.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/math/sinf.h"
-#include "common_constants.h"
+#include "sincosf_utils.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
@@ -21,19 +21,13 @@
#if defined(LIBC_TARGET_HAS_FMA)
#include "range_reduction_fma.h"
// using namespace __llvm_libc::fma;
-using __llvm_libc::fma::FAST_PASS_BOUND;
-using __llvm_libc::fma::large_range_reduction;
using __llvm_libc::fma::N_EXCEPTS;
using __llvm_libc::fma::SinfExcepts;
-using __llvm_libc::fma::small_range_reduction;
#else
#include "range_reduction.h"
// using namespace __llvm_libc::generic;
-using __llvm_libc::generic::FAST_PASS_BOUND;
-using __llvm_libc::generic::large_range_reduction;
using __llvm_libc::generic::N_EXCEPTS;
using __llvm_libc::generic::SinfExcepts;
-using __llvm_libc::generic::small_range_reduction;
#endif
namespace __llvm_libc {
@@ -143,55 +137,24 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
return result;
}
- int k;
- double y;
-
- if (likely(x_abs < FAST_PASS_BOUND)) {
- k = small_range_reduction(xd, y);
- } else {
- // x is inf or nan.
- if (unlikely(x_abs >= 0x7f80'0000U)) {
- if (x_abs == 0x7f80'0000U) {
- errno = EDOM;
- fputil::set_except(FE_INVALID);
- }
- return x +
- FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+ if (unlikely(x_abs >= 0x7f80'0000U)) {
+ if (x_abs == 0x7f80'0000U) {
+ errno = EDOM;
+ fputil::set_except(FE_INVALID);
}
-
- k = large_range_reduction(xd, xbits.get_exponent(), y);
+ return x +
+ FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
}
- // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
- // So k is an integer and -0.5 <= y <= 0.5.
- // Then sin(x) = sin((k + y)*pi/16)
- // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16)
-
- double ysq = y * y;
-
- // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya
- // with:
- // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]);
- double sin_y =
- fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10,
- 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29);
- // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with:
- // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]);
- // Note that cosm1_y = cos(y*pi/16) - 1.
- double cosm1_y =
- ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14,
- -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35);
-
- double sin_k = SIN_K_PI_OVER_16[k & 31];
- // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16).
- // cos_k = y * cos(k * pi/16)
- double cos_k = y * SIN_K_PI_OVER_16[(k + 8) & 31];
-
// Combine the results with the sine of sum formula:
// sin(x) = sin((k + y)*pi/16)
// = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16)
// = sin_y * cos_k + (1 + cosm1_y) * sin_k
// = sin_y * cos_k + (cosm1_y * sin_k + sin_k)
+ double sin_k, cos_k, sin_y, cosm1_y;
+
+ sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
+
return fputil::multiply_add(sin_y, cos_k,
fputil::multiply_add(cosm1_y, sin_k, sin_k));
}
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 5505222abb6d..c7e073e77348 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -56,6 +56,23 @@ add_fp_unittest(
)
add_fp_unittest(
+ sincosf_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ sincosf_test.cpp
+ DEPENDS
+ .exhaustive_test
+ libc.include.math
+ libc.src.math.sincosf
+ libc.src.__support.FPUtil.fputil
+ LINK_LIBRARIES
+ -lpthread
+)
+
+add_fp_unittest(
expf_test
NO_RUN_POSTBUILD
NEED_MPFR
diff --git a/libc/test/src/math/exhaustive/sincosf_test.cpp b/libc/test/src/math/exhaustive/sincosf_test.cpp
new file mode 100644
index 000000000000..bbd14e669fe8
--- /dev/null
+++ b/libc/test/src/math/exhaustive/sincosf_test.cpp
@@ -0,0 +1,77 @@
+//===-- Exhaustive test for sincosf ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/sincosf.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+#include <thread>
+
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+struct LlvmLibcSinCosfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+ bool check(uint32_t start, uint32_t stop,
+ mpfr::RoundingMode rounding) override {
+ mpfr::ForceRoundingMode r(rounding);
+ uint32_t bits = start;
+ bool result = true;
+ do {
+ FPBits xbits(bits);
+ float x = float(xbits);
+ float sinx, cosx;
+ __llvm_libc::sincosf(x, &sinx, &cosx);
+ result &= EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, sinx, 0.5, rounding);
+ result &= EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, cosx, 0.5, rounding);
+ } while (++bits < stop);
+ return result;
+ }
+};
+
+// Range: [0, +Inf);
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundUp) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundDown) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundTowardZero) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero);
+}
+
+// Range: (-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundUp) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundDown) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundTowardZero) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp
index f23d7193e077..559dec09e2b1 100644
--- a/libc/test/src/math/sincosf_test.cpp
+++ b/libc/test/src/math/sincosf_test.cpp
@@ -54,6 +54,40 @@ TEST(LlvmLibcSinCosfTest, SpecialNumbers) {
EXPECT_MATH_ERRNO(EDOM);
}
+#define EXPECT_SINCOS_MATCH_ALL_ROUNDING(input) \
+ { \
+ float sin, cos; \
+ namespace mpfr = __llvm_libc::testing::mpfr; \
+ \
+ mpfr::ForceRoundingMode __r1(mpfr::RoundingMode::Nearest); \
+ __llvm_libc::sincosf(input, &sin, &cos); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \
+ mpfr::RoundingMode::Nearest); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \
+ mpfr::RoundingMode::Nearest); \
+ \
+ mpfr::ForceRoundingMode __r2(mpfr::RoundingMode::Upward); \
+ __llvm_libc::sincosf(input, &sin, &cos); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \
+ mpfr::RoundingMode::Upward); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \
+ mpfr::RoundingMode::Upward); \
+ \
+ mpfr::ForceRoundingMode __r3(mpfr::RoundingMode::Downward); \
+ __llvm_libc::sincosf(input, &sin, &cos); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \
+ mpfr::RoundingMode::Downward); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \
+ mpfr::RoundingMode::Downward); \
+ \
+ mpfr::ForceRoundingMode __r4(mpfr::RoundingMode::TowardZero); \
+ __llvm_libc::sincosf(input, &sin, &cos); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \
+ mpfr::RoundingMode::TowardZero); \
+ EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \
+ mpfr::RoundingMode::TowardZero); \
+ }
+
TEST(LlvmLibcSinCosfTest, InFloatRange) {
constexpr uint32_t COUNT = 1000000;
constexpr uint32_t STEP = UINT32_MAX / COUNT;
@@ -62,31 +96,65 @@ TEST(LlvmLibcSinCosfTest, InFloatRange) {
if (isnan(x) || isinf(x))
continue;
- float sin, cos;
- __llvm_libc::sincosf(x, &sin, &cos);
- ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, 1.0);
- ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, 1.0);
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(x);
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x);
}
}
-// For small values, cos(x) is 1 and sin(x) is x.
-TEST(LlvmLibcSinCosfTest, SmallValues) {
- uint32_t bits = 0x17800000;
- float x = float(FPBits((bits)));
- float result_cos, result_sin;
- __llvm_libc::sincosf(x, &result_sin, &result_cos);
- EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, 1.0);
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, 1.0);
- EXPECT_FP_EQ(1.0f, result_cos);
- EXPECT_FP_EQ(x, result_sin);
-
- bits = 0x00400000;
- x = float(FPBits((bits)));
- __llvm_libc::sincosf(x, &result_sin, &result_cos);
- EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, 1.0);
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, 1.0);
- EXPECT_FP_EQ(1.0f, result_cos);
- EXPECT_FP_EQ(x, result_sin);
+// For hard to round inputs.
+TEST(LlvmLibcSinCosfTest, SpecialValues) {
+ constexpr int N = 43;
+ constexpr uint32_t INPUTS[N] = {
+ 0x3b56'37f5U, // x = 0x1.ac6feap-9f
+ 0x3f06'0a92U, // x = pi/6
+ 0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+ 0x3f49'0fdbU, // x = pi/4
+ 0x3f86'0a92U, // x = pi/3
+ 0x3fa7'832aU, // x = 0x1.4f0654p+0f
+ 0x3fc9'0fdbU, // x = pi/2
+ 0x4017'1973U, // x = 0x1.2e32e6p+1f
+ 0x4049'0fdbU, // x = pi
+ 0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+ 0x40c9'0fdbU, // x = 2*pi
+ 0x433b'7490U, // x = 0x1.76e92p+7f
+ 0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+ 0x4619'9998U, // x = 0x1.33333p+13f
+ 0x474d'246fU, // x = 0x1.9a48dep+15f
+ 0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+ 0x4c23'32e9U, // x = 0x1.4665d2p+25f
+ 0x50a3'e87fU, // x = 0x1.47d0fep+34f
+ 0x5239'47f6U, // x = 0x1.728fecp+37f
+ 0x53b1'46a6U, // x = 0x1.628d4cp+40f
+ 0x5532'5019U, // x = 0x1.64a032p+43f
+ 0x55ca'fb2aU, // x = 0x1.95f654p+44f
+ 0x588e'f060U, // x = 0x1.1de0cp+50f
+ 0x5922'aa80U, // x = 0x1.4555p+51f
+ 0x5aa4'542cU, // x = 0x1.48a858p+54f
+ 0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+ 0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
+ 0x5f18'b878U, // x = 0x1.3170fp+63f
+ 0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+ 0x6115'cb11U, // x = 0x1.2b9622p+67f
+ 0x61a4'0b40U, // x = 0x1.48168p+68f
+ 0x6386'134eU, // x = 0x1.0c269cp+72f
+ 0x6589'8498U, // x = 0x1.13093p+76f
+ 0x6600'0001U, // x = 0x1.000002p+77f
+ 0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+ 0x66b0'14aaU, // x = 0x1.602954p+78f
+ 0x67a9'242bU, // x = 0x1.524856p+80f
+ 0x6a19'76f1U, // x = 0x1.32ede2p+85f
+ 0x6c55'da58U, // x = 0x1.abb4bp+89f
+ 0x6f79'be45U, // x = 0x1.f37c8ap+95f
+ 0x7276'69d4U, // x = 0x1.ecd3a8p+101f
+ 0x7758'4625U, // x = 0x1.b08c4ap+111f
+ 0x7bee'f5efU, // x = 0x1.ddebdep+120f
+ };
+
+ for (int i = 0; i < N; ++i) {
+ float x = float(FPBits(INPUTS[i]));
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(x);
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x);
+ }
}
// SDCOMP-26094: check sinf in the cases for which the range reducer
@@ -94,9 +162,7 @@ TEST(LlvmLibcSinCosfTest, SmallValues) {
TEST(LlvmLibcSinCosfTest, SDCOMP_26094) {
for (uint32_t v : SDCOMP26094_VALUES) {
float x = float(FPBits((v)));
- float sin, cos;
- __llvm_libc::sincosf(x, &sin, &cos);
- EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, 1.0);
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, 1.0);
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(x);
+ EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x);
}
}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 65675c9cc34f..a413a3c1cd43 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -449,16 +449,6 @@ cc_library(
)
cc_library(
- name = "sincosf_utils",
- srcs = ["src/math/generic/sincosf_data.cpp"],
- hdrs = ["src/math/generic/sincosf_utils.h"],
- deps = [
- ":libc_root",
- ":math_utils",
- ],
-)
-
-cc_library(
name = "common_constants",
srcs = ["src/math/generic/common_constants.cpp"],
hdrs = ["src/math/generic/common_constants.h"],
@@ -482,6 +472,18 @@ cc_library(
],
)
+cc_library(
+ name = "sincosf_utils",
+ hdrs = ["src/math/generic/sincosf_utils.h"],
+ deps = [
+ ":__support_common",
+ ":__support_fputil",
+ ":__support_fputil_polyeval",
+ ":range_reduction",
+ ":libc_root",
+ ],
+)
+
libc_math_function(
name = "expm1f",
additional_deps = [
@@ -644,16 +646,15 @@ libc_math_function(
additional_deps = [
":__support_fputil_fma",
":__support_fputil_multiply_add",
- ":__support_fputil_polyeval",
- ":common_constants",
- ":range_reduction",
+ ":sincosf_utils",
],
)
libc_math_function(
name = "sincosf",
additional_deps = [
- ":math_utils",
+ ":__support_fputil_fma",
+ ":__support_fputil_multiply_add",
":sincosf_utils",
],
)
@@ -664,8 +665,8 @@ libc_math_function(
":__support_fputil_fma",
":__support_fputil_multiply_add",
":__support_fputil_polyeval",
- ":common_constants",
":range_reduction",
+ ":sincosf_utils",
],
)