aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite
diff options
context:
space:
mode:
authortschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>2018-12-14 20:42:08 +0000
committertschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>2018-12-14 20:42:08 +0000
commit815940afeefeeafa49ad3a5d81ef2d273ddeb3d7 (patch)
treea4dec1fa6554c8adfc9824efc21dd86ee3905c0a /libgomp/testsuite
parent75180da2a558d3106e26173326933f65b417182c (diff)
[PR88370] acc_get_cuda_stream/acc_set_cuda_stream: acc_async_sync, acc_async_noval
Per my reading of the OpenACC specification (and as supported by secondary documentation, such as code examples, or presentations), it's valid to call "acc_get_cuda_stream"/"acc_set_cuda_stream" also with "acc_async_sync", "acc_async_noval" arguments, not just with the nonnegative values as currently implemented. libgomp/ PR libgomp/88370 * libgomp.texi (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Clarify. * oacc-cuda.c (acc_get_cuda_stream, acc_set_cuda_stream): Use "async_valid_p". * plugin/plugin-nvptx.c (nvptx_set_cuda_stream): Refuse "async == acc_async_sync". * testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Update. * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@267147 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c97
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c27
4 files changed, 187 insertions, 10 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c
new file mode 100644
index 00000000000..93981ff5cb7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c
@@ -0,0 +1,42 @@
+/* Verify expected nvptx plugin behavior for "acc_set_cuda_stream" for
+ "acc_async_sync". */
+
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-set-target-env-var GOMP_DEBUG "1" } */
+
+#undef NDEBUG
+#include <assert.h>
+#include <openacc.h>
+
+int main(void)
+{
+ int async = 42;
+
+ /* Initialize. */
+#pragma acc parallel async(acc_async_sync)
+ ;
+#pragma acc parallel async(async)
+ ;
+#pragma acc wait
+
+ void *cuda_stream_sync = acc_get_cuda_stream (acc_async_sync);
+ assert (cuda_stream_sync == NULL);
+ void *cuda_stream_async = acc_get_cuda_stream (async);
+ assert (cuda_stream_async != NULL);
+ int ret = acc_set_cuda_stream (acc_async_sync, cuda_stream_async);
+ assert (ret == 0);
+ void *cuda_stream_sync_ = acc_get_cuda_stream (acc_async_sync);
+ assert (cuda_stream_sync_ == cuda_stream_sync);
+ void *cuda_stream_async_ = acc_get_cuda_stream (async);
+ assert (cuda_stream_async_ == cuda_stream_async);
+
+#pragma acc parallel async(acc_async_sync)
+ ;
+#pragma acc parallel async(async)
+ ;
+#pragma acc wait
+
+ return 0;
+}
+
+/* { dg-output "Refusing request to set CUDA stream associated with \"acc_async_sync\"" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c
new file mode 100644
index 00000000000..48e1846a36e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c
@@ -0,0 +1,97 @@
+/* Test mapping of async values to specific underlying queues. */
+
+#undef NDEBUG
+#include <assert.h>
+#include <openacc.h>
+
+/* This is implemented in terms of the "acc_get_cuda_stream" interface. */
+
+struct
+{
+ int async;
+ void *cuda_stream;
+} queues[] = { { acc_async_sync, NULL },
+ { acc_async_noval, NULL },
+ { 0, NULL },
+ { 1, NULL },
+ { 2, NULL },
+ { 36, NULL },
+ { 1982, NULL } };
+const size_t queues_n = sizeof queues / sizeof queues[0];
+
+int main(void)
+{
+ /* Explicitly initialize: it's not clear whether the following OpenACC
+ runtime library calls implicitly initialize;
+ <https://github.com/OpenACC/openacc-spec/issues/102>. */
+ acc_device_t d;
+#if defined ACC_DEVICE_TYPE_nvidia
+ d = acc_device_nvidia;
+#elif defined ACC_DEVICE_TYPE_host
+ d = acc_device_host;
+#else
+# error Not ported to this ACC_DEVICE_TYPE
+#endif
+ acc_init (d);
+
+ for (size_t i = 0; i < queues_n; ++i)
+ {
+ /* Before actually being used, there are all NULL. */
+ queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
+ assert (queues[i].cuda_stream == NULL);
+ }
+
+ for (size_t i = 0; i < queues_n; ++i)
+ {
+ /* Use the queue to initialize it. */
+#pragma acc parallel async(queues[i].async)
+ ;
+#pragma acc wait
+
+ /* Verify CUDA stream used. */
+ queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
+#if defined ACC_DEVICE_TYPE_nvidia
+ /* "acc_async_sync" maps to the NULL CUDA default stream. */
+ if (queues[i].async == acc_async_sync)
+ assert (queues[i].cuda_stream == NULL);
+ else
+ assert (queues[i].cuda_stream != NULL);
+#elif defined ACC_DEVICE_TYPE_host
+ /* For "acc_device_host" there are no CUDA streams. */
+ assert (queues[i].cuda_stream == NULL);
+#else
+# error Not ported to this ACC_DEVICE_TYPE
+#endif
+ }
+
+ /* Verify same results. */
+ for (size_t i = 0; i < queues_n; ++i)
+ {
+ void *cuda_stream;
+
+ cuda_stream = acc_get_cuda_stream (queues[i].async);
+ assert (cuda_stream == queues[i].cuda_stream);
+
+#pragma acc parallel async(queues[i].async)
+ ;
+#pragma acc wait
+
+ cuda_stream = acc_get_cuda_stream (queues[i].async);
+ assert (cuda_stream == queues[i].cuda_stream);
+ }
+
+ /* Verify individual underlying queues are all different. */
+ for (size_t i = 0; i < queues_n; ++i)
+ {
+ if (queues[i].cuda_stream == NULL)
+ continue;
+ for (size_t j = i + 1; j < queues_n; ++j)
+ {
+ if (queues[j].cuda_stream == NULL)
+ continue;
+ assert (queues[j].cuda_stream != queues[i].cuda_stream);
+ }
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
index 786b908f755..d793c743630 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
@@ -7,6 +7,14 @@
#include <openacc.h>
#include <cuda.h>
+#if !defined __cplusplus
+# undef static_assert
+# define static_assert _Static_assert
+#endif
+
+static_assert (acc_async_sync == -2, "acc_async_sync?");
+static_assert (acc_async_noval == -1, "acc_async_noval?");
+
int
main (int argc, char **argv)
{
@@ -20,9 +28,11 @@ main (int argc, char **argv)
(void) acc_get_device_num (acc_device_nvidia);
- streams = (CUstream *) malloc (N * sizeof (void *));
+ streams = (CUstream *) malloc ((2 + N) * sizeof (void *));
+ streams += 2;
+ /* "streams[i]" is valid for i in [acc_async_sync..N). */
- for (i = 0; i < N; i++)
+ for (i = acc_async_sync; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
if (streams[i] != NULL)
@@ -35,11 +45,20 @@ main (int argc, char **argv)
abort ();
}
- if (!acc_set_cuda_stream (i, streams[i]))
- abort ();
+ int ret = acc_set_cuda_stream (i, streams[i]);
+ if (i == acc_async_sync)
+ {
+ if (ret == 1)
+ abort ();
+ }
+ else
+ {
+ if (ret != 1)
+ abort ();
+ }
}
- for (i = 0; i < N; i++)
+ for (i = acc_async_sync; i < N; i++)
{
int j;
int cnt;
@@ -48,7 +67,7 @@ main (int argc, char **argv)
s = streams[i];
- for (j = 0; j < N; j++)
+ for (j = acc_async_sync; j < N; j++)
{
if (s == streams[j])
cnt++;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
index cf925a7b002..141c83b53dd 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
@@ -7,6 +7,14 @@
#include <stdio.h>
#include <cuda.h>
+#if !defined __cplusplus
+# undef static_assert
+# define static_assert _Static_assert
+#endif
+
+static_assert (acc_async_sync == -2, "acc_async_sync?");
+static_assert (acc_async_noval == -1, "acc_async_noval?");
+
int
main (int argc, char **argv)
{
@@ -20,9 +28,11 @@ main (int argc, char **argv)
(void) acc_get_device_num (acc_device_nvidia);
- streams = (CUstream *) malloc (N * sizeof (void *));
+ streams = (CUstream *) malloc ((2 + N) * sizeof (void *));
+ streams += 2;
+ /* "streams[i]" is valid for i in [acc_async_sync..N). */
- for (i = 0; i < N; i++)
+ for (i = acc_async_sync; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
if (streams[i] != NULL)
@@ -35,8 +45,17 @@ main (int argc, char **argv)
abort ();
}
- if (!acc_set_cuda_stream (i, streams[i]))
- abort ();
+ int ret = acc_set_cuda_stream (i, streams[i]);
+ if (i == acc_async_sync)
+ {
+ if (ret == 1)
+ abort ();
+ }
+ else
+ {
+ if (ret != 1)
+ abort ();
+ }
}
s = NULL;