diff options
author | tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> | 2018-12-14 20:42:08 +0000 |
---|---|---|
committer | tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> | 2018-12-14 20:42:08 +0000 |
commit | 815940afeefeeafa49ad3a5d81ef2d273ddeb3d7 (patch) | |
tree | a4dec1fa6554c8adfc9824efc21dd86ee3905c0a /libgomp/testsuite | |
parent | 75180da2a558d3106e26173326933f65b417182c (diff) |
[PR88370] acc_get_cuda_stream/acc_set_cuda_stream: acc_async_sync, acc_async_noval
Per my reading of the OpenACC specification (and as supported by secondary
documentation, such as code examples, or presentations), it's valid to call
"acc_get_cuda_stream"/"acc_set_cuda_stream" also with "acc_async_sync",
"acc_async_noval" arguments, not just with the nonnegative values as currently
implemented.
libgomp/
PR libgomp/88370
* libgomp.texi (acc_get_current_cuda_context, acc_get_cuda_stream)
(acc_set_cuda_stream): Clarify.
* oacc-cuda.c (acc_get_cuda_stream, acc_set_cuda_stream): Use
"async_valid_p".
* plugin/plugin-nvptx.c (nvptx_set_cuda_stream): Refuse "async ==
acc_async_sync".
* testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c: New file.
* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-84.c: Update.
* testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@267147 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp/testsuite')
4 files changed, 187 insertions, 10 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c new file mode 100644 index 00000000000..93981ff5cb7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_set_cuda_stream-1.c @@ -0,0 +1,42 @@ +/* Verify expected nvptx plugin behavior for "acc_set_cuda_stream" for + "acc_async_sync". */ + +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-set-target-env-var GOMP_DEBUG "1" } */ + +#undef NDEBUG +#include <assert.h> +#include <openacc.h> + +int main(void) +{ + int async = 42; + + /* Initialize. */ +#pragma acc parallel async(acc_async_sync) + ; +#pragma acc parallel async(async) + ; +#pragma acc wait + + void *cuda_stream_sync = acc_get_cuda_stream (acc_async_sync); + assert (cuda_stream_sync == NULL); + void *cuda_stream_async = acc_get_cuda_stream (async); + assert (cuda_stream_async != NULL); + int ret = acc_set_cuda_stream (acc_async_sync, cuda_stream_async); + assert (ret == 0); + void *cuda_stream_sync_ = acc_get_cuda_stream (acc_async_sync); + assert (cuda_stream_sync_ == cuda_stream_sync); + void *cuda_stream_async_ = acc_get_cuda_stream (async); + assert (cuda_stream_async_ == cuda_stream_async); + +#pragma acc parallel async(acc_async_sync) + ; +#pragma acc parallel async(async) + ; +#pragma acc wait + + return 0; +} + +/* { dg-output "Refusing request to set CUDA stream associated with \"acc_async_sync\"" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c new file mode 100644 index 00000000000..48e1846a36e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async_queue-1.c @@ -0,0 +1,97 @@ +/* Test mapping of async values to specific underlying queues. */ + +#undef NDEBUG +#include <assert.h> +#include <openacc.h> + +/* This is implemented in terms of the "acc_get_cuda_stream" interface. */ + +struct +{ + int async; + void *cuda_stream; +} queues[] = { { acc_async_sync, NULL }, + { acc_async_noval, NULL }, + { 0, NULL }, + { 1, NULL }, + { 2, NULL }, + { 36, NULL }, + { 1982, NULL } }; +const size_t queues_n = sizeof queues / sizeof queues[0]; + +int main(void) +{ + /* Explicitly initialize: it's not clear whether the following OpenACC + runtime library calls implicitly initialize; + <https://github.com/OpenACC/openacc-spec/issues/102>. */ + acc_device_t d; +#if defined ACC_DEVICE_TYPE_nvidia + d = acc_device_nvidia; +#elif defined ACC_DEVICE_TYPE_host + d = acc_device_host; +#else +# error Not ported to this ACC_DEVICE_TYPE +#endif + acc_init (d); + + for (size_t i = 0; i < queues_n; ++i) + { + /* Before actually being used, there are all NULL. */ + queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async); + assert (queues[i].cuda_stream == NULL); + } + + for (size_t i = 0; i < queues_n; ++i) + { + /* Use the queue to initialize it. */ +#pragma acc parallel async(queues[i].async) + ; +#pragma acc wait + + /* Verify CUDA stream used. */ + queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async); +#if defined ACC_DEVICE_TYPE_nvidia + /* "acc_async_sync" maps to the NULL CUDA default stream. */ + if (queues[i].async == acc_async_sync) + assert (queues[i].cuda_stream == NULL); + else + assert (queues[i].cuda_stream != NULL); +#elif defined ACC_DEVICE_TYPE_host + /* For "acc_device_host" there are no CUDA streams. */ + assert (queues[i].cuda_stream == NULL); +#else +# error Not ported to this ACC_DEVICE_TYPE +#endif + } + + /* Verify same results. */ + for (size_t i = 0; i < queues_n; ++i) + { + void *cuda_stream; + + cuda_stream = acc_get_cuda_stream (queues[i].async); + assert (cuda_stream == queues[i].cuda_stream); + +#pragma acc parallel async(queues[i].async) + ; +#pragma acc wait + + cuda_stream = acc_get_cuda_stream (queues[i].async); + assert (cuda_stream == queues[i].cuda_stream); + } + + /* Verify individual underlying queues are all different. */ + for (size_t i = 0; i < queues_n; ++i) + { + if (queues[i].cuda_stream == NULL) + continue; + for (size_t j = i + 1; j < queues_n; ++j) + { + if (queues[j].cuda_stream == NULL) + continue; + assert (queues[j].cuda_stream != queues[i].cuda_stream); + } + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c index 786b908f755..d793c743630 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c @@ -7,6 +7,14 @@ #include <openacc.h> #include <cuda.h> +#if !defined __cplusplus +# undef static_assert +# define static_assert _Static_assert +#endif + +static_assert (acc_async_sync == -2, "acc_async_sync?"); +static_assert (acc_async_noval == -1, "acc_async_noval?"); + int main (int argc, char **argv) { @@ -20,9 +28,11 @@ main (int argc, char **argv) (void) acc_get_device_num (acc_device_nvidia); - streams = (CUstream *) malloc (N * sizeof (void *)); + streams = (CUstream *) malloc ((2 + N) * sizeof (void *)); + streams += 2; + /* "streams[i]" is valid for i in [acc_async_sync..N). */ - for (i = 0; i < N; i++) + for (i = acc_async_sync; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); if (streams[i] != NULL) @@ -35,11 +45,20 @@ main (int argc, char **argv) abort (); } - if (!acc_set_cuda_stream (i, streams[i])) - abort (); + int ret = acc_set_cuda_stream (i, streams[i]); + if (i == acc_async_sync) + { + if (ret == 1) + abort (); + } + else + { + if (ret != 1) + abort (); + } } - for (i = 0; i < N; i++) + for (i = acc_async_sync; i < N; i++) { int j; int cnt; @@ -48,7 +67,7 @@ main (int argc, char **argv) s = streams[i]; - for (j = 0; j < N; j++) + for (j = acc_async_sync; j < N; j++) { if (s == streams[j]) cnt++; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c index cf925a7b002..141c83b53dd 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c @@ -7,6 +7,14 @@ #include <stdio.h> #include <cuda.h> +#if !defined __cplusplus +# undef static_assert +# define static_assert _Static_assert +#endif + +static_assert (acc_async_sync == -2, "acc_async_sync?"); +static_assert (acc_async_noval == -1, "acc_async_noval?"); + int main (int argc, char **argv) { @@ -20,9 +28,11 @@ main (int argc, char **argv) (void) acc_get_device_num (acc_device_nvidia); - streams = (CUstream *) malloc (N * sizeof (void *)); + streams = (CUstream *) malloc ((2 + N) * sizeof (void *)); + streams += 2; + /* "streams[i]" is valid for i in [acc_async_sync..N). */ - for (i = 0; i < N; i++) + for (i = acc_async_sync; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); if (streams[i] != NULL) @@ -35,8 +45,17 @@ main (int argc, char **argv) abort (); } - if (!acc_set_cuda_stream (i, streams[i])) - abort (); + int ret = acc_set_cuda_stream (i, streams[i]); + if (i == acc_async_sync) + { + if (ret == 1) + abort (); + } + else + { + if (ret != 1) + abort (); + } } s = NULL; |