diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2022-03-11 15:11:25 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2022-03-12 15:37:27 +0100 |
commit | 535afbd959bc72de85fca36ba6417f075cca1018 (patch) | |
tree | c90f2af7be5cb307c71512f66c9c3fb77fcb1154 /libgomp | |
parent | 2e53fa7bb2ae9fe1152c27e423be9e261da82ddc (diff) |
OpenACC 'kernels' decomposition: wrong-code cases unless manually making certain variables addressable [PR104892]
Document a few examples of the status quo.
PR middle-end/104892
libgomp/
* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Point
to PR104892.
* testsuite/libgomp.oacc-c-c++-common/default-1.c: Likewise,
enable '--param=openacc-kernels=decompose' and adjust.
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90:
Likewise.
Diffstat (limited to 'libgomp')
5 files changed, 59 insertions, 16 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c index 0ac8d7132d4..fed65c8dccc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ + /* { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-foffload=-fopt-info-all-omp" } */ @@ -63,6 +65,8 @@ int test_parallel () int test_kernels () { int val = 2; + /*TODO <https://gcc.gnu.org/PR104892> */ + (volatile int *) &val; int ary[32]; int ondev = 0; @@ -71,12 +75,18 @@ int test_kernels () /* val defaults to copy, ary defaults to copy. */ #pragma acc kernels copy(ondev) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'ondev\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'val\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ { + /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ ondev = acc_on_device (acc_device_not_host); + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 } + ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */ #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ for (unsigned i = 0; i < 32; i++) { ary[i] = val; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c index eb424776b6b..3db59e8a75c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c @@ -29,12 +29,12 @@ static int g2; static void f1 () { int a = 0; - /*TODO Without making 'a' addressable, for GCN offloading we will not see the expected value copied out. (But it does work for nvptx offloading, strange...) */ + /*TODO <https://gcc.gnu.org/PR104892> */ (volatile int *) &a; #define N 123 int b[N] = { 0 }; unsigned long long f1; - /*TODO See above. */ + /*TODO <https://gcc.gnu.org/PR104892> */ (volatile void *) &f1; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c index fbd9815f683..e7b2817a391 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c @@ -1,6 +1,8 @@ /* Verify that a simple, explicit acc loop reduction works inside a kernels region. */ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ + /* { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-foffload=-fopt-info-all-omp" } */ @@ -17,12 +19,16 @@ int main () { int i, red = 0; + /*TODO <https://gcc.gnu.org/PR104892> */ + (volatile int *) &red; #pragma acc kernels /* { dg-line l_compute1 } */ - /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */ + /* { dg-note {variable 'red\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute1 } */ { #pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i1 } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */ + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i1 } */ for (i = 0; i < N; i++) red++; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index f9c7aed3a56..75e8cb510cc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -1,6 +1,8 @@ /* OpenACC parallelism dimensions clauses: num_gangs, num_workers, vector_length. */ +/* { dg-additional-options "--param=openacc-kernels=decompose" } */ + /* { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-foffload=-fopt-info-all-omp" } */ @@ -640,20 +642,26 @@ int main () kernels. */ { int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; + /*TODO <https://gcc.gnu.org/PR104892> */ + (volatile int *) &gangs_min, &gangs_max, &workers_min, &workers_max, &vectors_min, &vectors_max; gangs_min = workers_min = vectors_min = INT_MAX; gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'gangs_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'workers_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'vectors_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ { - /* This is to make the OpenACC kernels construct unparallelizable. */ - asm volatile ("" : : : "memory"); - #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 100; i > -100; --i) { + /* This is to make the loop unparallelizable. */ + asm volatile ("" : : : "memory"); + gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); vectors_min = vectors_max = acc_vector (); @@ -674,23 +682,29 @@ int main () #define WORKERS 5 #define VECTORS 13 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; + /*TODO <https://gcc.gnu.org/PR104892> */ + (volatile int *) &gangs_min, &gangs_max, &workers_min, &workers_max, &vectors_min, &vectors_max; gangs_min = workers_min = vectors_min = INT_MAX; gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \ num_gangs (gangs) \ num_workers (WORKERS) \ vector_length (VECTORS) - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'gangs_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'workers_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'vectors_max\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ { - /* This is to make the OpenACC kernels construct unparallelizable. */ - asm volatile ("" : : : "memory"); - #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 100; i > -100; --i) { + /* This is to make the loop unparallelizable. */ + asm volatile ("" : : : "memory"); + gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); vectors_min = vectors_max = acc_vector (); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 index 6ff740efc32..89bae49c94c 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 @@ -2,6 +2,8 @@ ! { dg-do run } +! { dg-additional-options "--param=openacc-kernels=decompose" } + ! { dg-additional-options "-fopt-info-all-omp" } ! { dg-additional-options "-foffload=-fopt-info-all-omp" } */ @@ -13,17 +15,28 @@ program reduction integer, parameter :: n = 20 integer :: i, red + !TODO <https://gcc.gnu.org/PR104892> + call make_addressable (red) red = 0 !$acc kernels ! { dg-line l_compute1 } */ - ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } + ! { dg-note {variable 'red\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute1 } !$acc loop reduction (+:red) ! { dg-line l_loop_i1 } + ! { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i1 } ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } + ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i1 } do i = 1, n red = red + 1 end do !$acc end kernels if (red .ne. n) stop 1 + +contains + + subroutine make_addressable (v) + integer :: v ! by reference + end subroutine make_addressable + end program reduction |