summaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2022-01-23 06:29:58 +0100
committerTom de Vries <tdevries@suse.de>2022-02-01 08:15:00 +0100
commitd43fbc7d3f7621e1c8f153c1471d2a5cd20bfdc8 (patch)
treede90383618e442099bccbe4f1be08c2a89ed9099 /libgomp
parentbe362d5e127e087dcd4a2c0d843cf7b9a7399c73 (diff)
[libgomp, testsuite] Fix insufficient resources in test-cases
When running libgomp test-case broadcast-many.c on an nvptx accelerator (T400, driver version 470.86), I run into: ... libgomp: The Nvidia accelerator has insufficient resources to launch \ 'main$_omp_fn$0' with num_workers = 32 and vector_length = 32; \ recompile the program with 'num_workers = x and vector_length = y' on \ that offloaded region or '-fopenacc-dim=:x:y' where x * y <= 896. FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/broadcast-many.c \ -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none \ -O0 execution test ... The error does not occur when using GOMP_NVPTX_JIT=-O0. Fix this by using 896 / 32 == 28 workers for ACC_DEVICE_TYPE_nvidia. Likewise for some other test-cases. Tested libgomp on x86_64 with nvptx accelerator. libgomp/ChangeLog: 2022-01-27 Tom de Vries <tdevries@suse.de> * testsuite/libgomp.oacc-c-c++-common/broadcast-many.c: Reduce num_workers for nvidia accelerator to fix libgomp error 'insufficient resources'. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: Same. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Same.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c9
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c9
3 files changed, 25 insertions, 3 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
index 37839edfb09..08e026960e6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
@@ -5,6 +5,13 @@
#include <assert.h>
#include <stdio.h>
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'. */
+#define NUM_WORKERS 28
+#else
+#define NUM_WORKERS 32
+#endif
+
#define LOCAL(n) double n = input;
#define LOCALS(n) LOCAL(n##1) LOCAL(n##2) LOCAL(n##3) LOCAL(n##4) \
LOCAL(n##5) LOCAL(n##6) LOCAL(n##7) LOCAL(n##8)
@@ -23,7 +30,7 @@ int main (void)
int ret;
int input = 1;
- #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+ #pragma acc parallel num_gangs(1) num_workers(NUM_WORKERS) copyout(ret)
{
int w = 0;
LOCALS2(h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
index c3cc12fa953..4c66dc7bfea 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
@@ -1,5 +1,12 @@
#include <assert.h>
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'. */
+#define NUM_WORKERS 24
+#else
+#define NUM_WORKERS 32
+#endif
+
/* Test of reduction on both parallel and loop directives (workers and vectors
together in gang-partitioned mode, float type, multiple reductions). */
@@ -13,7 +20,8 @@ main (int argc, char *argv[])
for (i = 0; i < 32768; i++)
arr[i] = i % (32768 / 64);
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ #pragma acc parallel \
+ num_gangs(32) num_workers(NUM_WORKERS) vector_length(32) \
reduction(+:res) reduction(max:mres) copy(res, mres)
{
#pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'm\?res'" "TODO" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
index c2fb922a7f1..b4fe2300581 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
@@ -181,6 +181,12 @@ void gwv_np_3()
assert (res == hres);
}
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'. */
+#define NUM_WORKERS 28
+#else
+#define NUM_WORKERS 32
+#endif
/* Test of reduction on loop directive (gangs, workers and vectors, multiple
non-private reduction variables, float type). */
@@ -194,7 +200,7 @@ void gwv_np_4()
for (i = 0; i < 32768; i++)
arr[i] = i % (32768 / 64);
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
+ #pragma acc parallel num_gangs(32) num_workers(NUM_WORKERS) vector_length(32)
{
#pragma acc loop gang reduction(+:res) reduction(max:mres)
for (j = 0; j < 32; j++)
@@ -235,6 +241,7 @@ void gwv_np_4()
assert (mres == hmres);
}
+#undef NUM_WORKERS
/* Test of reduction on loop directive (vectors, private reduction
variable). */