diff options
author | Nicolas Koenig <koenigni@student.ethz.ch> | 2020-11-08 17:07:48 +0100 |
---|---|---|
committer | Nicolas Koenig <koenigni@student.ethz.ch> | 2020-11-08 17:07:48 +0100 |
commit | 34186331bdeedb5a8ecae0a9a95d87e8624ea726 (patch) | |
tree | f0358afd4311cbfe97431e0c1d268a6b2a365930 /libgfortran | |
parent | 23856d2f29fd87edf724ade48ee30c869a3b1ea3 (diff) |
Implement stat and errmsg.
gcc/fortran/ChangeLog:
* trans-decl.c (gfc_sym_mangled_function_id): Whitespace fix.
(gfc_build_builtin_function_decls): Correct fn specs.
* trans-intrinsic.c (trans_argument): Re-apply fix (?)
(conv_cas_reduce): Likewise.
(conv_co_collective): Likewise.
libgfortran/ChangeLog:
* Makefile.am: Add counter_barrier.c and counter_barrier.h
* Makefile.in: Regenerate.
* generated/nca_minmax_i1.c: Regenerated.
* generated/nca_minmax_i16.c: Regenerated.
* generated/nca_minmax_i2.c: Regenerated.
* generated/nca_minmax_i4.c: Regenerated.
* generated/nca_minmax_i8.c: Regenerated.
* generated/nca_minmax_r10.c: Regenerated.
* generated/nca_minmax_r16.c: Regenerated.
* generated/nca_minmax_r4.c: Regenerated.
* generated/nca_minmax_r8.c: Regenerated.
* generated/nca_minmax_s1.c: Regenerated.
* generated/nca_minmax_s4.c: Regenerated.
* m4/nca-minmax-s.m4: Add stat and errmsg.
* m4/nca_minmax.m4: Likewise.
* nca/coarraynative.c (get_master): New function.
(test_for_cas_errors): New function.
(master_is_image_active): New function.
(master_get_num_active_images): New function.
(master_bind_active_image_barrier): New function.
(error_on_missing_images): New function.
(cas_master): New function.
* nca/collective_subroutine.c (collsub_sync): Replace pthread_barrier_wait
by counter_barrier.
(collsub_reduce_array): Add error_on_missing_images. Adjust to number of
images.
(collsub_reduce_scalar): Likewise.
(collsub_iface_init): Likewise.
* nca/collective_subroutine.h: Replace pthread_barrier_t by counter_barrier.
* nca/libcoarraynative.h: Include counter_barrier.h. Add handling
for failed images, stat and errmsg.
* nca/sync.c (sync_all_init): Replace pthread_barrir by counter_barrier.
(sync_iface_init): Adjust handling to total_num_images.
(sync_table): Likewise.
(sync_all): LIkewise.
* nca/sync.h: Include some theaders, adjust to counter_barrier.
* nca/util.h: Add internal_proto to unpack_array_finish.
* nca/wrapper.c (cas_collsub_reduce_array): Adjust to total_num_images,
handle status and errmsg.
(cas_collsub_reduce_scalar): Likewise.
(cas_collsub_broadcast_array): Likewise.
(cas_collsub_broadcast_scalar): Likewise.
(cas_coarray_alloc): Likewise.
(cas_coarray_free): Likewise.
(cas_coarray_num_images): Likewise.
(cas_coarray_sync_all): Likewise.
(cas_sync_images): Likewise.
* nca/counter_barrier.c: New file.
* nca/counter_barrier.h: New file.
Diffstat (limited to 'libgfortran')
25 files changed, 1018 insertions, 545 deletions
diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am index ef2b9f95488..bb4f126862f 100644 --- a/libgfortran/Makefile.am +++ b/libgfortran/Makefile.am @@ -82,12 +82,12 @@ mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR) libcaf_shared_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ nca/hashmap.c \ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ - nca/shared_memory.c \ + nca/shared_memory.c nca/counter_barrier.c \ $(i_nca_minmax_c) $(i_nca_minmax_s_c) libcaf_shared_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h nca/libcoarraynative.h nca/sync.h shared_memory.h \ nca/util.h nca/lock.h nca/collective_subroutine.h\ - nca/collective_inline.h + nca/collective_inline.h nca/counter_barrier.h libcaf_shared_la_LINK = $(LINK) $(libcaf_shared_la_LDFLAGS) endif diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in index 5d9d7e48b82..42f4724cf11 100644 --- a/libgfortran/Makefile.in +++ b/libgfortran/Makefile.in @@ -230,8 +230,9 @@ am__objects_2 = nca_minmax_s1.lo nca_minmax_s4.lo @LIBGFOR_NATIVE_COARRAY_TRUE@ hashmap.lo sync.lo util.lo \ @LIBGFOR_NATIVE_COARRAY_TRUE@ wrapper.lo \ @LIBGFOR_NATIVE_COARRAY_TRUE@ collective_subroutine.lo \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo $(am__objects_1) \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_2) +@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ counter_barrier.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_1) $(am__objects_2) libcaf_shared_la_OBJECTS = $(am_libcaf_shared_la_OBJECTS) @LIBGFOR_NATIVE_COARRAY_TRUE@am_libcaf_shared_la_rpath = -rpath \ @LIBGFOR_NATIVE_COARRAY_TRUE@ $(mylibdir) @@ -788,7 +789,7 @@ i_nca_minmax_s_c = \ @LIBGFOR_NATIVE_COARRAY_TRUE@libcaf_shared_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/hashmap.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c nca/counter_barrier.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ $(i_nca_minmax_c) $(i_nca_minmax_s_c) @LIBGFOR_NATIVE_COARRAY_TRUE@libcaf_shared_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h @@ -1840,6 +1841,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_2_l.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_4_l.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_8_l.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/counter_barrier.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpu_time.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c10.Plo@am__quote@ @@ -2901,6 +2903,13 @@ shared_memory.lo: nca/shared_memory.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c +counter_barrier.lo: nca/counter_barrier.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT counter_barrier.lo -MD -MP -MF $(DEPDIR)/counter_barrier.Tpo -c -o counter_barrier.lo `test -f 'nca/counter_barrier.c' || echo '$(srcdir)/'`nca/counter_barrier.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/counter_barrier.Tpo $(DEPDIR)/counter_barrier.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/counter_barrier.c' object='counter_barrier.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o counter_barrier.lo `test -f 'nca/counter_barrier.c' || echo '$(srcdir)/'`nca/counter_barrier.c + nca_minmax_i1.lo: $(srcdir)/generated/nca_minmax_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i1.Tpo -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i1.Tpo $(DEPDIR)/nca_minmax_i1.Plo @@ -7235,7 +7244,7 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ @LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/libcoarraynative.h nca/sync.h shared_memory.h \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/util.h nca/lock.h nca/collective_subroutine.h\ -@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h nca/counter_barrier.h # Turn on vectorization and loop unrolling for matmul. $(patsubst %.c,%.lo,$(notdir $(i_matmul_c))): AM_CFLAGS += -ffast-math -ftree-vectorize -funroll-loops --param max-unroll-times=4 diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c index 30a8587ea55..a34018165a9 100644 --- a/libgfortran/generated/nca_minmax_i1.c +++ b/libgfortran/generated/nca_minmax_i1.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i1); void cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i1); void cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i1); void cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i1); void cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i1); void cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i1); void cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c index f2e458182a5..6642fa6de2f 100644 --- a/libgfortran/generated/nca_minmax_i16.c +++ b/libgfortran/generated/nca_minmax_i16.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i16); void cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i16); void cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i16); void cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i16); void cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i16); void cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i16); void cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c index fd8d718fbe1..1a9d43afe32 100644 --- a/libgfortran/generated/nca_minmax_i2.c +++ b/libgfortran/generated/nca_minmax_i2.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i2); void cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i2); void cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i2); void cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i2); void cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i2); void cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i2); void cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c index 04972e767f7..e6018531a1f 100644 --- a/libgfortran/generated/nca_minmax_i4.c +++ b/libgfortran/generated/nca_minmax_i4.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i4); void cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i4); void cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i4); void cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i4); void cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i4); void cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i4); void cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c index b4b0864896f..fb5a2ecc2f3 100644 --- a/libgfortran/generated/nca_minmax_i8.c +++ b/libgfortran/generated/nca_minmax_i8.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i8); void cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i8); void cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i8); void cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i8); void cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i8); void cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i8); void cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c index 67111579f0d..f219dd3dc89 100644 --- a/libgfortran/generated/nca_minmax_r10.c +++ b/libgfortran/generated/nca_minmax_r10.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r10); void cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r10); void cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r10); void cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r10); void cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r10); void cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r10); void cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c index 959aec7fde3..a26eafe691f 100644 --- a/libgfortran/generated/nca_minmax_r16.c +++ b/libgfortran/generated/nca_minmax_r16.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r16); void cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r16); void cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r16); void cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r16); void cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r16); void cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r16); void cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c index 02dd3b67e51..3efa6259823 100644 --- a/libgfortran/generated/nca_minmax_r4.c +++ b/libgfortran/generated/nca_minmax_r4.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r4); void cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r4); void cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r4); void cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r4); void cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r4); void cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r4); void cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c index 6af88cb6cc2..83518b6bdb5 100644 --- a/libgfortran/generated/nca_minmax_r8.c +++ b/libgfortran/generated/nca_minmax_r8.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r8); void cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r8); void cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r8); void cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r8); void cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r8); void cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r8); void cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c index 493de0bf463..1b4826c99dc 100644 --- a/libgfortran/generated/nca_minmax_s1.c +++ b/libgfortran/generated/nca_minmax_s1.c @@ -70,19 +70,23 @@ cas_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -94,7 +98,7 @@ cas_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -125,19 +129,23 @@ cas_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -149,7 +157,7 @@ cas_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -169,10 +177,8 @@ export_proto (cas_collsub_max_array_s1); void cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -188,6 +194,10 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); @@ -212,7 +222,7 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -264,10 +274,10 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -285,7 +295,7 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -336,10 +346,8 @@ export_proto (cas_collsub_min_array_s1); void cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -355,6 +363,10 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); @@ -379,7 +391,7 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -431,10 +443,10 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -452,7 +464,7 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c index 9f74da23a84..336e1673751 100644 --- a/libgfortran/generated/nca_minmax_s4.c +++ b/libgfortran/generated/nca_minmax_s4.c @@ -70,19 +70,23 @@ cas_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -94,7 +98,7 @@ cas_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -125,19 +129,23 @@ cas_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -149,7 +157,7 @@ cas_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -169,10 +177,8 @@ export_proto (cas_collsub_max_array_s4); void cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -188,6 +194,10 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); @@ -212,7 +222,7 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -264,10 +274,10 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -285,7 +295,7 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -336,10 +346,8 @@ export_proto (cas_collsub_min_array_s4); void cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -355,6 +363,10 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); @@ -379,7 +391,7 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -431,10 +443,10 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -452,7 +464,7 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/m4/nca-minmax-s.m4 b/libgfortran/m4/nca-minmax-s.m4 index af5dd5d3136..9e9b0b98286 100644 --- a/libgfortran/m4/nca-minmax-s.m4 +++ b/libgfortran/m4/nca-minmax-s.m4 @@ -48,19 +48,23 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof ('rtype_name`); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -72,7 +76,7 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -94,10 +98,8 @@ export_proto (cas_collsub_'$1`_array_'rtype_code`); void cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -113,6 +115,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof ('rtype_name`); @@ -137,7 +143,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -189,10 +195,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -210,7 +216,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/m4/nca_minmax.m4 b/libgfortran/m4/nca_minmax.m4 index 9e107fc24b8..f69cf65490d 100644 --- a/libgfortran/m4/nca_minmax.m4 +++ b/libgfortran/m4/nca_minmax.m4 @@ -35,9 +35,7 @@ export_proto(cas_collsub_'$1`_scalar_'rtype_code`); void cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -45,17 +43,21 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, 'rtype_name` *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->num_images); + buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -63,7 +65,7 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -81,9 +83,7 @@ export_proto (cas_collsub_'$1`_array_'rtype_code`); void cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -98,6 +98,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -122,7 +126,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image num_elems = ssize / sizeof ('rtype_name`); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -171,10 +175,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { 'rtype_name` * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -191,7 +195,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/nca/coarraynative.c b/libgfortran/nca/coarraynative.c index 251e7c9dd57..c177fe8cea7 100644 --- a/libgfortran/nca/coarraynative.c +++ b/libgfortran/nca/coarraynative.c @@ -33,12 +33,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> +#include <string.h> #define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES" nca_local_data *local = NULL; -image this_image; +image this_image = {-1, NULL}; + +/* Get image number from environment or sysconf. */ static int get_environ_image_num (void) @@ -53,6 +56,26 @@ get_environ_image_num (void) return nimages; } +/* Get a master. */ + +static master * +get_master (void) +{ + master *m; + m = SHMPTR_AS (master *, + shared_memory_get_mem_with_alignment + (&local->sm, + sizeof (master) + sizeof(image_status) * local->total_num_images, + __alignof__(master)), &local->sm); + m->has_failed_image = 0; + m->finished_images = 0; + waitable_counter_init (&m->num_active_images, local->total_num_images); + return m; +} + + +/* Ensure things are initialized. */ + void ensure_initialization(void) { @@ -63,14 +86,94 @@ ensure_initialization(void) // point? Maybe use mmap(MAP_ANON) // instead pagesize = sysconf (_SC_PAGE_SIZE); - local->num_images = get_environ_image_num (); + local->total_num_images = get_environ_image_num (); shared_memory_init (&local->sm); shared_memory_prepare (&local->sm); + if (this_image.m == NULL) /* A bit of a hack, but we + need the master early. */ + this_image.m = get_master(); alloc_iface_init (&local->ai, &local->sm); collsub_iface_init (&local->ci, &local->ai, &local->sm); sync_iface_init (&local->si, &local->ai, &local->sm); } + +/* Test for failed or stopped images. */ + +int +test_for_cas_errors (int *stat, char *errmsg, size_t errmsg_length) +{ + size_t errmsg_written_bytes; + if (!stat) + return 0; + + /* This rather strange ordering is mandated by the standard. */ + if (this_image.m->finished_images) + { + *stat = CAS_STAT_STOPPED_IMAGE; + if (errmsg) + { + errmsg_written_bytes + = snprintf(errmsg, errmsg_length, + "Stopped images present (currently %d)", + this_image.m->finished_images); + if (errmsg_written_bytes > errmsg_length - 1) + errmsg_written_bytes = errmsg_length - 1; + + memset(errmsg + errmsg_written_bytes, ' ', + errmsg_length - errmsg_written_bytes); + } + } + else if (this_image.m->has_failed_image) + { + *stat = CAS_STAT_FAILED_IMAGE; + if (errmsg) + { + errmsg_written_bytes + = snprintf(errmsg, errmsg_length, + "Failed images present (currently %d)", + this_image.m->has_failed_image); + if (errmsg_written_bytes > errmsg_length - 1) + errmsg_written_bytes = errmsg_length - 1; + + memset(errmsg + errmsg_written_bytes, ' ', + errmsg_length - errmsg_written_bytes); + } + } + else + { + *stat = 0; + return 0; + } + return 1; +} + +/* Check if an image is active. */ + +int +master_is_image_active (master *m, int image_num) +{ + return m->images[image_num].status == IMAGE_OK; +} + +/* Get number of active images. */ + +int +master_get_num_active_images (master *m) +{ + return waitable_counter_get_val(&m->num_active_images); +} + +/* Bind barrier to counter. */ + +void +master_bind_active_image_barrier(master *m, counter_barrier *b) +{ + bind_counter_barrier(b, &m->num_active_images); +} + +/* Main wrapper. */ + static void __attribute__((noreturn)) image_main_wrapper (void (*image_main) (void), image *this) { @@ -83,16 +186,10 @@ image_main_wrapper (void (*image_main) (void), image *this) exit (0); } -static master * -get_master (void) { - master *m; - m = SHMPTR_AS (master *, - shared_memory_get_mem_with_alignment - (&local->sm, - sizeof (master) + sizeof(image_status) * local->num_images, - __alignof__(master)), &local->sm); - m->has_failed_image = 0; - return m; +void +error_on_missing_images(void) { + if (master_get_num_active_images(this_image.m) != local->total_num_images) + exit(1); } /* This is called from main, with a pointer to the user's program as @@ -107,11 +204,11 @@ cas_master (void (*image_main) (void)) { int exit_code = 0; int chstatus; ensure_initialization(); - m = get_master(); + m = this_image.m; im.m = m; - for (im.image_num = 0; im.image_num < local->num_images; im.image_num++) + for (im.image_num = 0; im.image_num < local->total_num_images; im.image_num++) { if ((new = fork())) { @@ -126,25 +223,28 @@ cas_master (void (*image_main) (void)) { else image_main_wrapper(image_main, &im); } - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) { new = wait (&chstatus); if (WIFEXITED (chstatus) && !WEXITSTATUS (chstatus)) { j = 0; - for (; j < local->num_images && m->images[j].pid != new; j++); + for (; j < local->total_num_images && m->images[j].pid != new; j++); m->images[j].status = IMAGE_SUCCESS; m->finished_images++; /* FIXME: Needs to be atomic, probably. */ } else if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus)) { j = 0; - for (; j < local->num_images && m->images[j].pid != new; j++); + for (; j < local->total_num_images && m->images[j].pid != new; j++); m->images[j].status = IMAGE_FAILED; m->has_failed_image++; /* FIXME: Needs to be atomic, probably. */ + for (; j < local->total_num_images; j++) + m->images[j].active_image_index--; dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new); exit_code = 1; } + waitable_counter_add(&m->num_active_images, -1); } exit (exit_code); } diff --git a/libgfortran/nca/collective_subroutine.c b/libgfortran/nca/collective_subroutine.c index 14bd517d576..f4d9b589e4d 100644 --- a/libgfortran/nca/collective_subroutine.c +++ b/libgfortran/nca/collective_subroutine.c @@ -23,9 +23,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #include "libgfortran.h" +#include <string.h> #include "libcoarraynative.h" #include "collective_subroutine.h" #include "allocator.h" +#include "counter_barrier.h" #include <string.h> @@ -56,7 +58,7 @@ get_collsub_buf (collsub_iface *ci, size_t size) void collsub_sync (collsub_iface *ci) { - pthread_barrier_wait (&ci->s->barrier); + counter_barrier_wait (&ci->s->barrier); } @@ -83,6 +85,8 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image index_type this_image_size_bytes; char *this_image_buf; + error_on_missing_images(); + packed = pack_array_prepare (&pi, desc); if (pi.num_elem == 0) return; @@ -90,7 +94,7 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image elem_size = GFC_DESCRIPTOR_SIZE (desc); this_image_size_bytes = elem_size * pi.num_elem; - buffer = get_collsub_buf (ci, this_image_size_bytes * local->num_images); + buffer = get_collsub_buf (ci, this_image_size_bytes * local->total_num_images); this_image_buf = buffer + this_image_size_bytes * this_image.image_num; if (packed) @@ -99,10 +103,12 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image pack_array_finish (&pi, desc, this_image_buf); collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + + + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) /* Reduce arrays elementwise. */ for (ssize_t i = 0; i < pi.num_elem; i++) assign_function (this_image_buf + elem_size * i, @@ -110,7 +116,7 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || *result_image == this_image.image_num) @@ -134,22 +140,26 @@ collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size, int imoffset; char *this_image_buf; - buffer = get_collsub_buf (ci, elem_size * local->num_images); + error_on_missing_images(); + + buffer = get_collsub_buf (ci, elem_size * master_get_num_active_images(this_image.m)); this_image_buf = buffer + elem_size * this_image.image_num; memcpy (this_image_buf, obj, elem_size); + collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + + if (this_image.image_num + imoffset < local->total_num_images) { /* Reduce arrays elementwise. */ - assign_function (this_image_buf, this_image_buf + elem_size*imoffset); - + assign_function (this_image_buf, this_image_buf + elem_size*imoffset); + } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (master_get_num_active_images(this_image.m) >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || *result_image == this_image.image_num) @@ -165,19 +175,15 @@ collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size, void collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm) { - pthread_barrierattr_t attr; ci->s = SHARED_MEMORY_RAW_ALLOC_PTR(sm, collsub_iface_shared); - ci->s->collsub_buf = shared_malloc(get_allocator(ai), sizeof(double)*local->num_images); - ci->s->curr_size = sizeof(double)*local->num_images; + ci->s->collsub_buf = shared_malloc(get_allocator(ai), + sizeof(double)*local->total_num_images); + ci->s->curr_size = sizeof(double)*local->total_num_images; ci->sm = sm; ci->a = get_allocator(ai); - pthread_barrierattr_init (&attr); - pthread_barrierattr_setpshared (&attr, PTHREAD_PROCESS_SHARED); - pthread_barrier_init (&ci->s->barrier, &attr, local->num_images); - pthread_barrierattr_destroy(&attr); - + master_bind_active_image_barrier (this_image.m, &ci->s->barrier); initialize_shared_mutex (&ci->s->mutex); } diff --git a/libgfortran/nca/collective_subroutine.h b/libgfortran/nca/collective_subroutine.h index 27931166ecf..137220c3e9d 100644 --- a/libgfortran/nca/collective_subroutine.h +++ b/libgfortran/nca/collective_subroutine.h @@ -8,7 +8,7 @@ typedef struct collsub_iface_shared { size_t curr_size; shared_mem_ptr collsub_buf; - pthread_barrier_t barrier; + counter_barrier barrier; pthread_mutex_t mutex; } collsub_iface_shared; diff --git a/libgfortran/nca/counter_barrier.c b/libgfortran/nca/counter_barrier.c new file mode 100644 index 00000000000..3f15ba460c5 --- /dev/null +++ b/libgfortran/nca/counter_barrier.c @@ -0,0 +1,141 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + + This file is part of the GNU Fortran Native Coarray Library (libnca). + + Libnca is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libnca is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include "util.h" +#include "counter_barrier.h" + +#include <assert.h> + +/* Lock the associated counter of this barrier. */ + +static inline void +lock_counter_barrier (counter_barrier *b) +{ + pthread_mutex_lock (&b->count->m); +} + +/* Unlock the associated counter of this barrier. */ + +static inline void +unlock_counter_barrier (counter_barrier *b) +{ + pthread_mutex_unlock (&b->count->m); +} + +/* Wait on the barrier. */ + +void +counter_barrier_wait (counter_barrier *b) +{ + int initial_count; + int wait_group_beginning; + + assert (b->count); + + lock_counter_barrier (b); + + wait_group_beginning = b->curr_wait_group; + + if (--b->wait_count <= 0) + pthread_cond_broadcast (&b->cond); + else + { + while (b->wait_count > 0 && b->curr_wait_group == wait_group_beginning) + pthread_cond_wait(&b->cond, &b->count->m); + } + + if (b->wait_count <= 0) + { + b->curr_wait_group = !wait_group_beginning; + b->wait_count = b->count->val; + } + + unlock_counter_barrier (b); +} + +/* Adjust the counter of a barrier by val (which can be positive or + negative), signalling if necessary. */ + +static inline void +change_internal_barrier_count (counter_barrier *b, int val) +{ + b->wait_count += val; + if (b->wait_count <= 0) + pthread_cond_broadcast (&b->cond); +} + +/* Adjust all associated barriers of a counter. */ +int +waitable_counter_add (waitable_counter *c, int val) +{ + counter_barrier *curr; + int ret; + pthread_mutex_lock(&c->m); + ret = (c->val += val); + for (curr = c->b; curr; curr = curr->next) + change_internal_barrier_count(curr, val); + + pthread_mutex_unlock(&c->m); + return ret; +} + +/* Get the value of a counter. */ + +int +waitable_counter_get_val (waitable_counter *c) +{ + int ret; + pthread_mutex_lock(&c->m); + ret = c->val; + pthread_mutex_unlock(&c->m); + return ret; +} + +/* Initialize waitable counter. */ + +void +waitable_counter_init (waitable_counter *c, int val) +{ + initialize_shared_mutex (&c->m); + c->val = val; + c->b = NULL; +} + +/* Bind a barrier to a counter. */ + +void +bind_counter_barrier (counter_barrier *b, waitable_counter *c) +{ + initialize_shared_condition (&b->cond); + + pthread_mutex_lock (&c->m); + b->next = c->b; + b->curr_wait_group = 0; + b->count = c; + b->wait_count = c->val; + c->b = b; + pthread_mutex_unlock (&c->m); + +} diff --git a/libgfortran/nca/counter_barrier.h b/libgfortran/nca/counter_barrier.h new file mode 100644 index 00000000000..94eb727a74e --- /dev/null +++ b/libgfortran/nca/counter_barrier.h @@ -0,0 +1,36 @@ +#ifndef COUNTER_BARRIER_HDR +#define COUNTER_BARRIER_HDR + +#include <pthread.h> + +struct waitable_counter; + +typedef struct counter_barrier +{ + pthread_cond_t cond; + struct waitable_counter *count; + volatile struct counter_barrier *next; + volatile int wait_count; + volatile int curr_wait_group; +} counter_barrier; + +typedef struct waitable_counter +{ + pthread_mutex_t m; + volatile counter_barrier *b; + volatile int val; +} waitable_counter; + +void waitable_counter_init (waitable_counter *c, int val); +internal_proto(waitable_counter_init); +int waitable_counter_add (waitable_counter *c, int val); +internal_proto(waitable_counter_add); +int waitable_counter_get_val (waitable_counter *c); +internal_proto(waitable_counter_get_val); + +void bind_counter_barrier (counter_barrier *b, waitable_counter *c); +internal_proto(bind_counter_barrier); +void counter_barrier_wait (counter_barrier *b); +internal_proto(counter_barrier_wait); + +#endif diff --git a/libgfortran/nca/libcoarraynative.h b/libgfortran/nca/libcoarraynative.h index 4bd7e3ca740..e4549652d78 100644 --- a/libgfortran/nca/libcoarraynative.h +++ b/libgfortran/nca/libcoarraynative.h @@ -38,11 +38,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include "sync.h" #include "util.h" #include "collective_subroutine.h" - -typedef struct { - pthread_barrier_t barrier; - int maximg; -} ipcollsub; +#include "counter_barrier.h" + +/* Defnitions of the Fortran 2008 standard; need to be kept in sync + with ISO_FORTRAN_ENV, cf. gcc/fortran/libgfortran.h. */ +typedef enum +{ + CAS_STAT_UNLOCKED = 0, + CAS_STAT_LOCKED, + CAS_STAT_LOCKED_OTHER_IMAGE, + CAS_STAT_STOPPED_IMAGE = 6000, + CAS_STAT_FAILED_IMAGE = 6001 +} stat_constants; typedef enum { IMAGE_UNKNOWN = 0, @@ -52,14 +59,17 @@ typedef enum { } image_status; typedef struct { - image_status status; pid_t pid; + image_status status; + int active_image_index; } image_tracker; typedef struct { - int has_failed_image; - int finished_images; - image_tracker images[]; + volatile int has_failed_image; + volatile int finished_images; + waitable_counter num_active_images; + pthread_mutex_t image_tracker_lock; + volatile image_tracker images[]; } master; typedef struct { @@ -70,7 +80,7 @@ typedef struct { extern image this_image; typedef struct { - int num_images; + int total_num_images; shared_memory sm; alloc_iface ai; collsub_iface ci; @@ -82,6 +92,33 @@ internal_proto (local); void ensure_initialization(void); internal_proto(ensure_initialization); +int test_for_cas_errors(int *, char *, size_t); +internal_proto(test_for_cas_errors); + +int master_get_num_active_images(master *m); +internal_proto(master_get_num_active_images); + +void master_bind_active_image_barrier (master *m, counter_barrier *); +internal_proto(master_bind_active_image_barrier); + +int master_is_image_active(master *m, int image_num); +internal_proto(master_is_image_active); + +void error_on_missing_images(void); +internal_proto(error_on_missing_images); + +#define STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len) \ + do { \ + if (test_for_cas_errors(stat, errmsg, errmsg_len))\ + return;\ + } while(0) + +#define STAT_ERRMSG_ENTRY_CHECK_RET(stat, errmsg, errmsg_len, retval) \ + do { \ + if (test_for_cas_errors(stat, errmsg, errmsg_len))\ + return retval;\ + } while(0) + void cas_master(void (*)(void)); export_proto (cas_master); diff --git a/libgfortran/nca/sync.c b/libgfortran/nca/sync.c index 7cf5ee28d9b..c7ae0679e5e 100644 --- a/libgfortran/nca/sync.c +++ b/libgfortran/nca/sync.c @@ -29,13 +29,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include <string.h> static void -sync_all_init (pthread_barrier_t *b) +sync_all_init (counter_barrier *b) { - pthread_barrierattr_t battr; - pthread_barrierattr_init (&battr); - pthread_barrierattr_setpshared (&battr, PTHREAD_PROCESS_SHARED); - pthread_barrier_init (b, &battr, local->num_images); - pthread_barrierattr_destroy (&battr); + master_bind_active_image_barrier(this_image.m, b); } static inline void @@ -76,17 +72,18 @@ sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm) si->a = get_allocator(ai); si->cis->table = - shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images); + shared_malloc(si->a, sizeof(int)*local->total_num_images * local->total_num_images); si->cis->triggers = - shared_malloc(si->a, sizeof(pthread_cond_t)*local->num_images); + shared_malloc(si->a, sizeof(pthread_cond_t)*local->total_num_images); si->table = SHMPTR_AS(int *, si->cis->table, si->sm); si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm); - for (int i = 0; i < local->num_images; i++) + for (int i = 0; i < local->total_num_images; i++) initialize_shared_condition (&si->triggers[i]); } +/* TODO: Maybe check whether synchronizing image is still alive. */ void sync_table (sync_iface *si, int *images, size_t size) { @@ -101,15 +98,15 @@ sync_table (sync_iface *si, int *images, size_t size) int *table = get_locked_table(si); for (i = 0; i < size; i++) { - table[images[i] - 1 + local->num_images*this_image.image_num]++; + table[images[i] - 1 + local->total_num_images*this_image.image_num]++; pthread_cond_signal (&si->triggers[images[i] - 1]); } for (;;) { done = 1; for (i = 0; i < size; i++) - done &= si->table[images[i] - 1 + this_image.image_num*local->num_images] - == si->table[this_image.image_num + (images[i] - 1)*local->num_images]; + done &= si->table[images[i] - 1 + this_image.image_num*local->total_num_images] + == si->table[this_image.image_num + (images[i] - 1)*local->total_num_images]; if (done) break; wait_table_cond (si, &si->triggers[this_image.image_num]); @@ -120,6 +117,5 @@ sync_table (sync_iface *si, int *images, size_t size) void sync_all (sync_iface *si) { - - pthread_barrier_wait (&si->cis->sync_all); + counter_barrier_wait(&si->cis->sync_all); } diff --git a/libgfortran/nca/sync.h b/libgfortran/nca/sync.h index 53aa3dcb15f..59946434da5 100644 --- a/libgfortran/nca/sync.h +++ b/libgfortran/nca/sync.h @@ -25,10 +25,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #ifndef IPSYNC_HDR #define IPSYNC_HDR +#include "shared_memory.h" +#include "alloc.h" +#include "counter_barrier.h" #include <pthread.h> typedef struct { - pthread_barrier_t sync_all; + counter_barrier sync_all; pthread_mutex_t table_lock; shared_mem_ptr table; shared_mem_ptr triggers; diff --git a/libgfortran/nca/util.h b/libgfortran/nca/util.h index 1d335129520..fa9e158b38f 100644 --- a/libgfortran/nca/util.h +++ b/libgfortran/nca/util.h @@ -83,4 +83,6 @@ internal_proto (pack_array_finish); void unpack_array_finish (pack_info * const restrict, const gfc_array_char * const, const char * restrict); +internal_proto (unpack_array_finish); + #endif diff --git a/libgfortran/nca/wrapper.c b/libgfortran/nca/wrapper.c index d3d50f5d211..2e0b53bb27c 100644 --- a/libgfortran/nca/wrapper.c +++ b/libgfortran/nca/wrapper.c @@ -69,19 +69,19 @@ void cas_unlock (void *); export_proto (cas_unlock); void cas_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *), - int *); + int *, int*, char *, size_t); export_proto (cas_collsub_reduce_array); void cas_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *), - int *); + int *, int*, char *, size_t); export_proto (cas_collsub_reduce_scalar); -void cas_collsub_broadcast_array (gfc_array_char * restrict, int/*, int *, char *, - size_t*/); +void cas_collsub_broadcast_array (gfc_array_char * restrict, int, int *, char *, + size_t); export_proto (cas_collsub_broadcast_array); -void cas_collsub_broadcast_scalar (void * restrict, size_t, int/*, int *, char *, - size_t*/); +void cas_collsub_broadcast_scalar (void * restrict, size_t, int, int *, char *, + size_t); export_proto(cas_collsub_broadcast_scalar); void @@ -115,7 +115,7 @@ cas_coarray_alloc (gfc_array_void *desc, int elem_size, int corank, num_coarray_elems *= GFC_DESCRIPTOR_EXTENT(desc, i); } - extent_last_codimen = div_ru (local->num_images, num_coarray_elems); + extent_last_codimen = div_ru (local->total_num_images, num_coarray_elems); last_lbound = GFC_DIMENSION_LBOUND(desc->dim[last_rank_index]); GFC_DIMENSION_SET(desc->dim[last_rank_index], last_lbound, @@ -143,7 +143,7 @@ cas_coarray_alloc (gfc_array_void *desc, int elem_size, int corank, __ATOMIC_SEQ_CST)); if (!addr->initialized++) { - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) initialize_shared_mutex (&addr->arr[i]); } __atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST); @@ -165,6 +165,7 @@ cas_coarray_free (gfc_array_void *desc, int alloc_type) lock_array *la; int expected = 0; la = desc->base_addr - offsetof (lock_array, arr); + /* TODO: Fix this, replace with some kind of atomic initilization. */ while (!__atomic_compare_exchange_n (&la->owner, &expected, this_image.image_num+1, false, __ATOMIC_SEQ_CST, @@ -173,7 +174,7 @@ cas_coarray_free (gfc_array_void *desc, int alloc_type) { /* Coarray locks can be removed and just normal pthread_mutex can be used. */ - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) pthread_mutex_destroy (&la->arr[i]); } __atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST); @@ -194,21 +195,20 @@ cas_coarray_this_image (int distance __attribute__((unused))) int cas_coarray_num_images (int distance __attribute__((unused))) { - return local->num_images; + return local->total_num_images; } void -cas_coarray_sync_all (int *stat __attribute__((unused))) +cas_coarray_sync_all (int *stat) { + STAT_ERRMSG_ENTRY_CHECK(stat, NULL, 0); sync_all (&local->si); } void -cas_sync_images (size_t s, int *images, - int *stat __attribute__((unused)), - char *error __attribute__((unused)), - size_t err_size __attribute__((unused))) +cas_sync_images (size_t s, int *images, int *stat, char *error, size_t err_size) { + STAT_ERRMSG_ENTRY_CHECK(stat, error, err_size); sync_table (&local->si, images, s); } @@ -226,33 +226,34 @@ cas_unlock (void *lock) void cas_collsub_reduce_array (gfc_array_char *desc, void (*assign_function) (void *, void *), - int *result_image) + int *result_image, int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_reduce_array (&local->ci, desc, result_image, assign_function); } void cas_collsub_reduce_scalar (void *obj, index_type elem_size, void (*assign_function) (void *, void *), - int *result_image) + int *result_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_reduce_scalar (&local->ci, obj, elem_size, result_image, assign_function); } void -cas_collsub_broadcast_array (gfc_array_char * restrict a, int source_image - /* , int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - size_t errmsg_len __attribute__ ((unused))*/) +cas_collsub_broadcast_array (gfc_array_char * restrict a, int source_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_broadcast_array (&local->ci, a, source_image - 1); } void -cas_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image/*, - int *stat __attribute__((unused)), - char *errmsg __attribute__ ((unused)), - size_t errmsg_len __attribute__ ((unused))*/) +cas_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1); } |