diff options
author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2017-05-12 18:01:32 +0000 |
---|---|---|
committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2017-05-12 18:01:32 +0000 |
commit | e703e783e760afff6983d1125e26996d65d01e71 (patch) | |
tree | 826841d6832029bc8446c4ce635f891a6143dae3 /runtime/src/kmp_affinity.h | |
parent | 07c5582c3b767e3f55753c493905e99498862441 (diff) |
Clang-format and whitespace cleanup of source code
This patch contains the clang-format and cleanup of the entire code base. Some
of clang-formats changes made the code look worse in places. A best effort was
made to resolve the bulk of these problems, but many remain. Most of the
problems were mangling line-breaks and tabbing of comments.
Patch by Terry Wilmarth
Differential Revision: https://reviews.llvm.org/D32659
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@302929 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'runtime/src/kmp_affinity.h')
-rw-r--r-- | runtime/src/kmp_affinity.h | 1472 |
1 files changed, 767 insertions, 705 deletions
diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h index 142acf7..bae013e 100644 --- a/runtime/src/kmp_affinity.h +++ b/runtime/src/kmp_affinity.h @@ -12,765 +12,827 @@ // //===----------------------------------------------------------------------===// + #ifndef KMP_AFFINITY_H #define KMP_AFFINITY_H -#include "kmp_os.h" #include "kmp.h" +#include "kmp_os.h" #if KMP_AFFINITY_SUPPORTED #if KMP_USE_HWLOC -class KMPHwlocAffinity: public KMPAffinity { +class KMPHwlocAffinity : public KMPAffinity { public: - class Mask : public KMPAffinity::Mask { - hwloc_cpuset_t mask; - public: - Mask() { mask = hwloc_bitmap_alloc(); this->zero(); } - ~Mask() { hwloc_bitmap_free(mask); } - void set(int i) override { hwloc_bitmap_set(mask, i); } - bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } - void clear(int i) override { hwloc_bitmap_clr(mask, i); } - void zero() override { hwloc_bitmap_zero(mask); } - void copy(const KMPAffinity::Mask* src) override { - const Mask* convert = static_cast<const Mask*>(src); - hwloc_bitmap_copy(mask, convert->mask); - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask* convert = static_cast<const Mask*>(rhs); - hwloc_bitmap_and(mask, mask, convert->mask); - } - void bitwise_or(const KMPAffinity::Mask * rhs) override { - const Mask* convert = static_cast<const Mask*>(rhs); - hwloc_bitmap_or(mask, mask, convert->mask); - } - void bitwise_not() override { hwloc_bitmap_not(mask, mask); } - int begin() const override { return hwloc_bitmap_first(mask); } - int end() const override { return -1; } - int next(int previous) const override { return hwloc_bitmap_next(mask, previous); } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int get_proc_group() const override { - int i; - int group = -1; -# if KMP_OS_WINDOWS - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (i = 0; i < __kmp_num_proc_groups; i++) { - // On windows, the long type is always 32 bits - unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2); - unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1); - if (first_32_bits == 0 && second_32_bits == 0) { - continue; - } - if (group >= 0) { - return -1; - } - group = i; - } -# endif /* KMP_OS_WINDOWS */ - return group; - } - }; - void determine_capable(const char* var) override { - const hwloc_topology_support* topology_support; - if(__kmp_hwloc_topology == NULL) { - if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); - } - if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); - } - } - topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); - // Is the system capable of setting/getting this thread's affinity? - // also, is topology discovery possible? (pu indicates ability to discover processing units) - // and finally, were there no errors when calling any hwloc_* API functions? - if(topology_support && topology_support->cpubind->set_thisthread_cpubind && - topology_support->cpubind->get_thisthread_cpubind && - topology_support->discovery->pu && - !__kmp_hwloc_error) - { - // enables affinity according to KMP_AFFINITY_CAPABLE() macro - KMP_AFFINITY_ENABLE(TRUE); - } else { - // indicate that hwloc didn't work and disable affinity - __kmp_hwloc_error = TRUE; - KMP_AFFINITY_DISABLE(); - } + class Mask : public KMPAffinity::Mask { + hwloc_cpuset_t mask; + + public: + Mask() { + mask = hwloc_bitmap_alloc(); + this->zero(); + } + ~Mask() { hwloc_bitmap_free(mask); } + void set(int i) override { hwloc_bitmap_set(mask, i); } + bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } + void clear(int i) override { hwloc_bitmap_clr(mask, i); } + void zero() override { hwloc_bitmap_zero(mask); } + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast<const Mask *>(src); + hwloc_bitmap_copy(mask, convert->mask); } - void bind_thread(int which) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - KMPAffinity::Mask *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); - } - KMPAffinity::Mask* allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* hwloc_array = static_cast<Mask*>(array); - delete[] hwloc_array; - } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* hwloc_array = static_cast<Mask*>(array); - return &(hwloc_array[index]); - } - api_type get_api_type() const override { return HWLOC; } + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + hwloc_bitmap_and(mask, mask, convert->mask); + } + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + hwloc_bitmap_or(mask, mask, convert->mask); + } + void bitwise_not() override { hwloc_bitmap_not(mask, mask); } + int begin() const override { return hwloc_bitmap_first(mask); } + int end() const override { return -1; } + int next(int previous) const override { + return hwloc_bitmap_next(mask, previous); + } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int get_proc_group() const override { + int i; + int group = -1; +#if KMP_OS_WINDOWS + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (i = 0; i < __kmp_num_proc_groups; i++) { + // On windows, the long type is always 32 bits + unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); + unsigned long second_32_bits = + hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); + if (first_32_bits == 0 && second_32_bits == 0) { + continue; + } + if (group >= 0) { + return -1; + } + group = i; + } +#endif /* KMP_OS_WINDOWS */ + return group; + } + }; + void determine_capable(const char *var) override { + const hwloc_topology_support *topology_support; + if (__kmp_hwloc_topology == NULL) { + if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if (__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); + } + if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if (__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); + } + } + topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); + // Is the system capable of setting/getting this thread's affinity? + // Also, is topology discovery possible? (pu indicates ability to discover + // processing units). And finally, were there no errors when calling any + // hwloc_* API functions? + if (topology_support && topology_support->cpubind->set_thisthread_cpubind && + topology_support->cpubind->get_thisthread_cpubind && + topology_support->discovery->pu && !__kmp_hwloc_error) { + // enables affinity according to KMP_AFFINITY_CAPABLE() macro + KMP_AFFINITY_ENABLE(TRUE); + } else { + // indicate that hwloc didn't work and disable affinity + __kmp_hwloc_error = TRUE; + KMP_AFFINITY_DISABLE(); + } + } + void bind_thread(int which) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); + KMPAffinity::Mask *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(which, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } + KMPAffinity::Mask *allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *hwloc_array = static_cast<Mask *>(array); + delete[] hwloc_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *hwloc_array = static_cast<Mask *>(array); + return &(hwloc_array[index]); + } + api_type get_api_type() const override { return HWLOC; } }; #endif /* KMP_USE_HWLOC */ #if KMP_OS_LINUX -/* - * On some of the older OS's that we build on, these constants aren't present - * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on - * all systems of the same arch where they are defined, and they cannot change. - * stone forever. - */ +/* On some of the older OS's that we build on, these constants aren't present + in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on + all systems of the same arch where they are defined, and they cannot change. + stone forever. */ #include <sys/syscall.h> -# if KMP_ARCH_X86 || KMP_ARCH_ARM -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 241 -# elif __NR_sched_setaffinity != 241 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 242 -# elif __NR_sched_getaffinity != 242 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_AARCH64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 122 -# elif __NR_sched_setaffinity != 122 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 123 -# elif __NR_sched_getaffinity != 123 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_X86_64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 203 -# elif __NR_sched_setaffinity != 203 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 204 -# elif __NR_sched_getaffinity != 204 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_PPC64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 222 -# elif __NR_sched_setaffinity != 222 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 223 -# elif __NR_sched_getaffinity != 223 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_MIPS -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 4239 -# elif __NR_sched_setaffinity != 4239 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 4240 -# elif __NR_sched_getaffinity != 4240 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_MIPS64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 5195 -# elif __NR_sched_setaffinity != 5195 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 5196 -# elif __NR_sched_getaffinity != 5196 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# error Unknown or unsupported architecture -# endif /* KMP_ARCH_* */ +#if KMP_ARCH_X86 || KMP_ARCH_ARM +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 241 +#elif __NR_sched_setaffinity != 241 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 242 +#elif __NR_sched_getaffinity != 242 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_AARCH64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 122 +#elif __NR_sched_setaffinity != 122 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 123 +#elif __NR_sched_getaffinity != 123 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_X86_64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 203 +#elif __NR_sched_setaffinity != 203 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 204 +#elif __NR_sched_getaffinity != 204 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_PPC64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 222 +#elif __NR_sched_setaffinity != 222 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 223 +#elif __NR_sched_getaffinity != 223 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_MIPS +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 4239 +#elif __NR_sched_setaffinity != 4239 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 4240 +#elif __NR_sched_getaffinity != 4240 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_MIPS64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 5195 +#elif __NR_sched_setaffinity != 5195 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 5196 +#elif __NR_sched_getaffinity != 5196 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#error Unknown or unsupported architecture +#endif /* KMP_ARCH_* */ class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef unsigned char mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; - public: - mask_t* mask; - Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); } - ~Mask() { if (mask) __kmp_free(mask); } - void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } - bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } - void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void zero() override { - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask* src) override { - const Mask * convert = static_cast<const Mask*>(src); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast<const Mask*>(rhs); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast<const Mask*>(rhs); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous+1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - }; - void determine_capable(const char* env_var) override { - __kmp_affinity_determine_capable(env_var); + class Mask : public KMPAffinity::Mask { + typedef unsigned char mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; + + public: + mask_t *mask; + Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } + ~Mask() { + if (mask) + __kmp_free(mask); + } + void set(int i) override { + mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); + } + bool is_set(int i) const override { + return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); + } + void clear(int i) override { + mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void bind_thread(int which) override { - __kmp_affinity_bind_thread(which); + void zero() override { + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = 0; } - KMPAffinity::Mask* allocate_mask() override { - KMPNativeAffinity::Mask* retval = new Mask(); - return retval; + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast<const Mask *>(src); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = convert->mask[i]; } - void deallocate_mask(KMPAffinity::Mask* m) override { - KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m); - delete m; + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] &= convert->mask[i]; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* linux_array = static_cast<Mask*>(array); - delete[] linux_array; + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] |= convert->mask[i]; } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* linux_array = static_cast<Mask*>(array); - return &(linux_array[index]); + void bitwise_not() override { + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = ~(mask[i]); } - api_type get_api_type() const override { return NATIVE_OS; } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous + 1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + }; + void determine_capable(const char *env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } + KMPAffinity::Mask *allocate_mask() override { + KMPNativeAffinity::Mask *retval = new Mask(); + return retval; + } + void deallocate_mask(KMPAffinity::Mask *m) override { + KMPNativeAffinity::Mask *native_mask = + static_cast<KMPNativeAffinity::Mask *>(m); + delete m; + } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *linux_array = static_cast<Mask *>(array); + delete[] linux_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *linux_array = static_cast<Mask *>(array); + return &(linux_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } }; #endif /* KMP_OS_LINUX */ #if KMP_OS_WINDOWS class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef ULONG_PTR mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; - mask_t* mask; - public: - Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); } - ~Mask() { if (mask) __kmp_free(mask); } - void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } - bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } - void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void zero() override { - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask* src) override { - const Mask * convert = static_cast<const Mask*>(src); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast<const Mask*>(rhs); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast<const Mask*>(rhs); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous+1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int set_system_affinity(bool abort_on_error) const override { - if (__kmp_num_proc_groups > 1) { - // Check for a valid mask. - GROUP_AFFINITY ga; - int group = get_proc_group(); - if (group < 0) { - if (abort_on_error) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - return -1; - } - // Transform the bit vector into a GROUP_AFFINITY struct - // and make the system call to set affinity. - ga.Group = group; - ga.Mask = mask[group]; - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - } else { - if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - } - return 0; - } - int get_system_affinity(bool abort_on_error) override { - if (__kmp_num_proc_groups > 1) { - this->zero(); - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); - if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) { - return -1; - } - mask[ga.Group] = ga.Mask; - } else { - mask_t newMask, sysMask, retval; - if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - retval = SetThreadAffinityMask(GetCurrentThread(), newMask); - if (! retval) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - newMask = SetThreadAffinityMask(GetCurrentThread(), retval); - if (! newMask) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - } - *mask = retval; - } - return 0; - } - int get_proc_group() const override { - int group = -1; - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (int i = 0; i < __kmp_num_proc_groups; i++) { - if (mask[i] == 0) - continue; - if (group >= 0) - return -1; - group = i; - } - return group; - } - }; - void determine_capable(const char* env_var) override { - __kmp_affinity_determine_capable(env_var); + class Mask : public KMPAffinity::Mask { + typedef ULONG_PTR mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; + mask_t *mask; + + public: + Mask() { + mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); + } + ~Mask() { + if (mask) + __kmp_free(mask); + } + void set(int i) override { + mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); + } + bool is_set(int i) const override { + return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); + } + void clear(int i) override { + mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); + } + void zero() override { + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = 0; + } + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast<const Mask *>(src); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = convert->mask[i]; + } + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] &= convert->mask[i]; + } + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast<const Mask *>(rhs); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] |= convert->mask[i]; + } + void bitwise_not() override { + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = ~(mask[i]); + } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; } - void bind_thread(int which) override { - __kmp_affinity_bind_thread(which); + int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous + 1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; } - KMPAffinity::Mask* allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* windows_array = static_cast<Mask*>(array); - delete[] windows_array; + int set_system_affinity(bool abort_on_error) const override { + if (__kmp_num_proc_groups > 1) { + // Check for a valid mask. + GROUP_AFFINITY ga; + int group = get_proc_group(); + if (group < 0) { + if (abort_on_error) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + return -1; + } + // Transform the bit vector into a GROUP_AFFINITY struct + // and make the system call to set affinity. + ga.Group = group; + ga.Mask = mask[group]; + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + } else { + if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + } + return 0; } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* windows_array = static_cast<Mask*>(array); - return &(windows_array[index]); + int get_system_affinity(bool abort_on_error) override { + if (__kmp_num_proc_groups > 1) { + this->zero(); + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); + if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || + (ga.Mask == 0)) { + return -1; + } + mask[ga.Group] = ga.Mask; + } else { + mask_t newMask, sysMask, retval; + if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "GetProcessAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + retval = SetThreadAffinityMask(GetCurrentThread(), newMask); + if (!retval) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + newMask = SetThreadAffinityMask(GetCurrentThread(), retval); + if (!newMask) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + } + *mask = retval; + } + return 0; } - api_type get_api_type() const override { return NATIVE_OS; } + int get_proc_group() const override { + int group = -1; + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (int i = 0; i < __kmp_num_proc_groups; i++) { + if (mask[i] == 0) + continue; + if (group >= 0) + return -1; + group = i; + } + return group; + } + }; + void determine_capable(const char *env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } + KMPAffinity::Mask *allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *windows_array = static_cast<Mask *>(array); + delete[] windows_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *windows_array = static_cast<Mask *>(array); + return &(windows_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } }; #endif /* KMP_OS_WINDOWS */ #endif /* KMP_AFFINITY_SUPPORTED */ class Address { public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth) - : depth(_depth), leader(FALSE) { - } - Address &operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; - } - bool operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool operator!=(const Address &b) const { - return !operator==(b); - } - void print() const { - unsigned i; - printf("Depth: %u --- ", depth); - for(i=0;i<depth;i++) { - printf("%u ", labels[i]); - } + static const unsigned maxDepth = 32; + unsigned labels[maxDepth]; + unsigned childNums[maxDepth]; + unsigned depth; + unsigned leader; + Address(unsigned _depth) : depth(_depth), leader(FALSE) {} + Address &operator=(const Address &b) { + depth = b.depth; + for (unsigned i = 0; i < depth; i++) { + labels[i] = b.labels[i]; + childNums[i] = b.childNums[i]; } + leader = FALSE; + return *this; + } + bool operator==(const Address &b) const { + if (depth != b.depth) + return false; + for (unsigned i = 0; i < depth; i++) + if (labels[i] != b.labels[i]) + return false; + return true; + } + bool isClose(const Address &b, int level) const { + if (depth != b.depth) + return false; + if ((unsigned)level >= depth) + return true; + for (unsigned i = 0; i < (depth - level); i++) + if (labels[i] != b.labels[i]) + return false; + return true; + } + bool operator!=(const Address &b) const { return !operator==(b); } + void print() const { + unsigned i; + printf("Depth: %u --- ", depth); + for (i = 0; i < depth; i++) { + printf("%u ", labels[i]); + } + } }; class AddrUnsPair { public: - Address first; - unsigned second; - AddrUnsPair(Address _first, unsigned _second) - : first(_first), second(_second) { - } - AddrUnsPair &operator=(const AddrUnsPair &b) - { - first = b.first; - second = b.second; - return *this; - } - void print() const { - printf("first = "); first.print(); - printf(" --- second = %u", second); - } - bool operator==(const AddrUnsPair &b) const { - if(first != b.first) return false; - if(second != b.second) return false; - return true; - } - bool operator!=(const AddrUnsPair &b) const { - return !operator==(b); - } + Address first; + unsigned second; + AddrUnsPair(Address _first, unsigned _second) + : first(_first), second(_second) {} + AddrUnsPair &operator=(const AddrUnsPair &b) { + first = b.first; + second = b.second; + return *this; + } + void print() const { + printf("first = "); + first.print(); + printf(" --- second = %u", second); + } + bool operator==(const AddrUnsPair &b) const { + if (first != b.first) + return false; + if (second != b.second) + return false; + return true; + } + bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } }; - -static int -__kmp_affinity_cmp_Address_labels(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - for (i = 0; i < depth; i++) { - if (aa->labels[i] < bb->labels[i]) return -1; - if (aa->labels[i] > bb->labels[i]) return 1; - } - return 0; +static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { + const Address *aa = (const Address *)&(((AddrUnsPair *)a)->first); + const Address *bb = (const Address *)&(((AddrUnsPair *)b)->first); + unsigned depth = aa->depth; + unsigned i; + KMP_DEBUG_ASSERT(depth == bb->depth); + for (i = 0; i < depth; i++) { + if (aa->labels[i] < bb->labels[i]) + return -1; + if (aa->labels[i] > bb->labels[i]) + return 1; + } + return 0; } - -/** A structure for holding machine-specific hierarchy info to be computed once at init. - This structure represents a mapping of threads to the actual machine hierarchy, or to - our best guess at what the hierarchy might be, for the purpose of performing an - efficient barrier. In the worst case, when there is no machine hierarchy information, - it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ +/* A structure for holding machine-specific hierarchy info to be computed once + at init. This structure represents a mapping of threads to the actual machine + hierarchy, or to our best guess at what the hierarchy might be, for the + purpose of performing an efficient barrier. In the worst case, when there is + no machine hierarchy information, it produces a tree suitable for a barrier, + similar to the tree used in the hyper barrier. */ class hierarchy_info { public: - /** Good default values for number of leaves and branching factor, given no affinity information. - Behaves a bit like hyper barrier. */ - static const kmp_uint32 maxLeaves=4; - static const kmp_uint32 minBranch=4; - /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package - or socket, packages/node, nodes/machine, etc. We don't want to get specific with - nomenclature. When the machine is oversubscribed we add levels to duplicate the - hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ - kmp_uint32 maxLevels; - - /** This is specifically the depth of the machine configuration hierarchy, in terms of the - number of levels along the longest path from root to any leaf. It corresponds to the - number of entries in numPerLevel if we exclude all but one trailing 1. */ - kmp_uint32 depth; - kmp_uint32 base_num_threads; - enum init_status { initialized=0, not_initialized=1, initializing=2 }; - volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress - volatile kmp_int8 resizing; // 0=not resizing, 1=resizing - - /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a - node at level i has. For example, if we have a machine with 4 packages, 4 cores/package - and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; - - void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { - int hier_depth = adr2os[0].first.depth; - int level = 0; - for (int i=hier_depth-1; i>=0; --i) { - int max = -1; - for (int j=0; j<num_addrs; ++j) { - int next = adr2os[j].first.childNums[i]; - if (next > max) max = next; - } - numPerLevel[level] = max+1; - ++level; - } + /* Good default values for number of leaves and branching factor, given no + affinity information. Behaves a bit like hyper barrier. */ + static const kmp_uint32 maxLeaves = 4; + static const kmp_uint32 minBranch = 4; + /** Number of levels in the hierarchy. Typical levels are threads/core, + cores/package or socket, packages/node, nodes/machine, etc. We don't want + to get specific with nomenclature. When the machine is oversubscribed we + add levels to duplicate the hierarchy, doubling the thread capacity of the + hierarchy each time we add a level. */ + kmp_uint32 maxLevels; + + /** This is specifically the depth of the machine configuration hierarchy, in + terms of the number of levels along the longest path from root to any + leaf. It corresponds to the number of entries in numPerLevel if we exclude + all but one trailing 1. */ + kmp_uint32 depth; + kmp_uint32 base_num_threads; + enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; + volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, + // 2=initialization in progress + volatile kmp_int8 resizing; // 0=not resizing, 1=resizing + + /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children + the parent of a node at level i has. For example, if we have a machine + with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = + {2, 4, 4, 1, 1}. All empty levels are set to 1. */ + kmp_uint32 *numPerLevel; + kmp_uint32 *skipPerLevel; + + void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { + int hier_depth = adr2os[0].first.depth; + int level = 0; + for (int i = hier_depth - 1; i >= 0; --i) { + int max = -1; + for (int j = 0; j < num_addrs; ++j) { + int next = adr2os[j].first.childNums[i]; + if (next > max) + max = next; + } + numPerLevel[level] = max + 1; + ++level; + } + } + + hierarchy_info() + : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} + + void fini() { + if (!uninitialized && numPerLevel) + __kmp_free(numPerLevel); + } + + void init(AddrUnsPair *adr2os, int num_addrs) { + kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( + &uninitialized, not_initialized, initializing); + if (bool_result == 0) { // Wait for initialization + while (TCR_1(uninitialized) != initialized) + KMP_CPU_PAUSE(); + return; + } + KMP_DEBUG_ASSERT(bool_result == 1); + + /* Added explicit initialization of the data fields here to prevent usage of + dirty value observed when static library is re-initialized multiple times + (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses + OpenMP). */ + depth = 1; + resizing = 0; + maxLevels = 7; + numPerLevel = + (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + for (kmp_uint32 i = 0; i < maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + skipPerLevel[i] = 1; } - hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} - - void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } - - void init(AddrUnsPair *adr2os, int num_addrs) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); - if (bool_result == 0) { // Wait for initialization - while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result==1); - - /* Added explicit initialization of the data fields here to prevent usage of dirty value - observed when static library is re-initialized multiple times (e.g. when - non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ - depth = 1; - resizing = 0; - maxLevels = 7; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Sort table by physical ID - if (adr2os) { - qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels); - deriveLevels(adr2os, num_addrs); - } - else { - numPerLevel[0] = maxLeaves; - numPerLevel[1] = num_addrs/maxLeaves; - if (num_addrs%maxLeaves) numPerLevel[1]++; - } - - base_num_threads = num_addrs; - for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' - depth++; - - kmp_uint32 branch = minBranch; - if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; - if (branch<minBranch) branch=minBranch; - for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width - while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! - if (numPerLevel[d] & 1) numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d+1] == 1) depth++; - numPerLevel[d+1] = numPerLevel[d+1] << 1; - } - if(numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch<4) branch = minBranch; - } - } - - for (kmp_uint32 i=1; i<depth; ++i) - skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1]; - // Fill in hierarchy in the case of oversubscription - for (kmp_uint32 i=depth; i<maxLevels; ++i) - skipPerLevel[i] = 2*skipPerLevel[i-1]; - - uninitialized = initialized; // One writer + // Sort table by physical ID + if (adr2os) { + qsort(adr2os, num_addrs, sizeof(*adr2os), + __kmp_affinity_cmp_Address_labels); + deriveLevels(adr2os, num_addrs); + } else { + numPerLevel[0] = maxLeaves; + numPerLevel[1] = num_addrs / maxLeaves; + if (num_addrs % maxLeaves) + numPerLevel[1]++; + } + base_num_threads = num_addrs; + for (int i = maxLevels - 1; i >= 0; + --i) // count non-empty levels to get depth + if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' + depth++; + + kmp_uint32 branch = minBranch; + if (numPerLevel[0] == 1) + branch = num_addrs / maxLeaves; + if (branch < minBranch) + branch = minBranch; + for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width + while (numPerLevel[d] > branch || + (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! + if (numPerLevel[d] & 1) + numPerLevel[d]++; + numPerLevel[d] = numPerLevel[d] >> 1; + if (numPerLevel[d + 1] == 1) + depth++; + numPerLevel[d + 1] = numPerLevel[d + 1] << 1; + } + if (numPerLevel[0] == 1) { + branch = branch >> 1; + if (branch < 4) + branch = minBranch; + } } - // Resize the hierarchy if nproc changes to something larger than before - void resize(kmp_uint32 nproc) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - while (bool_result == 0) { // someone else is trying to resize - KMP_CPU_PAUSE(); - if (nproc <= base_num_threads) // happy with other thread's resize - return; - else // try to resize - bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - } - KMP_DEBUG_ASSERT(bool_result!=0); - if (nproc <= base_num_threads) return; // happy with other thread's resize - - // Calculate new maxLevels - kmp_uint32 old_sz = skipPerLevel[depth-1]; - kmp_uint32 incs = 0, old_maxLevels = maxLevels; - // First see if old maxLevels is enough to contain new size - for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) { - skipPerLevel[i] = 2*skipPerLevel[i-1]; - numPerLevel[i-1] *= 2; - old_sz *= 2; - depth++; - } - if (nproc > old_sz) { // Not enough space, need to expand hierarchy - while (nproc > old_sz) { - old_sz *=2; - incs++; - depth++; - } - maxLevels += incs; - - // Resize arrays - kmp_uint32 *old_numPerLevel = numPerLevel; - kmp_uint32 *old_skipPerLevel = skipPerLevel; - numPerLevel = skipPerLevel = NULL; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - - // Copy old elements from old arrays - for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = old_numPerLevel[i]; - skipPerLevel[i] = old_skipPerLevel[i]; - } - - // Init new elements in arrays to 1 - for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Free old arrays - __kmp_free(old_numPerLevel); - } + for (kmp_uint32 i = 1; i < depth; ++i) + skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; + // Fill in hierarchy in the case of oversubscription + for (kmp_uint32 i = depth; i < maxLevels; ++i) + skipPerLevel[i] = 2 * skipPerLevel[i - 1]; + + uninitialized = initialized; // One writer + + } + + // Resize the hierarchy if nproc changes to something larger than before + void resize(kmp_uint32 nproc) { + kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); + while (bool_result == 0) { // someone else is trying to resize + KMP_CPU_PAUSE(); + if (nproc <= base_num_threads) // happy with other thread's resize + return; + else // try to resize + bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); + } + KMP_DEBUG_ASSERT(bool_result != 0); + if (nproc <= base_num_threads) + return; // happy with other thread's resize + + // Calculate new maxLevels + kmp_uint32 old_sz = skipPerLevel[depth - 1]; + kmp_uint32 incs = 0, old_maxLevels = maxLevels; + // First see if old maxLevels is enough to contain new size + for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { + skipPerLevel[i] = 2 * skipPerLevel[i - 1]; + numPerLevel[i - 1] *= 2; + old_sz *= 2; + depth++; + } + if (nproc > old_sz) { // Not enough space, need to expand hierarchy + while (nproc > old_sz) { + old_sz *= 2; + incs++; + depth++; + } + maxLevels += incs; + + // Resize arrays + kmp_uint32 *old_numPerLevel = numPerLevel; + kmp_uint32 *old_skipPerLevel = skipPerLevel; + numPerLevel = skipPerLevel = NULL; + numPerLevel = + (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + + // Copy old elements from old arrays + for (kmp_uint32 i = 0; i < old_maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = old_numPerLevel[i]; + skipPerLevel[i] = old_skipPerLevel[i]; + } + + // Init new elements in arrays to 1 + for (kmp_uint32 i = old_maxLevels; i < maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + skipPerLevel[i] = 1; + } + + // Free old arrays + __kmp_free(old_numPerLevel); + } - // Fill in oversubscription levels of hierarchy - for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) - skipPerLevel[i] = 2*skipPerLevel[i-1]; + // Fill in oversubscription levels of hierarchy + for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) + skipPerLevel[i] = 2 * skipPerLevel[i - 1]; - base_num_threads = nproc; - resizing = 0; // One writer + base_num_threads = nproc; + resizing = 0; // One writer - } + } }; #endif // KMP_AFFINITY_H |