diff options
Diffstat (limited to 'runtime/src')
-rw-r--r-- | runtime/src/kmp.h | 70 | ||||
-rw-r--r-- | runtime/src/kmp_ftn_entry.h | 4 | ||||
-rw-r--r-- | runtime/src/kmp_global.c | 9 | ||||
-rw-r--r-- | runtime/src/z_Windows_NT_util.c | 67 |
4 files changed, 102 insertions, 48 deletions
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h index cc81a50..82b607b 100644 --- a/runtime/src/kmp.h +++ b/runtime/src/kmp.h @@ -79,10 +79,8 @@ class kmp_stats_list; #endif -#if KMP_USE_HWLOC -#include "hwloc.h" -extern hwloc_topology_t __kmp_hwloc_topology; -extern int __kmp_hwloc_error; +#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED +# include "hwloc.h" #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -522,14 +520,43 @@ typedef int PACKED_REDUCTION_METHOD_T; */ #if KMP_AFFINITY_SUPPORTED +# if KMP_GROUP_AFFINITY +// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). +# if _MSC_VER < 1600 +typedef struct GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY; +# endif /* _MSC_VER < 1600 */ +extern int __kmp_num_proc_groups; +typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); +extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; + +typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); +extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; + +typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); +extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; + +typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); +extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; +# endif /* KMP_GROUP_AFFINITY */ + extern size_t __kmp_affin_mask_size; # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) +# if !KMP_USE_HWLOC +# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) +# define KMP_CPU_SET_ITERATE(i,mask) \ + for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) +# endif #if KMP_USE_HWLOC +extern hwloc_topology_t __kmp_hwloc_topology; +extern int __kmp_hwloc_error; typedef hwloc_cpuset_t kmp_affin_mask_t; # define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) # define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) @@ -600,9 +627,6 @@ typedef hwloc_cpuset_t kmp_affin_mask_t; } #else /* KMP_USE_HWLOC */ -# define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) - # if KMP_OS_LINUX // // On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size @@ -678,20 +702,8 @@ typedef unsigned char kmp_affin_mask_t; // # if KMP_GROUP_AFFINITY - -// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). -# if _MSC_VER < 1600 -typedef struct GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY; -# endif - typedef DWORD_PTR kmp_affin_mask_t; -extern int __kmp_num_proc_groups; - # define _KMP_CPU_SET(i,mask) \ (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \ (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) @@ -758,19 +770,6 @@ extern int __kmp_num_proc_groups; } \ } -typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); -extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; - -typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); -extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; - -typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); -extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; - -typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); -extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; - -extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); # else /* KMP_GROUP_AFFINITY */ @@ -817,6 +816,11 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */ #endif /* KMP_USE_HWLOC */ +// prototype after typedef of kmp_affin_mask_t +#if KMP_GROUP_AFFINITY +extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); +#endif + // // Declare local char buffers with this size for printing debug and info // messages, using __kmp_affinity_print_mask(). diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h index fe018bf..b9de5e3 100644 --- a/runtime/src/kmp_ftn_entry.h +++ b/runtime/src/kmp_ftn_entry.h @@ -270,9 +270,9 @@ FTN_GET_AFFINITY_MAX_PROC( void ) return 0; } - #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC + #if KMP_GROUP_AFFINITY if ( __kmp_num_proc_groups > 1 ) { - return (int)KMP_CPU_SETSIZE; + return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT); } #endif /* KMP_GROUP_AFFINITY */ return __kmp_xproc; diff --git a/runtime/src/kmp_global.c b/runtime/src/kmp_global.c index 63e9dc3..2cf0e2f 100644 --- a/runtime/src/kmp_global.c +++ b/runtime/src/kmp_global.c @@ -35,10 +35,6 @@ __thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; // gives reference tick for all events (considered the 0 tick) tsc_tick_count __kmp_stats_start_time; #endif -#if KMP_USE_HWLOC -int __kmp_hwloc_error = FALSE; -hwloc_topology_t __kmp_hwloc_topology = NULL; -#endif /* ----------------------------------------------------- */ /* INITIALIZATION VARIABLES */ @@ -220,6 +216,11 @@ enum mic_type __kmp_mic_type = non_mic; #if KMP_AFFINITY_SUPPORTED +# if KMP_USE_HWLOC +int __kmp_hwloc_error = FALSE; +hwloc_topology_t __kmp_hwloc_topology = NULL; +# endif + # if KMP_GROUP_AFFINITY int __kmp_num_proc_groups = 1; diff --git a/runtime/src/z_Windows_NT_util.c b/runtime/src/z_Windows_NT_util.c index 0191352..f3ae0a4 100644 --- a/runtime/src/z_Windows_NT_util.c +++ b/runtime/src/z_Windows_NT_util.c @@ -552,9 +552,18 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask ) int i; int group = -1; for (i = 0; i < __kmp_num_proc_groups; i++) { +#if KMP_USE_HWLOC + // On windows, the long type is always 32 bits + unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2); + unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1); + if (first_32_bits == 0 && second_32_bits == 0) { + continue; + } +#else if (mask[i] == 0) { continue; } +#endif if (group >= 0) { return -1; } @@ -568,8 +577,23 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask ) int __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +#else +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { // @@ -608,7 +632,7 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { @@ -624,14 +648,30 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) return error; } } +#endif /* KMP_USE_HWLOC */ return 0; } int __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +#else /* KMP_USE_HWLOC */ +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { KMP_CPU_ZERO(mask); @@ -660,7 +700,7 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { kmp_affin_mask_t newMask, sysMask, retval; @@ -704,14 +744,22 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) } *mask = retval; } +#endif /* KMP_USE_HWLOC */ return 0; } void __kmp_affinity_bind_thread( int proc ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(proc, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); +#else /* KMP_USE_HWLOC */ +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { // @@ -740,7 +788,7 @@ __kmp_affinity_bind_thread( int proc ) } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { kmp_affin_mask_t mask; @@ -748,6 +796,7 @@ __kmp_affinity_bind_thread( int proc ) KMP_CPU_SET(proc, &mask); __kmp_set_system_affinity(&mask, TRUE); } +#endif /* KMP_USE_HWLOC */ } void |