aboutsummaryrefslogtreecommitdiff
path: root/runtime/src
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/src')
-rw-r--r--runtime/src/kmp.h70
-rw-r--r--runtime/src/kmp_ftn_entry.h4
-rw-r--r--runtime/src/kmp_global.c9
-rw-r--r--runtime/src/z_Windows_NT_util.c67
4 files changed, 102 insertions, 48 deletions
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index cc81a50..82b607b 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -79,10 +79,8 @@
class kmp_stats_list;
#endif
-#if KMP_USE_HWLOC
-#include "hwloc.h"
-extern hwloc_topology_t __kmp_hwloc_topology;
-extern int __kmp_hwloc_error;
+#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
+# include "hwloc.h"
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -522,14 +520,43 @@ typedef int PACKED_REDUCTION_METHOD_T;
*/
#if KMP_AFFINITY_SUPPORTED
+# if KMP_GROUP_AFFINITY
+// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
+# if _MSC_VER < 1600
+typedef struct GROUP_AFFINITY {
+ KAFFINITY Mask;
+ WORD Group;
+ WORD Reserved[3];
+} GROUP_AFFINITY;
+# endif /* _MSC_VER < 1600 */
+extern int __kmp_num_proc_groups;
+typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
+extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
+
+typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
+extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
+
+typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
+extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
+
+typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
+extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
+# endif /* KMP_GROUP_AFFINITY */
+
extern size_t __kmp_affin_mask_size;
# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
-# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
+# if !KMP_USE_HWLOC
+# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
+# define KMP_CPU_SET_ITERATE(i,mask) \
+ for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
+# endif
#if KMP_USE_HWLOC
+extern hwloc_topology_t __kmp_hwloc_topology;
+extern int __kmp_hwloc_error;
typedef hwloc_cpuset_t kmp_affin_mask_t;
# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
@@ -600,9 +627,6 @@ typedef hwloc_cpuset_t kmp_affin_mask_t;
}
#else /* KMP_USE_HWLOC */
-# define KMP_CPU_SET_ITERATE(i,mask) \
- for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
-
# if KMP_OS_LINUX
//
// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
@@ -678,20 +702,8 @@ typedef unsigned char kmp_affin_mask_t;
//
# if KMP_GROUP_AFFINITY
-
-// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
-# if _MSC_VER < 1600
-typedef struct GROUP_AFFINITY {
- KAFFINITY Mask;
- WORD Group;
- WORD Reserved[3];
-} GROUP_AFFINITY;
-# endif
-
typedef DWORD_PTR kmp_affin_mask_t;
-extern int __kmp_num_proc_groups;
-
# define _KMP_CPU_SET(i,mask) \
(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \
(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
@@ -758,19 +770,6 @@ extern int __kmp_num_proc_groups;
} \
}
-typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
-extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
-
-typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
-extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
-
-typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
-extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
-
-typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
-extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
-
-extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
# else /* KMP_GROUP_AFFINITY */
@@ -817,6 +816,11 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
#endif /* KMP_USE_HWLOC */
+// prototype after typedef of kmp_affin_mask_t
+#if KMP_GROUP_AFFINITY
+extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
+#endif
+
//
// Declare local char buffers with this size for printing debug and info
// messages, using __kmp_affinity_print_mask().
diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h
index fe018bf..b9de5e3 100644
--- a/runtime/src/kmp_ftn_entry.h
+++ b/runtime/src/kmp_ftn_entry.h
@@ -270,9 +270,9 @@ FTN_GET_AFFINITY_MAX_PROC( void )
return 0;
}
- #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC
+ #if KMP_GROUP_AFFINITY
if ( __kmp_num_proc_groups > 1 ) {
- return (int)KMP_CPU_SETSIZE;
+ return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT);
}
#endif /* KMP_GROUP_AFFINITY */
return __kmp_xproc;
diff --git a/runtime/src/kmp_global.c b/runtime/src/kmp_global.c
index 63e9dc3..2cf0e2f 100644
--- a/runtime/src/kmp_global.c
+++ b/runtime/src/kmp_global.c
@@ -35,10 +35,6 @@ __thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
// gives reference tick for all events (considered the 0 tick)
tsc_tick_count __kmp_stats_start_time;
#endif
-#if KMP_USE_HWLOC
-int __kmp_hwloc_error = FALSE;
-hwloc_topology_t __kmp_hwloc_topology = NULL;
-#endif
/* ----------------------------------------------------- */
/* INITIALIZATION VARIABLES */
@@ -220,6 +216,11 @@ enum mic_type __kmp_mic_type = non_mic;
#if KMP_AFFINITY_SUPPORTED
+# if KMP_USE_HWLOC
+int __kmp_hwloc_error = FALSE;
+hwloc_topology_t __kmp_hwloc_topology = NULL;
+# endif
+
# if KMP_GROUP_AFFINITY
int __kmp_num_proc_groups = 1;
diff --git a/runtime/src/z_Windows_NT_util.c b/runtime/src/z_Windows_NT_util.c
index 0191352..f3ae0a4 100644
--- a/runtime/src/z_Windows_NT_util.c
+++ b/runtime/src/z_Windows_NT_util.c
@@ -552,9 +552,18 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask )
int i;
int group = -1;
for (i = 0; i < __kmp_num_proc_groups; i++) {
+#if KMP_USE_HWLOC
+ // On windows, the long type is always 32 bits
+ unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2);
+ unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1);
+ if (first_32_bits == 0 && second_32_bits == 0) {
+ continue;
+ }
+#else
if (mask[i] == 0) {
continue;
}
+#endif
if (group >= 0) {
return -1;
}
@@ -568,8 +577,23 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask )
int
__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
{
-
-#if KMP_GROUP_AFFINITY
+#if KMP_USE_HWLOC
+ int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(
+ kmp_ms_fatal,
+ KMP_MSG( FatalSysError ),
+ KMP_ERR( error ),
+ __kmp_msg_null
+ );
+ }
+ return error;
+#else
+# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
//
@@ -608,7 +632,7 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
}
else
-#endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_GROUP_AFFINITY */
{
if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
@@ -624,14 +648,30 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
return error;
}
}
+#endif /* KMP_USE_HWLOC */
return 0;
}
int
__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
{
-
-#if KMP_GROUP_AFFINITY
+#if KMP_USE_HWLOC
+ int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(
+ kmp_ms_fatal,
+ KMP_MSG( FatalSysError ),
+ KMP_ERR( error ),
+ __kmp_msg_null
+ );
+ }
+ return error;
+#else /* KMP_USE_HWLOC */
+# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
KMP_CPU_ZERO(mask);
@@ -660,7 +700,7 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
}
else
-#endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_GROUP_AFFINITY */
{
kmp_affin_mask_t newMask, sysMask, retval;
@@ -704,14 +744,22 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
}
*mask = retval;
}
+#endif /* KMP_USE_HWLOC */
return 0;
}
void
__kmp_affinity_bind_thread( int proc )
{
-
-#if KMP_GROUP_AFFINITY
+#if KMP_USE_HWLOC
+ kmp_affin_mask_t *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(proc, mask);
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
+#else /* KMP_USE_HWLOC */
+# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
//
@@ -740,7 +788,7 @@ __kmp_affinity_bind_thread( int proc )
}
else
-#endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_GROUP_AFFINITY */
{
kmp_affin_mask_t mask;
@@ -748,6 +796,7 @@ __kmp_affinity_bind_thread( int proc )
KMP_CPU_SET(proc, &mask);
__kmp_set_system_affinity(&mask, TRUE);
}
+#endif /* KMP_USE_HWLOC */
}
void