aboutsummaryrefslogtreecommitdiff
path: root/runtime/src/kmp_affinity.cpp
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2016-06-16 20:31:19 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2016-06-16 20:31:19 +0000
commite82ed9c802a452bfe4b17a07353e650eadc31e5b (patch)
tree249a9cacec092eec6a7e8a832cf097a50d4b5c31 /runtime/src/kmp_affinity.cpp
parentc1cd14fc405c9e602411579c113b5849b7b1a390 (diff)
Change hwloc discovery algorithm to print topology only for accessible resources
Change hwloc discovery algorithm to print topology for only accessible resources, and report uniformity correspondingly, similar to what other topology discovery algorithms do. Fixes minor inconsistency in total topology reported and resources used for threads binding in case hwloc used. Patch by Andrey Churbanov. Differential Revision: http://reviews.llvm.org/D21389 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@272952 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'runtime/src/kmp_affinity.cpp')
-rw-r--r--runtime/src/kmp_affinity.cpp46
1 files changed, 29 insertions, 17 deletions
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index 6f4c7c1..00ba5c1 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
int pkgLevel = 0;
int coreLevel = 1;
int threadLevel = 2;
- nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
- nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
- __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
if (! KMP_AFFINITY_CAPABLE())
{
@@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
+ __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
if (__kmp_affinity_verbose) {
@@ -423,23 +422,34 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+ //
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+ // nCoresPerPkg, & nPackages. Make sure all these vars are set
+ // correctly, and return if affinity is not enabled.
+ //
+
hwloc_obj_t pu;
hwloc_obj_t core;
hwloc_obj_t socket;
int nActiveThreads = 0;
int socket_identifier = 0;
+ // re-calculate globals to count only accessible resources
+ __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
socket != NULL;
socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
socket_identifier++)
{
int core_identifier = 0;
+ int num_active_cores = 0;
for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
core_identifier++)
{
int pu_identifier = 0;
+ int num_active_threads = 0;
for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
@@ -447,7 +457,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
{
Address addr(3);
if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
- continue;
+ continue; // skip inactive (inaccessible) unit
KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
addr.labels[0] = socket_identifier; // package
@@ -455,13 +465,26 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
addr.labels[2] = pu_identifier; // pu
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
nActiveThreads++;
+ ++num_active_threads; // count active threads per core
+ }
+ if (num_active_threads) { // were there any active threads on the core?
+ ++__kmp_ncores; // count total active cores
+ ++num_active_cores; // count active cores per socket
+ if (num_active_threads > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = num_active_threads; // calc maximum
}
}
+ if (num_active_cores) { // were there any active cores on the socket?
+ ++nPackages; // count total active packages
+ if (num_active_cores > nCoresPerPkg)
+ nCoresPerPkg = num_active_cores; // calc maximum
+ }
}
//
// If there's only one thread context to bind to, return now.
//
+ KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
KMP_ASSERT(nActiveThreads > 0);
if (nActiveThreads == 1) {
__kmp_ncores = nPackages = 1;
@@ -514,20 +537,9 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
//
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return if affinity is not enabled.
- //
- __kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
-
- //
// Check to see if the machine topology is uniform
//
- unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
- unsigned ncores = __kmp_ncores;
- unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
- unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
+ unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
//
// Print the machine topology summary.
@@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
- __kmp_str_buf_print(&buf, "%d", npackages);
+ __kmp_str_buf_print(&buf, "%d", nPackages);
//for (level = 1; level <= pkgLevel; level++) {
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
// }