diff options
Diffstat (limited to 'runtime/src/kmp_threadprivate.cpp')
-rw-r--r-- | runtime/src/kmp_threadprivate.cpp | 140 |
1 files changed, 113 insertions, 27 deletions
diff --git a/runtime/src/kmp_threadprivate.cpp b/runtime/src/kmp_threadprivate.cpp index 362c075..e64097b 100644 --- a/runtime/src/kmp_threadprivate.cpp +++ b/runtime/src/kmp_threadprivate.cpp @@ -594,6 +594,13 @@ void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, return ret; } +static kmp_cached_addr_t *__kmp_find_cache(void *data) { + kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; + while (ptr && ptr->data != data) + ptr = ptr->next; + return ptr; +} + /*! @ingroup THREADPRIVATE @param loc source location information @@ -620,35 +627,40 @@ __kmpc_threadprivate_cached(ident_t *loc, if (TCR_PTR(*cache) == 0) { __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - __kmp_tp_cached = 1; - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + // Compiler often passes in NULL cache, even if it's already been created void **my_cache; - KMP_ITT_IGNORE( - my_cache = (void **)__kmp_allocate( - sizeof(void *) * __kmp_tp_capacity + sizeof(kmp_cached_addr_t));); - // No need to zero the allocated memory; __kmp_allocate does that. - KC_TRACE( - 50, - ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", - global_tid, my_cache)); - - /* TODO: free all this memory in __kmp_common_destroy using - * __kmp_threadpriv_cache_list */ - /* Add address of mycache to linked list for cleanup later */ kmp_cached_addr_t *tp_cache_addr; - - tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; - tp_cache_addr->addr = my_cache; - tp_cache_addr->next = __kmp_threadpriv_cache_list; - __kmp_threadpriv_cache_list = tp_cache_addr; - + // Look for an existing cache + tp_cache_addr = __kmp_find_cache(data); + if (!tp_cache_addr) { // Cache was never created; do it now + __kmp_tp_cached = 1; + KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( + sizeof(void *) * __kmp_tp_capacity + + sizeof(kmp_cached_addr_t));); + // No need to zero the allocated memory; __kmp_allocate does that. + KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " + "address %p\n", + global_tid, my_cache)); + /* TODO: free all this memory in __kmp_common_destroy using + * __kmp_threadpriv_cache_list */ + /* Add address of mycache to linked list for cleanup later */ + tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; + tp_cache_addr->addr = my_cache; + tp_cache_addr->data = data; + tp_cache_addr->compiler_cache = cache; + tp_cache_addr->next = __kmp_threadpriv_cache_list; + __kmp_threadpriv_cache_list = tp_cache_addr; + } else { // A cache was already created; use it + my_cache = tp_cache_addr->addr; + tp_cache_addr->compiler_cache = cache; + } KMP_MB(); TCW_PTR(*cache, my_cache); + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); KMP_MB(); } - __kmp_release_lock(&__kmp_global_lock, global_tid); } @@ -661,10 +673,68 @@ __kmpc_threadprivate_cached(ident_t *loc, KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", global_tid, ret)); - return ret; } +// This function should only be called when both __kmp_tp_cached_lock and +// kmp_forkjoin_lock are held. +void __kmp_threadprivate_resize_cache(int newCapacity) { + KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n", + newCapacity)); + + kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; + + while (ptr) { + if (ptr->data) { // this location has an active cache; resize it + void **my_cache; + KMP_ITT_IGNORE(my_cache = + (void **)__kmp_allocate(sizeof(void *) * newCapacity + + sizeof(kmp_cached_addr_t));); + // No need to zero the allocated memory; __kmp_allocate does that. + KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n", + my_cache)); + // Now copy old cache into new cache + void **old_cache = ptr->addr; + for (int i = 0; i < __kmp_tp_capacity; ++i) { + my_cache[i] = old_cache[i]; + } + + // Add address of new my_cache to linked list for cleanup later + kmp_cached_addr_t *tp_cache_addr; + tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; + tp_cache_addr->addr = my_cache; + tp_cache_addr->data = ptr->data; + tp_cache_addr->compiler_cache = ptr->compiler_cache; + tp_cache_addr->next = __kmp_threadpriv_cache_list; + __kmp_threadpriv_cache_list = tp_cache_addr; + + // Copy new cache to compiler's location: We can copy directly + // to (*compiler_cache) if compiler guarantees it will keep + // using the same location for the cache. This is not yet true + // for some compilers, in which case we have to check if + // compiler_cache is still pointing at old cache, and if so, we + // can point it at the new cache with an atomic compare&swap + // operation. (Old method will always work, but we should shift + // to new method (commented line below) when Intel and Clang + // compilers use new method.) + (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, + my_cache); + //TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); + + // If the store doesn't happen here, the compiler's old behavior will + // inevitably call __kmpc_threadprivate_cache with a new location for the + // cache, and that function will store the resized cache there at that + // point. + + // Nullify old cache's data pointer so we skip it next time + ptr->data = NULL; + } + ptr = ptr->next; + } + // After all caches are resized, update __kmp_tp_capacity to the new size + *(volatile int *)&__kmp_tp_capacity = newCapacity; +} + /*! @ingroup THREADPRIVATE @param loc source location information @@ -701,14 +771,30 @@ void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, d_tn->dt.dtorv = dtor; d_tn->is_vec = TRUE; d_tn->vec_len = (size_t)vector_length; - /* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate - zeroes the memory - d_tn->pod_init = 0; - */ + // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory + // d_tn->pod_init = 0; lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); d_tn->next = *lnk_tn; *lnk_tn = d_tn; } } + +void __kmp_cleanup_threadprivate_caches() { + kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; + + while (ptr) { + void **cache = ptr->addr; + __kmp_threadpriv_cache_list = ptr->next; + if (*ptr->compiler_cache) + *ptr->compiler_cache = NULL; + ptr->compiler_cache = NULL; + ptr->data = NULL; + ptr->addr = NULL; + ptr->next = NULL; + // Threadprivate data pointed at by cache entries are destroyed at end of + // __kmp_launch_thread with __kmp_common_destroy_gtid. + __kmp_free(cache); // implicitly frees ptr too + ptr = __kmp_threadpriv_cache_list; + } +} |