diff options
author | Petri Savolainen <petri.savolainen@nokia.com> | 2015-12-11 13:30:50 +0200 |
---|---|---|
committer | Maxim Uvarov <maxim.uvarov@linaro.org> | 2015-12-29 14:13:46 +0300 |
commit | 5c30c35b62c880f1807995c7a0b30a30cfdc8263 (patch) | |
tree | d80a8a4fb5e39211482e8e869e1f1d51ad18b20b | |
parent | c6ced247726084d5711d0de7d382f645a9d94f18 (diff) |
api: barrier: added memory barriers
Added new memory barriers. These follow C11 release /
acquire specification and replaces odp_sync_stores().
Used GCC __atomic_thread_fence to implement all three
barriers.
Signed-off-by: Petri Savolainen <petri.savolainen@nokia.com>
Reviewed-by: Bill Fischofer <bill.fischofer@linaro.org>
Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
-rw-r--r-- | include/odp/api/barrier.h | 11 | ||||
-rw-r--r-- | include/odp/api/sync.h | 82 | ||||
-rw-r--r-- | platform/linux-generic/include/odp/sync.h | 28 |
3 files changed, 90 insertions, 31 deletions
diff --git a/include/odp/api/barrier.h b/include/odp/api/barrier.h index 8ca264727..823eae66c 100644 --- a/include/odp/api/barrier.h +++ b/include/odp/api/barrier.h @@ -18,8 +18,15 @@ extern "C" { #endif -/** @defgroup odp_barrier ODP BARRIER - * Thread excution and memory ordering barriers. +/** + * @defgroup odp_barrier ODP BARRIER + * Thread excution and memory ordering barriers. + * + * @details + * <b> Thread execution barrier (odp_barrier_t) </b> + * + * Thread execution barrier synchronizes a group of threads to wait on the + * barrier until the entire group has reached the barrier. * @{ */ diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h index 6477e7475..c6f790c6f 100644 --- a/include/odp/api/sync.h +++ b/include/odp/api/sync.h @@ -8,7 +8,7 @@ /** * @file * - * ODP synchronisation + * ODP memory barriers */ #ifndef ODP_API_SYNC_H_ @@ -18,42 +18,66 @@ extern "C" { #endif -/** @addtogroup odp_barrier +/** + * @addtogroup odp_barrier + * @details + * <b> Memory barriers </b> + * + * Memory barriers enforce ordering of memory load and store operations + * specified before and after the barrier. These barriers may affect both + * compiler optimizations and CPU out-of-order execution. All ODP + * synchronization mechanisms (e.g. execution barriers, locks, queues, etc ) + * include all necessary memory barriers, so these calls are not needed when + * using those. Also ODP atomic operations have memory ordered versions. These + * explicit barriers may be needed when thread synchronization is based on + * a non-ODP defined mechanism. Depending on the HW platform, heavy usage of + * memory barriers may cause significant performance degradation. + * * @{ */ /** - * Synchronise stores + * Memory barrier for release operations * - * Ensures that all CPU store operations that precede the odp_sync_stores() - * call are globally visible before any store operation that follows it. + * This memory barrier has release semantics. It synchronizes with a pairing + * barrier for acquire operations. The releasing and acquiring threads + * synchronize through shared memory. The releasing thread must call this + * barrier before signaling the acquiring thread. After the acquiring thread + * receives the signal, it must call odp_mb_acquire() before it reads the + * memory written by the releasing thread. + * + * This call is not needed when using ODP defined synchronization mechanisms. + * + * @see odp_mb_acquire() */ -static inline void odp_sync_stores(void) -{ -#if defined __x86_64__ || defined __i386__ - - __asm__ __volatile__ ("sfence\n" : : : "memory"); - -#elif defined(__arm__) -#if __ARM_ARCH == 6 - __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory"); -#elif __ARM_ARCH >= 7 || defined __aarch64__ - - __asm__ __volatile__ ("dmb st" : : : "memory"); -#else - __asm__ __volatile__ ("" : : : "memory"); -#endif - -#elif defined __OCTEON__ - - __asm__ __volatile__ ("syncws\n" : : : "memory"); +void odp_mb_release(void); -#else - __sync_synchronize(); -#endif -} +/** + * Memory barrier for acquire operations + * + * This memory barrier has acquire semantics. It synchronizes with a pairing + * barrier for release operations. The releasing and acquiring threads + * synchronize through shared memory. The releasing thread must call + * odp_mb_release() before signaling the acquiring thread. After the acquiring + * thread receives the signal, it must call this barrier before it reads the + * memory written by the releasing thread. + * + * This call is not needed when using ODP defined synchronization mechanisms. + * + * @see odp_mb_release() + */ +void odp_mb_acquire(void); +/** + * Full memory barrier + * + * This is a full memory barrier. It guarantees that all load and store + * operations specified before it are visible to other threads before + * all load and store operations specified after it. + * + * This call is not needed when using ODP defined synchronization mechanisms. + */ +void odp_mb_full(void); /** * @} diff --git a/platform/linux-generic/include/odp/sync.h b/platform/linux-generic/include/odp/sync.h index bc7308301..bfe67eeb4 100644 --- a/platform/linux-generic/include/odp/sync.h +++ b/platform/linux-generic/include/odp/sync.h @@ -17,6 +17,34 @@ extern "C" { #endif +/** @ingroup odp_barrier + * @{ + */ + +static inline void odp_mb_release(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + +static inline void odp_mb_acquire(void) +{ + __atomic_thread_fence(__ATOMIC_ACQUIRE); +} + +static inline void odp_mb_full(void) +{ + __atomic_thread_fence(__ATOMIC_SEQ_CST); +} + +static inline void odp_sync_stores(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + +/** + * @} + */ + #include <odp/api/sync.h> #ifdef __cplusplus |