aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetri Savolainen <petri.savolainen@nokia.com>2015-12-11 13:30:50 +0200
committerMaxim Uvarov <maxim.uvarov@linaro.org>2015-12-29 14:13:46 +0300
commit5c30c35b62c880f1807995c7a0b30a30cfdc8263 (patch)
treed80a8a4fb5e39211482e8e869e1f1d51ad18b20b
parentc6ced247726084d5711d0de7d382f645a9d94f18 (diff)
api: barrier: added memory barriers
Added new memory barriers. These follow C11 release / acquire specification and replaces odp_sync_stores(). Used GCC __atomic_thread_fence to implement all three barriers. Signed-off-by: Petri Savolainen <petri.savolainen@nokia.com> Reviewed-by: Bill Fischofer <bill.fischofer@linaro.org> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
-rw-r--r--include/odp/api/barrier.h11
-rw-r--r--include/odp/api/sync.h82
-rw-r--r--platform/linux-generic/include/odp/sync.h28
3 files changed, 90 insertions, 31 deletions
diff --git a/include/odp/api/barrier.h b/include/odp/api/barrier.h
index 8ca264727..823eae66c 100644
--- a/include/odp/api/barrier.h
+++ b/include/odp/api/barrier.h
@@ -18,8 +18,15 @@
extern "C" {
#endif
-/** @defgroup odp_barrier ODP BARRIER
- * Thread excution and memory ordering barriers.
+/**
+ * @defgroup odp_barrier ODP BARRIER
+ * Thread excution and memory ordering barriers.
+ *
+ * @details
+ * <b> Thread execution barrier (odp_barrier_t) </b>
+ *
+ * Thread execution barrier synchronizes a group of threads to wait on the
+ * barrier until the entire group has reached the barrier.
* @{
*/
diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h
index 6477e7475..c6f790c6f 100644
--- a/include/odp/api/sync.h
+++ b/include/odp/api/sync.h
@@ -8,7 +8,7 @@
/**
* @file
*
- * ODP synchronisation
+ * ODP memory barriers
*/
#ifndef ODP_API_SYNC_H_
@@ -18,42 +18,66 @@
extern "C" {
#endif
-/** @addtogroup odp_barrier
+/**
+ * @addtogroup odp_barrier
+ * @details
+ * <b> Memory barriers </b>
+ *
+ * Memory barriers enforce ordering of memory load and store operations
+ * specified before and after the barrier. These barriers may affect both
+ * compiler optimizations and CPU out-of-order execution. All ODP
+ * synchronization mechanisms (e.g. execution barriers, locks, queues, etc )
+ * include all necessary memory barriers, so these calls are not needed when
+ * using those. Also ODP atomic operations have memory ordered versions. These
+ * explicit barriers may be needed when thread synchronization is based on
+ * a non-ODP defined mechanism. Depending on the HW platform, heavy usage of
+ * memory barriers may cause significant performance degradation.
+ *
* @{
*/
/**
- * Synchronise stores
+ * Memory barrier for release operations
*
- * Ensures that all CPU store operations that precede the odp_sync_stores()
- * call are globally visible before any store operation that follows it.
+ * This memory barrier has release semantics. It synchronizes with a pairing
+ * barrier for acquire operations. The releasing and acquiring threads
+ * synchronize through shared memory. The releasing thread must call this
+ * barrier before signaling the acquiring thread. After the acquiring thread
+ * receives the signal, it must call odp_mb_acquire() before it reads the
+ * memory written by the releasing thread.
+ *
+ * This call is not needed when using ODP defined synchronization mechanisms.
+ *
+ * @see odp_mb_acquire()
*/
-static inline void odp_sync_stores(void)
-{
-#if defined __x86_64__ || defined __i386__
-
- __asm__ __volatile__ ("sfence\n" : : : "memory");
-
-#elif defined(__arm__)
-#if __ARM_ARCH == 6
- __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
- : : "r" (0) : "memory");
-#elif __ARM_ARCH >= 7 || defined __aarch64__
-
- __asm__ __volatile__ ("dmb st" : : : "memory");
-#else
- __asm__ __volatile__ ("" : : : "memory");
-#endif
-
-#elif defined __OCTEON__
-
- __asm__ __volatile__ ("syncws\n" : : : "memory");
+void odp_mb_release(void);
-#else
- __sync_synchronize();
-#endif
-}
+/**
+ * Memory barrier for acquire operations
+ *
+ * This memory barrier has acquire semantics. It synchronizes with a pairing
+ * barrier for release operations. The releasing and acquiring threads
+ * synchronize through shared memory. The releasing thread must call
+ * odp_mb_release() before signaling the acquiring thread. After the acquiring
+ * thread receives the signal, it must call this barrier before it reads the
+ * memory written by the releasing thread.
+ *
+ * This call is not needed when using ODP defined synchronization mechanisms.
+ *
+ * @see odp_mb_release()
+ */
+void odp_mb_acquire(void);
+/**
+ * Full memory barrier
+ *
+ * This is a full memory barrier. It guarantees that all load and store
+ * operations specified before it are visible to other threads before
+ * all load and store operations specified after it.
+ *
+ * This call is not needed when using ODP defined synchronization mechanisms.
+ */
+void odp_mb_full(void);
/**
* @}
diff --git a/platform/linux-generic/include/odp/sync.h b/platform/linux-generic/include/odp/sync.h
index bc7308301..bfe67eeb4 100644
--- a/platform/linux-generic/include/odp/sync.h
+++ b/platform/linux-generic/include/odp/sync.h
@@ -17,6 +17,34 @@
extern "C" {
#endif
+/** @ingroup odp_barrier
+ * @{
+ */
+
+static inline void odp_mb_release(void)
+{
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+
+static inline void odp_mb_acquire(void)
+{
+ __atomic_thread_fence(__ATOMIC_ACQUIRE);
+}
+
+static inline void odp_mb_full(void)
+{
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+static inline void odp_sync_stores(void)
+{
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+
+/**
+ * @}
+ */
+
#include <odp/api/sync.h>
#ifdef __cplusplus