diff options
Diffstat (limited to 'test/common_plat/validation/api/barrier/barrier.c')
-rw-r--r-- | test/common_plat/validation/api/barrier/barrier.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/test/common_plat/validation/api/barrier/barrier.c b/test/common_plat/validation/api/barrier/barrier.c new file mode 100644 index 000000000..d4583884a --- /dev/null +++ b/test/common_plat/validation/api/barrier/barrier.c @@ -0,0 +1,397 @@ +/* Copyright (c) 2014, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <malloc.h> +#include <odp_api.h> +#include <CUnit/Basic.h> +#include <odp_cunit_common.h> +#include <unistd.h> +#include "barrier.h" + +#define VERBOSE 0 +#define MAX_ITERATIONS 1000 +#define BARRIER_ITERATIONS 64 + +#define SLOW_BARRIER_DELAY 400 +#define BASE_DELAY 6 + +#define NUM_TEST_BARRIERS BARRIER_ITERATIONS +#define NUM_RESYNC_BARRIERS 100 + +#define BARRIER_DELAY 10 + +#define GLOBAL_SHM_NAME "GlobalLockTest" + +#define UNUSED __attribute__((__unused__)) + +static volatile int temp_result; + +typedef __volatile uint32_t volatile_u32_t; +typedef __volatile uint64_t volatile_u64_t; + +typedef struct { + odp_atomic_u32_t wait_cnt; +} custom_barrier_t; + +typedef struct { + /* Global variables */ + uint32_t g_num_threads; + uint32_t g_iterations; + uint32_t g_verbose; + uint32_t g_max_num_cores; + + odp_barrier_t test_barriers[NUM_TEST_BARRIERS]; + custom_barrier_t custom_barrier1[NUM_TEST_BARRIERS]; + custom_barrier_t custom_barrier2[NUM_TEST_BARRIERS]; + volatile_u32_t slow_thread_num; + volatile_u32_t barrier_cnt1; + volatile_u32_t barrier_cnt2; + odp_barrier_t global_barrier; + +} global_shared_mem_t; + +/* Per-thread memory */ +typedef struct { + global_shared_mem_t *global_mem; + + int thread_id; + int thread_core; + + volatile_u64_t delay_counter; +} per_thread_mem_t; + +static odp_shm_t global_shm; +static global_shared_mem_t *global_mem; + +/* +* Delay a consistent amount of time. Ideally the amount of CPU time taken +* is linearly proportional to "iterations". The goal is to try to do some +* work that the compiler optimizer won't optimize away, and also to +* minimize loads and stores (at least to different memory addresses) +* so as to not affect or be affected by caching issues. This does NOT have to +* correlate to a specific number of cpu cycles or be consistent across +* CPU architectures. +*/ +static void thread_delay(per_thread_mem_t *per_thread_mem, uint32_t iterations) +{ + volatile_u64_t *counter_ptr; + uint32_t cnt; + + counter_ptr = &per_thread_mem->delay_counter; + + for (cnt = 1; cnt <= iterations; cnt++) + (*counter_ptr)++; +} + +/* Initialise per-thread memory */ +static per_thread_mem_t *thread_init(void) +{ + global_shared_mem_t *global_mem; + per_thread_mem_t *per_thread_mem; + odp_shm_t global_shm; + uint32_t per_thread_mem_len; + + per_thread_mem_len = sizeof(per_thread_mem_t); + per_thread_mem = malloc(per_thread_mem_len); + memset(per_thread_mem, 0, per_thread_mem_len); + + per_thread_mem->delay_counter = 1; + + per_thread_mem->thread_id = odp_thread_id(); + per_thread_mem->thread_core = odp_cpu_id(); + + global_shm = odp_shm_lookup(GLOBAL_SHM_NAME); + global_mem = odp_shm_addr(global_shm); + CU_ASSERT_PTR_NOT_NULL(global_mem); + + per_thread_mem->global_mem = global_mem; + + return per_thread_mem; +} + +static void thread_finalize(per_thread_mem_t *per_thread_mem) +{ + free(per_thread_mem); +} + +static void custom_barrier_init(custom_barrier_t *custom_barrier, + uint32_t num_threads) +{ + odp_atomic_init_u32(&custom_barrier->wait_cnt, num_threads); +} + +static void custom_barrier_wait(custom_barrier_t *custom_barrier) +{ + volatile_u64_t counter = 1; + uint32_t delay_cnt, wait_cnt; + + odp_atomic_sub_u32(&custom_barrier->wait_cnt, 1); + + wait_cnt = 1; + while (wait_cnt != 0) { + for (delay_cnt = 1; delay_cnt <= BARRIER_DELAY; delay_cnt++) + counter++; + + wait_cnt = odp_atomic_load_u32(&custom_barrier->wait_cnt); + } +} + +static uint32_t barrier_test(per_thread_mem_t *per_thread_mem, + odp_bool_t no_barrier_test) +{ + global_shared_mem_t *global_mem; + uint32_t barrier_errs, iterations, cnt, i_am_slow_thread; + uint32_t thread_num, slow_thread_num, next_slow_thread, num_threads; + uint32_t lock_owner_delay, barrier_cnt1, barrier_cnt2; + + thread_num = odp_thread_id(); + global_mem = per_thread_mem->global_mem; + num_threads = global_mem->g_num_threads; + iterations = BARRIER_ITERATIONS; + + barrier_errs = 0; + lock_owner_delay = SLOW_BARRIER_DELAY; + + for (cnt = 1; cnt < iterations; cnt++) { + /* Wait here until all of the threads reach this point */ + custom_barrier_wait(&global_mem->custom_barrier1[cnt]); + + barrier_cnt1 = global_mem->barrier_cnt1; + barrier_cnt2 = global_mem->barrier_cnt2; + + if ((barrier_cnt1 != cnt) || (barrier_cnt2 != cnt)) { + printf("thread_num=%" PRIu32 " barrier_cnts of %" PRIu32 + " %" PRIu32 " cnt=%" PRIu32 "\n", + thread_num, barrier_cnt1, barrier_cnt2, cnt); + barrier_errs++; + } + + /* Wait here until all of the threads reach this point */ + custom_barrier_wait(&global_mem->custom_barrier2[cnt]); + + slow_thread_num = global_mem->slow_thread_num; + i_am_slow_thread = thread_num == slow_thread_num; + next_slow_thread = slow_thread_num + 1; + if (num_threads < next_slow_thread) + next_slow_thread = 1; + + /* + * Now run the test, which involves having all but one thread + * immediately calling odp_barrier_wait(), and one thread wait a + * moderate amount of time and then calling odp_barrier_wait(). + * The test fails if any of the first group of threads + * has not waited for the "slow" thread. The "slow" thread is + * responsible for re-initializing the barrier for next trial. + */ + if (i_am_slow_thread) { + thread_delay(per_thread_mem, lock_owner_delay); + lock_owner_delay += BASE_DELAY; + if ((global_mem->barrier_cnt1 != cnt) || + (global_mem->barrier_cnt2 != cnt) || + (global_mem->slow_thread_num + != slow_thread_num)) + barrier_errs++; + } + + if (no_barrier_test == 0) + odp_barrier_wait(&global_mem->test_barriers[cnt]); + + global_mem->barrier_cnt1 = cnt + 1; + odp_mb_full(); + + if (i_am_slow_thread) { + global_mem->slow_thread_num = next_slow_thread; + global_mem->barrier_cnt2 = cnt + 1; + odp_mb_full(); + } else { + while (global_mem->barrier_cnt2 != (cnt + 1)) + thread_delay(per_thread_mem, BASE_DELAY); + } + } + + if ((global_mem->g_verbose) && (barrier_errs != 0)) + printf("\nThread %" PRIu32 " (id=%d core=%d) had %" PRIu32 + " barrier_errs in %" PRIu32 " iterations\n", thread_num, + per_thread_mem->thread_id, + per_thread_mem->thread_core, barrier_errs, iterations); + + return barrier_errs; +} + +static int no_barrier_functional_test(void *arg UNUSED) +{ + per_thread_mem_t *per_thread_mem; + uint32_t barrier_errs; + + per_thread_mem = thread_init(); + barrier_errs = barrier_test(per_thread_mem, 1); + + /* + * Note that the following CU_ASSERT MAY appear incorrect, but for the + * no_barrier test it should see barrier_errs or else there is something + * wrong with the test methodology or the ODP thread implementation. + * So this test PASSES only if it sees barrier_errs or a single + * worker was used. + */ + CU_ASSERT(barrier_errs != 0 || global_mem->g_num_threads == 1); + thread_finalize(per_thread_mem); + + return CU_get_number_of_failures(); +} + +static int barrier_functional_test(void *arg UNUSED) +{ + per_thread_mem_t *per_thread_mem; + uint32_t barrier_errs; + + per_thread_mem = thread_init(); + barrier_errs = barrier_test(per_thread_mem, 0); + + CU_ASSERT(barrier_errs == 0); + thread_finalize(per_thread_mem); + + return CU_get_number_of_failures(); +} + +static void barrier_test_init(void) +{ + uint32_t num_threads, idx; + + num_threads = global_mem->g_num_threads; + + for (idx = 0; idx < NUM_TEST_BARRIERS; idx++) { + odp_barrier_init(&global_mem->test_barriers[idx], num_threads); + custom_barrier_init(&global_mem->custom_barrier1[idx], + num_threads); + custom_barrier_init(&global_mem->custom_barrier2[idx], + num_threads); + } + + global_mem->slow_thread_num = 1; + global_mem->barrier_cnt1 = 1; + global_mem->barrier_cnt2 = 1; +} + +/* Barrier tests */ +void barrier_test_memory_barrier(void) +{ + volatile int a = 0; + volatile int b = 0; + volatile int c = 0; + volatile int d = 0; + + /* Call all memory barriers to verify that those are implemented */ + a = 1; + odp_mb_release(); + b = 1; + odp_mb_acquire(); + c = 1; + odp_mb_full(); + d = 1; + + /* Avoid "variable set but not used" warning */ + temp_result = a + b + c + d; +} + +void barrier_test_no_barrier_functional(void) +{ + pthrd_arg arg; + + arg.numthrds = global_mem->g_num_threads; + barrier_test_init(); + odp_cunit_thread_create(no_barrier_functional_test, &arg); + odp_cunit_thread_exit(&arg); +} + +void barrier_test_barrier_functional(void) +{ + pthrd_arg arg; + + arg.numthrds = global_mem->g_num_threads; + barrier_test_init(); + odp_cunit_thread_create(barrier_functional_test, &arg); + odp_cunit_thread_exit(&arg); +} + +odp_testinfo_t barrier_suite_barrier[] = { + ODP_TEST_INFO(barrier_test_memory_barrier), + ODP_TEST_INFO(barrier_test_no_barrier_functional), + ODP_TEST_INFO(barrier_test_barrier_functional), + ODP_TEST_INFO_NULL +}; + +int barrier_init(odp_instance_t *inst) +{ + uint32_t workers_count, max_threads; + int ret = 0; + odp_cpumask_t mask; + + if (0 != odp_init_global(inst, NULL, NULL)) { + fprintf(stderr, "error: odp_init_global() failed.\n"); + return -1; + } + if (0 != odp_init_local(*inst, ODP_THREAD_CONTROL)) { + fprintf(stderr, "error: odp_init_local() failed.\n"); + return -1; + } + + global_shm = odp_shm_reserve(GLOBAL_SHM_NAME, + sizeof(global_shared_mem_t), 64, + ODP_SHM_SW_ONLY); + if (ODP_SHM_INVALID == global_shm) { + fprintf(stderr, "Unable reserve memory for global_shm\n"); + return -1; + } + + global_mem = odp_shm_addr(global_shm); + memset(global_mem, 0, sizeof(global_shared_mem_t)); + + global_mem->g_num_threads = MAX_WORKERS; + global_mem->g_iterations = MAX_ITERATIONS; + global_mem->g_verbose = VERBOSE; + + workers_count = odp_cpumask_default_worker(&mask, 0); + + max_threads = (workers_count >= MAX_WORKERS) ? + MAX_WORKERS : workers_count; + + if (max_threads < global_mem->g_num_threads) { + printf("Requested num of threads is too large\n"); + printf("reducing from %" PRIu32 " to %" PRIu32 "\n", + global_mem->g_num_threads, + max_threads); + global_mem->g_num_threads = max_threads; + } + + printf("Num of threads used = %" PRIu32 "\n", + global_mem->g_num_threads); + + return ret; +} + +odp_suiteinfo_t barrier_suites[] = { + {"barrier", NULL, NULL, + barrier_suite_barrier}, + ODP_SUITE_INFO_NULL +}; + +int barrier_main(int argc, char *argv[]) +{ + int ret; + + /* parse common options: */ + if (odp_cunit_parse_options(argc, argv)) + return -1; + + odp_cunit_register_global_init(barrier_init); + + ret = odp_cunit_register(barrier_suites); + + if (ret == 0) + ret = odp_cunit_run(); + + return ret; +} |