/* Copyright (c) 2018, Linaro Limited * Copyright (c) 2020-2021, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ #include #include #include #include #include #include #include #include #define MAX_QUEUES (256 * 1024) #define MAX_GROUPS 256 /* Max time to wait for new events in nanoseconds */ #define MAX_SCHED_WAIT_NS (10 * ODP_TIME_SEC_IN_NS) /* Scheduling round interval to check for MAX_SCHED_WAIT_NS */ #define TIME_CHECK_INTERVAL (1024 * 1024) /* Round up 'X' to a multiple of 'NUM' */ #define ROUNDUP(X, NUM) ((NUM) * (((X) + (NUM) - 1) / (NUM))) typedef struct test_options_t { uint32_t num_cpu; uint32_t num_queue; uint32_t num_low; uint32_t num_high; uint32_t num_dummy; uint32_t num_event; uint32_t num_sched; int num_group; uint32_t num_join; uint32_t max_burst; odp_pool_type_t pool_type; int queue_type; int forward; int fairness; uint32_t event_size; uint32_t queue_size; uint32_t tot_queue; uint32_t tot_event; int touch_data; uint32_t rd_words; uint32_t rw_words; uint32_t ctx_size; uint32_t ctx_rd_words; uint32_t ctx_rw_words; uint64_t wait_ns; int verbose; } test_options_t; typedef struct test_stat_t { uint64_t rounds; uint64_t enqueues; uint64_t events; uint64_t nsec; uint64_t cycles; uint64_t waits; uint64_t dummy_sum; uint8_t failed; } test_stat_t; typedef struct thread_arg_t { void *global; int first_group; } thread_arg_t; typedef struct test_global_t { test_options_t test_options; odp_schedule_config_t schedule_config; odp_barrier_t barrier; odp_pool_t pool; odp_cpumask_t cpumask; odp_shm_t ctx_shm; odp_queue_t queue[MAX_QUEUES]; odp_schedule_group_t group[MAX_GROUPS]; odph_thread_t thread_tbl[ODP_THREAD_COUNT_MAX]; test_stat_t stat[ODP_THREAD_COUNT_MAX]; thread_arg_t thread_arg[ODP_THREAD_COUNT_MAX]; odp_atomic_u32_t num_worker; } test_global_t; typedef struct { odp_queue_t next; odp_atomic_u64_t count; } queue_context_t; static void print_usage(void) { printf("\n" "Scheduler performance test\n" "\n" "Usage: odp_sched_perf [options]\n" "\n" " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default: 1.\n" " -q, --num_queue Number of queues. Default: 1.\n" " -L, --num_low Number of lowest priority queues out of '--num_queue' queues. Rest of\n" " the queues are default (or highest) priority. Default: 0.\n" " -H, --num_high Number of highest priority queues out of '--num_queue' queues. Rest of\n" " the queues are default (or lowest) priority. Default: 0.\n" " -d, --num_dummy Number of empty queues. Default: 0.\n" " -e, --num_event Number of events per queue. Default: 100.\n" " -s, --num_sched Number of events to schedule per thread. Default: 100 000.\n" " -g, --num_group Number of schedule groups. Round robins threads and queues into groups.\n" " -1: SCHED_GROUP_WORKER\n" " 0: SCHED_GROUP_ALL (default)\n" " -j, --num_join Number of groups a thread joins. Threads are divide evenly into groups,\n" " if num_cpu is multiple of num_group and num_group is multiple of num_join.\n" " 0: join all groups (default)\n" " -b, --burst Maximum number of events per operation. Default: 100.\n" " -t, --type Queue type. 0: parallel, 1: atomic, 2: ordered. Default: 0.\n" " -f, --forward 0: Keep event in the original queue, 1: Forward event to the next queue. Default: 0.\n" " -a, --fairness 0: Don't count events per queue, 1: Count and report events relative to average. Default: 0.\n" " -w, --wait_ns Number of nsec to wait before enqueueing events. Default: 0.\n" " -k, --ctx_rd_words Number of queue context words (uint64_t) to read on every event. Default: 0.\n" " -l, --ctx_rw_words Number of queue context words (uint64_t) to modify on every event. Default: 0.\n" " -n, --rd_words Number of event data words (uint64_t) to read before enqueueing it. Default: 0.\n" " -m, --rw_words Number of event data words (uint64_t) to modify before enqueueing it. Default: 0.\n" " -p, --pool_type Pool type. 0: buffer, 1: packet. Default: 0.\n" " -v, --verbose Verbose output.\n" " -h, --help This help\n" "\n"); } static int parse_options(int argc, char *argv[], test_options_t *test_options) { int opt, long_index, num_group, num_join; int ret = 0; uint32_t ctx_size = 0; int pool_type = 0; static const struct option longopts[] = { {"num_cpu", required_argument, NULL, 'c'}, {"num_queue", required_argument, NULL, 'q'}, {"num_low", required_argument, NULL, 'L'}, {"num_high", required_argument, NULL, 'H'}, {"num_dummy", required_argument, NULL, 'd'}, {"num_event", required_argument, NULL, 'e'}, {"num_sched", required_argument, NULL, 's'}, {"num_group", required_argument, NULL, 'g'}, {"num_join", required_argument, NULL, 'j'}, {"burst", required_argument, NULL, 'b'}, {"type", required_argument, NULL, 't'}, {"forward", required_argument, NULL, 'f'}, {"fairness", required_argument, NULL, 'a'}, {"wait_ns", required_argument, NULL, 'w'}, {"ctx_rd_words", required_argument, NULL, 'k'}, {"ctx_rw_words", required_argument, NULL, 'l'}, {"rd_words", required_argument, NULL, 'n'}, {"rw_words", required_argument, NULL, 'm'}, {"pool_type", required_argument, NULL, 'p'}, {"verbose", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:a:w:k:l:n:m:p:vh"; test_options->num_cpu = 1; test_options->num_queue = 1; test_options->num_low = 0; test_options->num_high = 0; test_options->num_dummy = 0; test_options->num_event = 100; test_options->num_sched = 100000; test_options->num_group = 0; test_options->num_join = 0; test_options->max_burst = 100; test_options->queue_type = 0; test_options->forward = 0; test_options->fairness = 0; test_options->ctx_rd_words = 0; test_options->ctx_rw_words = 0; test_options->rd_words = 0; test_options->rw_words = 0; test_options->wait_ns = 0; test_options->verbose = 0; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); if (opt == -1) break; switch (opt) { case 'c': test_options->num_cpu = atoi(optarg); break; case 'q': test_options->num_queue = atoi(optarg); break; case 'L': test_options->num_low = atoi(optarg); break; case 'H': test_options->num_high = atoi(optarg); break; case 'd': test_options->num_dummy = atoi(optarg); break; case 'e': test_options->num_event = atoi(optarg); break; case 's': test_options->num_sched = atoi(optarg); break; case 'g': test_options->num_group = atoi(optarg); break; case 'j': test_options->num_join = atoi(optarg); break; case 'b': test_options->max_burst = atoi(optarg); break; case 't': test_options->queue_type = atoi(optarg); break; case 'f': test_options->forward = atoi(optarg); break; case 'a': test_options->fairness = atoi(optarg); break; case 'k': test_options->ctx_rd_words = atoi(optarg); break; case 'l': test_options->ctx_rw_words = atoi(optarg); break; case 'n': test_options->rd_words = atoi(optarg); break; case 'm': test_options->rw_words = atoi(optarg); break; case 'p': pool_type = atoi(optarg); break; case 'w': test_options->wait_ns = atoll(optarg); break; case 'v': test_options->verbose = 1; break; case 'h': /* fall through */ default: print_usage(); ret = -1; break; } } if (pool_type == 0) { test_options->pool_type = ODP_POOL_BUFFER; } else if (pool_type == 1) { test_options->pool_type = ODP_POOL_PACKET; } else { ODPH_ERR("Invalid pool type: %d.\n", pool_type); ret = -1; } test_options->touch_data = test_options->rd_words || test_options->rw_words; if ((test_options->num_queue + test_options->num_dummy) > MAX_QUEUES) { ODPH_ERR("Too many queues. Max supported %i.\n", MAX_QUEUES); ret = -1; } if ((test_options->num_low + test_options->num_high) > test_options->num_queue) { ODPH_ERR("Number of low/high prio %u/%u exceed number of queues %u.\n", test_options->num_low, test_options->num_high, test_options->num_queue); ret = -1; } num_group = test_options->num_group; num_join = test_options->num_join; if (num_group > MAX_GROUPS) { ODPH_ERR("Too many groups. Max supported %i.\n", MAX_GROUPS); ret = -1; } if (num_group > 0 && num_join > num_group) { ODPH_ERR("num_join (%i) larger than num_group (%i).\n", num_join, num_group); ret = -1; } if (num_join && num_group > (int)(test_options->num_cpu * num_join)) { printf("WARNING: Too many groups (%i). Some groups (%i) are not served.\n\n", num_group, num_group - (test_options->num_cpu * num_join)); if (test_options->forward) { printf("Error: Cannot forward when some queues are not served.\n"); ret = -1; } } test_options->tot_queue = test_options->num_queue + test_options->num_dummy; test_options->tot_event = test_options->num_queue * test_options->num_event; test_options->queue_size = test_options->num_event; if (test_options->forward) { /* When forwarding, all events may end up into * a single queue */ test_options->queue_size = test_options->tot_event; } if (test_options->forward || test_options->fairness) ctx_size = sizeof(queue_context_t); if (test_options->ctx_rd_words || test_options->ctx_rw_words) { /* Round up queue handle size to a multiple of 8 for correct * context data alignment */ ctx_size = ROUNDUP(ctx_size, 8); ctx_size += 8 * test_options->ctx_rd_words; ctx_size += 8 * test_options->ctx_rw_words; } /* When context data is modified, round up to cache line size to avoid * false sharing */ if (test_options->fairness || test_options->ctx_rw_words) ctx_size = ROUNDUP(ctx_size, ODP_CACHE_LINE_SIZE); test_options->ctx_size = ctx_size; return ret; } static int set_num_cpu(test_global_t *global) { int ret; test_options_t *test_options = &global->test_options; int num_cpu = test_options->num_cpu; /* One thread used for the main thread */ if (num_cpu > ODP_THREAD_COUNT_MAX - 1) { printf("Error: Too many workers. Maximum is %i.\n", ODP_THREAD_COUNT_MAX - 1); return -1; } ret = odp_cpumask_default_worker(&global->cpumask, num_cpu); if (num_cpu && ret != num_cpu) { printf("Error: Too many workers. Max supported %i\n.", ret); return -1; } /* Zero: all available workers */ if (num_cpu == 0) { num_cpu = ret; test_options->num_cpu = num_cpu; } odp_barrier_init(&global->barrier, num_cpu); return 0; } static int create_pool(test_global_t *global) { odp_pool_capability_t pool_capa; odp_pool_param_t pool_param; odp_pool_t pool; uint32_t max_num, max_size; test_options_t *test_options = &global->test_options; uint32_t num_cpu = test_options->num_cpu; uint32_t num_queue = test_options->num_queue; uint32_t num_dummy = test_options->num_dummy; uint32_t num_event = test_options->num_event; uint32_t num_sched = test_options->num_sched; uint32_t max_burst = test_options->max_burst; uint32_t tot_queue = test_options->tot_queue; uint32_t tot_event = test_options->tot_event; uint32_t queue_size = test_options->queue_size; int num_group = test_options->num_group; uint32_t num_join = test_options->num_join; int forward = test_options->forward; uint64_t wait_ns = test_options->wait_ns; uint32_t event_size = 16; int touch_data = test_options->touch_data; uint32_t ctx_size = test_options->ctx_size; if (touch_data) { event_size = test_options->rd_words + test_options->rw_words; event_size = 8 * event_size; } test_options->event_size = event_size; printf("\nScheduler performance test\n"); printf(" num sched %u\n", num_sched); printf(" num cpu %u\n", num_cpu); printf(" num queues %u\n", num_queue); printf(" num lowest prio queues %u\n", test_options->num_low); printf(" num highest prio queues %u\n", test_options->num_high); printf(" num empty queues %u\n", num_dummy); printf(" total queues %u\n", tot_queue); printf(" num groups %i", num_group); if (num_group == -1) printf(" (ODP_SCHED_GROUP_WORKER)\n"); else if (num_group == 0) printf(" (ODP_SCHED_GROUP_ALL)\n"); else printf("\n"); printf(" num join %u\n", num_join); printf(" forward events %i\n", forward ? 1 : 0); printf(" wait nsec %" PRIu64 "\n", wait_ns); printf(" events per queue %u\n", num_event); printf(" queue size %u\n", queue_size); printf(" max burst size %u\n", max_burst); printf(" total events %u\n", tot_event); printf(" event size %u bytes", event_size); if (touch_data) { printf(" (rd: %u, rw: %u)\n", 8 * test_options->rd_words, 8 * test_options->rw_words); } else { printf("\n"); } printf(" context size %u bytes", ctx_size); if (test_options->ctx_rd_words || test_options->ctx_rw_words) { printf(" (rd: %u, rw: %u)\n", 8 * test_options->ctx_rd_words, 8 * test_options->ctx_rw_words); } else { printf("\n"); } if (odp_pool_capability(&pool_capa)) { ODPH_ERR("Error: pool capa failed\n"); return -1; } if (test_options->pool_type == ODP_POOL_BUFFER) { printf(" pool type buffer\n"); max_num = pool_capa.buf.max_num; max_size = pool_capa.buf.max_size; } else { printf(" pool type packet\n"); max_num = pool_capa.pkt.max_num; max_size = pool_capa.pkt.max_seg_len; } if (max_num && tot_event > max_num) { ODPH_ERR("Error: max events supported %u\n", max_num); return -1; } if (max_size && event_size > max_size) { ODPH_ERR("Error: max supported event size %u\n", max_size); return -1; } odp_pool_param_init(&pool_param); if (test_options->pool_type == ODP_POOL_BUFFER) { pool_param.type = ODP_POOL_BUFFER; pool_param.buf.num = tot_event; pool_param.buf.size = event_size; pool_param.buf.align = 8; } else { pool_param.type = ODP_POOL_PACKET; pool_param.pkt.num = tot_event; pool_param.pkt.len = event_size; pool_param.pkt.seg_len = event_size; pool_param.pkt.align = 8; } pool = odp_pool_create("sched perf", &pool_param); if (pool == ODP_POOL_INVALID) { ODPH_ERR("Error: pool create failed\n"); return -1; } global->pool = pool; return 0; } static int create_groups(test_global_t *global) { odp_schedule_capability_t sched_capa; odp_thrmask_t thrmask; uint32_t i; test_options_t *test_options = &global->test_options; uint32_t num_group = test_options->num_group; if (test_options->num_group <= 0) return 0; if (odp_schedule_capability(&sched_capa)) { printf("Error: schedule capability failed\n"); return -1; } if (num_group > sched_capa.max_groups) { printf("Error: Too many sched groups (max_groups capa %u)\n", sched_capa.max_groups); return -1; } odp_thrmask_zero(&thrmask); for (i = 0; i < num_group; i++) { odp_schedule_group_t group; group = odp_schedule_group_create("test_group", &thrmask); if (group == ODP_SCHED_GROUP_INVALID) { printf("Error: Group create failed %u\n", i); return -1; } global->group[i] = group; } return 0; } static int create_queues(test_global_t *global) { odp_queue_param_t queue_param; odp_queue_t queue; odp_schedule_sync_t sync; odp_schedule_prio_t prio; const char *type_str; uint32_t i, j, first; test_options_t *test_options = &global->test_options; uint32_t event_size = test_options->event_size; uint32_t num_event = test_options->num_event; uint32_t queue_size = test_options->queue_size; uint32_t tot_queue = test_options->tot_queue; uint32_t num_low = test_options->num_low; uint32_t num_high = test_options->num_high; uint32_t num_default = test_options->num_queue - num_low - num_high; int num_group = test_options->num_group; int type = test_options->queue_type; odp_pool_t pool = global->pool; uint8_t *ctx = NULL; uint32_t ctx_size = test_options->ctx_size; if (type == 0) { type_str = "parallel"; sync = ODP_SCHED_SYNC_PARALLEL; } else if (type == 1) { type_str = "atomic"; sync = ODP_SCHED_SYNC_ATOMIC; } else { type_str = "ordered"; sync = ODP_SCHED_SYNC_ORDERED; } printf(" queue type %s\n\n", type_str); if (tot_queue > global->schedule_config.num_queues) { printf("Max queues supported %u\n", global->schedule_config.num_queues); return -1; } if (global->schedule_config.queue_size && queue_size > global->schedule_config.queue_size) { printf("Max queue size %u\n", global->schedule_config.queue_size); return -1; } if (ctx_size) { ctx = odp_shm_addr(global->ctx_shm); if (ctx == NULL) { printf("Bad queue context\n"); return -1; } } odp_queue_param_init(&queue_param); queue_param.type = ODP_QUEUE_TYPE_SCHED; queue_param.sched.sync = sync; queue_param.size = queue_size; if (num_group == -1) queue_param.sched.group = ODP_SCHED_GROUP_WORKER; else queue_param.sched.group = ODP_SCHED_GROUP_ALL; first = test_options->num_dummy; for (i = 0; i < tot_queue; i++) { if (num_group > 0) { odp_schedule_group_t group; /* Divide all queues evenly into groups */ group = global->group[i % num_group]; queue_param.sched.group = group; } /* Create low, high and default queues in a mixed order. Dummy queues are created * first and with default priority. */ prio = ODP_SCHED_PRIO_DEFAULT; if (i >= first) { switch (i % 3) { case 0: if (num_low) { num_low--; prio = ODP_SCHED_PRIO_LOWEST; } else if (num_high) { num_high--; prio = ODP_SCHED_PRIO_HIGHEST; } else { num_default--; } break; case 1: if (num_high) { num_high--; prio = ODP_SCHED_PRIO_HIGHEST; } else if (num_low) { num_low--; prio = ODP_SCHED_PRIO_LOWEST; } else { num_default--; } break; default: if (num_default) { num_default--; } else if (num_high) { num_high--; prio = ODP_SCHED_PRIO_HIGHEST; } else { num_low--; prio = ODP_SCHED_PRIO_LOWEST; } break; } } queue_param.sched.prio = prio; queue = odp_queue_create(NULL, &queue_param); global->queue[i] = queue; if (queue == ODP_QUEUE_INVALID) { printf("Error: Queue create failed %u\n", i); return -1; } } /* Store events into queues. Dummy queues are allocated from * the beginning of the array, so that usage of those affect allocation * of active queues. Dummy queues are left empty. */ for (i = first; i < tot_queue; i++) { queue = global->queue[i]; if (ctx_size) { /* * Cast increases alignment, but it's ok, since ctx and * ctx_size are both cache line aligned. */ queue_context_t *qc = (queue_context_t *)(uintptr_t)ctx; if (test_options->forward) { uint32_t next = i + 1; if (next == tot_queue) next = first; qc->next = global->queue[next]; } if (test_options->fairness) odp_atomic_init_u64(&qc->count, 0); if (odp_queue_context_set(queue, ctx, ctx_size)) { printf("Error: Context set failed %u\n", i); return -1; } ctx += ctx_size; } for (j = 0; j < num_event; j++) { odp_event_t ev; if (test_options->pool_type == ODP_POOL_BUFFER) { odp_buffer_t buf = odp_buffer_alloc(pool); if (buf == ODP_BUFFER_INVALID) { ODPH_ERR("Error: alloc failed %u/%u\n", i, j); return -1; } ev = odp_buffer_to_event(buf); } else { odp_packet_t pkt = odp_packet_alloc(pool, event_size); if (pkt == ODP_PACKET_INVALID) { ODPH_ERR("Error: alloc failed %u/%u\n", i, j); return -1; } ev = odp_packet_to_event(pkt); } if (odp_queue_enq(queue, ev)) { ODPH_ERR("Error: enqueue failed %u/%u\n", i, j); return -1; } } } return 0; } static int join_group(test_global_t *global, int grp_index, int thr) { odp_thrmask_t thrmask; odp_schedule_group_t group; odp_thrmask_zero(&thrmask); odp_thrmask_set(&thrmask, thr); group = global->group[grp_index]; if (odp_schedule_group_join(group, &thrmask)) { printf("Error: Group %i join failed (thr %i)\n", grp_index, thr); return -1; } return 0; } static int join_all_groups(test_global_t *global, int thr) { int i; test_options_t *test_options = &global->test_options; int num_group = test_options->num_group; if (num_group <= 0) return 0; for (i = 0; i < num_group; i++) { if (join_group(global, i, thr)) { printf("Error: Group %u join failed (thr %i)\n", i, thr); return -1; } } return 0; } static void print_queue_fairness(test_global_t *global) { uint32_t i; queue_context_t *ctx; test_options_t *test_options = &global->test_options; uint32_t first = test_options->num_dummy; uint32_t num_queue = test_options->num_queue; uint32_t tot_queue = test_options->tot_queue; uint64_t total = 0; double average; if (!test_options->fairness) return; for (i = first; i < tot_queue; i++) { ctx = odp_queue_context(global->queue[i]); total += odp_atomic_load_u64(&ctx->count); } average = (double)total / (double)num_queue; printf("\n"); printf("RESULTS - events per queue (percent of average):\n"); printf("------------------------------------------------\n"); printf(" 1 2 3 4 5 6 7 8 9 10"); for (i = first; i < tot_queue; i++) { ctx = odp_queue_context(global->queue[i]); if ((i % 10) == 0) printf("\n "); printf("%6.1f ", (double)odp_atomic_load_u64(&ctx->count) / average * 100.0); } printf("\n"); } static int destroy_queues(test_global_t *global) { uint32_t i; odp_event_t ev; uint64_t wait; test_options_t *test_options = &global->test_options; uint32_t tot_queue = test_options->tot_queue; int thr = odp_thread_id(); if (join_all_groups(global, thr)) return -1; wait = odp_schedule_wait_time(200 * ODP_TIME_MSEC_IN_NS); while ((ev = odp_schedule(NULL, wait)) != ODP_EVENT_INVALID) odp_event_free(ev); for (i = 0; i < tot_queue; i++) { if (global->queue[i] != ODP_QUEUE_INVALID) { if (odp_queue_destroy(global->queue[i])) { printf("Error: Queue destroy failed %u\n", i); return -1; } } } return 0; } static int destroy_groups(test_global_t *global) { int i; test_options_t *test_options = &global->test_options; int num_group = test_options->num_group; if (num_group <= 0) return 0; for (i = 0; i < num_group; i++) { odp_schedule_group_t group = global->group[i]; if (odp_schedule_group_destroy(group)) { printf("Error: Group destroy failed %u\n", i); return -1; } } return 0; } static inline uint64_t rw_ctx_data(void *ctx, uint32_t offset, uint32_t rd_words, uint32_t rw_words) { uint64_t *data; uint32_t i; uint64_t sum = 0; data = (uint64_t *)(uintptr_t)((uint8_t *)ctx + offset); for (i = 0; i < rd_words; i++) sum += data[i]; for (; i < rd_words + rw_words; i++) { sum += data[i]; data[i] += 1; } return sum; } static uint64_t rw_data(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words, odp_pool_type_t pool_type) { uint64_t *data; int i; uint32_t j; uint64_t sum = 0; for (i = 0; i < num; i++) { if (pool_type == ODP_POOL_BUFFER) data = odp_buffer_addr(odp_buffer_from_event(ev[i])); else data = odp_packet_data(odp_packet_from_event(ev[i])); for (j = 0; j < rd_words; j++) sum += data[j]; for (; j < rd_words + rw_words; j++) { sum += data[j]; data[j] += 1; } } return sum; } static int test_sched(void *arg) { int num, num_enq, ret, thr; uint32_t i, rounds; uint64_t c1, c2, cycles, nsec; uint64_t events, enqueues, waits, events_prev; odp_time_t t1, t2, last_retry_ts; odp_queue_t queue; thread_arg_t *thread_arg = arg; test_global_t *global = thread_arg->global; test_options_t *test_options = &global->test_options; uint32_t num_sched = test_options->num_sched; uint32_t max_burst = test_options->max_burst; int num_group = test_options->num_group; int forward = test_options->forward; int fairness = test_options->fairness; int touch_data = test_options->touch_data; uint32_t rd_words = test_options->rd_words; uint32_t rw_words = test_options->rw_words; uint32_t ctx_size = test_options->ctx_size; uint32_t ctx_rd_words = test_options->ctx_rd_words; uint32_t ctx_rw_words = test_options->ctx_rw_words; odp_pool_type_t pool_type = test_options->pool_type; int touch_ctx = ctx_rd_words || ctx_rw_words; uint32_t ctx_offset = 0; uint32_t sched_retries = 0; uint64_t data_sum = 0; uint64_t ctx_sum = 0; uint64_t wait_ns = test_options->wait_ns; odp_event_t ev[max_burst]; thr = odp_thread_id(); if (forward || fairness) ctx_offset = ROUNDUP(sizeof(queue_context_t), 8); if (num_group > 0) { uint32_t num_join = test_options->num_join; if (num_join) { int pos = 0; int n = 512; char str[n]; int group_index = thread_arg->first_group; pos += snprintf(&str[pos], n - pos, "Thread %i joined groups:", thr); for (i = 0; i < num_join; i++) { if (join_group(global, group_index, thr)) return -1; pos += snprintf(&str[pos], n - pos, " %i", group_index); group_index = (group_index + 1) % num_group; } printf("%s\n", str); } else { if (join_all_groups(global, thr)) return -1; } } for (i = 0; i < max_burst; i++) ev[i] = ODP_EVENT_INVALID; enqueues = 0; events = 0; events_prev = 0; waits = 0; ret = 0; /* Start all workers at the same time */ odp_barrier_wait(&global->barrier); t1 = odp_time_local(); c1 = odp_cpu_cycles(); last_retry_ts = t1; for (rounds = 0; events < num_sched; rounds++) { num = odp_schedule_multi(&queue, ODP_SCHED_NO_WAIT, ev, max_burst); if (odp_likely(num > 0)) { sched_retries = 0; events += num; i = 0; if (odp_unlikely(ctx_size)) { queue_context_t *ctx = odp_queue_context(queue); if (forward) queue = ctx->next; if (fairness) odp_atomic_add_u64(&ctx->count, num); if (odp_unlikely(touch_ctx)) ctx_sum += rw_ctx_data(ctx, ctx_offset, ctx_rd_words, ctx_rw_words); } if (odp_unlikely(touch_data)) data_sum += rw_data(ev, num, rd_words, rw_words, pool_type); if (odp_unlikely(wait_ns)) { waits++; odp_time_wait_ns(wait_ns); } while (num) { num_enq = odp_queue_enq_multi(queue, &ev[i], num); if (num_enq < 0) { printf("Error: Enqueue failed. Round %u\n", rounds); odp_event_free_multi(&ev[i], num); ret = -1; break; } num -= num_enq; i += num_enq; enqueues++; } if (odp_unlikely(ret)) break; continue; } else if (num == 0) { sched_retries++; if (odp_unlikely(sched_retries > TIME_CHECK_INTERVAL)) { odp_time_t cur_time = odp_time_local(); /* Measure time from the last received event and * break if MAX_SCHED_WAIT_NS is exceeded */ sched_retries = 0; if (events_prev != events) { events_prev = events; last_retry_ts = cur_time; } else if (odp_time_diff_ns(cur_time, last_retry_ts) > MAX_SCHED_WAIT_NS) { printf("Error: scheduling timed out\n"); ret = -1; break; } } } /* <0 not specified as an error but checking anyway */ if (num < 0) { printf("Error: Sched failed. Round %u\n", rounds); ret = -1; break; } } c2 = odp_cpu_cycles(); t2 = odp_time_local(); nsec = odp_time_diff_ns(t2, t1); cycles = odp_cpu_cycles_diff(c2, c1); /* Update stats*/ global->stat[thr].rounds = rounds; global->stat[thr].enqueues = enqueues; global->stat[thr].events = events; global->stat[thr].nsec = nsec; global->stat[thr].cycles = cycles; global->stat[thr].waits = waits; global->stat[thr].dummy_sum = data_sum + ctx_sum; global->stat[thr].failed = ret; if (odp_atomic_fetch_dec_u32(&global->num_worker) == 1) { /* The last worker frees all events. This is needed when the main * thread cannot do the clean up (ODP_SCHED_GROUP_WORKER). */ odp_event_t event; uint64_t sched_wait = odp_schedule_wait_time(200 * ODP_TIME_MSEC_IN_NS); /* Print queue and scheduler status at the end of the test, before any queues * are emptied or destroyed. */ if (test_options->verbose) { odp_queue_print_all(); odp_schedule_print(); } while ((event = odp_schedule(NULL, sched_wait)) != ODP_EVENT_INVALID) odp_event_free(event); } /* Pause scheduling before thread exit */ odp_schedule_pause(); while (1) { ev[0] = odp_schedule(&queue, ODP_SCHED_NO_WAIT); if (ev[0] == ODP_EVENT_INVALID) break; if (odp_unlikely(forward)) queue = ((queue_context_t *)odp_queue_context(queue))->next; if (odp_queue_enq(queue, ev[0])) { printf("Error: Queue enqueue failed\n"); odp_event_free(ev[0]); ret = -1; } } return ret; } static int start_workers(test_global_t *global, odp_instance_t instance) { odph_thread_common_param_t thr_common; int i, ret; test_options_t *test_options = &global->test_options; int num_group = test_options->num_group; uint32_t num_join = test_options->num_join; int num_cpu = test_options->num_cpu; odph_thread_param_t thr_param[num_cpu]; odp_atomic_init_u32(&global->num_worker, num_cpu); memset(global->thread_tbl, 0, sizeof(global->thread_tbl)); odph_thread_common_param_init(&thr_common); thr_common.instance = instance; thr_common.cpumask = &global->cpumask; for (i = 0; i < num_cpu; i++) { odph_thread_param_init(&thr_param[i]); thr_param[i].start = test_sched; thr_param[i].arg = &global->thread_arg[i]; thr_param[i].thr_type = ODP_THREAD_WORKER; global->thread_arg[i].global = global; global->thread_arg[i].first_group = 0; if (num_group > 0 && num_join) { /* Each thread joins only num_join groups, starting * from this group index and wraping around the group * table. */ int first_group = (i * num_join) % num_group; global->thread_arg[i].first_group = first_group; } } ret = odph_thread_create(global->thread_tbl, &thr_common, thr_param, num_cpu); if (ret != num_cpu) { printf("Error: thread create failed %i\n", ret); return -1; } return 0; } static double measure_wait_time_cycles(uint64_t wait_ns) { uint64_t i, c1, c2, diff; uint64_t rounds; double wait_cycles; if (wait_ns == 0) return 0.0; /* Run measurement for 100msec or at least two times, so that effect * from CPU frequency scaling is minimized. */ rounds = (100 * ODP_TIME_MSEC_IN_NS) / wait_ns; if (rounds == 0) rounds = 2; c1 = odp_cpu_cycles(); for (i = 0; i < rounds; i++) odp_time_wait_ns(wait_ns); c2 = odp_cpu_cycles(); diff = odp_cpu_cycles_diff(c2, c1); wait_cycles = (double)diff / rounds; printf("\nMeasured wait cycles: %.3f\n", wait_cycles); return wait_cycles; } static void print_stat(test_global_t *global) { int i, num; double rounds_ave, enqueues_ave, events_ave, nsec_ave, cycles_ave; double waits_ave, wait_cycles, wait_cycles_ave; test_options_t *test_options = &global->test_options; int num_cpu = test_options->num_cpu; uint64_t wait_ns = test_options->wait_ns; uint64_t rounds_sum = 0; uint64_t enqueues_sum = 0; uint64_t events_sum = 0; uint64_t nsec_sum = 0; uint64_t cycles_sum = 0; uint64_t waits_sum = 0; wait_cycles = measure_wait_time_cycles(wait_ns); /* Averages */ for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { if (global->stat[i].failed) { num_cpu--; continue; } rounds_sum += global->stat[i].rounds; enqueues_sum += global->stat[i].enqueues; events_sum += global->stat[i].events; nsec_sum += global->stat[i].nsec; cycles_sum += global->stat[i].cycles; waits_sum += global->stat[i].waits; } if (rounds_sum == 0 || num_cpu <= 0) { printf("No results.\n"); return; } rounds_ave = rounds_sum / num_cpu; enqueues_ave = enqueues_sum / num_cpu; events_ave = events_sum / num_cpu; nsec_ave = nsec_sum / num_cpu; cycles_ave = cycles_sum / num_cpu; waits_ave = waits_sum / num_cpu; wait_cycles_ave = waits_ave * wait_cycles; num = 0; printf("\n"); printf("RESULTS - per thread (Million events per sec):\n"); printf("----------------------------------------------\n"); printf(" 1 2 3 4 5 6 7 8 9 10"); for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) { if (global->stat[i].rounds) { if ((num % 10) == 0) printf("\n "); if (global->stat[i].failed) printf(" n/a "); else printf("%6.1f ", (1000.0 * global->stat[i].events) / global->stat[i].nsec); num++; } } printf("\n\n"); printf("RESULTS - average over %i threads:\n", num_cpu); printf("----------------------------------\n"); printf(" schedule calls: %.3f\n", rounds_ave); printf(" enqueue calls: %.3f\n", enqueues_ave); printf(" duration: %.3f msec\n", nsec_ave / 1000000); printf(" num cycles: %.3f M\n", cycles_ave / 1000000); printf(" cycles per round: %.3f\n", cycles_ave / rounds_ave); printf(" cycles per event: %.3f\n", cycles_ave / events_ave); if (wait_ns) { printf(" without wait_ns cycles: %.3f\n", (cycles_ave - wait_cycles_ave) / events_ave); } printf(" ave events received: %.3f\n", events_ave / rounds_ave); printf(" rounds per sec: %.3f M\n", (1000.0 * rounds_ave) / nsec_ave); printf(" events per sec: %.3f M\n\n", (1000.0 * events_ave) / nsec_ave); printf("TOTAL events per sec: %.3f M\n\n", (1000.0 * events_sum) / nsec_ave); } int main(int argc, char **argv) { odph_helper_options_t helper_options; odp_instance_t instance; odp_init_t init; odp_shm_t shm; test_global_t *global; /* Let helper collect its own arguments (e.g. --odph_proc) */ argc = odph_parse_options(argc, argv); if (odph_options(&helper_options)) { ODPH_ERR("Error: Reading ODP helper options failed.\n"); exit(EXIT_FAILURE); } /* List features not to be used */ odp_init_param_init(&init); init.not_used.feat.cls = 1; init.not_used.feat.compress = 1; init.not_used.feat.crypto = 1; init.not_used.feat.ipsec = 1; init.not_used.feat.timer = 1; init.not_used.feat.tm = 1; init.mem_model = helper_options.mem_model; /* Init ODP before calling anything else */ if (odp_init_global(&instance, &init, NULL)) { printf("Error: Global init failed.\n"); return -1; } /* Init this thread */ if (odp_init_local(instance, ODP_THREAD_CONTROL)) { printf("Error: Local init failed.\n"); return -1; } shm = odp_shm_reserve("sched_perf_global", sizeof(test_global_t), ODP_CACHE_LINE_SIZE, 0); if (shm == ODP_SHM_INVALID) { ODPH_ERR("Error: SHM reserve failed.\n"); exit(EXIT_FAILURE); } global = odp_shm_addr(shm); if (global == NULL) { ODPH_ERR("Error: SHM alloc failed\n"); exit(EXIT_FAILURE); } memset(global, 0, sizeof(test_global_t)); global->pool = ODP_POOL_INVALID; global->ctx_shm = ODP_SHM_INVALID; if (parse_options(argc, argv, &global->test_options)) return -1; odp_sys_info_print(); if (global->test_options.ctx_size) { uint64_t size = (uint64_t)global->test_options.ctx_size * global->test_options.tot_queue; global->ctx_shm = odp_shm_reserve("queue contexts", size, ODP_CACHE_LINE_SIZE, 0); if (global->ctx_shm == ODP_SHM_INVALID) { printf("Error: SHM reserve %" PRIu64 " bytes failed\n", size); return -1; } } odp_schedule_config_init(&global->schedule_config); odp_schedule_config(&global->schedule_config); if (set_num_cpu(global)) return -1; if (create_pool(global)) return -1; if (create_groups(global)) return -1; if (create_queues(global)) return -1; /* Start workers */ start_workers(global, instance); /* Wait workers to exit */ odph_thread_join(global->thread_tbl, global->test_options.num_cpu); print_queue_fairness(global); if (destroy_queues(global)) return -1; if (destroy_groups(global)) return -1; print_stat(global); if (odp_pool_destroy(global->pool)) { printf("Error: Pool destroy failed.\n"); return -1; } if (global->ctx_shm != ODP_SHM_INVALID) odp_shm_free(global->ctx_shm); if (odp_shm_free(shm)) { ODPH_ERR("Error: SHM free failed.\n"); exit(EXIT_FAILURE); } if (odp_term_local()) { printf("Error: term local failed.\n"); return -1; } if (odp_term_global(instance)) { printf("Error: term global failed.\n"); return -1; } return 0; }