diff options
Diffstat (limited to 'test/performance')
44 files changed, 2688 insertions, 1671 deletions
diff --git a/test/performance/.gitignore b/test/performance/.gitignore index 46d9e9c2c..d5ab7df24 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -27,7 +27,7 @@ odp_random odp_sched_latency odp_sched_perf odp_sched_pktio -odp_scheduling odp_stash_perf odp_stress +odp_timer_accuracy odp_timer_perf diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 356e98a2d..8142d5db9 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -30,7 +30,7 @@ COMPILE_ONLY = odp_cpu_bench \ odp_sched_latency \ odp_sched_perf \ odp_sched_pktio \ - odp_scheduling \ + odp_timer_accuracy \ odp_timer_perf if LIBCONFIG @@ -46,7 +46,7 @@ TESTSCRIPTS = odp_cpu_bench_run.sh \ odp_sched_latency_run.sh \ odp_sched_perf_run.sh \ odp_sched_pktio_run.sh \ - odp_scheduling_run.sh \ + odp_timer_accuracy_run.sh \ odp_timer_perf_run.sh if ODP_PKTIO_PCAP @@ -80,7 +80,6 @@ odp_packet_gen_SOURCES = odp_packet_gen.c odp_pktio_ordered_SOURCES = odp_pktio_ordered.c dummy_crc.h odp_sched_latency_SOURCES = odp_sched_latency.c odp_sched_pktio_SOURCES = odp_sched_pktio.c -odp_scheduling_SOURCES = odp_scheduling.c odp_pktio_perf_SOURCES = odp_pktio_perf.c odp_pool_latency_SOURCES = odp_pool_latency.c odp_pool_perf_SOURCES = odp_pool_perf.c @@ -88,6 +87,7 @@ odp_queue_perf_SOURCES = odp_queue_perf.c odp_random_SOURCES = odp_random.c odp_sched_perf_SOURCES = odp_sched_perf.c odp_stress_SOURCES = odp_stress.c +odp_timer_accuracy_SOURCES = odp_timer_accuracy.c odp_timer_perf_SOURCES = odp_timer_perf.c if LIBCONFIG @@ -95,11 +95,6 @@ odp_ipsecfwd_SOURCES = odp_ipsecfwd.c AM_CFLAGS += $(LIBCONFIG_CFLAGS) endif -# l2fwd test depends on generator example -EXTRA_odp_l2fwd_DEPENDENCIES = $(top_builddir)/example/generator/odp_generator$(EXEEXT) -$(top_builddir)/example/generator/odp_generator$(EXEEXT): - $(MAKE) -C $(top_builddir)/example/generator odp_generator$(EXEEXT) - dist_check_SCRIPTS = $(TESTSCRIPTS) dist_check_DATA = udp64.pcap diff --git a/test/performance/dummy_crc.h b/test/performance/dummy_crc.h index 01e6c2433..8491b8fdc 100644 --- a/test/performance/dummy_crc.h +++ b/test/performance/dummy_crc.h @@ -1,40 +1,8 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2016-2018 Linaro Limited * - * SPDX-License-Identifier: BSD-3-Clause - */ - -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * Copyright(c) 2010-2014 Intel Corporation + * - lib/hash/rte_crc_sw.h */ /** @cond _ODP_HIDE_FROM_DOXYGEN_ */ diff --git a/test/performance/odp_atomic_perf.c b/test/performance/odp_atomic_perf.c index e665081a2..af0a37921 100644 --- a/test/performance/odp_atomic_perf.c +++ b/test/performance/odp_atomic_perf.c @@ -1,8 +1,5 @@ -/* Copyright (c) 2021, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021 Nokia */ /** diff --git a/test/performance/odp_bench_buffer.c b/test/performance/odp_bench_buffer.c index ce14ec8b3..838617f78 100644 --- a/test/performance/odp_bench_buffer.c +++ b/test/performance/odp_bench_buffer.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2017-2018, Linaro Limited - * Copyright (c) 2022-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2017-2018 Linaro Limited + * Copyright (c) 2022-2023 Nokia */ /** diff --git a/test/performance/odp_bench_misc.c b/test/performance/odp_bench_misc.c index 61afdc398..a0e9476e6 100644 --- a/test/performance/odp_bench_misc.c +++ b/test/performance/odp_bench_misc.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2022-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022-2023 Nokia */ /** diff --git a/test/performance/odp_bench_packet.c b/test/performance/odp_bench_packet.c index cb9e3ca03..a8494bd28 100644 --- a/test/performance/odp_bench_packet.c +++ b/test/performance/odp_bench_packet.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2017-2018, Linaro Limited - * Copyright (c) 2022-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2017-2018 Linaro Limited + * Copyright (c) 2022-2023 Nokia */ /** diff --git a/test/performance/odp_bench_pktio_sp.c b/test/performance/odp_bench_pktio_sp.c index 017e7565f..179db129d 100644 --- a/test/performance/odp_bench_pktio_sp.c +++ b/test/performance/odp_bench_pktio_sp.c @@ -824,7 +824,7 @@ static int parse_interface(appl_args_t *appl_args, const char *optarg) ODPH_ERR("Unable to store interface name (MAX_NAME_LEN=%d)\n", MAX_NAME_LEN); return -1; } - strncpy(appl_args->opt.name, optarg, MAX_NAME_LEN); + odph_strcpy(appl_args->opt.name, optarg, MAX_NAME_LEN); return 0; } @@ -849,7 +849,7 @@ static int parse_args(int argc, char *argv[]) static const char *shortopts = "i:m:o:p:q:r:s:t:h"; - strncpy(gbl_args->opt.name, "loop", MAX_NAME_LEN); + odph_strcpy(gbl_args->opt.name, "loop", MAX_NAME_LEN); gbl_args->opt.rounds = ROUNDS; gbl_args->opt.in_mode = ODP_PKTIN_MODE_DIRECT; gbl_args->opt.out_mode = ODP_PKTOUT_MODE_DIRECT; diff --git a/test/performance/odp_bench_timer.c b/test/performance/odp_bench_timer.c index 65c7a9168..ad80367d1 100644 --- a/test/performance/odp_bench_timer.c +++ b/test/performance/odp_bench_timer.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia */ /** diff --git a/test/performance/odp_cpu_bench.c b/test/performance/odp_cpu_bench.c index 39eff620d..674015d8a 100644 --- a/test/performance/odp_cpu_bench.c +++ b/test/performance/odp_cpu_bench.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited */ /** @@ -192,7 +190,7 @@ static void sig_handler(int signo ODP_UNUSED) static inline void init_packet(odp_packet_t pkt, uint32_t seq, uint16_t group) { - uint32_t *payload; + odp_una_u32_t *payload; test_hdr_t *hdr; odp_packet_parse_param_t param; @@ -224,7 +222,7 @@ static inline odp_queue_t work_on_event(odp_event_t event) odph_udphdr_t *udp_hdr; test_hdr_t *hdr; lookup_entry_t *lookup_entry; - uint32_t *payload; + odp_una_u32_t *payload; uint32_t crc; uint32_t pkt_len; uint8_t *data; diff --git a/test/performance/odp_cpu_bench_run.sh b/test/performance/odp_cpu_bench_run.sh index c33e0b38e..15be2e729 100755 --- a/test/performance/odp_cpu_bench_run.sh +++ b/test/performance/odp_cpu_bench_run.sh @@ -1,9 +1,8 @@ #!/bin/sh # -# Copyright (c) 2022, Nokia -# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Nokia # -# SPDX-License-Identifier: BSD-3-Clause TEST_DIR="${TEST_DIR:-$(dirname $0)}" diff --git a/test/performance/odp_crc.c b/test/performance/odp_crc.c index 89e2e971f..1b631c691 100644 --- a/test/performance/odp_crc.c +++ b/test/performance/odp_crc.c @@ -1,8 +1,5 @@ -/* Copyright (c) 2021, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021 Nokia */ /** diff --git a/test/performance/odp_crypto.c b/test/performance/odp_crypto.c index a644da5e1..380e798c9 100644 --- a/test/performance/odp_crypto.c +++ b/test/performance/odp_crypto.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * Copyright (c) 2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2023 Nokia */ /** diff --git a/test/performance/odp_crypto_run.sh b/test/performance/odp_crypto_run.sh index f50311ae0..fcb7435fd 100755 --- a/test/performance/odp_crypto_run.sh +++ b/test/performance/odp_crypto_run.sh @@ -1,9 +1,8 @@ #!/bin/sh # -# Copyright (c) 2022, Nokia -# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Nokia # -# SPDX-License-Identifier: BSD-3-Clause TEST_DIR="${TEST_DIR:-$(dirname $0)}" diff --git a/test/performance/odp_dma_perf_run.sh b/test/performance/odp_dma_perf_run.sh index 31948e40a..fb7b2bb34 100755 --- a/test/performance/odp_dma_perf_run.sh +++ b/test/performance/odp_dma_perf_run.sh @@ -1,9 +1,8 @@ #!/bin/sh # -# Copyright (c) 2022-2023, Nokia -# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022-2023 Nokia # -# SPDX-License-Identifier: BSD-3-Clause TEST_DIR="${TEST_DIR:-$(dirname $0)}" BIN_NAME=odp_dma_perf diff --git a/test/performance/odp_dmafwd_run.sh b/test/performance/odp_dmafwd_run.sh index ebb9b153a..38fcc8dc2 100755 --- a/test/performance/odp_dmafwd_run.sh +++ b/test/performance/odp_dmafwd_run.sh @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 Nokia +# TEST_DIR="${TEST_DIR:-$PWD}" TEST_SRC_DIR=$(dirname $0) diff --git a/test/performance/odp_ipsec.c b/test/performance/odp_ipsec.c index 3ea93ec96..58be03dad 100644 --- a/test/performance/odp_ipsec.c +++ b/test/performance/odp_ipsec.c @@ -1,9 +1,7 @@ -/* Copyright (c) 2018, Linaro Limited - * Copyright (c) 2022, Marvell - * Copyright (c) 2022, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2022 Marvell + * Copyright (c) 2022 Nokia */ /** diff --git a/test/performance/odp_ipsec_run.sh b/test/performance/odp_ipsec_run.sh index 2ddb48d07..f050cb8e0 100755 --- a/test/performance/odp_ipsec_run.sh +++ b/test/performance/odp_ipsec_run.sh @@ -1,9 +1,8 @@ #!/bin/sh # -# Copyright (c) 2022, Nokia -# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Nokia # -# SPDX-License-Identifier: BSD-3-Clause TEST_DIR="${TEST_DIR:-$(dirname $0)}" diff --git a/test/performance/odp_l2fwd.c b/test/performance/odp_l2fwd.c index b993de4cb..5f3efd464 100644 --- a/test/performance/odp_l2fwd.c +++ b/test/performance/odp_l2fwd.c @@ -1,15 +1,19 @@ -/* Copyright (c) 2014-2018, Linaro Limited - * Copyright (c) 2019-2024, Nokia - * Copyright (c) 2020-2021, Marvell - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2014-2018 Linaro Limited + * Copyright (c) 2019-2024 Nokia + * Copyright (c) 2020-2021 Marvell */ /** * @example odp_l2fwd.c * - * L2 forwarding example application + * This L2 forwarding application can be used as example as well as performance + * test for different ODP packet I/O modes (direct, queue or scheduled). + * + * Note that this example is tuned for performance. As a result, when using + * scheduled packet input mode with direct or queued output mode and multiple + * output queues, packet order is not guaranteed. To maintain packet order, + * use a single worker thread or output interfaces with one output queue. * * @cond _ODP_HIDE_FROM_DOXYGEN_ */ @@ -91,6 +95,9 @@ typedef struct { /* Some extra features (e.g. error checks) have been enabled */ uint8_t extra_feat; + /* Has some state that needs to be maintained across tx and/or rx */ + uint8_t has_state; + /* Prefetch packet data */ uint8_t prefetch; @@ -133,6 +140,9 @@ typedef struct { int rx_queues; /* RX queues per interface */ int pool_per_if; /* Create pool per interface */ uint32_t num_pkt; /* Number of packets per pool */ + int flow_control; /* Flow control mode */ + bool pause_rx; /* Reception of pause frames enabled */ + bool pause_tx; /* Transmission of pause frames enabled */ bool vector_mode; /* Vector mode enabled */ uint32_t num_vec; /* Number of vectors per pool */ uint64_t vec_tmo_ns; /* Vector formation timeout in ns */ @@ -144,7 +154,17 @@ typedef struct { int flow_aware; /* Flow aware scheduling enabled */ uint8_t input_ts; /* Packet input timestamping enabled */ int mtu; /* Interface MTU */ + int num_om; int num_prio; + + struct { + odp_packet_tx_compl_mode_t mode; + uint32_t nth; + uint32_t thr_compl_id; + uint32_t tot_compl_id; + } tx_compl; + + char *output_map[MAX_PKTIOS]; /* Destination port mappings for interfaces */ odp_schedule_prio_t prio[MAX_PKTIOS]; /* Priority of input queues of an interface */ } appl_args_t; @@ -158,6 +178,10 @@ typedef union ODP_ALIGNED_CACHE { uint64_t rx_drops; /* Packets dropped due to transmit error */ uint64_t tx_drops; + /* Number of transmit completion start misses (previous incomplete) */ + uint64_t tx_c_misses; + /* Number of transmit completion start failures */ + uint64_t tx_c_fails; /* Number of failed packet copies */ uint64_t copy_fails; /* Dummy sum of packet data */ @@ -167,9 +191,37 @@ typedef union ODP_ALIGNED_CACHE { uint8_t padding[ODP_CACHE_LINE_SIZE]; } stats_t; +/* Transmit completion specific state data */ +typedef struct { + /* Options that are passed to transmit completion requests */ + odp_packet_tx_compl_opt_t opt; + /* Thread specific initial value for transmit completion IDs */ + uint32_t init; + /* Thread specific maximum value for transmit completion IDs */ + uint32_t max; + /* Next free completion ID to be used for a transmit completion request */ + uint32_t free_head; + /* Next completion ID to be polled for transmit completion readiness */ + uint32_t poll_head; + /* Number of active requests */ + uint32_t num_act; + /* Maximum number of active requests */ + uint32_t max_act; + /* Transmit completion request interval for packets */ + int interval; + /* Next packet in a send burst for which to request transmit completion */ + int next_req; +} tx_compl_t; + +/* Thread specific state data */ +typedef struct { + tx_compl_t tx_compl; +} state_t; + /* Thread specific data */ typedef struct thread_args_t { stats_t stats; + state_t state; struct { odp_pktin_queue_t pktin; @@ -217,6 +269,7 @@ typedef struct { odp_pktout_queue_t pktout[MAX_QUEUES]; odp_queue_t rx_q[MAX_QUEUES]; odp_queue_t tx_q[MAX_QUEUES]; + odp_queue_t compl_q; int num_rx_thr; int num_tx_thr; int num_rx_queue; @@ -251,6 +304,16 @@ static void sig_handler(int signo ODP_UNUSED) odp_atomic_store_u32(&gbl_args->exit_threads, 1); } +static int setup_sig_handler(void) +{ + struct sigaction action = { .sa_handler = sig_handler }; + + if (sigemptyset(&action.sa_mask) || sigaction(SIGINT, &action, NULL)) + return -1; + + return 0; +} + /* * Drop packets which input parsing marked as containing errors. * @@ -471,16 +534,122 @@ static inline int process_extra_features(const appl_args_t *appl_args, odp_packe return pkts; } +static inline void handle_tx_event_compl(tx_compl_t *tx_c, odp_packet_t pkts[], int num, + int tx_idx, stats_t *stats) +{ + odp_packet_t pkt; + int next_req = tx_c->next_req; + const int interval = tx_c->interval; + + tx_c->opt.queue = gbl_args->pktios[tx_idx].compl_q; + + while (next_req <= num) { + pkt = pkts[next_req - 1]; + + if (odp_packet_tx_compl_request(pkt, &tx_c->opt) < 0) { + stats->s.tx_c_fails++; + /* Missed one, try requesting for the first packet of next burst. */ + next_req = num + 1; + break; + } + + next_req += interval; + } + + tx_c->next_req = next_req - num; +} + +static inline void handle_tx_poll_compl(tx_compl_t *tx_c, odp_packet_t pkts[], int num, int tx_idx, + stats_t *stats) +{ + uint32_t num_act = tx_c->num_act, poll_head = tx_c->poll_head, free_head = tx_c->free_head; + const uint32_t max = tx_c->max, init = tx_c->init, max_act = tx_c->max_act; + odp_pktio_t pktio = gbl_args->pktios[tx_idx].pktio; + int next_req = tx_c->next_req; + odp_packet_t pkt; + const int interval = tx_c->interval; + + while (num_act > 0) { + if (odp_packet_tx_compl_done(pktio, poll_head) < 1) + break; + + --num_act; + + if (++poll_head > max) + poll_head = init; + } + + while (next_req <= num) { + pkt = pkts[next_req - 1]; + + if (num_act == max_act) { + stats->s.tx_c_misses++; + /* Missed one, try requesting for the first packet of next burst. */ + next_req = num + 1; + break; + } + + tx_c->opt.compl_id = free_head; + + if (odp_packet_tx_compl_request(pkt, &tx_c->opt) < 0) { + stats->s.tx_c_fails++; + /* Missed one, try requesting for the first packet of next burst. */ + next_req = num + 1; + break; + } + + if (++free_head > max) + free_head = init; + + ++num_act; + next_req += interval; + } + + tx_c->free_head = free_head; + tx_c->poll_head = poll_head; + tx_c->num_act = num_act; + tx_c->next_req = next_req - num; +} + +static inline void handle_tx_state(state_t *state, odp_packet_t pkts[], int num, int tx_idx, + stats_t *stats) +{ + tx_compl_t *tx_c = &state->tx_compl; + + if (tx_c->opt.mode == ODP_PACKET_TX_COMPL_EVENT) + handle_tx_event_compl(tx_c, pkts, num, tx_idx, stats); + else if (tx_c->opt.mode == ODP_PACKET_TX_COMPL_POLL) + handle_tx_poll_compl(tx_c, pkts, num, tx_idx, stats); +} + +static inline void handle_state_failure(state_t *state, odp_packet_t packet) +{ + if (odp_packet_has_tx_compl_request(packet) != 0) { + --state->tx_compl.num_act; + --state->tx_compl.free_head; + + if (state->tx_compl.free_head == UINT32_MAX || + state->tx_compl.free_head < state->tx_compl.init) + state->tx_compl.free_head = state->tx_compl.max; + } +} + static inline void send_packets(odp_packet_t *pkt_tbl, int pkts, int use_event_queue, + int tx_idx, odp_queue_t tx_queue, odp_pktout_queue_t pktout_queue, + state_t *state, stats_t *stats) { int sent; unsigned int tx_drops; int i; + odp_packet_t pkt; + + if (odp_unlikely(state != NULL)) + handle_tx_state(state, pkt_tbl, pkts, tx_idx, stats); if (odp_unlikely(use_event_queue)) sent = event_queue_send(tx_queue, pkt_tbl, pkts); @@ -494,13 +663,27 @@ static inline void send_packets(odp_packet_t *pkt_tbl, stats->s.tx_drops += tx_drops; /* Drop rejected packets */ - for (i = sent; i < pkts; i++) - odp_packet_free(pkt_tbl[i]); + for (i = sent; i < pkts; i++) { + pkt = pkt_tbl[i]; + handle_state_failure(state, pkt); + odp_packet_free(pkt); + } } stats->s.packets += pkts; } +static int handle_rx_state(state_t *state, odp_event_t evs[], int num) +{ + if (state->tx_compl.opt.mode != ODP_PACKET_TX_COMPL_EVENT || + odp_event_type(evs[0]) != ODP_EVENT_PACKET_TX_COMPL) + return num; + + odp_event_free_multi(evs, num); + + return 0; +} + /* * Packet IO worker thread using scheduled queues and vector mode. * @@ -518,6 +701,7 @@ static int run_worker_sched_mode_vector(void *arg) thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; const appl_args_t *appl_args = &gbl_args->appl; + state_t *state = appl_args->has_state ? &thr_args->state : NULL; int use_event_queue = gbl_args->appl.out_mode; pktin_mode_t in_mode = gbl_args->appl.in_mode; @@ -566,19 +750,23 @@ static int run_worker_sched_mode_vector(void *arg) for (i = 0; i < events; i++) { odp_packet_vector_t pkt_vec = ODP_PACKET_VECTOR_INVALID; - odp_packet_t *pkt_tbl; + odp_packet_t *pkt_tbl = NULL; odp_packet_t pkt; int src_idx, dst_idx; - int pkts; + int pkts = 0; if (odp_event_type(ev_tbl[i]) == ODP_EVENT_PACKET) { pkt = odp_packet_from_event(ev_tbl[i]); pkt_tbl = &pkt; pkts = 1; - } else { - ODPH_ASSERT(odp_event_type(ev_tbl[i]) == ODP_EVENT_PACKET_VECTOR); + } else if (odp_event_type(ev_tbl[i]) == ODP_EVENT_PACKET_VECTOR) { pkt_vec = odp_packet_vector_from_event(ev_tbl[i]); pkts = odp_packet_vector_tbl(pkt_vec, &pkt_tbl); + } else if (state != NULL) { + pkts = handle_rx_state(state, ev_tbl, events); + + if (pkts <= 0) + continue; } prefetch_data(appl_args->prefetch, pkt_tbl, pkts); @@ -597,11 +785,8 @@ static int run_worker_sched_mode_vector(void *arg) dst_idx = gbl_args->dst_port_from_idx[src_idx]; fill_eth_addrs(pkt_tbl, pkts, dst_idx); - send_packets(pkt_tbl, pkts, - use_event_queue, - tx_queue[dst_idx], - pktout[dst_idx], - stats); + send_packets(pkt_tbl, pkts, use_event_queue, dst_idx, tx_queue[dst_idx], + pktout[dst_idx], state, stats); if (pkt_vec != ODP_PACKET_VECTOR_INVALID) odp_packet_vector_free(pkt_vec); @@ -668,6 +853,7 @@ static int run_worker_sched_mode(void *arg) thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; const appl_args_t *appl_args = &gbl_args->appl; + state_t *state = appl_args->has_state ? &thr_args->state : NULL; int use_event_queue = gbl_args->appl.out_mode; pktin_mode_t in_mode = gbl_args->appl.in_mode; @@ -729,6 +915,13 @@ static int run_worker_sched_mode(void *arg) if (pkts <= 0) continue; + if (odp_unlikely(state != NULL)) { + pkts = handle_rx_state(state, ev_tbl, pkts); + + if (pkts <= 0) + continue; + } + odp_packet_from_event_multi(pkt_tbl, ev_tbl, pkts); prefetch_data(appl_args->prefetch, pkt_tbl, pkts); @@ -744,11 +937,8 @@ static int run_worker_sched_mode(void *arg) dst_idx = gbl_args->dst_port_from_idx[src_idx]; fill_eth_addrs(pkt_tbl, pkts, dst_idx); - send_packets(pkt_tbl, pkts, - use_event_queue, - tx_queue[dst_idx], - pktout[dst_idx], - stats); + send_packets(pkt_tbl, pkts, use_event_queue, dst_idx, tx_queue[dst_idx], + pktout[dst_idx], state, stats); } /* @@ -809,6 +999,7 @@ static int run_worker_plain_queue_mode(void *arg) thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; const appl_args_t *appl_args = &gbl_args->appl; + state_t *state = appl_args->has_state ? &thr_args->state : NULL; int use_event_queue = gbl_args->appl.out_mode; int i; @@ -857,10 +1048,7 @@ static int run_worker_plain_queue_mode(void *arg) fill_eth_addrs(pkt_tbl, pkts, dst_idx); - send_packets(pkt_tbl, pkts, - use_event_queue, - tx_queue, - pktout, + send_packets(pkt_tbl, pkts, use_event_queue, dst_idx, tx_queue, pktout, state, stats); } @@ -910,6 +1098,7 @@ static int run_worker_direct_mode(void *arg) thread_args_t *thr_args = arg; stats_t *stats = &thr_args->stats; const appl_args_t *appl_args = &gbl_args->appl; + state_t *state = appl_args->has_state ? &thr_args->state : NULL; int use_event_queue = gbl_args->appl.out_mode; thr = odp_thread_id(); @@ -953,10 +1142,7 @@ static int run_worker_direct_mode(void *arg) fill_eth_addrs(pkt_tbl, pkts, dst_idx); - send_packets(pkt_tbl, pkts, - use_event_queue, - tx_queue, - pktout, + send_packets(pkt_tbl, pkts, use_event_queue, dst_idx, tx_queue, pktout, state, stats); } @@ -1036,6 +1222,7 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po odp_pktio_config_t config; odp_pktin_queue_param_t pktin_param; odp_pktout_queue_param_t pktout_param; + odp_queue_param_t compl_queue; odp_pktio_op_mode_t mode_rx; odp_pktio_op_mode_t mode_tx; pktin_mode_t in_mode = gbl_args->appl.in_mode; @@ -1052,6 +1239,12 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po if (gbl_args->appl.out_mode != PKTOUT_DIRECT) pktio_param.out_mode = ODP_PKTOUT_MODE_QUEUE; + if (num_rx == 0) + pktio_param.in_mode = ODP_PKTIN_MODE_DISABLED; + + if (num_tx == 0) + pktio_param.out_mode = ODP_PKTOUT_MODE_DISABLED; + pktio = odp_pktio_open(dev, pool, &pktio_param); if (pktio == ODP_PKTIO_INVALID) { ODPH_ERR("Pktio open failed: %s\n", dev); @@ -1063,9 +1256,6 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po return -1; } - if (gbl_args->appl.verbose) - odp_pktio_print(pktio); - if (odp_pktio_capability(pktio, &pktio_capa)) { ODPH_ERR("Pktio capability query failed: %s\n", dev); return -1; @@ -1091,9 +1281,48 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po config.pktout.bit.tcp_chksum_ena = 1; } + if (gbl_args->appl.tx_compl.mode != ODP_PACKET_TX_COMPL_DISABLED) { + if (gbl_args->appl.tx_compl.mode == ODP_PACKET_TX_COMPL_EVENT && + !(pktio_capa.tx_compl.mode_event && pktio_capa.tx_compl.queue_type_sched)) { + ODPH_ERR("Transmit event completion not supported: %s\n", dev); + return -1; + } + + if (gbl_args->appl.tx_compl.mode == ODP_PACKET_TX_COMPL_POLL && + !(pktio_capa.tx_compl.mode_poll && + pktio_capa.tx_compl.max_compl_id >= gbl_args->appl.tx_compl.tot_compl_id)) { + ODPH_ERR("Transmit poll completion not supported: %s\n", dev); + return -1; + } + + if (gbl_args->appl.tx_compl.mode == ODP_PACKET_TX_COMPL_EVENT) + config.tx_compl.mode_event = 1; + + if (gbl_args->appl.tx_compl.mode == ODP_PACKET_TX_COMPL_POLL) { + config.tx_compl.mode_poll = 1; + config.tx_compl.max_compl_id = gbl_args->appl.tx_compl.tot_compl_id; + } + } + /* Provide hint to pktio that packet references are not used */ config.pktout.bit.no_packet_refs = 1; + if (gbl_args->appl.pause_rx) { + if (!pktio_capa.flow_control.pause_rx) { + ODPH_ERR("Reception of pause frames not supported: %s\n", dev); + return -1; + } + config.flow_control.pause_rx = ODP_PKTIO_LINK_PAUSE_ON; + } + + if (gbl_args->appl.pause_tx) { + if (!pktio_capa.flow_control.pause_tx) { + ODPH_ERR("Transmission of pause frames not supported: %s\n", dev); + return -1; + } + config.flow_control.pause_tx = ODP_PKTIO_LINK_PAUSE_ON; + } + odp_pktio_config(pktio, &config); if (gbl_args->appl.promisc_mode && odp_pktio_promisc_mode(pktio) != 1) { @@ -1169,6 +1398,20 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po pktin_param.queue_param.sched.prio = prio; pktin_param.queue_param.sched.sync = sync_mode; pktin_param.queue_param.sched.group = group; + + if (gbl_args->appl.tx_compl.mode == ODP_PACKET_TX_COMPL_EVENT) { + odp_queue_param_init(&compl_queue); + compl_queue.type = ODP_QUEUE_TYPE_SCHED; + compl_queue.sched.prio = prio; + compl_queue.sched.sync = ODP_SCHED_SYNC_PARALLEL; + compl_queue.sched.group = group; + gbl_args->pktios[idx].compl_q = odp_queue_create(NULL, &compl_queue); + + if (gbl_args->pktios[idx].compl_q == ODP_QUEUE_INVALID) { + ODPH_ERR("Creating completion queue failed: %s\n", dev); + return -1; + } + } } if (num_rx > (int)pktio_capa.max_input_queues) { @@ -1205,37 +1448,45 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po return -1; } - if (odp_pktin_queue_config(pktio, &pktin_param)) { + if (num_rx > 0 && odp_pktin_queue_config(pktio, &pktin_param)) { ODPH_ERR("Input queue config failed: %s\n", dev); return -1; } - if (odp_pktout_queue_config(pktio, &pktout_param)) { + if (num_tx > 0 && odp_pktout_queue_config(pktio, &pktout_param)) { ODPH_ERR("Output queue config failed: %s\n", dev); return -1; } - if (gbl_args->appl.in_mode == DIRECT_RECV) { - if (odp_pktin_queue(pktio, gbl_args->pktios[idx].pktin, num_rx) != num_rx) { - ODPH_ERR("Pktin queue query failed: %s\n", dev); - return -1; - } - } else { - if (odp_pktin_event_queue(pktio, gbl_args->pktios[idx].rx_q, num_rx) != num_rx) { - ODPH_ERR("Pktin event queue query failed: %s\n", dev); - return -1; + if (num_rx > 0) { + if (gbl_args->appl.in_mode == DIRECT_RECV) { + if (odp_pktin_queue(pktio, gbl_args->pktios[idx].pktin, num_rx) + != num_rx) { + ODPH_ERR("Pktin queue query failed: %s\n", dev); + return -1; + } + } else { + if (odp_pktin_event_queue(pktio, gbl_args->pktios[idx].rx_q, num_rx) + != num_rx) { + ODPH_ERR("Pktin event queue query failed: %s\n", dev); + return -1; + } } } - if (gbl_args->appl.out_mode == PKTOUT_DIRECT) { - if (odp_pktout_queue(pktio, gbl_args->pktios[idx].pktout, num_tx) != num_tx) { - ODPH_ERR("Pktout queue query failed: %s\n", dev); - return -1; - } - } else { - if (odp_pktout_event_queue(pktio, gbl_args->pktios[idx].tx_q, num_tx) != num_tx) { - ODPH_ERR("Event queue query failed: %s\n", dev); - return -1; + if (num_tx > 0) { + if (gbl_args->appl.out_mode == PKTOUT_DIRECT) { + if (odp_pktout_queue(pktio, gbl_args->pktios[idx].pktout, num_tx) + != num_tx) { + ODPH_ERR("Pktout queue query failed: %s\n", dev); + return -1; + } + } else { + if (odp_pktout_event_queue(pktio, gbl_args->pktios[idx].tx_q, num_tx) + != num_tx) { + ODPH_ERR("Event queue query failed: %s\n", dev); + return -1; + } } } @@ -1250,6 +1501,9 @@ static int create_pktio(const char *dev, int idx, int num_rx, int num_tx, odp_po "%02x:%02x:%02x:%02x:%02x:%02x\n", dev, info.drv_name, num_rx, num_tx, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + if (gbl_args->appl.verbose) + odp_pktio_print(pktio); + gbl_args->pktios[idx].num_rx_queue = num_rx; gbl_args->pktios[idx].num_tx_queue = num_tx; gbl_args->pktios[idx].pktio = pktio; @@ -1271,7 +1525,7 @@ static int print_speed_stats(int num_workers, stats_t **thr_stats, uint64_t pkts = 0; uint64_t pkts_prev = 0; uint64_t pps; - uint64_t rx_drops, tx_drops, copy_fails; + uint64_t rx_drops, tx_drops, tx_c_misses, tx_c_fails, copy_fails; uint64_t maximum_pps = 0; int i; int elapsed = 0; @@ -1289,6 +1543,8 @@ static int print_speed_stats(int num_workers, stats_t **thr_stats, pkts = 0; rx_drops = 0; tx_drops = 0; + tx_c_misses = 0; + tx_c_fails = 0; copy_fails = 0; sleep(timeout); @@ -1297,6 +1553,8 @@ static int print_speed_stats(int num_workers, stats_t **thr_stats, pkts += thr_stats[i]->s.packets; rx_drops += thr_stats[i]->s.rx_drops; tx_drops += thr_stats[i]->s.tx_drops; + tx_c_misses += thr_stats[i]->s.tx_c_misses; + tx_c_fails += thr_stats[i]->s.tx_c_fails; copy_fails += thr_stats[i]->s.copy_fails; } if (stats_enabled) { @@ -1309,6 +1567,10 @@ static int print_speed_stats(int num_workers, stats_t **thr_stats, if (gbl_args->appl.packet_copy) printf("%" PRIu64 " copy fails, ", copy_fails); + if (gbl_args->appl.tx_compl.mode != ODP_PACKET_TX_COMPL_DISABLED) + printf("%" PRIu64 " tx compl misses, %" PRIu64 " tx compl fails, ", + tx_c_misses, tx_c_fails); + printf("%" PRIu64 " rx drops, %" PRIu64 " tx drops\n", rx_drops, tx_drops); @@ -1358,6 +1620,14 @@ static void print_port_mapping(void) */ static int find_dest_port(int port) { + const char *output = gbl_args->appl.output_map[port]; + + /* Check output mappings first */ + if (output != NULL) + for (int i = 0; i < gbl_args->appl.if_count; i++) + if (strcmp(output, gbl_args->appl.if_names[i]) == 0) + return i; + /* Even number of ports */ if (gbl_args->appl.if_count % 2 == 0) return (port % 2 == 0) ? port + 1 : port - 1; @@ -1522,6 +1792,21 @@ static void bind_queues(void) printf("\n"); } +static void init_state(const appl_args_t *args, state_t *state, int thr_idx) +{ + const uint32_t cnt = args->tx_compl.thr_compl_id + 1; + + state->tx_compl.opt.mode = args->tx_compl.mode; + state->tx_compl.init = thr_idx * cnt; + state->tx_compl.max = state->tx_compl.init + cnt - 1; + state->tx_compl.free_head = state->tx_compl.init; + state->tx_compl.poll_head = state->tx_compl.init; + state->tx_compl.num_act = 0; + state->tx_compl.max_act = state->tx_compl.max - state->tx_compl.init + 1; + state->tx_compl.interval = args->tx_compl.nth; + state->tx_compl.next_req = state->tx_compl.interval; +} + static void init_port_lookup_tbl(void) { int rx_idx, if_count; @@ -1560,88 +1845,119 @@ static void usage(char *progname) " eth2 will send pkts to eth3 and vice versa\n" "\n" "Mandatory OPTIONS:\n" - " -i, --interface <name> Eth interfaces (comma-separated, no spaces)\n" - " Interface count min 1, max %i\n" + " -i, --interface <name> Eth interfaces (comma-separated, no spaces)\n" + " Interface count min 1, max %i\n" "\n" "Optional OPTIONS:\n" - " -m, --mode <arg> Packet input mode\n" - " 0: Direct mode: PKTIN_MODE_DIRECT (default)\n" - " 1: Scheduler mode with parallel queues:\n" - " PKTIN_MODE_SCHED + SCHED_SYNC_PARALLEL\n" - " 2: Scheduler mode with atomic queues:\n" - " PKTIN_MODE_SCHED + SCHED_SYNC_ATOMIC\n" - " 3: Scheduler mode with ordered queues:\n" - " PKTIN_MODE_SCHED + SCHED_SYNC_ORDERED\n" - " 4: Plain queue mode: PKTIN_MODE_QUEUE\n" - " -o, --out_mode <arg> Packet output mode\n" - " 0: Direct mode: PKTOUT_MODE_DIRECT (default)\n" - " 1: Queue mode: PKTOUT_MODE_QUEUE\n" - " -c, --count <num> CPU count, 0=all available, default=1\n" - " -t, --time <sec> Time in seconds to run.\n" - " -a, --accuracy <sec> Time in seconds get print statistics\n" - " (default is 1 second).\n" - " -d, --dst_change <arg> 0: Don't change packets' dst eth addresses\n" - " 1: Change packets' dst eth addresses (default)\n" - " -s, --src_change <arg> 0: Don't change packets' src eth addresses\n" - " 1: Change packets' src eth addresses (default)\n" - " -r, --dst_addr <addr> Destination addresses (comma-separated, no spaces)\n" - " Requires also the -d flag to be set\n" - " -e, --error_check <arg> 0: Don't check packet errors (default)\n" - " 1: Check packet errors\n" - " -k, --chksum <arg> 0: Don't use checksum offload (default)\n" - " 1: Use checksum offload\n", + " -m, --mode <arg> Packet input mode\n" + " 0: Direct mode: PKTIN_MODE_DIRECT (default)\n" + " 1: Scheduler mode with parallel queues:\n" + " PKTIN_MODE_SCHED + SCHED_SYNC_PARALLEL\n" + " 2: Scheduler mode with atomic queues:\n" + " PKTIN_MODE_SCHED + SCHED_SYNC_ATOMIC\n" + " 3: Scheduler mode with ordered queues:\n" + " PKTIN_MODE_SCHED + SCHED_SYNC_ORDERED\n" + " 4: Plain queue mode: PKTIN_MODE_QUEUE\n" + " -o, --out_mode <arg> Packet output mode\n" + " 0: Direct mode: PKTOUT_MODE_DIRECT (default)\n" + " 1: Queue mode: PKTOUT_MODE_QUEUE\n" + " -O, --output_map <list> List of destination ports for passed interfaces\n" + " (comma-separated, no spaces). Ordering follows\n" + " the '--interface' option, e.g. passing\n" + " '-i eth0,eth1' and '-O eth0,eth1' would result\n" + " in eth0 and eth1 looping packets back.\n" + " -c, --count <num> CPU count, 0=all available, default=1\n" + " -t, --time <sec> Time in seconds to run.\n" + " -a, --accuracy <sec> Time in seconds get print statistics\n" + " (default is 1 second).\n" + " -d, --dst_change <arg> 0: Don't change packets' dst eth addresses\n" + " 1: Change packets' dst eth addresses (default)\n" + " -s, --src_change <arg> 0: Don't change packets' src eth addresses\n" + " 1: Change packets' src eth addresses (default)\n" + " -r, --dst_addr <addr> Destination addresses (comma-separated, no\n" + " spaces) Requires also the -d flag to be set\n" + " -e, --error_check <arg> 0: Don't check packet errors (default)\n" + " 1: Check packet errors\n" + " -k, --chksum <arg> 0: Don't use checksum offload (default)\n" + " 1: Use checksum offload\n", NO_PATH(progname), NO_PATH(progname), MAX_PKTIOS); - printf(" -g, --groups <num> Number of new groups to create (1 ... num). Interfaces\n" - " are placed into the groups in round robin.\n" - " 0: Use SCHED_GROUP_ALL (default)\n" - " -1: Use SCHED_GROUP_WORKER\n" - " -G, --group_mode <arg> Select how threads join new groups (when -g > 0)\n" - " 0: All threads join all created groups (default)\n" - " 1: All threads join first N created groups.\n" - " N is number of interfaces (== active groups).\n" - " 2: Each thread joins a part of the first N groups\n" - " (in round robin).\n" - " -I, --prio <prio list> Schedule priority of packet input queues.\n" - " Comma separated list of priorities (no spaces). A value\n" - " per interface. All queues of an interface have the same\n" - " priority. Values must be between odp_schedule_min_prio\n" - " and odp_schedule_max_prio. odp_schedule_default_prio is\n" - " used by default.\n" - " -b, --burst_rx <num> 0: Use max burst size (default)\n" - " num: Max number of packets per receive call\n" - " -q, --rx_queues <num> Number of RX queues per interface in scheduler mode\n" - " 0: RX queue per worker CPU (default)\n" - " -p, --packet_copy 0: Don't copy packet (default)\n" - " 1: Create and send copy of the received packet.\n" - " Free the original packet.\n" - " -R, --data_rd <num> Number of packet data words (uint64_t) to read from\n" - " every received packet. Number of words is rounded down\n" - " to fit into the first segment of a packet. Default\n" - " is 0.\n" - " -y, --pool_per_if Create a packet (and packet vector) pool per interface.\n" - " 0: Share a single pool between all interfaces (default)\n" - " 1: Create a pool per interface\n" - " -n, --num_pkt <num> Number of packets per pool. Default is 16k or\n" - " the maximum capability. Use 0 for the default.\n" - " -u, --vector_mode Enable vector mode.\n" - " Supported only with scheduler packet input modes (1-3).\n" - " -w, --num_vec <num> Number of vectors per pool.\n" - " Default is num_pkts divided by vec_size.\n" - " -x, --vec_size <num> Vector size (default %i).\n" - " -z, --vec_tmo_ns <ns> Vector timeout in ns (default %llu ns).\n" - " -M, --mtu <len> Interface MTU in bytes.\n" - " -P, --promisc_mode Enable promiscuous mode.\n" - " -l, --packet_len <len> Maximum length of packets supported (default %d).\n" - " -L, --seg_len <len> Packet pool segment length\n" - " (default equal to packet length).\n" - " -F, --prefetch <num> Prefetch packet data in 64 byte multiples (default 1).\n" - " -f, --flow_aware Enable flow aware scheduling.\n" - " -T, --input_ts Enable packet input timestamping.\n" - " -v, --verbose Verbose output.\n" - " -V, --verbose_pkt Print debug information on every received packet.\n" - " -h, --help Display help and exit.\n\n" - "\n", DEFAULT_VEC_SIZE, DEFAULT_VEC_TMO, POOL_PKT_LEN); + printf(" -g, --groups <num> Number of new groups to create (1 ... num).\n" + " Interfaces are placed into the groups in round\n" + " robin.\n" + " 0: Use SCHED_GROUP_ALL (default)\n" + " -1: Use SCHED_GROUP_WORKER\n" + " -G, --group_mode <arg> Select how threads join new groups\n" + " (when -g > 0)\n" + " 0: All threads join all created groups\n" + " (default)\n" + " 1: All threads join first N created groups.\n" + " N is number of interfaces (== active\n" + " groups).\n" + " 2: Each thread joins a part of the first N\n" + " groups (in round robin).\n" + " -I, --prio <prio list> Schedule priority of packet input queues.\n" + " Comma separated list of priorities (no spaces).\n" + " A value per interface. All queues of an\n" + " interface have the same priority. Values must\n" + " be between odp_schedule_min_prio and\n" + " odp_schedule_max_prio.\n" + " odp_schedule_default_prio is used by default.\n" + " -b, --burst_rx <num> 0: Use max burst size (default)\n" + " num: Max number of packets per receive call\n" + " -q, --rx_queues <num> Number of RX queues per interface in scheduler\n" + " mode\n" + " 0: RX queue per worker CPU (default)\n" + " -p, --packet_copy 0: Don't copy packet (default)\n" + " 1: Create and send copy of the received packet.\n" + " Free the original packet.\n" + " -R, --data_rd <num> Number of packet data words (uint64_t) to read\n" + " from every received packet. Number of words is\n" + " rounded down to fit into the first segment of a\n" + " packet. Default is 0.\n" + " -y, --pool_per_if Create a packet (and packet vector) pool per\n" + " interface.\n" + " 0: Share a single pool between all interfaces\n" + " (default)\n" + " 1: Create a pool per interface\n" + " -n, --num_pkt <num> Number of packets per pool. Default is 16k or\n" + " the maximum capability. Use 0 for the default.\n" + " -u, --vector_mode Enable vector mode.\n" + " Supported only with scheduler packet input\n" + " modes (1-3).\n" + " -w, --num_vec <num> Number of vectors per pool.\n" + " Default is num_pkts divided by vec_size.\n" + " -x, --vec_size <num> Vector size (default %i).\n" + " -z, --vec_tmo_ns <ns> Vector timeout in ns (default %llu ns).\n" + " -M, --mtu <len> Interface MTU in bytes.\n" + " -P, --promisc_mode Enable promiscuous mode.\n" + " -l, --packet_len <len> Maximum length of packets supported\n" + " (default %d).\n" + " -L, --seg_len <len> Packet pool segment length\n" + " (default equal to packet length).\n" + " -F, --prefetch <num> Prefetch packet data in 64 byte multiples\n" + " (default 1).\n" + " -f, --flow_aware Enable flow aware scheduling.\n" + " -T, --input_ts Enable packet input timestamping.\n", + DEFAULT_VEC_SIZE, DEFAULT_VEC_TMO, POOL_PKT_LEN); + + printf(" -C, --tx_compl <mode,n,max_id> Enable transmit completion with a specified\n" + " completion mode for nth packet, with maximum\n" + " completion ID per worker thread in case of poll\n" + " completion (comma-separated, no spaces).\n" + " 0: Event completion mode\n" + " 1: Poll completion mode\n" + " -X, --flow_control <mode> Ethernet flow control mode.\n" + " 0: Flow control disabled (default)\n" + " 1: Enable reception of pause frames\n" + " 2: Enable transmission of pause frames\n" + " 3: Enable reception and transmission of pause\n" + " frames\n" + " -v, --verbose Verbose output.\n" + " -V, --verbose_pkt Print debug information on every received\n" + " packet.\n" + " -h, --help Display help and exit.\n\n" + "\n"); } /* @@ -1656,7 +1972,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) int opt; int long_index; char *token; - char *tmp_str; + char *tmp_str, *tmp; size_t str_len, len; int i; static const struct option longopts[] = { @@ -1666,6 +1982,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) {"interface", required_argument, NULL, 'i'}, {"mode", required_argument, NULL, 'm'}, {"out_mode", required_argument, NULL, 'o'}, + {"output_map", required_argument, NULL, 'O'}, {"dst_addr", required_argument, NULL, 'r'}, {"dst_change", required_argument, NULL, 'd'}, {"src_change", required_argument, NULL, 's'}, @@ -1691,14 +2008,16 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) {"prefetch", required_argument, NULL, 'F'}, {"flow_aware", no_argument, NULL, 'f'}, {"input_ts", no_argument, NULL, 'T'}, + {"tx_compl", required_argument, NULL, 'C'}, + {"flow_control", required_argument, NULL, 'X'}, {"verbose", no_argument, NULL, 'v'}, {"verbose_pkt", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "+c:t:a:i:m:o:r:d:s:e:k:g:G:I:" - "b:q:p:R:y:n:l:L:w:x:z:M:F:uPfTvVh"; + static const char *shortopts = "+c:t:a:i:m:o:O:r:d:s:e:k:g:G:I:" + "b:q:p:R:y:n:l:L:w:x:X:z:M:F:uPfTC:vVh"; appl_args->time = 0; /* loop forever if time to run is 0 */ appl_args->accuracy = 1; /* get and print pps stats second */ @@ -1729,6 +2048,7 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) appl_args->num_prio = 0; appl_args->prefetch = 1; appl_args->data_rd = 0; + appl_args->flow_control = 0; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); @@ -1838,6 +2158,40 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) if (i != 0) appl_args->out_mode = PKTOUT_QUEUE; break; + case 'O': + if (strlen(optarg) == 0) { + ODPH_ERR("Bad output map string\n"); + exit(EXIT_FAILURE); + } + + tmp_str = strdup(optarg); + + if (tmp_str == NULL) { + ODPH_ERR("Output map string duplication failed\n"); + exit(EXIT_FAILURE); + } + + token = strtok(tmp_str, ","); + + while (token) { + if (appl_args->num_om >= MAX_PKTIOS) { + ODPH_ERR("Bad output map element count\n"); + exit(EXIT_FAILURE); + } + + appl_args->output_map[appl_args->num_om] = strdup(token); + + if (appl_args->output_map[appl_args->num_om] == NULL) { + ODPH_ERR("Output map element duplication failed\n"); + exit(EXIT_FAILURE); + } + + appl_args->num_om++; + token = strtok(NULL, ","); + } + + free(tmp_str); + break; case 'd': appl_args->dst_change = atoi(optarg); break; @@ -1930,6 +2284,13 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'x': appl_args->vec_size = atoi(optarg); break; + case 'X': + appl_args->flow_control = atoi(optarg); + if (appl_args->flow_control == 1 || appl_args->flow_control == 3) + appl_args->pause_rx = true; + if (appl_args->flow_control == 2 || appl_args->flow_control == 3) + appl_args->pause_tx = true; + break; case 'z': appl_args->vec_tmo_ns = atoi(optarg); break; @@ -1942,6 +2303,56 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'T': appl_args->input_ts = 1; break; + case 'C': + if (strlen(optarg) == 0) { + ODPH_ERR("Bad transmit completion parameter string\n"); + exit(EXIT_FAILURE); + } + + tmp_str = strdup(optarg); + + if (tmp_str == NULL) { + ODPH_ERR("Transmit completion parameter string duplication" + " failed\n"); + exit(EXIT_FAILURE); + } + + tmp = strtok(tmp_str, ","); + + if (tmp == NULL) { + ODPH_ERR("Invalid transmit completion parameter format\n"); + exit(EXIT_FAILURE); + } + + i = atoi(tmp); + + if (i == 0) + appl_args->tx_compl.mode = ODP_PACKET_TX_COMPL_EVENT; + else if (i == 1) + appl_args->tx_compl.mode = ODP_PACKET_TX_COMPL_POLL; + + tmp = strtok(NULL, ","); + + if (tmp == NULL) { + ODPH_ERR("Invalid transmit completion parameter format\n"); + exit(EXIT_FAILURE); + } + + appl_args->tx_compl.nth = atoi(tmp); + + if (appl_args->tx_compl.mode == ODP_PACKET_TX_COMPL_POLL) { + tmp = strtok(NULL, ","); + + if (tmp == NULL) { + ODPH_ERR("Invalid transmit completion parameter format\n"); + exit(EXIT_FAILURE); + } + + appl_args->tx_compl.thr_compl_id = atoi(tmp); + } + + free(tmp_str); + break; case 'v': appl_args->verbose = 1; break; @@ -1962,6 +2373,11 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) exit(EXIT_FAILURE); } + if (appl_args->num_om && appl_args->num_om != appl_args->if_count) { + ODPH_ERR("Different number of output mappings and pktio interfaces\n"); + exit(EXIT_FAILURE); + } + if (appl_args->num_prio && appl_args->num_prio != appl_args->if_count) { ODPH_ERR("Different number of priorities and pktio interfaces\n"); exit(EXIT_FAILURE); @@ -1978,6 +2394,23 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) exit(EXIT_FAILURE); } + if (appl_args->tx_compl.mode != ODP_PACKET_TX_COMPL_DISABLED && + appl_args->tx_compl.nth == 0) { + ODPH_ERR("Invalid packet interval for transmit completion: %u\n", + appl_args->tx_compl.nth); + exit(EXIT_FAILURE); + } + + if (appl_args->tx_compl.mode == ODP_PACKET_TX_COMPL_EVENT && + (appl_args->in_mode == PLAIN_QUEUE || appl_args->in_mode == DIRECT_RECV)) { + ODPH_ERR("Transmit event completion mode not supported with plain queue or direct " + "input modes\n"); + exit(EXIT_FAILURE); + } + + appl_args->tx_compl.tot_compl_id = (appl_args->tx_compl.thr_compl_id + 1) * + appl_args->cpu_count - 1; + if (appl_args->burst_rx == 0) appl_args->burst_rx = MAX_PKT_BURST; @@ -1986,6 +2419,10 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) appl_args->packet_copy || appl_args->data_rd || appl_args->verbose_pkt) appl_args->extra_feat = 1; + appl_args->has_state = 0; + if (appl_args->tx_compl.mode != ODP_PACKET_TX_COMPL_DISABLED) + appl_args->has_state = 1; + optind = 1; /* reset 'extern optind' from the getopt lib */ } @@ -2020,6 +2457,15 @@ static void print_options(void) else printf("PKTOUT_DIRECT\n"); + if (appl_args->num_om > 0) { + printf("Output mappings: "); + + for (i = 0; i < appl_args->num_om; ++i) + printf(" %s", appl_args->output_map[i]); + + printf("\n"); + } + printf("MTU: "); if (appl_args->mtu) printf("%i bytes\n", appl_args->mtu); @@ -2027,6 +2473,10 @@ static void print_options(void) printf("interface default\n"); printf("Promisc mode: %s\n", appl_args->promisc_mode ? "enabled" : "disabled"); + if (appl_args->flow_control) + printf("Flow control: %s%s\n", + appl_args->pause_rx ? "rx " : "", + appl_args->pause_tx ? "tx" : ""); printf("Flow aware: %s\n", appl_args->flow_aware ? "yes" : "no"); printf("Input TS: %s\n", appl_args->input_ts ? "yes" : "no"); @@ -2035,12 +2485,13 @@ static void print_options(void) printf("Number of pools: %i\n", appl_args->pool_per_if ? appl_args->if_count : 1); - if (appl_args->extra_feat) { - printf("Extra features: %s%s%s%s%s\n", + if (appl_args->extra_feat || appl_args->has_state) { + printf("Extra features: %s%s%s%s%s%s\n", appl_args->error_check ? "error_check " : "", appl_args->chksum ? "chksum " : "", appl_args->packet_copy ? "packet_copy " : "", appl_args->data_rd ? "data_rd" : "", + appl_args->tx_compl.mode != ODP_PACKET_TX_COMPL_DISABLED ? "tx_compl" : "", appl_args->verbose_pkt ? "verbose_pkt" : ""); } @@ -2082,7 +2533,11 @@ static void gbl_args_init(args_t *args) for (queue = 0; queue < MAX_QUEUES; queue++) args->pktios[pktio].rx_q[queue] = ODP_QUEUE_INVALID; + + args->pktios[pktio].compl_q = ODP_QUEUE_INVALID; } + + args->appl.tx_compl.mode = ODP_PACKET_TX_COMPL_DISABLED; } static void create_groups(int num, odp_schedule_group_t *group) @@ -2199,9 +2654,10 @@ int main(int argc, char *argv[]) init.mem_model = helper_options.mem_model; - /* Signal handler has to be registered before global init in case ODP - * implementation creates internal threads/processes. */ - signal(SIGINT, sig_handler); + if (setup_sig_handler()) { + ODPH_ERR("Signal handler setup failed\n"); + exit(EXIT_FAILURE); + } /* Init ODP before calling anything else */ if (odp_init_global(&instance, &init, NULL)) { @@ -2518,6 +2974,7 @@ int main(int argc, char *argv[]) int num_join; int mode = gbl_args->appl.group_mode; + init_state(&gbl_args->appl, &gbl_args->thread_args[i].state, i); odph_thread_param_init(&thr_param[i]); thr_param[i].start = thr_run_func; thr_param[i].arg = &gbl_args->thread_args[i]; @@ -2584,11 +3041,20 @@ int main(int argc, char *argv[]) if (gbl_args->appl.in_mode != DIRECT_RECV) odp_barrier_wait(&gbl_args->term_barrier); + odph_thread_join_result_t res[num_workers]; + /* Master thread waits for other threads to exit */ - num_thr = odph_thread_join(gbl_args->thread_tbl, num_workers); - if (num_thr != num_workers) { - ODPH_ERR("Worker join failed: %i\n", num_thr); - exit(EXIT_FAILURE); + if (odph_thread_join_result(gbl_args->thread_tbl, res, num_workers) != num_workers) { + ODPH_ERR("Worker join failed\n"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < num_workers; i++) { + if (res[i].is_sig || res[i].ret != 0) { + ODPH_ERR("Worker thread failure%s: %d\n", res[i].is_sig ? + " (signaled)" : "", res[i].ret); + exit(EXIT_FAILURE); + } } for (i = 0; i < if_count; ++i) { @@ -2599,6 +3065,9 @@ int main(int argc, char *argv[]) odp_pktio_extra_stats_print(pktio); } + if (gbl_args->pktios[i].compl_q != ODP_QUEUE_INVALID) + (void)odp_queue_destroy(gbl_args->pktios[i].compl_q); + if (odp_pktio_close(pktio)) { ODPH_ERR("Pktio close failed: %s\n", gbl_args->appl.if_names[i]); exit(EXIT_FAILURE); @@ -2607,6 +3076,10 @@ int main(int argc, char *argv[]) free(gbl_args->appl.if_names); free(gbl_args->appl.if_str); + + for (i = 0; i < gbl_args->appl.num_om; i++) + free(gbl_args->appl.output_map[i]); + gbl_args = NULL; odp_mb_full(); diff --git a/test/performance/odp_l2fwd_run.sh b/test/performance/odp_l2fwd_run.sh index cd750ca35..626b6da72 100755 --- a/test/performance/odp_l2fwd_run.sh +++ b/test/performance/odp_l2fwd_run.sh @@ -1,9 +1,7 @@ #!/bin/bash # -# Copyright (c) 2015-2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2015-2018 Linaro Limited # # TEST_DIR is set by Makefile, when we add a rule to Makefile for odp_l2fwd_run @@ -25,7 +23,7 @@ TEST_DIR="${TEST_DIR:-$PWD}" # directory where test sources are, including scripts TEST_SRC_DIR=$(dirname $0) -PATH=$TEST_DIR:$TEST_DIR/../../example/generator:$PATH +PATH=$TEST_DIR:$PATH # exit codes expected by automake for skipped tests TEST_SKIPPED=77 @@ -33,8 +31,6 @@ TEST_SKIPPED=77 VALIDATION_TESTDIR=platform/$ODP_PLATFORM/test/validation PLATFORM_VALIDATION=${TEST_SRC_DIR}/../../$VALIDATION_TESTDIR -FLOOD_MODE=0 - # Use installed pktio env or for make check take it from platform directory if [ -f "./pktio_env" ]; then . ./pktio_env @@ -60,40 +56,33 @@ run_l2fwd() exit $TEST_SKIPPED fi - type odp_generator > /dev/null + type odp_packet_gen > /dev/null if [ $? -ne 0 ]; then - echo "odp_generator not installed. Aborting." + echo "odp_packet_gen not installed. Aborting." cleanup_pktio_env exit 1 fi - export ODP_PLATFORM_PARAMS="-m 256 --file-prefix="gen" \ + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="gen" \ --proc-type auto --no-pci \ --vdev net_pcap0,iface=$IF0" - # Run generator with one worker - (odp_generator${EXEEXT} --interval $FLOOD_MODE -I 0 \ - --srcip 192.168.0.1 --dstip 192.168.0.2 \ - -m u -w 1 2>&1 > /dev/null) \ + # Run odp_packet_gen with one tx thread + (odp_packet_gen${EXEEXT} --gap 0 -i 0 \ + --ipv4_src 192.168.0.1 --ipv4_dst 192.168.0.2 \ + -r 0 -t 1 2>&1 > /dev/null) \ 2>&1 > /dev/null & GEN_PID=$! - # this just turns off output buffering so that you still get periodic - # output while piping to tee, as long as stdbuf is available. - if [ "$(which stdbuf)" != "" ]; then - STDBUF="stdbuf -o 0" - else - STDBUF= - fi LOG=odp_l2fwd_tmp.log - export ODP_PLATFORM_PARAMS="-m 256 --file-prefix="l2fwd" \ + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="l2fwd" \ --proc-type auto --no-pci --vdev net_pcap1,iface=$IF1 \ --vdev net_pcap2,iface=$IF2" # Max 2 workers - $STDBUF odp_l2fwd${EXEEXT} -i 0,1 -m 0 -t 5 -c 2 | tee $LOG + odp_l2fwd${EXEEXT} -i 0,1 -m 0 -t 5 -c 2 | tee $LOG ret=${PIPESTATUS[0]} kill -2 ${GEN_PID} diff --git a/test/performance/odp_lock_perf.c b/test/performance/odp_lock_perf.c index 0f78db3b8..43dea0728 100644 --- a/test/performance/odp_lock_perf.c +++ b/test/performance/odp_lock_perf.c @@ -1,8 +1,5 @@ -/* Copyright (c) 2021, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021 Nokia */ /** diff --git a/test/performance/odp_mem_perf.c b/test/performance/odp_mem_perf.c index 241128b1f..5a7642a10 100644 --- a/test/performance/odp_mem_perf.c +++ b/test/performance/odp_mem_perf.c @@ -1,8 +1,5 @@ -/* Copyright (c) 2021, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021 Nokia */ /** diff --git a/test/performance/odp_packet_gen.c b/test/performance/odp_packet_gen.c index c88535791..7954a08bb 100644 --- a/test/performance/odp_packet_gen.c +++ b/test/performance/odp_packet_gen.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2020-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2020-2024 Nokia */ /** @@ -38,8 +36,8 @@ #define MAX_WORKERS (MAX_THREADS - 1) -/* At least one control and two worker threads */ -ODP_STATIC_ASSERT(MAX_WORKERS >= 2, "Too few threads"); +/* At least one control and one worker thread */ +ODP_STATIC_ASSERT(MAX_WORKERS >= 1, "Too few threads"); /* Maximum number of packet IO interfaces */ #define MAX_PKTIOS 16 @@ -57,10 +55,12 @@ ODP_STATIC_ASSERT(MAX_WORKERS >= 2, "Too few threads"); /* Max retries to generate random data */ #define MAX_RAND_RETRIES 1000 -/* Used don't free */ +/* Use don't free */ #define TX_MODE_DF 0 /* Use static references */ #define TX_MODE_REF 1 +/* Use packet copy */ +#define TX_MODE_COPY 2 /* Minimum number of packets to receive in CI test */ #define MIN_RX_PACKETS_CI 800 @@ -68,6 +68,11 @@ ODP_STATIC_ASSERT(MAX_WORKERS >= 2, "Too few threads"); /* Identifier for payload-timestamped packets */ #define TS_MAGIC 0xff88ee99ddaaccbb +enum { + L4_PROTO_UDP = 0, + L4_PROTO_TCP +}; + ODP_STATIC_ASSERT(MAX_PKTIOS <= UINT8_MAX, "Interface index must fit into uint8_t\n"); typedef struct test_options_t { @@ -91,11 +96,12 @@ typedef struct test_options_t { uint32_t num_vlan; uint32_t ipv4_src; uint32_t ipv4_dst; - uint16_t udp_src; - uint16_t udp_dst; + uint16_t src_port; + uint16_t dst_port; uint32_t wait_sec; uint32_t wait_start_sec; uint32_t mtu; + uint8_t l4_proto; int tx_mode; odp_bool_t promisc_mode; odp_bool_t calc_latency; @@ -108,8 +114,8 @@ typedef struct test_options_t { } vlan[MAX_VLANS]; struct { - uint32_t udp_src; - uint32_t udp_dst; + uint32_t src_port; + uint32_t dst_port; } c_mode; char pktio_name[MAX_PKTIOS][MAX_PKTIO_NAME + 1]; @@ -252,15 +258,18 @@ static void print_usage(void) " num_tx * burst_size * bursts * (10^9 / gap)\n" " -s, --ipv4_src IPv4 source address. Default: 192.168.0.1\n" " -d, --ipv4_dst IPv4 destination address. Default: 192.168.0.2\n" - " -o, --udp_src UDP source port. Default: 10000\n" - " -p, --udp_dst UDP destination port. Default: 20000\n" + " -o, --src_port UDP/TCP source port. Default: 10000\n" + " -p, --dst_port UDP/TCP destination port. Default: 20000\n" + " -N, --proto L4 protocol. Default: 0\n" + " 0: UDP\n" + " 1: TCP\n" " -P, --promisc_mode Enable promiscuous mode.\n" " -a, --latency Calculate latency. Cannot be used with packet\n" " references (see \"--tx_mode\").\n" - " -c, --c_mode <counts> Counter mode for incrementing UDP port numbers.\n" + " -c, --c_mode <counts> Counter mode for incrementing UDP/TCP port numbers.\n" " Specify the number of port numbers used starting from\n" - " udp_src/udp_dst. Comma-separated (no spaces) list of\n" - " count values: <udp_src count>,<udp_dst count>\n" + " src_port/dst_port. Comma-separated (no spaces) list of\n" + " count values: <src_port count>,<dst_port count>\n" " Default value: 0,0\n" " -C, --no_udp_checksum Do not calculate UDP checksum. Instead, set it to\n" " zero in every packet.\n" @@ -354,7 +363,7 @@ static int init_bins(test_global_t *global) static int parse_options(int argc, char *argv[], test_global_t *global) { - int opt, i, len, str_len, long_index, udp_port; + int opt, i, len, str_len, long_index, port; unsigned long int count; uint32_t min_packets, num_tx_pkt, num_tx_alloc, pkt_len, val, bins; char *name, *str, *end; @@ -368,6 +377,7 @@ static int parse_options(int argc, char *argv[], test_global_t *global) {"num_rx", required_argument, NULL, 'r'}, {"num_tx", required_argument, NULL, 't'}, {"num_pkt", required_argument, NULL, 'n'}, + {"proto", required_argument, NULL, 'N'}, {"len", required_argument, NULL, 'l'}, {"len_range", required_argument, NULL, 'L'}, {"direct_rx", required_argument, NULL, 'D'}, @@ -378,8 +388,8 @@ static int parse_options(int argc, char *argv[], test_global_t *global) {"vlan", required_argument, NULL, 'v'}, {"ipv4_src", required_argument, NULL, 's'}, {"ipv4_dst", required_argument, NULL, 'd'}, - {"udp_src", required_argument, NULL, 'o'}, - {"udp_dst", required_argument, NULL, 'p'}, + {"src_port", required_argument, NULL, 'o'}, + {"dst_port", required_argument, NULL, 'p'}, {"promisc_mode", no_argument, NULL, 'P'}, {"latency", no_argument, NULL, 'a'}, {"c_mode", required_argument, NULL, 'c'}, @@ -394,7 +404,7 @@ static int parse_options(int argc, char *argv[], test_global_t *global) {NULL, 0, NULL, 0} }; - static const char *shortopts = "+i:e:r:t:n:l:L:D:m:M:b:x:g:v:s:d:o:p:c:CAq:u:w:W:Pah"; + static const char *shortopts = "+i:e:r:t:n:N:l:L:D:m:M:b:x:g:v:s:d:o:p:c:CAq:u:w:W:Pah"; test_options->num_pktio = 0; test_options->num_rx = 1; @@ -412,10 +422,10 @@ static int parse_options(int argc, char *argv[], test_global_t *global) test_options->calc_latency = 0; test_options->calc_cs = 1; test_options->fill_pl = 1; - strncpy(test_options->ipv4_src_s, "192.168.0.1", - sizeof(test_options->ipv4_src_s) - 1); - strncpy(test_options->ipv4_dst_s, "192.168.0.2", - sizeof(test_options->ipv4_dst_s) - 1); + odph_strcpy(test_options->ipv4_src_s, "192.168.0.1", + sizeof(test_options->ipv4_src_s)); + odph_strcpy(test_options->ipv4_dst_s, "192.168.0.2", + sizeof(test_options->ipv4_dst_s)); if (odph_ipv4_addr_parse(&test_options->ipv4_src, test_options->ipv4_src_s)) { ODPH_ERR("Address parse failed\n"); return -1; @@ -424,15 +434,16 @@ static int parse_options(int argc, char *argv[], test_global_t *global) ODPH_ERR("Address parse failed\n"); return -1; } - test_options->udp_src = 10000; - test_options->udp_dst = 20000; - test_options->c_mode.udp_src = 0; - test_options->c_mode.udp_dst = 0; + test_options->src_port = 10000; + test_options->dst_port = 20000; + test_options->c_mode.src_port = 0; + test_options->c_mode.dst_port = 0; test_options->quit = 0; test_options->update_msec = 0; test_options->wait_sec = 0; test_options->wait_start_sec = 0; test_options->mtu = 0; + test_options->l4_proto = L4_PROTO_UDP; for (i = 0; i < MAX_PKTIOS; i++) { memcpy(global->pktio[i].eth_dst.addr, default_eth_dst, 6); @@ -504,22 +515,22 @@ static int parse_options(int argc, char *argv[], test_global_t *global) } break; case 'o': - udp_port = atoi(optarg); - if (udp_port < 0 || udp_port > UINT16_MAX) { - ODPH_ERR("Error: Bad UDP source port: %d\n", udp_port); + port = atoi(optarg); + if (port < 0 || port > UINT16_MAX) { + ODPH_ERR("Error: Bad source port: %d\n", port); ret = -1; break; } - test_options->udp_src = udp_port; + test_options->src_port = port; break; case 'p': - udp_port = atoi(optarg); - if (udp_port < 0 || udp_port > UINT16_MAX) { - ODPH_ERR("Error: Bad UDP destination port: %d\n", udp_port); + port = atoi(optarg); + if (port < 0 || port > UINT16_MAX) { + ODPH_ERR("Error: Bad destination port: %d\n", port); ret = -1; break; } - test_options->udp_dst = udp_port; + test_options->dst_port = port; break; case 'P': test_options->promisc_mode = 1; @@ -536,6 +547,9 @@ static int parse_options(int argc, char *argv[], test_global_t *global) case 'n': test_options->num_pkt = atoi(optarg); break; + case 'N': + test_options->l4_proto = atoi(optarg); + break; case 'l': test_options->pkt_len = atoi(optarg); break; @@ -581,8 +595,8 @@ static int parse_options(int argc, char *argv[], test_global_t *global) ODPH_ERR("Error: Bad IPv4 source address: %s\n", optarg); ret = -1; } - strncpy(test_options->ipv4_src_s, optarg, - sizeof(test_options->ipv4_src_s) - 1); + odph_strcpy(test_options->ipv4_src_s, optarg, + sizeof(test_options->ipv4_src_s)); break; case 'd': if (odph_ipv4_addr_parse(&test_options->ipv4_dst, @@ -590,16 +604,16 @@ static int parse_options(int argc, char *argv[], test_global_t *global) ODPH_ERR("Error: Bad IPv4 destination address: %s\n", optarg); ret = -1; } - strncpy(test_options->ipv4_dst_s, optarg, - sizeof(test_options->ipv4_dst_s) - 1); + odph_strcpy(test_options->ipv4_dst_s, optarg, + sizeof(test_options->ipv4_dst_s)); break; case 'c': count = strtoul(optarg, &end, 0); - test_options->c_mode.udp_src = count; + test_options->c_mode.src_port = count; end++; count = strtoul(end, NULL, 0); - test_options->c_mode.udp_dst = count; + test_options->c_mode.dst_port = count; break; case 'C': test_options->calc_cs = 0; @@ -637,8 +651,8 @@ static int parse_options(int argc, char *argv[], test_global_t *global) return -1; } - if (test_options->num_rx < 1 || test_options->num_tx < 1) { - ODPH_ERR("Error: At least one rx and tx thread needed.\n"); + if (test_options->num_rx < 1 && test_options->num_tx < 1) { + ODPH_ERR("Error: At least one rx or tx thread needed.\n"); return -1; } @@ -684,6 +698,10 @@ static int parse_options(int argc, char *argv[], test_global_t *global) ODPH_ERR("Error: Latency test is not supported with packet references (--tx_mode 1)\n"); return -1; } + if (test_options->calc_latency && (test_options->num_rx < 1 || test_options->num_tx < 1)) { + ODPH_ERR("Error: Latency test requires both rx and tx threads\n"); + return -1; + } if (test_options->gap_nsec) { double gap_hz = 1000000000.0 / test_options->gap_nsec; @@ -702,17 +720,25 @@ static int parse_options(int argc, char *argv[], test_global_t *global) ODPH_ERR("\nWARNING: Not enough packets for every packet length bin.\n\n"); } - if (test_options->c_mode.udp_dst && - num_tx_pkt % test_options->c_mode.udp_dst) - ODPH_ERR("\nWARNING: Transmit packet count is not evenly divisible by UDP destination port count.\n\n"); + if (test_options->c_mode.dst_port && num_tx_pkt % test_options->c_mode.dst_port) + ODPH_ERR("\nWARNING: Transmit packet count is not evenly divisible by destination port count.\n\n"); + + if (test_options->c_mode.src_port && num_tx_pkt % test_options->c_mode.src_port) + ODPH_ERR("\nWARNING: Transmit packet count is not evenly divisible by source port count.\n\n"); - if (test_options->c_mode.udp_src && - num_tx_pkt % test_options->c_mode.udp_src) - ODPH_ERR("\nWARNING: Transmit packet count is not evenly divisible by UDP source port count.\n\n"); + if (test_options->l4_proto != L4_PROTO_TCP && test_options->l4_proto != L4_PROTO_UDP) { + ODPH_ERR("Error: Invalid L4 protocol: %" PRIu8 "\n", test_options->l4_proto); + return -1; + } + if (test_options->l4_proto == L4_PROTO_TCP && test_options->tx_mode != TX_MODE_COPY) { + ODPH_ERR("Error: TCP protocol supported only with copy transmit mode\n"); + return -1; + } - test_options->hdr_len = ODPH_ETHHDR_LEN + - (test_options->num_vlan * ODPH_VLANHDR_LEN) + - ODPH_IPV4HDR_LEN + ODPH_UDPHDR_LEN; + test_options->hdr_len = ODPH_ETHHDR_LEN + (test_options->num_vlan * ODPH_VLANHDR_LEN) + + ODPH_IPV4HDR_LEN; + test_options->hdr_len += test_options->l4_proto == L4_PROTO_UDP ? + ODPH_UDPHDR_LEN : ODPH_TCPHDR_LEN; pkt_len = test_options->use_rand_pkt_len ? test_options->rand_pkt_len_min : test_options->pkt_len; @@ -784,8 +810,6 @@ static int open_pktios(test_global_t *global) uint32_t num_pkt = test_options->num_pkt; uint32_t pkt_len = test_options->use_rand_pkt_len ? test_options->rand_pkt_len_max : test_options->pkt_len; - odp_pktout_queue_t pktout[num_tx]; - odp_pktin_queue_t pktin[num_rx]; printf("\nODP packet generator\n"); printf(" quit test after %" PRIu64 " rounds\n", @@ -822,10 +846,12 @@ static int open_pktios(test_global_t *global) } printf(" IPv4 source %s\n", test_options->ipv4_src_s); printf(" IPv4 destination %s\n", test_options->ipv4_dst_s); - printf(" UDP source %u\n", test_options->udp_src); - printf(" UDP destination %u\n", test_options->udp_dst); - printf(" UDP src count %u\n", test_options->c_mode.udp_src); - printf(" UDP dst count %u\n", test_options->c_mode.udp_dst); + printf(" L4 protocol: %s\n", + test_options->l4_proto == L4_PROTO_UDP ? "UDP" : "TCP"); + printf(" source port %u\n", test_options->src_port); + printf(" destination port %u\n", test_options->dst_port); + printf(" src port count %u\n", test_options->c_mode.src_port); + printf(" dst port count %u\n", test_options->c_mode.dst_port); printf(" num pktio %u\n", num_pktio); printf(" interfaces names: "); @@ -891,12 +917,11 @@ static int open_pktios(test_global_t *global) odp_pktio_param_init(&pktio_param); - if (test_options->direct_rx) - pktio_param.in_mode = ODP_PKTIN_MODE_DIRECT; - else - pktio_param.in_mode = ODP_PKTIN_MODE_SCHED; + pktio_param.in_mode = num_rx ? (test_options->direct_rx ? + ODP_PKTIN_MODE_DIRECT : ODP_PKTIN_MODE_SCHED) : + ODP_PKTIN_MODE_DISABLED; - pktio_param.out_mode = ODP_PKTOUT_MODE_DIRECT; + pktio_param.out_mode = num_tx ? ODP_PKTOUT_MODE_DIRECT : ODP_PKTOUT_MODE_DISABLED; for (i = 0; i < num_pktio; i++) global->pktio[i].pktio = ODP_PKTIO_INVALID; @@ -1034,15 +1059,21 @@ static int open_pktios(test_global_t *global) return -1; } - if (odp_pktout_queue(pktio, pktout, num_tx) != num_tx) { - ODPH_ERR("Error (%s): Pktout queue request failed.\n", name); - return -1; + if (num_tx > 0) { + odp_pktout_queue_t pktout[MAX_THREADS]; + + if (odp_pktout_queue(pktio, pktout, num_tx) != num_tx) { + ODPH_ERR("Error (%s): Pktout queue request failed.\n", name); + return -1; + } + + for (j = 0; j < num_tx; j++) + global->pktio[i].pktout[j] = pktout[j]; } - for (j = 0; j < num_tx; j++) - global->pktio[i].pktout[j] = pktout[j]; + if (num_rx > 0 && test_options->direct_rx) { + odp_pktin_queue_t pktin[MAX_THREADS]; - if (test_options->direct_rx) { if (odp_pktin_queue(pktio, pktin, num_rx) != num_rx) { ODPH_ERR("Error (%s): Pktin queue request failed.\n", name); return -1; @@ -1082,6 +1113,7 @@ static int print_link_info(odp_pktio_t pktio) return 0; } + static int start_pktios(test_global_t *global) { uint32_t i; @@ -1386,15 +1418,16 @@ static int init_packets(test_global_t *global, int pktio, uint8_t *u8; odph_ethhdr_t *eth; odph_ipv4hdr_t *ip; - odph_udphdr_t *udp; uint16_t tpid; test_options_t *test_options = &global->test_options; + const odp_bool_t use_tcp = test_options->l4_proto == L4_PROTO_TCP; uint32_t num_vlan = test_options->num_vlan; uint32_t hdr_len = test_options->hdr_len; - uint16_t udp_src = test_options->udp_src; - uint16_t udp_dst = test_options->udp_dst; - uint32_t udp_src_cnt = 0; - uint32_t udp_dst_cnt = 0; + uint16_t src_port = test_options->src_port; + uint16_t dst_port = test_options->dst_port; + uint32_t src_cnt = 0; + uint32_t dst_cnt = 0; + uint32_t tcp_seqnum = 0x1234; odph_vlanhdr_t *vlan = NULL; /* Fixes bogus compiler warning */ if (num_vlan > MAX_VLANS) @@ -1446,56 +1479,75 @@ static int init_packets(test_global_t *global, int pktio, ip->tot_len = odp_cpu_to_be_16(pkt_len - l2_len); ip->id = odp_cpu_to_be_16(seq + i); ip->ttl = 64; - ip->proto = ODPH_IPPROTO_UDP; + ip->proto = use_tcp ? ODPH_IPPROTO_TCP : ODPH_IPPROTO_UDP; ip->src_addr = odp_cpu_to_be_32(test_options->ipv4_src); ip->dst_addr = odp_cpu_to_be_32(test_options->ipv4_dst); ip->chksum = ~odp_chksum_ones_comp16(ip, ODPH_IPV4HDR_LEN); - /* UDP */ - udp = (odph_udphdr_t *)((uint8_t *)data + l2_len + - ODPH_IPV4HDR_LEN); - memset(udp, 0, ODPH_UDPHDR_LEN); - udp->src_port = odp_cpu_to_be_16(udp_src); - udp->dst_port = odp_cpu_to_be_16(udp_dst); - udp->length = odp_cpu_to_be_16(payload_len + ODPH_UDPHDR_LEN); - udp->chksum = 0; + u8 = ((uint8_t *)data + l2_len + ODPH_IPV4HDR_LEN); + + if (use_tcp) { + odph_tcphdr_t *tcp = (odph_tcphdr_t *)u8; + + memset(tcp, 0, ODPH_TCPHDR_LEN); + tcp->src_port = odp_cpu_to_be_16(src_port); + tcp->dst_port = odp_cpu_to_be_16(dst_port); + tcp->seq_no = odp_cpu_to_be_32(tcp_seqnum); + tcp->ack_no = odp_cpu_to_be_32(0x12345678); + tcp->window = odp_cpu_to_be_16(0x4000); + tcp->hl = 5; + tcp->ack = 1; + tcp_seqnum += payload_len; + } else { + odph_udphdr_t *udp = (odph_udphdr_t *)u8; + + memset(udp, 0, ODPH_UDPHDR_LEN); + udp->src_port = odp_cpu_to_be_16(src_port); + udp->dst_port = odp_cpu_to_be_16(dst_port); + udp->length = odp_cpu_to_be_16(payload_len + ODPH_UDPHDR_LEN); + udp->chksum = 0; + } u8 = data; u8 += hdr_len; if (test_options->fill_pl) { - /* Init UDP payload until the end of the first segment */ + /* Init payload until the end of the first segment */ for (j = 0; j < seg_len - hdr_len; j++) u8[j] = j; } - /* Insert UDP checksum */ + /* Insert checksum */ odp_packet_l3_offset_set(pkt, l2_len); odp_packet_l4_offset_set(pkt, l2_len + ODPH_IPV4HDR_LEN); odp_packet_has_eth_set(pkt, 1); odp_packet_has_ipv4_set(pkt, 1); - odp_packet_has_udp_set(pkt, 1); - - udp->chksum = !test_options->calc_latency && test_options->calc_cs ? - odph_ipv4_udp_chksum(pkt) : 0; + if (use_tcp) { + odp_packet_has_tcp_set(pkt, 1); + /* TCP checksum is always updated before TX */ + } else { + odp_packet_has_udp_set(pkt, 1); + if (!test_options->calc_latency && test_options->calc_cs) + odph_udp_chksum_set(pkt); + } /* Increment port numbers */ - if (test_options->c_mode.udp_src) { - udp_src_cnt++; - if (udp_src_cnt < test_options->c_mode.udp_src) { - udp_src++; + if (test_options->c_mode.src_port) { + src_cnt++; + if (src_cnt < test_options->c_mode.src_port) { + src_port++; } else { - udp_src = test_options->udp_src; - udp_src_cnt = 0; + src_port = test_options->src_port; + src_cnt = 0; } } - if (test_options->c_mode.udp_dst) { - udp_dst_cnt++; - if (udp_dst_cnt < test_options->c_mode.udp_dst) { - udp_dst++; + if (test_options->c_mode.dst_port) { + dst_cnt++; + if (dst_cnt < test_options->c_mode.dst_port) { + dst_port++; } else { - udp_dst = test_options->udp_dst; - udp_dst_cnt = 0; + dst_port = test_options->dst_port; + dst_cnt = 0; } } } @@ -1503,6 +1555,20 @@ static int init_packets(test_global_t *global, int pktio, return 0; } +static inline void update_tcp_hdr(odp_packet_t pkt, odp_packet_t base_pkt, uint32_t hdr_len) +{ + odph_tcphdr_t *tcp = odp_packet_l4_ptr(pkt, NULL); + odph_tcphdr_t *tcp_base = odp_packet_l4_ptr(base_pkt, NULL); + uint32_t prev_seqnum = odp_be_to_cpu_32(tcp_base->seq_no); + + tcp->seq_no = odp_cpu_to_be_32(prev_seqnum + (odp_packet_len(pkt) - hdr_len)); + + /* Last used sequence number is stored in the base packet */ + tcp_base->seq_no = tcp->seq_no; + + odph_tcp_chksum_set(pkt); +} + static inline int update_rand_data(uint8_t *data, uint32_t data_len) { uint32_t generated = 0; @@ -1528,13 +1594,11 @@ static inline int update_rand_data(uint8_t *data, uint32_t data_len) return 0; } -static inline void set_timestamp(odp_packet_t pkt, uint32_t ts_off, odp_bool_t calc_cs) +static inline void set_timestamp(odp_packet_t pkt, uint32_t ts_off) { const ts_data_t ts_data = { .magic = TS_MAGIC, .tx_ts = odp_time_global_ns() }; - odph_udphdr_t *udp = odp_packet_l4_ptr(pkt, NULL); (void)odp_packet_copy_from_mem(pkt, ts_off, sizeof(ts_data), &ts_data); - udp->chksum = calc_cs ? odph_ipv4_udp_chksum(pkt) : 0; } static int alloc_packets(odp_pool_t pool, odp_packet_t *pkt_tbl, uint32_t num, @@ -1570,8 +1634,8 @@ static int alloc_packets(odp_pool_t pool, odp_packet_t *pkt_tbl, uint32_t num, static inline uint32_t form_burst(odp_packet_t out_pkt[], uint32_t burst_size, uint32_t num_bins, uint32_t burst, odp_packet_t *pkt_tbl, odp_pool_t pool, - int tx_mode, uint32_t ts_off, odp_bool_t calc_cs, - uint64_t *total_bytes) + int tx_mode, odp_bool_t calc_latency, uint32_t hdr_len, + odp_bool_t calc_udp_cs, uint64_t *total_bytes, uint8_t l4_proto) { uint32_t i, idx; odp_packet_t pkt; @@ -1615,8 +1679,13 @@ static inline uint32_t form_burst(odp_packet_t out_pkt[], uint32_t burst_size, u if (odp_unlikely(out_pkt[i] == ODP_PACKET_INVALID)) break; - if (ts_off) - set_timestamp(out_pkt[i], ts_off, calc_cs); + if (calc_latency) + set_timestamp(out_pkt[i], hdr_len); + + if (l4_proto == L4_PROTO_TCP) + update_tcp_hdr(out_pkt[i], pkt, hdr_len); + else if (calc_latency && calc_udp_cs) + odph_udp_chksum_set(out_pkt[i]); } bytes += odp_packet_len(out_pkt[i]); @@ -1675,16 +1744,19 @@ static int tx_thread(void *arg) uint64_t tx_packets = 0; uint64_t tx_drops = 0; int ret = 0; + const uint32_t hdr_len = test_options->hdr_len; const uint32_t burst_size = test_options->burst_size; const uint32_t bursts = test_options->bursts; const uint32_t num_tx = test_options->num_tx; + const uint8_t l4_proto = test_options->l4_proto; const int tx_mode = test_options->tx_mode; - odp_bool_t calc_cs = test_options->calc_cs; + const odp_bool_t calc_cs = test_options->calc_cs; + const odp_bool_t calc_latency = test_options->calc_latency; int num_pktio = test_options->num_pktio; odp_pktout_queue_t pktout[num_pktio]; - uint32_t ts_off = test_options->calc_latency ? test_options->hdr_len : 0; uint32_t tot_packets = 0; uint32_t num_bins = global->num_bins; + thr = odp_thread_id(); tx_thr = thread_arg->tx_thr; global->stat[thr].thread_type = TX_THREAD; @@ -1754,7 +1826,8 @@ static int tx_thread(void *arg) for (j = 0; j < bursts; j++) { num = form_burst(pkt, burst_size, num_bins, j, pkt_tbl, pool, - tx_mode, ts_off, calc_cs, &total_bytes); + tx_mode, calc_latency, hdr_len, calc_cs, + &total_bytes, l4_proto); if (odp_unlikely(num == 0)) { ret = -1; @@ -1777,7 +1850,6 @@ static int tx_thread(void *arg) if (odp_unlikely(periodic_stat)) global->stat[thr].pktio[i].tx_packets += sent; - } } } @@ -1884,16 +1956,19 @@ static void print_periodic_stat(test_global_t *global, uint64_t nsec) num_tx[i] += global->stat[j].pktio[i].tx_packets; } } + if (global->test_options.num_tx) { + printf(" TX: %12.6fs", sec); + for (i = 0; i < num_pktio; i++) + printf(" %10" PRIu64 "", num_tx[i]); + printf("\n"); + } - printf(" TX: %12.6fs", sec); - for (i = 0; i < num_pktio; i++) - printf(" %10" PRIu64 "", num_tx[i]); - - printf("\n RX: %12.6fs", sec); - for (i = 0; i < num_pktio; i++) - printf(" %10" PRIu64 "", num_rx[i]); - - printf("\n"); + if (global->test_options.num_rx) { + printf(" RX: %12.6fs", sec); + for (i = 0; i < num_pktio; i++) + printf(" %10" PRIu64 "", num_rx[i]); + printf("\n"); + } } static void periodic_print_loop(test_global_t *global) @@ -1948,7 +2023,7 @@ static void print_humanised_latency(double lat_nsec, double lat_min_nsec, double static int print_final_stat(test_global_t *global) { int i, num_thr; - double rx_pkt_ave, rx_mbit_per_sec, tx_mbit_per_sec; + double rx_mbit_per_sec, tx_mbit_per_sec; test_options_t *test_options = &global->test_options; int num_rx = test_options->num_rx; int num_tx = test_options->num_tx; @@ -1965,6 +2040,7 @@ static int print_final_stat(test_global_t *global) uint64_t tx_byte_sum = 0; uint64_t tx_drop_sum = 0; uint64_t tx_tmo_sum = 0; + double rx_pkt_ave = 0.0; double rx_pkt_per_sec = 0.0; double rx_byte_per_sec = 0.0; double rx_pkt_len = 0.0; @@ -2036,7 +2112,8 @@ static int print_final_stat(test_global_t *global) } } - rx_pkt_ave = (double)rx_pkt_sum / num_rx; + if (num_rx) + rx_pkt_ave = (double)rx_pkt_sum / num_rx; rx_sec = rx_nsec_sum / 1000000000.0; tx_sec = tx_nsec_sum / 1000000000.0; diff --git a/test/performance/odp_packet_gen_run.sh b/test/performance/odp_packet_gen_run.sh index af272f619..437513d47 100755 --- a/test/performance/odp_packet_gen_run.sh +++ b/test/performance/odp_packet_gen_run.sh @@ -1,9 +1,7 @@ #!/bin/sh # -# Copyright (c) 2020, Nokia -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2020 Nokia # # directory where test binaries have been built diff --git a/test/performance/odp_pktio_ordered.c b/test/performance/odp_pktio_ordered.c index 6177a8160..18845a5df 100644 --- a/test/performance/odp_pktio_ordered.c +++ b/test/performance/odp_pktio_ordered.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2016-2018 Linaro Limited */ /** @@ -190,13 +188,13 @@ typedef union ODP_ALIGNED_CACHE { * IPv4 5-tuple */ typedef struct { - int32_t src_ip; - int32_t dst_ip; - int16_t src_port; - int16_t dst_port; - int8_t proto; - int8_t pad0; - int16_t pad1; + uint32_t src_ip; + uint32_t dst_ip; + uint16_t src_port; + uint16_t dst_port; + uint8_t proto; + uint8_t pad0; + uint16_t pad1; } ipv4_tuple5_t; /** @@ -335,7 +333,7 @@ static inline uint64_t calc_ipv4_5tuple_hash(ipv4_tuple5_t *tuple) mix(a, b, c); - a += (tuple->src_port << 16) + tuple->dst_port + JHASH_GOLDEN_RATIO; + a += ((uint32_t)tuple->src_port << 16) + tuple->dst_port + JHASH_GOLDEN_RATIO; final(a, b, c); return c; diff --git a/test/performance/odp_pktio_ordered_run.sh b/test/performance/odp_pktio_ordered_run.sh index b4584753f..4c573731b 100755 --- a/test/performance/odp_pktio_ordered_run.sh +++ b/test/performance/odp_pktio_ordered_run.sh @@ -1,10 +1,9 @@ #!/bin/bash # -# Copyright (c) 2016-2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2016-2018 Linaro Limited # + TEST_SRC_DIR=$(dirname $0) TEST_DIR="${TEST_DIR:-$(dirname $0)}" @@ -20,19 +19,11 @@ if [ ! -f ${PCAP_IN} ]; then exit 1 fi -# This just turns off output buffering so that you still get periodic -# output while piping to tee, as long as stdbuf is available. -if [ "$(which stdbuf)" != "" ]; then - STDBUF="stdbuf -o 0" -else - STDBUF= -fi - export ODP_PLATFORM_PARAMS="--no-pci \ --vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=${PCAP_OUT} \ --vdev net_pcap1,rx_pcap=${PCAP_IN},tx_pcap=${PCAP_OUT}" -$STDBUF ${TEST_DIR}/odp_pktio_ordered${EXEEXT} \ +${TEST_DIR}/odp_pktio_ordered${EXEEXT} \ -i 0,1 \ -t $DURATION | tee $LOG ret=${PIPESTATUS[0]} diff --git a/test/performance/odp_pktio_perf.c b/test/performance/odp_pktio_perf.c index 4cfeb50cf..8ca9d076e 100644 --- a/test/performance/odp_pktio_perf.c +++ b/test/performance/odp_pktio_perf.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited */ /** diff --git a/test/performance/odp_pool_latency.c b/test/performance/odp_pool_latency.c index 6b964e773..0afe2f317 100644 --- a/test/performance/odp_pool_latency.c +++ b/test/performance/odp_pool_latency.c @@ -16,6 +16,7 @@ #endif #include <inttypes.h> +#include <signal.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -107,6 +108,7 @@ typedef struct { uint64_t reallocs; uint64_t alloc_errs; uint64_t pattern_errs; + uint64_t act_num_rounds; uint8_t max_alloc_pt; uint8_t min_alloc_pt; uint8_t max_uarea_pt; @@ -150,13 +152,14 @@ typedef struct prog_config_s { alloc_fn_t alloc_fn; free_fn_t free_fn; int64_t cache_size; + uint64_t num_rounds; + uint64_t num_ignore; + odp_atomic_u32_t is_running; uint32_t num_data_elems; uint32_t seg_len; uint32_t handle_size; uint32_t num_evs; uint32_t data_size; - uint32_t num_rounds; - uint32_t num_ignore; uint32_t num_workers; uint32_t uarea_size; uint8_t num_elems; @@ -166,6 +169,11 @@ typedef struct prog_config_s { static prog_config_t *prog_conf; +static void terminate(int signal ODP_UNUSED) +{ + odp_atomic_store_u32(&prog_conf->is_running, 0U); +} + static void init_config(prog_config_t *config) { alloc_elem_t *alloc_elem; @@ -298,7 +306,8 @@ static void print_usage(const dynamic_defs_t *dyn_defs) " Policies:\n" " 0: One pool shared by workers\n" " 1: One pool per worker\n" - " -r, --round_count Number of rounds to run. %u by default.\n" + " -r, --round_count Number of rounds to run. Use 0 to run indefinitely. %u by\n" + " default.\n" " -i, --ignore_rounds Ignore an amount of initial rounds. %u by default.\n" " -c, --worker_count Number of workers. %u by default.\n" " -C, --cache_size Maximum cache size for pools. Defaults:\n" @@ -547,14 +556,9 @@ static parse_result_t check_options(prog_config_t *config) return PRS_NOK; } - if (config->num_rounds == 0U) { - ODPH_ERR("Invalid round count: %u (min: 1)\n", config->num_rounds); - return PRS_NOK; - } - - if (config->num_ignore >= config->num_rounds) { - ODPH_ERR("Invalid round ignorance count: %u (max: %u)\n", config->num_ignore, - config->num_rounds - 1U); + if (config->num_rounds > 0U && config->num_ignore >= config->num_rounds) { + ODPH_ERR("Invalid round ignore count: %" PRIu64 " (max: %" PRIu64 ")\n", + config->num_ignore, config->num_rounds - 1U); return PRS_NOK; } @@ -607,10 +611,10 @@ static parse_result_t parse_options(int argc, char **argv, prog_config_t *config config->policy = atoi(optarg); break; case 'r': - config->num_rounds = atoi(optarg); + config->num_rounds = atoll(optarg); break; case 'i': - config->num_ignore = atoi(optarg); + config->num_ignore = atoll(optarg); break; case 'c': config->num_workers = atoi(optarg); @@ -634,6 +638,21 @@ static parse_result_t parse_options(int argc, char **argv, prog_config_t *config return check_options(config); } +static parse_result_t setup_program(int argc, char **argv, prog_config_t *config) +{ + struct sigaction action = { .sa_handler = terminate }; + + if (sigemptyset(&action.sa_mask) == -1 || sigaddset(&action.sa_mask, SIGINT) == -1 || + sigaddset(&action.sa_mask, SIGTERM) == -1 || + sigaddset(&action.sa_mask, SIGHUP) == -1 || sigaction(SIGINT, &action, NULL) == -1 || + sigaction(SIGTERM, &action, NULL) == -1 || sigaction(SIGHUP, &action, NULL) == -1) { + ODPH_ERR("Error installing signal handler\n"); + return PRS_NOK; + } + + return parse_options(argc, argv, config); +} + static inline void save_alloc_stats(odp_time_t t1, odp_time_t t2, uint32_t num_alloc, uint64_t round, uint8_t pattern, stats_t *stats) { @@ -1040,8 +1059,10 @@ static int run_test(void *args) { worker_config_t *config = args; odp_time_t t1, t2; - uint32_t head_idx, cur_idx, num_ignore = config->prog_config->num_ignore, val, num_alloc, - idx; + uint64_t i, num_ignore = config->prog_config->num_ignore; + const uint64_t num_rnds = config->prog_config->num_rounds; + odp_atomic_u32_t *is_running = &config->prog_config->is_running; + uint32_t head_idx, cur_idx, val, num_alloc, idx; odp_bool_t is_saved; const uint8_t num_elems = config->prog_config->num_elems; const alloc_elem_t *elems = config->prog_config->alloc_elems, *elem; @@ -1054,7 +1075,7 @@ static int run_test(void *args) odp_barrier_wait(&config->prog_config->init_barrier); t1 = odp_time_local_strict(); - for (uint32_t i = 0U; i < config->prog_config->num_rounds; ++i) { + for (i = 0U; (i < num_rnds || num_rnds == 0U) && odp_atomic_load_u32(is_running); ++i) { head_idx = 0U; cur_idx = head_idx; is_saved = (num_ignore > 0U ? num_ignore-- : num_ignore) == 0U; @@ -1093,6 +1114,7 @@ static int run_test(void *args) t2 = odp_time_local_strict(); stats->tot_tm = odp_time_diff_ns(t2, t1); + stats->act_num_rounds = i; odp_barrier_wait(&config->prog_config->term_barrier); return 0; @@ -1151,20 +1173,21 @@ static void print_stats(const prog_config_t *config) printf("\n==================\n\n" "Pool latency test done\n\n" - " type: %s\n" - " event count: %u\n", config->type == BUFFER ? "buffer" : + " type: %s\n" + " event count: %u\n", config->type == BUFFER ? "buffer" : config->type == PACKET ? "packet" : config->type == TMO ? "timeout" : "vector", config->num_evs); if (config->type != TMO) - printf(" %s %u\n", config->type != VECTOR ? "data size: " : "vector size:", + printf(" %s %u\n", + config->type != VECTOR ? "data size: " : "vector size: ", config->data_size); - printf(" pool policy: %s\n" - " round count: %u\n" - " ignore count: %u\n" - " cache size: %" PRIi64 "\n" - " user area: %u (B)\n" + printf(" pool policy: %s\n" + " target round count: %" PRIu64 "\n" + " ignore count: %" PRIu64 "\n" + " cache size: %" PRIi64 "\n" + " user area: %u (B)\n" " burst pattern:\n", config->policy == SINGLE ? "shared" : "per-worker", config->num_rounds, config->num_ignore, config->cache_size, config->uarea_size); @@ -1194,6 +1217,7 @@ static void print_stats(const prog_config_t *config) ave_free_tm = stats->alloc_cnt > 0U ? stats->free_tm / stats->alloc_cnt : 0U; printf(" worker %d:\n" + " actual round count: %" PRIu64 "\n" " significant events allocated/freed: %" PRIu64 "\n" " allocation retries: %" PRIu64 "\n" " allocation errors: %" PRIu64 "\n" @@ -1208,9 +1232,9 @@ static void print_stats(const prog_config_t *config) " per free burst: %" PRIu64 " (min: %" PRIu64 " (round: %" PRIu64 ", pattern: %u), max: %" PRIu64 " (round: %" PRIu64 ", pattern: %u))" "\n" - " per free: %" PRIu64 "\n", i, stats->alloc_cnt, - stats->reallocs, stats->alloc_errs, stats->pattern_errs, stats->tot_tm, - ev_rate, ave_b_alloc_tm, b_alloc_min, stats->min_alloc_rnd, + " per free: %" PRIu64 "\n", i, stats->act_num_rounds, + stats->alloc_cnt, stats->reallocs, stats->alloc_errs, stats->pattern_errs, + stats->tot_tm, ev_rate, ave_b_alloc_tm, b_alloc_min, stats->min_alloc_rnd, stats->min_alloc_pt, b_alloc_max, stats->max_alloc_rnd, stats->max_alloc_pt, ave_alloc_tm, ave_b_free_tm, b_free_min, stats->min_free_rnd, stats->min_free_pt, b_free_max, stats->max_free_rnd, stats->max_free_pt, @@ -1339,7 +1363,7 @@ int main(int argc, char **argv) goto out; } - parse_res = parse_options(argc, argv, prog_conf); + parse_res = setup_program(argc, argv, prog_conf); if (parse_res == PRS_NOK) { ret = EXIT_FAILURE; @@ -1352,6 +1376,7 @@ int main(int argc, char **argv) } prog_conf->odp_instance = odp_instance; + odp_atomic_init_u32(&prog_conf->is_running, 1U); if (!setup_test(prog_conf)) { ret = EXIT_FAILURE; diff --git a/test/performance/odp_pool_perf.c b/test/performance/odp_pool_perf.c index 43a39a21e..c79465e53 100644 --- a/test/performance/odp_pool_perf.c +++ b/test/performance/odp_pool_perf.c @@ -1,9 +1,6 @@ -/* Copyright (c) 2018, Linaro Limited - * Copyright (c) 2019-2022, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2019-2022 Nokia */ /** diff --git a/test/performance/odp_queue_perf.c b/test/performance/odp_queue_perf.c index 7d4612cb8..153f87d10 100644 --- a/test/performance/odp_queue_perf.c +++ b/test/performance/odp_queue_perf.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2018, Linaro Limited - * Copyright (c) 2021-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2021-2023 Nokia */ /** diff --git a/test/performance/odp_random.c b/test/performance/odp_random.c index 99714d7b3..4a689e440 100644 --- a/test/performance/odp_random.c +++ b/test/performance/odp_random.c @@ -1,8 +1,5 @@ -/* Copyright (c) 2021-2022, Nokia - * - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021-2024 Nokia */ /** @@ -379,11 +376,20 @@ static void test_type(odp_instance_t instance, test_global_t *global, odp_random exit(EXIT_FAILURE); } - if (odph_thread_join(thr_worker, num_threads) != num_threads) { + odph_thread_join_result_t res[num_threads]; + + if (odph_thread_join_result(thr_worker, res, num_threads) != num_threads) { ODPH_ERR("Failed to join worker threads.\n"); exit(EXIT_FAILURE); } + for (i = 0; i < num_threads; i++) { + if (res[i].ret != 0) { + ODPH_ERR("Worker thread failure: %d.\n", res[i].ret); + exit(EXIT_FAILURE); + } + } + double mb, seconds, nsec = 0; for (i = 0; i < num_threads; i++) diff --git a/test/performance/odp_sched_latency.c b/test/performance/odp_sched_latency.c index 0fec49fb9..f3230cc17 100644 --- a/test/performance/odp_sched_latency.c +++ b/test/performance/odp_sched_latency.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * Copyright (c) 2020-2022, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2016-2018 Linaro Limited + * Copyright (c) 2020-2022 Nokia */ /** diff --git a/test/performance/odp_sched_latency_run.sh b/test/performance/odp_sched_latency_run.sh index b051c1a4e..8cd6dd480 100755 --- a/test/performance/odp_sched_latency_run.sh +++ b/test/performance/odp_sched_latency_run.sh @@ -1,9 +1,7 @@ #!/bin/sh # -# Copyright (c) 2016-2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2016-2018 Linaro Limited # # Script that passes command line arguments to odp_sched_latency test when # launched by 'make check' diff --git a/test/performance/odp_sched_perf.c b/test/performance/odp_sched_perf.c index 47f703338..85a158c9e 100644 --- a/test/performance/odp_sched_perf.c +++ b/test/performance/odp_sched_perf.c @@ -1,8 +1,6 @@ -/* Copyright (c) 2018, Linaro Limited - * Copyright (c) 2020-2024, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2020-2024 Nokia */ /** @@ -31,6 +29,9 @@ #define MAX_QUEUES (256 * 1024) #define MAX_GROUPS 256 +/* Limit data values to 16 bits. Large data values are costly on square root calculation. */ +#define DATA_MASK 0xffff + /* Max time to wait for new events in nanoseconds */ #define MAX_SCHED_WAIT_NS (10 * ODP_TIME_SEC_IN_NS) @@ -60,6 +61,7 @@ typedef struct test_options_t { uint32_t tot_queue; uint32_t tot_event; int touch_data; + uint32_t stress; uint32_t rd_words; uint32_t rw_words; uint32_t ctx_size; @@ -156,8 +158,15 @@ static void print_usage(void) " -b, --burst Maximum number of events per operation. Default: 100.\n" " -t, --type Queue type. 0: parallel, 1: atomic, 2: ordered. Default: 0.\n" " -f, --forward 0: Keep event in the original queue, 1: Forward event to the next queue. Default: 0.\n" - " -a, --fairness 0: Don't count events per queue, 1: Count and report events relative to average. Default: 0.\n" + " -F, --fairness 0: Don't count events per queue, 1: Count and report events relative to average. Default: 0.\n" " -w, --wait_ns Number of nsec to wait before enqueueing events. Default: 0.\n" + " -S, --stress CPU stress function(s) to be called for each event data word (requires -n or -m).\n" + " Data is processed as uint32_t words. Multiple flags may be selected.\n" + " 0: No extra data processing (default)\n" + " 0x1: Calculate square of each uint32_t\n" + " 0x2: Calculate log2 of each uint32_t\n" + " 0x4: Calculate square root of each uint32_t\n" + " 0x8: Calculate square root of each uint32_t in floating point\n" " -k, --ctx_rd_words Number of queue context words (uint64_t) to read on every event. Default: 0.\n" " -l, --ctx_rw_words Number of queue context words (uint64_t) to modify on every event. Default: 0.\n" " -n, --rd_words Number of event data words (uint64_t) to read before enqueueing it. Default: 0.\n" @@ -190,8 +199,9 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) {"burst", required_argument, NULL, 'b'}, {"type", required_argument, NULL, 't'}, {"forward", required_argument, NULL, 'f'}, - {"fairness", required_argument, NULL, 'a'}, + {"fairness", required_argument, NULL, 'F'}, {"wait_ns", required_argument, NULL, 'w'}, + {"stress", required_argument, NULL, 'S'}, {"ctx_rd_words", required_argument, NULL, 'k'}, {"ctx_rw_words", required_argument, NULL, 'l'}, {"rd_words", required_argument, NULL, 'n'}, @@ -204,7 +214,7 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) {NULL, 0, NULL, 0} }; - static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:a:w:k:l:n:m:p:u:U:vh"; + static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:F:w:S:k:l:n:m:p:u:U:vh"; test_options->num_cpu = 1; test_options->num_queue = 1; @@ -219,6 +229,7 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) test_options->queue_type = 0; test_options->forward = 0; test_options->fairness = 0; + test_options->stress = 0; test_options->ctx_rd_words = 0; test_options->ctx_rw_words = 0; test_options->rd_words = 0; @@ -271,9 +282,12 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) case 'f': test_options->forward = atoi(optarg); break; - case 'a': + case 'F': test_options->fairness = atoi(optarg); break; + case 'S': + test_options->stress = strtoul(optarg, NULL, 0); + break; case 'k': test_options->ctx_rd_words = atoi(optarg); break; @@ -321,6 +335,11 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) test_options->touch_data = test_options->rd_words || test_options->rw_words; + if (test_options->stress && test_options->touch_data == 0) { + ODPH_ERR("Use -n or/and -m to select event data size with a stress function\n"); + ret = -1; + } + if ((test_options->num_queue + test_options->num_dummy) > MAX_QUEUES) { ODPH_ERR("Too many queues. Max supported %i.\n", MAX_QUEUES); ret = -1; @@ -420,6 +439,19 @@ static int set_num_cpu(test_global_t *global) return 0; } +static uint64_t init_data(uint64_t init, uint64_t *data, uint32_t words) +{ + uint32_t i; + uint64_t val = init; + + for (i = 0; i < words; i++) { + data[i] = val; + val = (val + 1) & DATA_MASK; + } + + return val; +} + static int create_pool(test_global_t *global) { odp_pool_capability_t pool_capa; @@ -474,6 +506,7 @@ static int create_pool(test_global_t *global) printf(" queue size %u\n", queue_size); printf(" max burst size %u\n", max_burst); printf(" total events %u\n", tot_event); + printf(" stress 0x%x\n", test_options->stress); printf(" event size %u bytes", event_size); if (touch_data) printf(" (rd: %u, rw: %u)", 8 * test_options->rd_words, 8 * test_options->rw_words); @@ -612,6 +645,7 @@ static int create_queues(test_global_t *global) odp_pool_t pool = global->pool; uint8_t *ctx = NULL; uint32_t ctx_size = test_options->ctx_size; + uint64_t init_val = 0; if (type == 0) { type_str = "parallel"; @@ -755,6 +789,8 @@ static int create_queues(test_global_t *global) for (j = 0; j < num_event; j++) { odp_event_t ev; + uint64_t *data; + uint32_t words; if (test_options->pool_type == ODP_POOL_BUFFER) { odp_buffer_t buf = odp_buffer_alloc(pool); @@ -764,6 +800,9 @@ static int create_queues(test_global_t *global) return -1; } ev = odp_buffer_to_event(buf); + + data = odp_buffer_addr(buf); + words = odp_buffer_size(buf) / 8; } else { odp_packet_t pkt = odp_packet_alloc(pool, event_size); @@ -772,7 +811,13 @@ static int create_queues(test_global_t *global) return -1; } ev = odp_packet_to_event(pkt); + + data = odp_packet_data(pkt); + words = odp_packet_seg_len(pkt) / 8; } + + init_val = init_data(init_val, data, words); + if (odp_queue_enq(queue, ev)) { ODPH_ERR("Error: enqueue failed %u/%u\n", i, j); return -1; @@ -952,15 +997,14 @@ static inline uint64_t rw_ctx_data(void *ctx, uint32_t offset, return sum; } -static uint64_t rw_data(odp_event_t ev[], int num, - uint32_t rd_words, uint32_t rw_words, odp_pool_type_t pool_type) +static uint64_t rw_data(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words, + odp_pool_type_t pool_type) { uint64_t *data; - int i; uint32_t j; uint64_t sum = 0; - for (i = 0; i < num; i++) { + for (int i = 0; i < num; i++) { if (pool_type == ODP_POOL_BUFFER) data = odp_buffer_addr(odp_buffer_from_event(ev[i])); else @@ -978,6 +1022,40 @@ static uint64_t rw_data(odp_event_t ev[], int num, return sum; } +static uint64_t rw_data_stress(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words, + uint32_t stress, odp_pool_type_t pool_type) +{ + uint64_t *data; + uint64_t word; + uint32_t j; + uint64_t sum = 0; + + for (int i = 0; i < num; i++) { + if (pool_type == ODP_POOL_BUFFER) + data = odp_buffer_addr(odp_buffer_from_event(ev[i])); + else + data = odp_packet_data(odp_packet_from_event(ev[i])); + + for (j = 0; j < rd_words + rw_words; j++) { + word = data[j]; + + if (stress & 0x1) + sum += odph_stress_pow2_u32(word); + if (stress & 0x2) + sum += odph_stress_log2_u32(word); + if (stress & 0x4) + sum += odph_stress_sqrt_u32(word); + if (stress & 0x8) + sum += odph_stress_sqrt_f32(word); + + if (j >= rd_words) + data[j] = (word + 1) & DATA_MASK; + } + } + + return sum; +} + static int test_sched(void *arg) { int num, num_enq, ret, thr; @@ -994,16 +1072,17 @@ static int test_sched(void *arg) int num_group = test_options->num_group; int forward = test_options->forward; int fairness = test_options->fairness; - int touch_data = test_options->touch_data; - uint32_t rd_words = test_options->rd_words; - uint32_t rw_words = test_options->rw_words; + const int touch_data = test_options->touch_data; + const uint32_t stress = test_options->stress; + const uint32_t rd_words = test_options->rd_words; + const uint32_t rw_words = test_options->rw_words; uint32_t ctx_size = test_options->ctx_size; uint32_t ctx_rd_words = test_options->ctx_rd_words; uint32_t ctx_rw_words = test_options->ctx_rw_words; const uint32_t uarea_size = test_options->uarea_size; const uint32_t uarea_rd = test_options->uarea_rd; const uint32_t uarea_rw = test_options->uarea_rw; - odp_pool_type_t pool_type = test_options->pool_type; + const odp_pool_type_t pool_type = test_options->pool_type; int touch_ctx = ctx_rd_words || ctx_rw_words; odp_atomic_u32_t *exit_threads = &global->exit_threads; uint32_t ctx_offset = 0; @@ -1095,9 +1174,14 @@ static int test_sched(void *arg) ctx_rw_words); } - if (odp_unlikely(touch_data)) - data_sum += rw_data(ev, num, rd_words, - rw_words, pool_type); + if (odp_unlikely(touch_data)) { + if (stress) { + data_sum += rw_data_stress(ev, num, rd_words, rw_words, + stress, pool_type); + } else { + data_sum += rw_data(ev, num, rd_words, rw_words, pool_type); + } + } if (odp_unlikely(wait_ns)) { waits++; diff --git a/test/performance/odp_sched_perf_run.sh b/test/performance/odp_sched_perf_run.sh index 8e7911290..d4c8ebf6e 100755 --- a/test/performance/odp_sched_perf_run.sh +++ b/test/performance/odp_sched_perf_run.sh @@ -1,33 +1,45 @@ #!/bin/sh # -# Copyright (c) 2021, Nokia -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2021-2024 Nokia # TEST_DIR="${TEST_DIR:-$(dirname $0)}" -echo odp_sched_perf: buffer pool -echo =============================================== - -$TEST_DIR/odp_sched_perf${EXEEXT} -p 0 - -RET_VAL=$? -if [ $RET_VAL -ne 0 ]; then - echo odp_sched_perf -p 0: FAILED - exit $RET_VAL -fi - -echo odp_sched_perf: packet pool -echo =============================================== - -$TEST_DIR/odp_sched_perf${EXEEXT} -p 1 - -RET_VAL=$? -if [ $RET_VAL -ne 0 ]; then - echo odp_sched_perf -p 1: FAILED - exit $RET_VAL -fi +run() +{ + # Maximum number of workers may be less than the number of available processors. One worker + # should be always available. + MAX_WORKERS=$(($(nproc) - 2)) + if [ $MAX_WORKERS -lt 1 ]; then + MAX_WORKERS=1 + fi + + if [ $MAX_WORKERS -lt $1 ]; then + echo "Not enough CPU cores (requested $1, available $MAX_WORKERS). Skipping test." + else + echo odp_sched_perf -p 0 -c $1 + echo =============================================== + $TEST_DIR/odp_sched_perf${EXEEXT} -p 0 -c $1 + RET_VAL=$? + if [ $RET_VAL -ne 0 ]; then + echo odp_sched_perf FAILED + exit $RET_VAL + fi + + echo odp_sched_perf -p 1 -c $1 + echo =============================================== + $TEST_DIR/odp_sched_perf${EXEEXT} -p 1 -c $1 + RET_VAL=$? + if [ $RET_VAL -ne 0 ]; then + echo odp_sched_perf FAILED + exit $RET_VAL + fi + fi +} + +run 1 +run 2 +run 6 exit 0 diff --git a/test/performance/odp_sched_pktio.c b/test/performance/odp_sched_pktio.c index d8ab1b279..eb79b6b69 100644 --- a/test/performance/odp_sched_pktio.c +++ b/test/performance/odp_sched_pktio.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited */ /** diff --git a/test/performance/odp_sched_pktio_run.sh b/test/performance/odp_sched_pktio_run.sh index dd332c191..828a83029 100755 --- a/test/performance/odp_sched_pktio_run.sh +++ b/test/performance/odp_sched_pktio_run.sh @@ -1,9 +1,7 @@ #!/bin/sh # -# Copyright (c) 2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2018 Linaro Limited # # directory where test binaries have been built @@ -11,7 +9,7 @@ TEST_DIR="${TEST_DIR:-$PWD}" # directory where test sources are, including scripts TEST_SRC_DIR=$(dirname $0) -PATH=$TEST_DIR:$TEST_DIR/../../example/generator:$PATH +PATH=$TEST_DIR:$PATH # exit codes expected by automake for skipped tests TEST_SKIPPED=77 @@ -19,8 +17,6 @@ TEST_SKIPPED=77 VALIDATION_TESTDIR=platform/$ODP_PLATFORM/test/validation PLATFORM_VALIDATION=${TEST_SRC_DIR}/../../$VALIDATION_TESTDIR -FLOOD_MODE=0 - # Use installed pktio env or for make check take it from platform directory if [ -f "./pktio_env" ]; then . ./pktio_env @@ -47,9 +43,9 @@ run_sched_pktio() exit $TEST_SKIPPED fi - type odp_generator > /dev/null + type odp_packet_gen > /dev/null if [ $? -ne 0 ]; then - echo "odp_generator not installed. Aborting." + echo "odp_packet_gen not installed. Aborting." cleanup_pktio_env exit 1 fi @@ -65,14 +61,14 @@ run_sched_pktio() sleep 1 - # Run generator with one worker - export ODP_PLATFORM_PARAMS="-m 256 --file-prefix="gen" \ + # Run odp_packet_gen with one tx thread + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="gen" \ --proc-type auto --no-pci \ --vdev net_pcap0,iface=$IF0" - (odp_generator${EXEEXT} --interval $FLOOD_MODE -I 0 \ - --srcip 192.168.0.1 --dstip 192.168.0.2 \ - -m u -w 1 2>&1 > /dev/null) \ + (odp_packet_gen${EXEEXT} --gap 0 -i 0 \ + --ipv4_src 192.168.0.1 --ipv4_dst 192.168.0.2 \ + -r 0 -t 1 2>&1 > /dev/null) \ 2>&1 > /dev/null & GEN_PID=$! diff --git a/test/performance/odp_scheduling.c b/test/performance/odp_scheduling.c deleted file mode 100644 index c9f3eb89f..000000000 --- a/test/performance/odp_scheduling.c +++ /dev/null @@ -1,1042 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2013-2018 Linaro Limited - * Copyright (c) 2019-2023 Nokia - */ - -/** - * @example odp_scheduling.c - * - * Performance test application for miscellaneous scheduling operations - * - * @cond _ODP_HIDE_FROM_DOXYGEN_ - */ - -#include <string.h> -#include <stdlib.h> -#include <inttypes.h> - -/* ODP main header */ -#include <odp_api.h> - -/* ODP helper for Linux apps */ -#include <odp/helper/odph_api.h> - -/* Needs librt*/ -#include <time.h> - -/* GNU lib C */ -#include <getopt.h> - -#define MAX_BUF (512 * 1024) /**< Maximum pool size */ -#define MAX_ALLOCS 32 /**< Alloc burst size */ -#define QUEUES_PER_PRIO 64 /**< Queue per priority */ -#define NUM_PRIOS 2 /**< Number of tested priorities */ -#define QUEUE_ROUNDS (512 * 1024) /**< Queue test rounds */ -#define ALLOC_ROUNDS (1024 * 1024) /**< Alloc test rounds */ -#define MULTI_BUFS_MAX 4 /**< Buffer burst size */ -#define TEST_SEC 2 /**< Time test duration in sec */ -#define STATS_PER_LINE 8 /**< Stats per printed line */ - -/** Dummy message */ -typedef struct { - int msg_id; /**< Message ID */ - int seq; /**< Sequence number */ -} test_message_t; - -#define MSG_HELLO 1 /**< Hello */ -#define MSG_ACK 2 /**< Ack */ - -/** Test arguments */ -typedef struct { - double test_sec; /**< CPU frequency test duration in seconds */ - unsigned int cpu_count; /**< CPU count */ - int fairness; /**< Check fairness */ -} test_args_t; - -typedef struct ODP_ALIGNED_CACHE { - uint64_t num_ev; -} queue_context_t; - -/** Test global variables */ -typedef struct { - odp_barrier_t barrier; - odp_spinlock_t lock; - odp_pool_t pool; - int first_thr; - int queues_per_prio; - test_args_t args; - odp_queue_t queue[NUM_PRIOS][QUEUES_PER_PRIO]; - queue_context_t queue_ctx[NUM_PRIOS][QUEUES_PER_PRIO]; -} test_globals_t; - -/* Prints and initializes queue statistics */ -static void print_stats(int prio, test_globals_t *globals) -{ - int i, j, k; - - if (prio == odp_schedule_max_prio()) - i = 0; - else - i = 1; - - printf("\nQueue fairness\n-----+--------\n"); - - for (j = 0; j < globals->queues_per_prio;) { - printf(" %2i | ", j); - - for (k = 0; k < STATS_PER_LINE - 1; k++) { - printf(" %8" PRIu64, - globals->queue_ctx[i][j].num_ev); - globals->queue_ctx[i][j++].num_ev = 0; - } - - printf(" %8" PRIu64 "\n", globals->queue_ctx[i][j].num_ev); - globals->queue_ctx[i][j++].num_ev = 0; - } - - printf("\n"); -} - -/** - * @internal Clear all scheduled queues. Retry to be sure that all - * buffers have been scheduled. - */ -static void clear_sched_queues(void) -{ - odp_event_t ev; - - while (1) { - ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT); - - if (ev == ODP_EVENT_INVALID) - break; - - odp_event_free(ev); - } -} - -/** - * @internal Enqueue events into queues - * - * @param thr Thread - * @param prio Queue priority - * @param num_queues Number of queues - * @param num_events Number of events - * @param globals Test shared data - * - * @return 0 if successful - */ -static int enqueue_events(int thr, int prio, int num_queues, int num_events, - test_globals_t *globals) -{ - odp_buffer_t buf[num_events]; - odp_event_t ev[num_events]; - odp_queue_t queue; - int i, j, k, ret; - - if (prio == odp_schedule_max_prio()) - i = 0; - else - i = 1; - - /* Alloc and enqueue a buffer per queue */ - for (j = 0; j < num_queues; j++) { - queue = globals->queue[i][j]; - - ret = odp_buffer_alloc_multi(globals->pool, buf, num_events); - if (ret != num_events) { - ODPH_ERR(" [%i] buffer alloc failed\n", thr); - ret = ret < 0 ? 0 : ret; - ret = ret > num_events ? num_events : ret; /* GCC-9 -O3 workaround */ - odp_buffer_free_multi(buf, ret); - return -1; - } - for (k = 0; k < num_events; k++) { - if (!odp_buffer_is_valid(buf[k])) { - ODPH_ERR(" [%i] buffer alloc failed\n", thr); - odp_buffer_free_multi(buf, num_events); - return -1; - } - ev[k] = odp_buffer_to_event(buf[k]); - } - - ret = odp_queue_enq_multi(queue, ev, num_events); - if (ret != num_events) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - ret = ret < 0 ? 0 : ret; - odp_buffer_free_multi(&buf[ret], num_events - ret); - return -1; - } - } - - return 0; -} - -/** - * @internal Test single buffer alloc and free - * - * @param thr Thread - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_alloc_single(int thr, test_globals_t *globals) -{ - int i; - odp_buffer_t temp_buf; - uint64_t c1, c2, cycles; - - c1 = odp_cpu_cycles(); - - for (i = 0; i < ALLOC_ROUNDS; i++) { - temp_buf = odp_buffer_alloc(globals->pool); - - if (!odp_buffer_is_valid(temp_buf)) { - ODPH_ERR(" [%i] alloc_single failed\n", thr); - return -1; - } - - odp_buffer_free(temp_buf); - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - cycles = cycles / ALLOC_ROUNDS; - - printf(" [%i] alloc_sng alloc+free %6" PRIu64 " CPU cycles\n", - thr, cycles); - - return 0; -} - -/** - * @internal Test multiple buffers alloc and free - * - * @param thr Thread - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_alloc_multi(int thr, test_globals_t *globals) -{ - int i, j, ret; - const int num_alloc = MAX_ALLOCS; - odp_buffer_t temp_buf[num_alloc]; - uint64_t c1, c2, cycles; - - c1 = odp_cpu_cycles(); - - for (i = 0; i < ALLOC_ROUNDS; i++) { - ret = odp_buffer_alloc_multi(globals->pool, temp_buf, num_alloc); - if (ret != num_alloc) { - ODPH_ERR(" [%i] buffer alloc failed\n", thr); - ret = ret < 0 ? 0 : ret; - odp_buffer_free_multi(temp_buf, ret); - return -1; - } - - for (j = 0; j < num_alloc; j++) { - if (!odp_buffer_is_valid(temp_buf[j])) { - ODPH_ERR(" [%i] alloc_multi failed\n", thr); - odp_buffer_free_multi(temp_buf, num_alloc); - return -1; - } - } - odp_buffer_free_multi(temp_buf, num_alloc); - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - cycles = cycles / (ALLOC_ROUNDS * num_alloc); - - printf(" [%i] alloc_multi alloc+free %6" PRIu64 " CPU cycles\n", - thr, cycles); - - return 0; -} - -/** - * @internal Test plain queues - * - * Enqueue to and dequeue to/from a single shared queue. - * - * @param thr Thread - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_plain_queue(int thr, test_globals_t *globals) -{ - odp_event_t ev; - odp_buffer_t buf; - test_message_t *t_msg; - odp_queue_t queue; - uint64_t c1, c2, cycles; - int i, j; - - /* Alloc test message */ - buf = odp_buffer_alloc(globals->pool); - - if (!odp_buffer_is_valid(buf)) { - ODPH_ERR(" [%i] buffer alloc failed\n", thr); - return -1; - } - - /* odp_buffer_print(buf); */ - - t_msg = odp_buffer_addr(buf); - t_msg->msg_id = MSG_HELLO; - t_msg->seq = 0; - - queue = odp_queue_lookup("plain_queue"); - - if (queue == ODP_QUEUE_INVALID) { - printf(" [%i] Queue lookup failed.\n", thr); - return -1; - } - - c1 = odp_cpu_cycles(); - - for (i = 0; i < QUEUE_ROUNDS; i++) { - ev = odp_buffer_to_event(buf); - - if (odp_queue_enq(queue, ev)) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - odp_buffer_free(buf); - return -1; - } - - /* When enqueue and dequeue are decoupled (e.g. not using a - * common lock), an enqueued event may not be immediately - * visible to dequeue. So we just try again for a while. */ - for (j = 0; j < 100; j++) { - ev = odp_queue_deq(queue); - if (ev != ODP_EVENT_INVALID) - break; - odp_cpu_pause(); - } - - buf = odp_buffer_from_event(ev); - - if (!odp_buffer_is_valid(buf)) { - ODPH_ERR(" [%i] Queue empty.\n", thr); - return -1; - } - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - cycles = cycles / QUEUE_ROUNDS; - - printf(" [%i] plain_queue enq+deq %6" PRIu64 " CPU cycles\n", - thr, cycles); - - odp_buffer_free(buf); - return 0; -} - -/** - * @internal Test scheduling of a single queue - with odp_schedule() - * - * Enqueue a buffer to the shared queue. Schedule and enqueue the received - * buffer back into the queue. - * - * @param str Test case name string - * @param thr Thread - * @param prio Priority - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_schedule_single(const char *str, int thr, - int prio, test_globals_t *globals) -{ - odp_event_t ev; - odp_queue_t queue; - uint64_t c1, c2, cycles; - uint32_t i; - uint32_t tot; - - if (enqueue_events(thr, prio, 1, 1, globals)) - return -1; - - c1 = odp_cpu_cycles(); - - for (i = 0; i < QUEUE_ROUNDS; i++) { - ev = odp_schedule(&queue, ODP_SCHED_WAIT); - - if (odp_queue_enq(queue, ev)) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - odp_event_free(ev); - return -1; - } - } - - /* Clear possible locally stored buffers */ - odp_schedule_pause(); - - tot = i; - - while (1) { - ev = odp_schedule(&queue, ODP_SCHED_NO_WAIT); - - if (ev == ODP_EVENT_INVALID) - break; - - tot++; - - if (odp_queue_enq(queue, ev)) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - odp_event_free(ev); - return -1; - } - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - - odp_barrier_wait(&globals->barrier); - - odp_schedule_resume(); - - clear_sched_queues(); - - cycles = cycles / tot; - - printf(" [%i] %s enq+deq %6" PRIu64 " CPU cycles\n", thr, str, cycles); - - return 0; -} - -/** - * @internal Test scheduling of multiple queues - with odp_schedule() - * - * Enqueue a buffer to each queue. Schedule and enqueue the received - * buffer back into the queue it came from. - * - * @param str Test case name string - * @param thr Thread - * @param prio Priority - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_schedule_many(const char *str, int thr, - int prio, test_globals_t *globals) -{ - odp_event_t ev; - odp_queue_t queue; - uint64_t c1, c2, cycles; - uint32_t i; - uint32_t tot; - - if (enqueue_events(thr, prio, globals->queues_per_prio, 1, globals)) - return -1; - - /* Start sched-enq loop */ - c1 = odp_cpu_cycles(); - - for (i = 0; i < QUEUE_ROUNDS; i++) { - ev = odp_schedule(&queue, ODP_SCHED_WAIT); - - if (odp_queue_enq(queue, ev)) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - odp_event_free(ev); - return -1; - } - } - - /* Clear possible locally stored buffers */ - odp_schedule_pause(); - - tot = i; - - while (1) { - ev = odp_schedule(&queue, ODP_SCHED_NO_WAIT); - - if (ev == ODP_EVENT_INVALID) - break; - - tot++; - - if (odp_queue_enq(queue, ev)) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - odp_event_free(ev); - return -1; - } - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - - odp_barrier_wait(&globals->barrier); - - odp_schedule_resume(); - - clear_sched_queues(); - - cycles = cycles / tot; - - printf(" [%i] %s enq+deq %6" PRIu64 " CPU cycles\n", thr, str, cycles); - - return 0; -} - -/** - * @internal Test scheduling of multiple queues with multi_sched and multi_enq - * - * @param str Test case name string - * @param thr Thread - * @param prio Priority - * @param globals Test shared data - * - * @return 0 if successful - */ -static int test_schedule_multi(const char *str, int thr, - int prio, test_globals_t *globals) -{ - odp_event_t ev[MULTI_BUFS_MAX]; - odp_queue_t queue; - uint64_t c1, c2, cycles; - int i; - int num; - uint32_t tot = 0; - - if (enqueue_events(thr, prio, globals->queues_per_prio, MULTI_BUFS_MAX, - globals)) - return -1; - - /* Start sched-enq loop */ - c1 = odp_cpu_cycles(); - - for (i = 0; i < QUEUE_ROUNDS; i++) { - num = odp_schedule_multi(&queue, ODP_SCHED_WAIT, ev, - MULTI_BUFS_MAX); - - tot += num; - - if (globals->args.fairness) { - queue_context_t *queue_ctx; - - queue_ctx = odp_queue_context(queue); - queue_ctx->num_ev += num; - } - - /* Assume we can enqueue all events */ - if (odp_queue_enq_multi(queue, ev, num) != num) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - return -1; - } - } - - /* Clear possible locally stored events */ - odp_schedule_pause(); - - while (1) { - num = odp_schedule_multi(&queue, ODP_SCHED_NO_WAIT, ev, - MULTI_BUFS_MAX); - - if (num == 0) - break; - - tot += num; - - if (globals->args.fairness) { - queue_context_t *queue_ctx; - - queue_ctx = odp_queue_context(queue); - queue_ctx->num_ev += num; - } - - /* Assume we can enqueue all events */ - if (odp_queue_enq_multi(queue, ev, num) != num) { - ODPH_ERR(" [%i] Queue enqueue failed.\n", thr); - return -1; - } - } - - c2 = odp_cpu_cycles(); - cycles = odp_cpu_cycles_diff(c2, c1); - - odp_barrier_wait(&globals->barrier); - - odp_schedule_resume(); - - clear_sched_queues(); - - if (tot) - cycles = cycles / tot; - else - cycles = 0; - - printf(" [%i] %s enq+deq %6" PRIu64 " CPU cycles\n", thr, str, cycles); - - odp_barrier_wait(&globals->barrier); - - if (globals->args.fairness && globals->first_thr == thr) - print_stats(prio, globals); - - return 0; -} - -/** - * @internal Worker thread - * - * @param arg Arguments - * - * @return non zero on failure - */ -static int run_thread(void *arg ODP_UNUSED) -{ - int thr; - odp_shm_t shm; - test_globals_t *globals; - odp_barrier_t *barrier; - - thr = odp_thread_id(); - - printf("Thread %i starts on CPU %i\n", thr, odp_cpu_id()); - - shm = odp_shm_lookup("test_globals"); - globals = odp_shm_addr(shm); - - if (globals == NULL) { - ODPH_ERR("Shared mem lookup failed\n"); - return -1; - } - - barrier = &globals->barrier; - - /* - * Test barriers back-to-back - */ - odp_barrier_wait(barrier); - odp_barrier_wait(barrier); - odp_barrier_wait(barrier); - odp_barrier_wait(barrier); - odp_barrier_wait(barrier); - - /* Select which thread is the first_thr */ - while (globals->first_thr < 0) { - if (odp_spinlock_trylock(&globals->lock)) { - globals->first_thr = thr; - odp_spinlock_unlock(&globals->lock); - } - } - - odp_barrier_wait(barrier); - - if (test_alloc_single(thr, globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_alloc_multi(thr, globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_plain_queue(thr, globals)) - return -1; - - /* Low prio */ - - odp_barrier_wait(barrier); - - if (test_schedule_single("sched_____s_lo", thr, - odp_schedule_min_prio(), globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_schedule_many("sched_____m_lo", thr, - odp_schedule_min_prio(), globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_schedule_multi("sched_multi_lo", thr, - odp_schedule_min_prio(), globals)) - return -1; - - /* High prio */ - - odp_barrier_wait(barrier); - - if (test_schedule_single("sched_____s_hi", thr, - odp_schedule_max_prio(), globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_schedule_many("sched_____m_hi", thr, - odp_schedule_max_prio(), globals)) - return -1; - - odp_barrier_wait(barrier); - - if (test_schedule_multi("sched_multi_hi", thr, - odp_schedule_max_prio(), globals)) - return -1; - - printf("Thread %i exits\n", thr); - fflush(NULL); - return 0; -} - -/** - * @internal Test cycle counter frequency - */ -static void test_cpu_freq(double test_sec) -{ - odp_time_t cur_time, test_time, start_time, end_time; - uint64_t c1, c2, cycles; - uint64_t nsec; - double diff_max_hz, max_cycles; - - printf("\nCPU cycle count frequency test (runs about %f sec)\n", - test_sec); - - test_time = odp_time_local_from_ns(test_sec * ODP_TIME_SEC_IN_NS); - start_time = odp_time_local(); - end_time = odp_time_sum(start_time, test_time); - - /* Start the measurement */ - c1 = odp_cpu_cycles(); - - do { - cur_time = odp_time_local(); - } while (odp_time_cmp(end_time, cur_time) > 0); - - c2 = odp_cpu_cycles(); - - test_time = odp_time_diff(cur_time, start_time); - nsec = odp_time_to_ns(test_time); - - cycles = odp_cpu_cycles_diff(c2, c1); - max_cycles = (nsec * odp_cpu_hz_max()) / 1000000000.0; - - /* Compare measured CPU cycles to maximum theoretical CPU cycle count */ - diff_max_hz = ((double)(cycles) - max_cycles) / max_cycles; - - printf("odp_time %" PRIu64 " ns\n", nsec); - printf("odp_cpu_cycles %" PRIu64 " CPU cycles\n", cycles); - printf("odp_sys_cpu_hz %" PRIu64 " hz\n", odp_cpu_hz_max()); - printf("Diff from max CPU freq %f%%\n", diff_max_hz * 100.0); - - printf("\n"); -} - -/** - * @internal Print help - */ -static void print_usage(void) -{ - printf("\n\nUsage: ./odp_example [options]\n"); - printf("Options:\n"); - printf(" -t, --time <number> test duration, default=%.1f\n", (double)TEST_SEC); - printf(" -c, --count <number> CPU count, 0=all available, default=1\n"); - printf(" -h, --help this help\n"); - printf(" -f, --fair collect fairness statistics\n"); - printf("\n\n"); -} - -/** - * @internal Parse arguments - * - * @param argc Argument count - * @param argv Argument vector - * @param args Test arguments - */ -static void parse_args(int argc, char *argv[], test_args_t *args) -{ - int opt; - int long_index; - - static const struct option longopts[] = { - {"time", required_argument, NULL, 't'}, - {"count", required_argument, NULL, 'c'}, - {"fair", no_argument, NULL, 'f'}, - {"help", no_argument, NULL, 'h'}, - {NULL, 0, NULL, 0} - }; - - static const char *shortopts = "+t:c:fh"; - - args->cpu_count = 1; /* use one worker by default */ - args->test_sec = TEST_SEC; - - while (1) { - opt = getopt_long(argc, argv, shortopts, longopts, &long_index); - - if (opt == -1) - break; /* No more options */ - - switch (opt) { - case 'f': - args->fairness = 1; - break; - - case 't': - args->test_sec = atof(optarg); - break; - - case 'c': - args->cpu_count = atoi(optarg); - break; - - case 'h': - print_usage(); - exit(EXIT_SUCCESS); - break; - - default: - break; - } - } -} - -/** - * Test main function - */ -int main(int argc, char *argv[]) -{ - odph_helper_options_t helper_options; - odph_thread_t *thread_tbl; - test_args_t args; - int num_workers; - odp_cpumask_t cpumask; - odp_pool_t pool; - odp_queue_t plain_queue; - int i, j; - odp_shm_t shm; - test_globals_t *globals; - char cpumaskstr[ODP_CPUMASK_STR_SIZE]; - odp_pool_param_t params; - int ret = 0; - odp_instance_t instance; - odp_init_t init_param; - odph_thread_common_param_t thr_common; - odph_thread_param_t thr_param; - odp_queue_capability_t capa; - odp_pool_capability_t pool_capa; - odp_schedule_config_t schedule_config; - uint32_t num_queues, num_buf; - - printf("\nODP example starts\n\n"); - - /* Let helper collect its own arguments (e.g. --odph_proc) */ - argc = odph_parse_options(argc, argv); - if (odph_options(&helper_options)) { - ODPH_ERR("Error: reading ODP helper options failed.\n"); - exit(EXIT_FAILURE); - } - - odp_init_param_init(&init_param); - init_param.mem_model = helper_options.mem_model; - - memset(&args, 0, sizeof(args)); - parse_args(argc, argv, &args); - - /* ODP global init */ - if (odp_init_global(&instance, &init_param, NULL)) { - ODPH_ERR("ODP global init failed.\n"); - return -1; - } - - /* - * Init this thread. It makes also ODP calls when - * setting up resources for worker threads. - */ - if (odp_init_local(instance, ODP_THREAD_CONTROL)) { - ODPH_ERR("ODP global init failed.\n"); - return -1; - } - - printf("\n"); - odp_sys_info_print(); - - /* Get default worker cpumask */ - num_workers = odp_cpumask_default_worker(&cpumask, args.cpu_count); - (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); - - printf("num worker threads: %i\n", num_workers); - printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); - printf("cpu mask: %s\n", cpumaskstr); - - thread_tbl = calloc(num_workers, sizeof(odph_thread_t)); - if (!thread_tbl) { - ODPH_ERR("no memory for thread_tbl\n"); - return -1; - } - - /* Test cycle count frequency */ - test_cpu_freq(args.test_sec); - - shm = odp_shm_reserve("test_globals", - sizeof(test_globals_t), ODP_CACHE_LINE_SIZE, 0); - if (shm == ODP_SHM_INVALID) { - ODPH_ERR("Shared memory reserve failed.\n"); - return -1; - } - - globals = odp_shm_addr(shm); - memset(globals, 0, sizeof(test_globals_t)); - memcpy(&globals->args, &args, sizeof(test_args_t)); - - /* - * Create message pool - */ - if (odp_pool_capability(&pool_capa)) { - ODPH_ERR("Pool capabilities failed.\n"); - return -1; - } - - num_buf = MAX_BUF; - if (pool_capa.buf.max_num && pool_capa.buf.max_num < MAX_BUF) - num_buf = pool_capa.buf.max_num; - - odp_pool_param_init(¶ms); - params.buf.size = sizeof(test_message_t); - params.buf.align = 0; - params.buf.num = num_buf; - params.type = ODP_POOL_BUFFER; - - pool = odp_pool_create("msg_pool", ¶ms); - - if (pool == ODP_POOL_INVALID) { - ODPH_ERR("Pool create failed.\n"); - return -1; - } - - globals->pool = pool; - - if (odp_queue_capability(&capa)) { - ODPH_ERR("Fetching queue capabilities failed.\n"); - return -1; - } - - odp_schedule_config_init(&schedule_config); - odp_schedule_config(&schedule_config); - - globals->queues_per_prio = QUEUES_PER_PRIO; - num_queues = globals->queues_per_prio * NUM_PRIOS; - if (schedule_config.num_queues && - num_queues > schedule_config.num_queues) - globals->queues_per_prio = schedule_config.num_queues / - NUM_PRIOS; - - /* One plain queue is also used */ - num_queues = (globals->queues_per_prio * NUM_PRIOS) + 1; - if (num_queues > capa.max_queues) - globals->queues_per_prio--; - - if (globals->queues_per_prio <= 0) { - ODPH_ERR("Not enough queues. At least 1 plain and %d scheduled " - "queues required.\n", NUM_PRIOS); - return -1; - } - - /* - * Create a queue for plain queue test - */ - plain_queue = odp_queue_create("plain_queue", NULL); - - if (plain_queue == ODP_QUEUE_INVALID) { - ODPH_ERR("Plain queue create failed.\n"); - return -1; - } - - /* - * Create queues for schedule test. - */ - for (i = 0; i < NUM_PRIOS; i++) { - char name[] = "sched_XX_YY"; - odp_queue_t queue; - odp_queue_param_t param; - int prio; - - if (i == 0) - prio = odp_schedule_max_prio(); - else - prio = odp_schedule_min_prio(); - - name[6] = '0' + (prio / 10); - name[7] = '0' + prio - (10 * (prio / 10)); - - odp_queue_param_init(¶m); - param.type = ODP_QUEUE_TYPE_SCHED; - param.sched.prio = prio; - param.sched.sync = ODP_SCHED_SYNC_ATOMIC; - param.sched.group = ODP_SCHED_GROUP_ALL; - - for (j = 0; j < globals->queues_per_prio; j++) { - name[9] = '0' + j / 10; - name[10] = '0' + j - 10 * (j / 10); - - queue = odp_queue_create(name, ¶m); - - if (queue == ODP_QUEUE_INVALID) { - ODPH_ERR("Schedule queue create failed.\n"); - return -1; - } - - globals->queue[i][j] = queue; - - if (odp_queue_context_set(queue, - &globals->queue_ctx[i][j], - sizeof(queue_context_t)) - < 0) { - ODPH_ERR("Queue context set failed.\n"); - return -1; - } - } - } - - odp_shm_print_all(); - - odp_pool_print(pool); - - /* Barrier to sync test case execution */ - odp_barrier_init(&globals->barrier, num_workers); - - odp_spinlock_init(&globals->lock); - globals->first_thr = -1; - - /* Create and launch worker threads */ - - odph_thread_common_param_init(&thr_common); - thr_common.instance = instance; - thr_common.cpumask = &cpumask; - thr_common.share_param = 1; - - odph_thread_param_init(&thr_param); - thr_param.thr_type = ODP_THREAD_WORKER; - thr_param.start = run_thread; - thr_param.arg = NULL; - - odph_thread_create(thread_tbl, &thr_common, &thr_param, num_workers); - - /* Wait for worker threads to terminate */ - odph_thread_join(thread_tbl, num_workers); - free(thread_tbl); - - printf("ODP example complete\n\n"); - - for (i = 0; i < NUM_PRIOS; i++) { - odp_queue_t queue; - - for (j = 0; j < globals->queues_per_prio; j++) { - queue = globals->queue[i][j]; - ret += odp_queue_destroy(queue); - } - } - - ret += odp_shm_free(shm); - ret += odp_queue_destroy(plain_queue); - ret += odp_pool_destroy(pool); - ret += odp_term_local(); - ret += odp_term_global(instance); - - return ret; -} diff --git a/test/performance/odp_scheduling_run.sh b/test/performance/odp_scheduling_run.sh deleted file mode 100755 index 4e004264e..000000000 --- a/test/performance/odp_scheduling_run.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2015-2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -# -# Script that passes command line arguments to odp_scheduling test when -# launched by 'make check' - -TEST_DIR="${TEST_DIR:-$(dirname $0)}" -ALL=0 - -run() -{ - echo odp_scheduling_run starts requesting $1 worker threads - echo ====================================================== - - if [ $(nproc) -lt $1 ]; then - echo "Not enough CPU cores. Skipping test." - else - $TEST_DIR/odp_scheduling${EXEEXT} -c $1 -t 0.1 - RET_VAL=$? - if [ $RET_VAL -ne 0 ]; then - echo odp_scheduling FAILED - exit $RET_VAL - fi - fi -} - -run 1 -run 5 -run 8 -run 11 -run $ALL - -exit 0 diff --git a/test/performance/odp_stress.c b/test/performance/odp_stress.c index 3ec01df33..1f768b353 100644 --- a/test/performance/odp_stress.c +++ b/test/performance/odp_stress.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2022, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022-2024 Nokia */ /** @@ -23,6 +21,11 @@ #include <odp_api.h> #include <odp/helper/odph_api.h> +#define MODE_MEMCPY 0x1 +#define MODE_COPY_U32 0x2 +#define MODE_SQRT_U32 0x4 +#define MODE_SQRT_F32 0x8 + typedef struct test_options_t { uint32_t num_cpu; uint64_t period_ns; @@ -37,6 +40,7 @@ typedef struct test_stat_t { uint64_t rounds; uint64_t tot_nsec; uint64_t work_nsec; + uint64_t dummy_sum; } test_stat_t; @@ -61,7 +65,7 @@ typedef struct test_global_t { odp_timer_pool_t timer_pool; odp_pool_t tmo_pool; uint64_t period_ticks; - uint8_t *worker_mem; + void *worker_mem; odp_timer_t timer[ODP_THREAD_COUNT_MAX]; odp_queue_t tmo_queue[ODP_THREAD_COUNT_MAX]; odp_schedule_group_t group[ODP_THREAD_COUNT_MAX]; @@ -75,6 +79,35 @@ typedef struct test_global_t { test_global_t *test_global; +/* 250 random numbers: values between 100 and 20000 */ +static const uint32_t pseudo_rand[] = { + 14917, 9914, 5313, 4092, 16041, 7757, 17247, 14804, 3255, 7675, + 13149, 7288, 5665, 7095, 9594, 1296, 2058, 6013, 17779, 11788, + 14855, 760, 16891, 2483, 10937, 16385, 13593, 10674, 4080, 2392, + 12218, 11475, 6009, 5798, 7582, 8358, 4520, 14655, 10555, 6598, + 10598, 16097, 16634, 17102, 16296, 17142, 5748, 11079, 14569, 10961, + 16693, 17775, 19155, 14102, 16132, 19561, 8746, 4521, 8280, 355, + 10655, 14539, 5641, 2343, 19213, 9187, 570, 15096, 780, 1711, + 8007, 8128, 17416, 14123, 4713, 13774, 11450, 9031, 1194, 16531, + 9349, 3496, 19130, 19458, 12412, 9168, 9508, 10607, 5952, 19375, + 14934, 18276, 12116, 510, 14272, 10362, 4095, 6789, 1600, 18509, + 9274, 2815, 3175, 1122, 6495, 7991, 18831, 17550, 7056, 16185, + 18594, 19178, 10028, 1182, 13410, 16173, 3548, 8013, 6099, 2619, + 7359, 6889, 15227, 4910, 12341, 18904, 671, 5851, 9836, 18105, + 13624, 8138, 5751, 15590, 17415, 15330, 697, 11439, 7008, 10676, + 9863, 17163, 10885, 5581, 8078, 4689, 9870, 18370, 19323, 8831, + 11444, 3602, 10125, 6244, 13171, 19335, 15635, 19684, 17581, 9513, + 8444, 13724, 5243, 9987, 19886, 5087, 17292, 16294, 19627, 14985, + 1999, 9889, 1311, 5589, 10084, 911, 301, 2260, 15305, 8265, + 409, 1732, 1463, 17680, 15038, 2440, 4239, 9554, 14045, 924, + 13997, 3472, 18304, 4848, 10601, 18604, 6459, 19394, 2962, 11218, + 5405, 9869, 133, 2512, 13440, 4350, 625, 6580, 5082, 12908, + 11517, 8919, 354, 14216, 3190, 15515, 1277, 1028, 507, 9525, + 10115, 811, 1268, 17587, 5192, 7240, 17371, 4902, 19908, 1027, + 3475, 8658, 11782, 13701, 13034, 154, 4940, 12679, 14067, 2707, + 10180, 4669, 17756, 6602, 6727, 818, 8644, 580, 16988, 19127 +}; + static void print_usage(void) { printf("\n" @@ -83,9 +116,12 @@ static void print_usage(void) " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default: 1\n" " -p, --period_ns Timeout period in nsec. Default: 100 ms\n" " -r, --rounds Number of timeout rounds. Default: 2\n" - " -m, --mode Select test mode. Default: 1\n" - " 0: No stress, just wait for timeouts\n" - " 1: Memcpy\n" + " -m, --mode Test mode flags, multiple may be selected. Default: 0x1\n" + " 0: No stress, just wait for timeouts\n" + " 0x1: memcpy()\n" + " 0x2: Memory copy loop\n" + " 0x4: Integer square root\n" + " 0x8: Floating point square root\n" " -s, --mem_size Memory size per worker in bytes. Default: 2048\n" " -g, --group_mode Select schedule group mode: Default: 1\n" " 0: Use GROUP_ALL group. Scheduler load balances timeout events.\n" @@ -116,7 +152,7 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) test_options->num_cpu = 1; test_options->period_ns = 100 * ODP_TIME_MSEC_IN_NS; test_options->rounds = 2; - test_options->mode = 1; + test_options->mode = MODE_MEMCPY; test_options->mem_size = 2048; test_options->group_mode = 1; @@ -137,7 +173,7 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) test_options->rounds = atoll(optarg); break; case 'm': - test_options->mode = atoi(optarg); + test_options->mode = strtoul(optarg, NULL, 0); break; case 's': test_options->mem_size = atoll(optarg); @@ -155,8 +191,9 @@ static int parse_options(int argc, char *argv[], test_options_t *test_options) } if (test_options->mode) { - if (test_options->mem_size < 2) { - ODPH_ERR("Too small memory size\n"); + if (test_options->mem_size < sizeof(uint32_t)) { + ODPH_ERR("Too small memory size. Minimum is %zu bytes.\n", + sizeof(uint32_t)); return -1; } } @@ -218,20 +255,25 @@ static int worker_thread(void *arg) odp_event_t ev; odp_timeout_t tmo; odp_timer_t timer; - uint64_t tot_nsec, work_sum, max_nsec; + uint64_t tot_nsec, work_sum, max_nsec, i; odp_timer_start_t start_param; odp_time_t t1, t2, max_time; odp_time_t work_t1, work_t2; uint8_t *src = NULL, *dst = NULL; + uint32_t *src_u32 = NULL, *dst_u32 = NULL; thread_arg_t *thread_arg = arg; int worker_idx = thread_arg->worker_idx; test_global_t *global = thread_arg->global; test_options_t *test_options = &global->test_options; - int mode = test_options->mode; - int group_mode = test_options->group_mode; - uint64_t mem_size = test_options->mem_size; - uint64_t copy_size = mem_size / 2; + const int group_mode = test_options->group_mode; + const int mode = test_options->mode; + const int data_mode = mode & (MODE_SQRT_U32 | MODE_SQRT_F32); + const uint64_t mem_size = test_options->mem_size; + const uint64_t copy_size = mem_size / 2; + const uint64_t num_words = mem_size / sizeof(uint32_t); + const uint64_t copy_words = num_words / 2; uint64_t rounds = 0; + uint64_t dummy_sum = 0; int ret = 0; uint32_t done = 0; uint64_t wait = ODP_SCHED_WAIT; @@ -255,8 +297,10 @@ static int worker_thread(void *arg) } if (mode) { - src = global->worker_mem + worker_idx * mem_size; + src = (uint8_t *)global->worker_mem + worker_idx * mem_size; dst = src + copy_size; + src_u32 = (uint32_t *)(uintptr_t)src; + dst_u32 = (uint32_t *)(uintptr_t)dst; } start_param.tick_type = ODP_TIMER_TICK_REL; @@ -316,7 +360,22 @@ static int worker_thread(void *arg) if (mode) { work_t1 = odp_time_local(); - memcpy(dst, src, copy_size); + if (mode & MODE_MEMCPY) + memcpy(dst, src, copy_size); + + if (mode & MODE_COPY_U32) + for (i = 0; i < copy_words; i++) + dst_u32[i] = src_u32[i]; + + if (data_mode) { + for (i = 0; i < num_words; i++) { + if (mode & MODE_SQRT_U32) + dummy_sum += odph_stress_sqrt_u32(src_u32[i]); + + if (mode & MODE_SQRT_F32) + dummy_sum += odph_stress_sqrt_f32(src_u32[i]); + } + } work_t2 = odp_time_local(); work_sum += odp_time_diff_ns(work_t2, work_t1); @@ -336,6 +395,7 @@ static int worker_thread(void *arg) global->stat[thr].rounds = rounds; global->stat[thr].tot_nsec = tot_nsec; global->stat[thr].work_nsec = work_sum; + global->stat[thr].dummy_sum = dummy_sum; return ret; } @@ -656,8 +716,8 @@ static void print_stat(test_global_t *global) test_stat_sum_t *sum = &global->stat_sum; double sec_ave, work_ave, perc; double round_ave = 0.0; - double copy_ave = 0.0; - double copy_tot = 0.0; + double rate_ave = 0.0; + double rate_tot = 0.0; double cpu_load = 0.0; const double mega = 1000000.0; const double giga = 1000000000.0; @@ -692,10 +752,16 @@ static void print_stat(test_global_t *global) cpu_load = 100.0 * (work_ave / sec_ave); if (mode) { - uint64_t copy_bytes = sum->rounds * test_options->mem_size / 2; + uint64_t data_bytes; + + if (mode == MODE_MEMCPY || mode == MODE_COPY_U32 || + mode == (MODE_COPY_U32 | MODE_MEMCPY)) + data_bytes = sum->rounds * test_options->mem_size / 2; + else + data_bytes = sum->rounds * test_options->mem_size; - copy_ave = copy_bytes / (sum->work_nsec / giga); - copy_tot = copy_ave * num_cpu; + rate_ave = data_bytes / (sum->work_nsec / giga); + rate_tot = rate_ave * num_cpu; } } @@ -705,8 +771,8 @@ static void print_stat(test_global_t *global) printf(" ave work: %.2f sec\n", work_ave); printf(" ave CPU load: %.2f\n", cpu_load); printf(" ave rounds per sec: %.2f\n", round_ave / sec_ave); - printf(" ave copy speed: %.2f MB/sec\n", copy_ave / mega); - printf(" total copy speed: %.2f MB/sec\n", copy_tot / mega); + printf(" ave data rate: %.2f MB/sec\n", rate_ave / mega); + printf(" total data rate: %.2f MB/sec\n", rate_tot / mega); printf("\n"); } @@ -798,6 +864,10 @@ int main(int argc, char **argv) /* Memory for workers */ if (mode) { + uint64_t num_words; + uint32_t *word; + uint32_t num_rand = ODPH_ARRAY_SIZE(pseudo_rand); + mem_size = test_options->mem_size * num_cpu; shm = odp_shm_reserve("Test memory", mem_size, ODP_CACHE_LINE_SIZE, 0); @@ -813,13 +883,18 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - memset(global->worker_mem, 0, mem_size); + num_words = mem_size / sizeof(uint32_t); + word = (uint32_t *)global->worker_mem; + + for (uint64_t j = 0; j < num_words; j++) + word[j] = pseudo_rand[j % num_rand]; + } printf("\n"); printf("Test parameters\n"); printf(" num workers %u\n", num_cpu); - printf(" mode %i\n", mode); + printf(" mode 0x%x\n", mode); printf(" group mode %i\n", test_options->group_mode); printf(" mem size per worker %" PRIu64 " bytes\n", test_options->mem_size); diff --git a/test/performance/odp_timer_accuracy.c b/test/performance/odp_timer_accuracy.c new file mode 100644 index 000000000..a663c894a --- /dev/null +++ b/test/performance/odp_timer_accuracy.c @@ -0,0 +1,1438 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2019-2023 Nokia + */ + +/** + * @example odp_timer_accuracy.c + * + * ODP timer accuracy test application + * + * @cond _ODP_HIDE_FROM_DOXYGEN_ + */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> + +#include <unistd.h> +#include <getopt.h> + +#include <odp_api.h> +#include <odp/helper/odph_api.h> + +#define MAX_WORKERS (ODP_THREAD_COUNT_MAX - 1) +#define MAX_QUEUES 1024 +#define MAX_FILENAME 128 + +enum mode_e { + MODE_ONESHOT = 0, + MODE_RESTART_ABS, + MODE_RESTART_REL, + MODE_PERIODIC, +}; + +typedef struct test_opt_t { + int cpu_count; + unsigned long long period_ns; + long long res_ns; + unsigned long long res_hz; + unsigned long long offset_ns; + unsigned long long max_tmo_ns; + unsigned long long num; + unsigned long long num_warmup; + unsigned long long burst; + unsigned long long burst_gap; + odp_fract_u64_t freq; + unsigned long long max_multiplier; + unsigned long long multiplier; + enum mode_e mode; + int clk_src; + odp_queue_type_t queue_type; + int num_queue; + int groups; + int init; + int output; + int early_retry; + uint64_t warmup_timers; + uint64_t tot_timers; + uint64_t alloc_timers; + char filename[MAX_FILENAME]; +} test_opt_t; + +typedef struct timer_ctx_t { + odp_timer_t timer; + odp_event_t event; + uint64_t nsec; + uint64_t count; + uint64_t first_period; + int tmo_tick; + int64_t first_tmo_diff; + int64_t nsec_final; + +} timer_ctx_t; + +typedef struct { + uint64_t nsec_before_sum; + uint64_t nsec_before_min; + uint64_t nsec_before_min_idx; + uint64_t nsec_before_max; + uint64_t nsec_before_max_idx; + + uint64_t nsec_after_sum; + uint64_t nsec_after_min; + uint64_t nsec_after_min_idx; + uint64_t nsec_after_max; + uint64_t nsec_after_max_idx; + + uint64_t num_before; + uint64_t num_exact; + uint64_t num_after; + + uint64_t num_too_near; + +} test_stat_t; + +typedef struct test_log_t { + uint64_t tmo_ns; + int64_t diff_ns; + int tid; + +} test_log_t; + +typedef struct test_global_t { + test_opt_t opt; + + test_stat_t stat[MAX_WORKERS]; + + odp_queue_t queue[MAX_QUEUES]; + odp_schedule_group_t group[MAX_WORKERS]; + odp_timer_pool_t timer_pool; + odp_pool_t timeout_pool; + timer_ctx_t *timer_ctx; + double res_ns; + uint64_t start_tick; + uint64_t start_ns; + uint64_t period_tick; + double period_dbl; + odp_fract_u64_t base_freq; + test_log_t *log; + FILE *file; + odp_barrier_t barrier; + odp_atomic_u64_t events; + odp_atomic_u64_t last_events; + +} test_global_t; + +static void print_usage(void) +{ + printf("\n" + "Timer accuracy test application.\n" + "\n" + "OPTIONS:\n" + " -c, --count <num> CPU count, 0=all available, default=1\n" + " -p, --period <nsec> Timeout period in nsec. Not used in periodic mode. Default: 200 msec\n" + " -r, --res_ns <nsec> Timeout resolution in nsec. Default value is 0. Special values:\n" + " 0: Use period / 10 as the resolution\n" + " -1: In periodic mode, use resolution from capabilities\n" + " -R, --res_hz <hertz> Timeout resolution in hertz. Set resolution either with -r (nsec) or -R (hertz),\n" + " and leave other to 0. Default: 0 (not used)\n" + " -f, --first <nsec> First timer offset in nsec. Default: 0 for periodic mode, otherwise 300 msec\n" + " -x, --max_tmo <nsec> Maximum timeout in nsec. Not used in periodic mode.\n" + " When 0, max tmo is calculated from other options. Default: 0\n" + " -n, --num <number> Number of timeout periods. Default: 50\n" + " -w, --warmup <number> Number of warmup periods. Default: 0\n" + " -b, --burst <number> Number of timers per a timeout period. Default: 1\n" + " -g, --burst_gap <nsec> Gap (in nsec) between timers within a burst. Default: 0\n" + " In periodic mode, first + burst * burst_gap must be less than period length.\n" + " -m, --mode <number> Test mode select (default: 0):\n" + " 0: One-shot. Start all timers at init phase.\n" + " 1: One-shot. Each period, restart timers with absolute time.\n" + " 2: One-shot. Each period, restart timers with relative time.\n" + " 3: Periodic.\n" + " -P, --periodic <freq_integer:freq_numer:freq_denom:max_multiplier>\n" + " Periodic timer pool parameters. Default: 5:0:0:1 (5 Hz)\n" + " -M, --multiplier Periodic timer multiplier. Default: 1\n" + " -o, --output <file> Output file for measurement logs\n" + " -e, --early_retry <num> When timer restart fails due to ODP_TIMER_TOO_NEAR, retry this many times\n" + " with expiration time incremented by the period. Default: 0\n" + " -s, --clk_src Clock source select (default 0):\n" + " 0: ODP_CLOCK_DEFAULT\n" + " 1: ODP_CLOCK_SRC_1, ...\n" + " -t, --queue_type Queue sync type. Default is 0 (PARALLEL).\n" + " 0: PARALLEL\n" + " 1: ATOMIC\n" + " 2: ORDERED\n" + " -q, --num_queue Number of queues. Default is 1.\n" + " -G, --sched_groups Use dedicated schedule group for each worker.\n" + " -i, --init Set global init parameters. Default: init params not set.\n" + " -h, --help Display help and exit.\n\n"); +} + +static int parse_options(int argc, char *argv[], test_opt_t *test_opt) +{ + int opt, long_index; + const struct option longopts[] = { + {"count", required_argument, NULL, 'c'}, + {"period", required_argument, NULL, 'p'}, + {"res_ns", required_argument, NULL, 'r'}, + {"res_hz", required_argument, NULL, 'R'}, + {"first", required_argument, NULL, 'f'}, + {"max_tmo", required_argument, NULL, 'x'}, + {"num", required_argument, NULL, 'n'}, + {"warmup", required_argument, NULL, 'w'}, + {"burst", required_argument, NULL, 'b'}, + {"burst_gap", required_argument, NULL, 'g'}, + {"mode", required_argument, NULL, 'm'}, + {"periodic", required_argument, NULL, 'P'}, + {"multiplier", required_argument, NULL, 'M'}, + {"output", required_argument, NULL, 'o'}, + {"early_retry", required_argument, NULL, 'e'}, + {"clk_src", required_argument, NULL, 's'}, + {"queue_type", required_argument, NULL, 't'}, + {"num_queue", required_argument, NULL, 'q'}, + {"sched_groups", no_argument, NULL, 'G'}, + {"init", no_argument, NULL, 'i'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + const char *shortopts = "+c:p:r:R:f:x:n:w:b:g:m:P:M:o:e:s:t:q:Gih"; + int ret = 0; + + memset(test_opt, 0, sizeof(*test_opt)); + + test_opt->cpu_count = 1; + test_opt->period_ns = 200 * ODP_TIME_MSEC_IN_NS; + test_opt->res_ns = 0; + test_opt->res_hz = 0; + test_opt->offset_ns = UINT64_MAX; + test_opt->max_tmo_ns = 0; + test_opt->num = 50; + test_opt->num_warmup = 0; + test_opt->burst = 1; + test_opt->burst_gap = 0; + test_opt->mode = MODE_ONESHOT; + test_opt->freq.integer = ODP_TIME_SEC_IN_NS / test_opt->period_ns; + test_opt->freq.numer = 0; + test_opt->freq.denom = 0; + test_opt->max_multiplier = 1; + test_opt->multiplier = 1; + test_opt->clk_src = ODP_CLOCK_DEFAULT; + test_opt->queue_type = ODP_SCHED_SYNC_PARALLEL; + test_opt->groups = 0; + test_opt->num_queue = 1; + test_opt->init = 0; + test_opt->output = 0; + test_opt->early_retry = 0; + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'c': + test_opt->cpu_count = atoi(optarg); + break; + case 'p': + test_opt->period_ns = strtoull(optarg, NULL, 0); + break; + case 'r': + test_opt->res_ns = strtoll(optarg, NULL, 0); + break; + case 'R': + test_opt->res_hz = strtoull(optarg, NULL, 0); + break; + case 'f': + test_opt->offset_ns = strtoull(optarg, NULL, 0); + break; + case 'x': + test_opt->max_tmo_ns = strtoull(optarg, NULL, 0); + break; + case 'n': + test_opt->num = strtoull(optarg, NULL, 0); + break; + case 'w': + test_opt->num_warmup = strtoull(optarg, NULL, 0); + break; + case 'b': + test_opt->burst = strtoull(optarg, NULL, 0); + break; + case 'g': + test_opt->burst_gap = strtoull(optarg, NULL, 0); + break; + case 'm': + test_opt->mode = atoi(optarg); + break; + case 'P': + sscanf(optarg, "%" SCNu64 ":%" SCNu64 ":%" SCNu64 ":%llu", + &test_opt->freq.integer, &test_opt->freq.numer, + &test_opt->freq.denom, &test_opt->max_multiplier); + break; + case 'M': + test_opt->multiplier = strtoull(optarg, NULL, 0); + break; + case 'o': + test_opt->output = 1; + if (strlen(optarg) >= MAX_FILENAME) { + printf("Filename too long\n"); + return -1; + } + odph_strcpy(test_opt->filename, optarg, MAX_FILENAME); + break; + case 'e': + test_opt->early_retry = atoi(optarg); + break; + case 's': + test_opt->clk_src = atoi(optarg); + break; + case 't': + switch (atoi(optarg)) { + case 1: + test_opt->queue_type = ODP_SCHED_SYNC_ATOMIC; + break; + case 2: + test_opt->queue_type = ODP_SCHED_SYNC_ORDERED; + break; + default: + test_opt->queue_type = ODP_SCHED_SYNC_PARALLEL; + break; + } + break; + case 'q': + test_opt->num_queue = atoi(optarg); + break; + case 'G': + test_opt->groups = 1; + break; + case 'i': + test_opt->init = 1; + break; + case 'h': + print_usage(); + ret = -1; + break; + default: + print_usage(); + ret = -1; + break; + } + } + + if (test_opt->mode == MODE_PERIODIC) { + if ((test_opt->freq.integer == 0 && test_opt->freq.numer == 0) || + (test_opt->freq.numer != 0 && test_opt->freq.denom == 0)) { + printf("Bad frequency\n"); + return -1; + } + + test_opt->period_ns = + ODP_TIME_SEC_IN_NS / odp_fract_u64_to_dbl(&test_opt->freq); + + if (test_opt->offset_ns == UINT64_MAX) + test_opt->offset_ns = 0; + } else { + if (test_opt->res_ns < 0) { + printf("Resolution (res_ns) must be >= 0 with single shot timer\n"); + return -1; + } + + if (test_opt->offset_ns == UINT64_MAX) + test_opt->offset_ns = 300 * ODP_TIME_MSEC_IN_NS; + } + + test_opt->warmup_timers = test_opt->num_warmup * test_opt->burst; + test_opt->tot_timers = + test_opt->warmup_timers + test_opt->num * test_opt->burst; + + if (test_opt->mode == MODE_ONESHOT) + test_opt->alloc_timers = test_opt->tot_timers; + else + test_opt->alloc_timers = test_opt->burst; + + return ret; +} + +static int single_shot_params(test_global_t *test_global, odp_timer_pool_param_t *timer_param, + odp_timer_capability_t *timer_capa) +{ + uint64_t res_ns, res_hz; + uint64_t max_res_ns, max_res_hz; + uint64_t period_ns = test_global->opt.period_ns; + uint64_t num_tmo = test_global->opt.num + test_global->opt.num_warmup; + uint64_t offset_ns = test_global->opt.offset_ns; + enum mode_e mode = test_global->opt.mode; + + max_res_ns = timer_capa->max_res.res_ns; + max_res_hz = timer_capa->max_res.res_hz; + + /* Default resolution */ + if (test_global->opt.res_ns == 0 && test_global->opt.res_hz == 0) { + res_ns = test_global->opt.period_ns / 10; + res_hz = 0; + } else if (test_global->opt.res_ns) { + res_ns = test_global->opt.res_ns; + res_hz = 0; + } else { + res_ns = 0; + res_hz = test_global->opt.res_hz; + } + + if (res_ns && res_ns < max_res_ns) { + printf("Resolution %" PRIu64 " nsec too high. Highest resolution %" PRIu64 " nsec. " + "Default resolution is period / 10.\n\n", + res_ns, max_res_ns); + return -1; + } + + if (res_hz && res_hz > max_res_hz) { + printf("Resolution %" PRIu64 " hz too high. Highest resolution %" PRIu64 " hz. " + "Default resolution is period / 10.\n\n", + res_hz, max_res_hz); + return -1; + } + + if (res_ns) + timer_param->res_ns = res_ns; + else + timer_param->res_hz = res_hz; + + if (mode == MODE_ONESHOT) { + timer_param->min_tmo = offset_ns / 2; + timer_param->max_tmo = offset_ns + ((num_tmo + 1) * period_ns); + } else { + timer_param->min_tmo = period_ns / 10; + timer_param->max_tmo = offset_ns + (2 * period_ns); + } + + if (test_global->opt.max_tmo_ns) { + if (test_global->opt.max_tmo_ns < timer_param->max_tmo) { + printf("Max tmo is too small. Must be at least %" PRIu64 " nsec.\n", + timer_param->max_tmo); + return -1; + } + + timer_param->max_tmo = test_global->opt.max_tmo_ns; + } + + printf(" period: %" PRIu64 " nsec\n", period_ns); + printf(" max res nsec: %" PRIu64 "\n", max_res_ns); + printf(" max res hertz: %" PRIu64 "\n", max_res_hz); + + test_global->period_dbl = period_ns; + + return 0; +} + +static int periodic_params(test_global_t *test_global, odp_timer_pool_param_t *timer_param, + odp_timer_capability_t *timer_capa) +{ + int ret; + uint64_t res_ns; + odp_timer_periodic_capability_t capa; + double freq_dbl, min_freq, max_freq; + double opt_freq = odp_fract_u64_to_dbl(&test_global->opt.freq); + odp_fract_u64_t freq = test_global->opt.freq; + uint64_t res_hz = test_global->opt.res_hz; + uint64_t max_multiplier = test_global->opt.max_multiplier; + uint64_t multiplier = test_global->opt.multiplier; + + if (res_hz) { + res_ns = ODP_TIME_SEC_IN_NS / res_hz; + } else { + res_ns = test_global->opt.res_ns; + + /* Default resolution */ + if (res_ns == 0) + res_ns = ODP_TIME_SEC_IN_NS / (10 * multiplier * opt_freq); + } + + if (res_ns == 0) { + printf("Too high resolution\n"); + return -1; + } + + /* Resolution from capa */ + if (test_global->opt.res_ns < 0) + res_ns = 0; + + min_freq = odp_fract_u64_to_dbl(&timer_capa->periodic.min_base_freq_hz); + max_freq = odp_fract_u64_to_dbl(&timer_capa->periodic.max_base_freq_hz); + + capa.base_freq_hz = freq; + capa.max_multiplier = max_multiplier; + capa.res_ns = res_ns; + + ret = odp_timer_periodic_capability(test_global->opt.clk_src, &capa); + + if (ret < 0) { + printf("Requested periodic timer capabilities are not supported.\n" + "Capabilities: min base freq %g Hz, max base freq %g Hz, " + "max res %" PRIu64 " Hz\n", min_freq, max_freq, timer_capa->max_res.res_hz); + return -1; + } + + if (ret == 0) { + printf("Requested base frequency is not met. Using %.2f Hz instead of %.2f Hz.\n", + odp_fract_u64_to_dbl(&capa.base_freq_hz), opt_freq); + + freq = capa.base_freq_hz; + } + + if (res_ns == 0) + res_ns = capa.res_ns; + + freq_dbl = odp_fract_u64_to_dbl(&freq); + test_global->base_freq = freq; + test_global->period_dbl = ODP_TIME_SEC_IN_NS / (multiplier * freq_dbl); + + /* Min/max tmo are ignored, leave those to default values */ + timer_param->timer_type = ODP_TIMER_TYPE_PERIODIC; + timer_param->periodic.base_freq_hz = freq; + timer_param->periodic.max_multiplier = max_multiplier; + + if (res_hz) + timer_param->res_hz = res_hz; + else + timer_param->res_ns = res_ns; + + printf(" min freq capa: %.2f hz\n", min_freq); + printf(" max freq capa: %.2f hz\n", max_freq); + printf(" freq option: %.2f hz\n", opt_freq); + printf(" freq: %.2f hz\n", freq_dbl); + printf(" freq integer: %" PRIu64 "\n", freq.integer); + printf(" freq numer: %" PRIu64 "\n", freq.numer); + printf(" freq denom: %" PRIu64 "\n", freq.denom); + printf(" max_multiplier: %" PRIu64 "\n", max_multiplier); + printf(" multiplier: %" PRIu64 "\n", multiplier); + printf(" timer freq: %.2f hz\n", multiplier * freq_dbl); + printf(" timer period: %.2f nsec\n", test_global->period_dbl); + printf(" resolution capa: %" PRIu64 " nsec\n", capa.res_ns); + + return 0; +} + +static int create_timers(test_global_t *test_global) +{ + odp_pool_t pool; + odp_pool_param_t pool_param; + odp_timer_pool_t timer_pool; + odp_timer_pool_param_t timer_param; + odp_timer_capability_t timer_capa; + odp_timer_t timer; + odp_queue_t *queue; + odp_schedule_group_t *group; + odp_queue_param_t queue_param; + uint64_t offset_ns; + uint32_t max_timers; + odp_event_t event; + odp_timeout_t timeout; + uint64_t i, num_tmo, num_warmup, burst, burst_gap; + uint64_t tot_timers, alloc_timers; + enum mode_e mode; + odp_timer_clk_src_t clk_src; + int ret; + + mode = test_global->opt.mode; + alloc_timers = test_global->opt.alloc_timers; + tot_timers = test_global->opt.tot_timers; + num_warmup = test_global->opt.num_warmup; + num_tmo = num_warmup + test_global->opt.num; + burst = test_global->opt.burst; + burst_gap = test_global->opt.burst_gap; + offset_ns = test_global->opt.offset_ns; + queue = test_global->queue; + group = test_global->group; + + /* Always init globals for destroy calls */ + test_global->timer_pool = ODP_TIMER_POOL_INVALID; + test_global->timeout_pool = ODP_POOL_INVALID; + + for (i = 0; i < alloc_timers; i++) { + test_global->timer_ctx[i].timer = ODP_TIMER_INVALID; + test_global->timer_ctx[i].event = ODP_EVENT_INVALID; + } + + if (test_global->opt.groups) { + /* Create groups */ + + odp_thrmask_t zero; + + odp_thrmask_zero(&zero); + + for (i = 0; i < (uint64_t)test_global->opt.cpu_count; i++) { + group[i] = odp_schedule_group_create(NULL, &zero); + + if (group[i] == ODP_SCHED_GROUP_INVALID) { + printf("Group create failed.\n"); + return -1; + } + } + } + + odp_queue_param_init(&queue_param); + queue_param.type = ODP_QUEUE_TYPE_SCHED; + queue_param.sched.prio = odp_schedule_default_prio(); + queue_param.sched.sync = test_global->opt.queue_type; + queue_param.sched.group = ODP_SCHED_GROUP_ALL; + + for (i = 0; i < (uint64_t)test_global->opt.num_queue; i++) { + if (test_global->opt.groups) + queue_param.sched.group = group[i % test_global->opt.cpu_count]; + + queue[i] = odp_queue_create(NULL, &queue_param); + if (queue[i] == ODP_QUEUE_INVALID) { + printf("Queue create failed.\n"); + return -1; + } + } + + odp_pool_param_init(&pool_param); + pool_param.type = ODP_POOL_TIMEOUT; + pool_param.tmo.num = alloc_timers; + + pool = odp_pool_create("timeout pool", &pool_param); + + if (pool == ODP_POOL_INVALID) { + printf("Timeout pool create failed.\n"); + return -1; + } + + test_global->timeout_pool = pool; + clk_src = test_global->opt.clk_src; + + if (odp_timer_capability(clk_src, &timer_capa)) { + printf("Timer capa failed\n"); + return -1; + } + + max_timers = timer_capa.max_timers; + + if (mode == MODE_PERIODIC) { + if (timer_capa.periodic.max_pools < 1) { + printf("Error: Periodic timers not supported.\n"); + return -1; + } + max_timers = timer_capa.periodic.max_timers; + } + + if (max_timers && test_global->opt.alloc_timers > max_timers) { + printf("Error: Too many timers: %" PRIu64 ".\n" + " Max timers: %u\n", + test_global->opt.alloc_timers, max_timers); + return -1; + } + + printf("\nTest parameters:\n"); + printf(" clock source: %i\n", clk_src); + printf(" max timers capa: %" PRIu32 "\n", max_timers); + printf(" mode: %i\n", mode); + printf(" queue type: %i\n", test_global->opt.queue_type); + printf(" num queue: %i\n", test_global->opt.num_queue); + printf(" sched groups: %s\n", test_global->opt.groups ? "yes" : "no"); + + odp_timer_pool_param_init(&timer_param); + + if (mode == MODE_PERIODIC) + ret = periodic_params(test_global, &timer_param, &timer_capa); + else + ret = single_shot_params(test_global, &timer_param, &timer_capa); + + if (ret) + return ret; + + if (timer_param.res_hz) { + test_global->res_ns = 1000000000.0 / timer_param.res_hz; + printf(" resolution: %" PRIu64 " Hz\n", timer_param.res_hz); + } else { + test_global->res_ns = timer_param.res_ns; + printf(" resolution: %" PRIu64 " nsec\n", timer_param.res_ns); + } + + timer_param.num_timers = alloc_timers; + timer_param.clk_src = clk_src; + + printf(" restart retries: %i\n", test_global->opt.early_retry); + if (test_global->opt.output) + printf(" log file: %s\n", test_global->opt.filename); + printf(" start offset: %" PRIu64 " nsec\n", offset_ns); + printf(" min timeout: %" PRIu64 " nsec\n", timer_param.min_tmo); + printf(" max timeout: %" PRIu64 " nsec\n", timer_param.max_tmo); + printf(" num timeout: %" PRIu64 "\n", num_tmo); + printf(" num warmup: %" PRIu64 "\n", num_warmup); + printf(" burst size: %" PRIu64 "\n", burst); + printf(" burst gap: %" PRIu64 "\n", burst_gap); + printf(" total timers: %" PRIu64 "\n", tot_timers); + printf(" warmup timers: %" PRIu64 "\n", test_global->opt.warmup_timers); + printf(" alloc timers: %" PRIu64 "\n", alloc_timers); + printf(" warmup time: %.2f sec\n", + (offset_ns + (num_warmup * test_global->period_dbl)) / 1000000000.0); + printf(" test run time: %.2f sec\n\n", + (offset_ns + (num_tmo * test_global->period_dbl)) / 1000000000.0); + + timer_pool = odp_timer_pool_create("timer_accuracy", &timer_param); + + if (timer_pool == ODP_TIMER_POOL_INVALID) { + printf("Timer pool create failed\n"); + return -1; + } + + if (odp_timer_pool_start_multi(&timer_pool, 1) != 1) { + ODPH_ERR("Timer pool start failed\n"); + return -1; + } + + odp_timer_pool_print(timer_pool); + + /* Spend some time so that current tick would not be zero */ + odp_time_wait_ns(100 * ODP_TIME_MSEC_IN_NS); + + test_global->timer_pool = timer_pool; + + for (i = 0; i < alloc_timers; i++) { + timer_ctx_t *ctx = &test_global->timer_ctx[i]; + + timer = odp_timer_alloc(timer_pool, queue[i % test_global->opt.num_queue], ctx); + + if (timer == ODP_TIMER_INVALID) { + printf("Timer alloc failed.\n"); + return -1; + } + + ctx->timer = timer; + + timeout = odp_timeout_alloc(pool); + if (timeout == ODP_TIMEOUT_INVALID) { + printf("Timeout alloc failed\n"); + return -1; + } + + ctx->event = odp_timeout_to_event(timeout); + } + + /* Run scheduler few times to ensure that (software) timer is active */ + for (i = 0; i < 1000; i++) { + event = odp_schedule(NULL, ODP_SCHED_NO_WAIT); + + if (event != ODP_EVENT_INVALID) { + printf("Spurious event received\n"); + odp_event_free(event); + return -1; + } + } + + return 0; +} + +static int start_timers(test_global_t *test_global) +{ + odp_timer_pool_t timer_pool; + uint64_t start_tick; + uint64_t period_ns, start_ns, nsec, offset_ns; + odp_time_t time; + uint64_t i, j, idx, num_tmo, num_warmup, burst, burst_gap; + enum mode_e mode; + + mode = test_global->opt.mode; + num_warmup = test_global->opt.num_warmup; + num_tmo = num_warmup + test_global->opt.num; + burst = test_global->opt.burst; + burst_gap = test_global->opt.burst_gap; + period_ns = test_global->opt.period_ns; + offset_ns = test_global->opt.offset_ns; + timer_pool = test_global->timer_pool; + idx = 0; + + /* Record test start time and tick. Memory barriers forbid compiler and out-of-order + * CPU to move samples apart. */ + odp_mb_full(); + start_tick = odp_timer_current_tick(timer_pool); + time = odp_time_global(); + odp_mb_full(); + + start_ns = odp_time_to_ns(time); + test_global->start_tick = start_tick; + test_global->start_ns = start_ns; + test_global->period_tick = odp_timer_ns_to_tick(timer_pool, period_ns); + + /* When mode is not one-shot, set only one burst of timers initially */ + if (mode != MODE_ONESHOT) + num_tmo = 1; + + for (i = 0; i < num_tmo; i++) { + odp_timer_retval_t retval; + + for (j = 0; j < burst; j++) { + timer_ctx_t *ctx = &test_global->timer_ctx[idx]; + odp_timer_start_t start_param; + + if (mode == MODE_PERIODIC) { + odp_timer_periodic_start_t periodic_start; + + nsec = offset_ns + (j * burst_gap); + + /* By default, timer starts one period after current time. Round + * floating point to closest integer number. */ + ctx->nsec = start_ns + test_global->period_dbl + 0.5; + if (nsec) + ctx->nsec = start_ns + nsec; + + ctx->count = 0; + ctx->first_period = start_tick + + odp_timer_ns_to_tick(timer_pool, + test_global->period_dbl + 0.5); + periodic_start.freq_multiplier = test_global->opt.multiplier; + periodic_start.first_tick = 0; + if (nsec) + periodic_start.first_tick = + start_tick + odp_timer_ns_to_tick(timer_pool, nsec); + periodic_start.tmo_ev = ctx->event; + retval = odp_timer_periodic_start(ctx->timer, &periodic_start); + } else { + nsec = offset_ns + (i * period_ns) + (j * burst_gap); + ctx->nsec = start_ns + nsec; + start_param.tick_type = ODP_TIMER_TICK_ABS; + start_param.tick = + start_tick + odp_timer_ns_to_tick(timer_pool, nsec); + start_param.tmo_ev = ctx->event; + retval = odp_timer_start(ctx->timer, &start_param); + } + + if (retval != ODP_TIMER_SUCCESS) { + printf("Timer[%" PRIu64 "] set failed: %i\n", idx, retval); + return -1; + } + + idx++; + } + } + + printf("\nStarting timers took %" PRIu64 " nsec\n", odp_time_global_ns() - start_ns); + + return 0; +} + +static int destroy_timers(test_global_t *test_global) +{ + uint64_t i, alloc_timers; + odp_timer_t timer; + int ret = 0; + + alloc_timers = test_global->opt.alloc_timers; + + for (i = 0; i < alloc_timers; i++) { + timer = test_global->timer_ctx[i].timer; + + if (timer == ODP_TIMER_INVALID) + break; + + if (odp_timer_free(timer)) { + printf("Timer free failed: %" PRIu64 "\n", i); + ret = -1; + } + } + + if (test_global->timer_pool != ODP_TIMER_POOL_INVALID) + odp_timer_pool_destroy(test_global->timer_pool); + + if (test_global->timeout_pool != ODP_POOL_INVALID) { + if (odp_pool_destroy(test_global->timeout_pool)) { + printf("Pool destroy failed.\n"); + ret = -1; + } + } + + for (i = 0; i < (uint64_t)test_global->opt.num_queue; i++) { + if (odp_queue_destroy(test_global->queue[i])) { + printf("Queue destroy failed.\n"); + ret = -1; + } + } + + if (test_global->opt.groups) { + for (i = 0; i < (uint64_t)test_global->opt.cpu_count; i++) { + if (odp_schedule_group_destroy(test_global->group[i])) { + printf("Group destroy failed.\n"); + ret = -1; + } + } + } + + return ret; +} + +static void print_nsec_error(const char *str, int64_t nsec, double res_ns, + int tid, int idx) +{ + printf(" %s: %12" PRIi64 " / %.3fx resolution", + str, nsec, (double)nsec / res_ns); + if (tid >= 0) + printf(", thread %d", tid); + if (idx >= 0) + printf(", event %d", idx); + printf("\n"); +} + +static void print_stat(test_global_t *test_global) +{ + test_stat_t test_stat; + test_stat_t *stat = &test_stat; + uint64_t tot_timers; + test_stat_t *s = test_global->stat; + test_log_t *log = test_global->log; + double res_ns = test_global->res_ns; + uint64_t ave_after = 0; + uint64_t ave_before = 0; + uint64_t nsec_before_min_tid = 0; + uint64_t nsec_before_max_tid = 0; + uint64_t nsec_after_min_tid = 0; + uint64_t nsec_after_max_tid = 0; + + memset(stat, 0, sizeof(*stat)); + stat->nsec_before_min = UINT64_MAX; + stat->nsec_after_min = UINT64_MAX; + + for (int i = 1; i < test_global->opt.cpu_count + 1; i++) { + stat->nsec_before_sum += s[i].nsec_before_sum; + stat->nsec_after_sum += s[i].nsec_after_sum; + stat->num_before += s[i].num_before; + stat->num_exact += s[i].num_exact; + stat->num_after += s[i].num_after; + stat->num_too_near += s[i].num_too_near; + + if (s[i].nsec_before_min < stat->nsec_before_min) { + stat->nsec_before_min = s[i].nsec_before_min; + stat->nsec_before_min_idx = s[i].nsec_before_min_idx; + nsec_before_min_tid = i; + } + + if (s[i].nsec_after_min < stat->nsec_after_min) { + stat->nsec_after_min = s[i].nsec_after_min; + stat->nsec_after_min_idx = s[i].nsec_after_min_idx; + nsec_after_min_tid = i; + } + + if (s[i].nsec_before_max > stat->nsec_before_max) { + stat->nsec_before_max = s[i].nsec_before_max; + stat->nsec_before_max_idx = s[i].nsec_before_max_idx; + nsec_before_max_tid = i; + } + + if (s[i].nsec_after_max > stat->nsec_after_max) { + stat->nsec_after_max = s[i].nsec_after_max; + stat->nsec_after_max_idx = s[i].nsec_after_max_idx; + nsec_after_max_tid = i; + } + } + + if (stat->num_after) + ave_after = stat->nsec_after_sum / stat->num_after; + else + stat->nsec_after_min = 0; + + if (stat->num_before) + ave_before = stat->nsec_before_sum / stat->num_before; + else + stat->nsec_before_min = 0; + + tot_timers = stat->num_before + stat->num_after + stat->num_exact; + + if (log) { + FILE *file = test_global->file; + + fprintf(file, " Timer thread tmo(ns) diff(ns)\n"); + + for (uint64_t i = 0; i < tot_timers; i++) { + fprintf(file, "%8" PRIu64 " %7u %12" PRIu64 " %10" + PRIi64 "\n", i, log[i].tid, log[i].tmo_ns, log[i].diff_ns); + } + + fprintf(file, "\n"); + } + + printf("\nTest results:\n"); + printf(" num after: %12" PRIu64 " / %.2f%%\n", + stat->num_after, 100.0 * stat->num_after / tot_timers); + printf(" num before: %12" PRIu64 " / %.2f%%\n", + stat->num_before, 100.0 * stat->num_before / tot_timers); + printf(" num exact: %12" PRIu64 " / %.2f%%\n", + stat->num_exact, 100.0 * stat->num_exact / tot_timers); + printf(" num retry: %12" PRIu64 " / %.2f%%\n", + stat->num_too_near, 100.0 * stat->num_too_near / tot_timers); + printf(" error after (nsec):\n"); + print_nsec_error("min", stat->nsec_after_min, res_ns, nsec_after_min_tid, + stat->nsec_after_min_idx); + print_nsec_error("max", stat->nsec_after_max, res_ns, nsec_after_max_tid, + stat->nsec_after_max_idx); + print_nsec_error("ave", ave_after, res_ns, -1, -1); + printf(" error before (nsec):\n"); + print_nsec_error("min", stat->nsec_before_min, res_ns, nsec_before_min_tid, + stat->nsec_before_min_idx); + print_nsec_error("max", stat->nsec_before_max, res_ns, nsec_before_max_tid, + stat->nsec_before_max_idx); + print_nsec_error("ave", ave_before, res_ns, -1, -1); + + if (test_global->opt.mode == MODE_PERIODIC && !test_global->opt.offset_ns) { + int idx = 0; + int64_t max = 0; + + for (int i = 0; i < (int)test_global->opt.alloc_timers; i++) { + timer_ctx_t *t = &test_global->timer_ctx[i]; + int64_t v = t->first_tmo_diff; + + if (ODPH_ABS(v) > ODPH_ABS(max)) { + max = v; + idx = i; + } + } + + printf(" first timeout difference to one period, based on %s (nsec):\n", + test_global->timer_ctx[idx].tmo_tick ? "timeout tick" : "time"); + print_nsec_error("max", max, res_ns, -1, -1); + } + + int64_t max = 0; + + for (int i = 0; i < (int)test_global->opt.alloc_timers; i++) { + timer_ctx_t *t = &test_global->timer_ctx[i]; + int64_t v = t->nsec_final; + + if (ODPH_ABS(v) > ODPH_ABS(max)) + max = v; + } + + printf(" final timeout error (nsec):\n"); + print_nsec_error("max", max, res_ns, -1, -1); + + printf("\n"); +} + +static void cancel_periodic_timers(test_global_t *test_global) +{ + uint64_t i, alloc_timers; + odp_timer_t timer; + + alloc_timers = test_global->opt.alloc_timers; + + for (i = 0; i < alloc_timers; i++) { + timer = test_global->timer_ctx[i].timer; + + if (timer == ODP_TIMER_INVALID) + break; + + if (odp_timer_periodic_cancel(timer)) + printf("Failed to cancel periodic timer.\n"); + } +} + +static int run_test(void *arg) +{ + test_global_t *test_global = (test_global_t *)arg; + odp_event_t ev; + odp_time_t time; + uint64_t time_ns, diff_ns; + odp_timeout_t tmo; + uint64_t tmo_ns; + timer_ctx_t *ctx; + odp_thrmask_t mask; + uint64_t wait = odp_schedule_wait_time(10 * ODP_TIME_MSEC_IN_NS); + odp_schedule_group_t group = ODP_SCHED_GROUP_INVALID; + test_log_t *log = test_global->log; + enum mode_e mode = test_global->opt.mode; + uint64_t tot_timers = test_global->opt.tot_timers; + double period_dbl = test_global->period_dbl; + odp_timer_pool_t tp = test_global->timer_pool; + int tid = odp_thread_id(); + + if (tid > test_global->opt.cpu_count) { + printf("Error: tid %d is larger than cpu_count %d.\n", tid, + test_global->opt.cpu_count); + return 0; + } + + test_stat_t *stat = &test_global->stat[tid]; + + memset(stat, 0, sizeof(*stat)); + stat->nsec_before_min = UINT64_MAX; + stat->nsec_after_min = UINT64_MAX; + + if (test_global->opt.groups) { + odp_thrmask_zero(&mask); + odp_thrmask_set(&mask, tid); + group = test_global->group[tid - 1]; + + if (odp_schedule_group_join(group, &mask)) { + printf("odp_schedule_group_join() failed\n"); + return 0; + } + } + + odp_barrier_wait(&test_global->barrier); + + while (1) { + ev = odp_schedule(NULL, wait); + time = odp_time_global_strict(); + + if (ev == ODP_EVENT_INVALID) { + if (mode == MODE_PERIODIC) { + if (odp_atomic_load_u64(&test_global->last_events) >= + test_global->opt.alloc_timers) + break; + + } else if (odp_atomic_load_u64(&test_global->events) >= tot_timers) { + break; + } + + continue; + } + + time_ns = odp_time_to_ns(time); + tmo = odp_timeout_from_event(ev); + ctx = odp_timeout_user_ptr(tmo); + tmo_ns = ctx->nsec; + + if (mode == MODE_PERIODIC) { + if (!ctx->count && !test_global->opt.offset_ns) { + /* + * If first_tick is zero, the API allows the implementation to + * place the timer where it can, so we have to adjust our + * expectation of the timeout time. + */ + + uint64_t tmo_tick = odp_timeout_tick(tmo); + + if (tmo_tick) { + /* + * Adjust by the difference between one period after start + * time and the timeout tick. + */ + ctx->tmo_tick = 1; + ctx->first_tmo_diff = + (int64_t)odp_timer_tick_to_ns(tp, tmo_tick) - + (int64_t)odp_timer_tick_to_ns(tp, ctx->first_period); + tmo_ns += ctx->first_tmo_diff; + } else { + /* + * Timeout tick is not provided, so the best we can do is + * to just take the current time as a baseline. + */ + ctx->first_tmo_diff = (int64_t)time_ns - (int64_t)tmo_ns; + tmo_ns = ctx->nsec = time_ns; + } + + ctx->nsec = tmo_ns; + } + + /* round to closest integer number */ + tmo_ns += ctx->count * period_dbl + 0.5; + ctx->count++; + } + + uint64_t events = odp_atomic_fetch_inc_u64(&test_global->events); + + if (events >= test_global->opt.warmup_timers && events < tot_timers) { + uint64_t i = events - test_global->opt.warmup_timers; + + ctx->nsec_final = (int64_t)time_ns - (int64_t)tmo_ns; + + if (log) { + log[i].tmo_ns = tmo_ns; + log[i].tid = tid; + } + + if (time_ns > tmo_ns) { + diff_ns = time_ns - tmo_ns; + stat->num_after++; + stat->nsec_after_sum += diff_ns; + if (diff_ns < stat->nsec_after_min) { + stat->nsec_after_min = diff_ns; + stat->nsec_after_min_idx = i; + } + if (diff_ns > stat->nsec_after_max) { + stat->nsec_after_max = diff_ns; + stat->nsec_after_max_idx = i; + } + if (log) + log[i].diff_ns = diff_ns; + + } else if (time_ns < tmo_ns) { + diff_ns = tmo_ns - time_ns; + stat->num_before++; + stat->nsec_before_sum += diff_ns; + if (diff_ns < stat->nsec_before_min) { + stat->nsec_before_min = diff_ns; + stat->nsec_before_min_idx = i; + } + if (diff_ns > stat->nsec_before_max) { + stat->nsec_before_max = diff_ns; + stat->nsec_before_max_idx = i; + } + if (log) + log[i].diff_ns = -diff_ns; + } else { + stat->num_exact++; + } + } + + if ((mode == MODE_RESTART_ABS || mode == MODE_RESTART_REL) && + events < tot_timers - 1) { + /* Reset timer for next period */ + odp_timer_t tim; + uint64_t nsec, tick; + odp_timer_retval_t ret; + unsigned int j; + unsigned int retries = test_global->opt.early_retry; + uint64_t start_ns = test_global->start_ns; + uint64_t period_ns = test_global->opt.period_ns; + odp_timer_start_t start_param; + + tim = ctx->timer; + + /* Depending on the option, retry when expiration + * time is too early */ + for (j = 0; j < retries + 1; j++) { + if (mode == MODE_RESTART_ABS) { + /* Absolute time */ + ctx->nsec += period_ns; + nsec = ctx->nsec - start_ns; + tick = test_global->start_tick + + odp_timer_ns_to_tick(tp, nsec); + start_param.tick_type = ODP_TIMER_TICK_ABS; + } else { + /* Relative time */ + tick = test_global->period_tick; + time = odp_time_local(); + time_ns = odp_time_to_ns(time); + ctx->nsec = time_ns + period_ns; + start_param.tick_type = ODP_TIMER_TICK_REL; + } + + start_param.tmo_ev = ev; + start_param.tick = tick; + + ret = odp_timer_start(tim, &start_param); + if (ret == ODP_TIMER_TOO_NEAR) { + if (events >= test_global->opt.warmup_timers) + stat->num_too_near++; + } else { + break; + } + } + + if (ret != ODP_TIMER_SUCCESS) { + printf("Timer set failed: %i. Timeout nsec " + "%" PRIu64 "\n", ret, ctx->nsec); + return 0; + } + } else if (mode == MODE_PERIODIC) { + int ret = odp_timer_periodic_ack(ctx->timer, ev); + + if (ret < 0) + printf("Failed to ack a periodic timer.\n"); + + if (ret == 2) + odp_atomic_inc_u64(&test_global->last_events); + + if (ret == 2 || ret < 0) + odp_event_free(ev); + } else { + odp_event_free(ev); + } + } + + if (test_global->opt.groups) { + if (odp_schedule_group_leave(group, &mask)) + printf("odp_schedule_group_leave() failed\n"); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + odp_instance_t instance; + odp_init_t init; + test_opt_t test_opt; + test_global_t *test_global; + odph_helper_options_t helper_options; + odp_init_t *init_ptr = NULL; + int ret = 0; + + /* Let helper collect its own arguments (e.g. --odph_proc) */ + argc = odph_parse_options(argc, argv); + if (odph_options(&helper_options)) { + ODPH_ERR("Reading ODP helper options failed.\n"); + exit(EXIT_FAILURE); + } + + if (parse_options(argc, argv, &test_opt)) + return -1; + + /* List features not to be used (may optimize performance) */ + odp_init_param_init(&init); + init.not_used.feat.cls = 1; + init.not_used.feat.compress = 1; + init.not_used.feat.crypto = 1; + init.not_used.feat.ipsec = 1; + init.not_used.feat.tm = 1; + + init.mem_model = helper_options.mem_model; + + if (test_opt.init) + init_ptr = &init; + + /* Init ODP before calling anything else */ + if (odp_init_global(&instance, init_ptr, NULL)) { + printf("Global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local(instance, ODP_THREAD_CONTROL)) { + printf("Local init failed.\n"); + return -1; + } + + odp_sys_info_print(); + + /* Configure scheduler */ + odp_schedule_config(NULL); + + odp_shm_t shm = ODP_SHM_INVALID, shm_ctx = ODP_SHM_INVALID, shm_log = ODP_SHM_INVALID; + uint64_t size = sizeof(test_global_t); + + shm = odp_shm_reserve("timer_accuracy", size, + ODP_CACHE_LINE_SIZE, ODP_SHM_SINGLE_VA); + + if (shm == ODP_SHM_INVALID) { + printf("Shm alloc failed.\n"); + return -1; + } + + test_global = odp_shm_addr(shm); + memset(test_global, 0, size); + memcpy(&test_global->opt, &test_opt, sizeof(test_opt_t)); + + size = test_global->opt.alloc_timers * sizeof(timer_ctx_t); + shm_ctx = odp_shm_reserve("timer_accuracy_ctx", size, + ODP_CACHE_LINE_SIZE, ODP_SHM_SINGLE_VA); + + if (shm_ctx == ODP_SHM_INVALID) { + printf("Timer context alloc failed.\n"); + ret = -1; + goto quit; + } + + test_global->timer_ctx = odp_shm_addr(shm_ctx); + memset(test_global->timer_ctx, 0, size); + + if (test_global->opt.output) { + test_global->file = fopen(test_global->opt.filename, "w"); + if (test_global->file == NULL) { + printf("Failed to open output file %s: %s\n", + test_global->opt.filename, strerror(errno)); + ret = -1; + goto quit; + } + + size = (test_global->opt.tot_timers - test_global->opt.warmup_timers) * + sizeof(test_log_t); + shm_log = odp_shm_reserve("timer_accuracy_log", size, sizeof(test_log_t), + ODP_SHM_SINGLE_VA); + + if (shm_log == ODP_SHM_INVALID) { + printf("Test log alloc failed.\n"); + ret = -1; + goto quit; + } + + test_global->log = odp_shm_addr(shm_log); + memset(test_global->log, 0, size); + } + + odph_thread_t thread_tbl[MAX_WORKERS]; + int num_workers; + odp_cpumask_t cpumask; + char cpumaskstr[ODP_CPUMASK_STR_SIZE]; + odph_thread_common_param_t thr_common; + odph_thread_param_t thr_param; + + memset(thread_tbl, 0, sizeof(thread_tbl)); + + num_workers = MAX_WORKERS; + if (test_global->opt.cpu_count && test_global->opt.cpu_count < MAX_WORKERS) + num_workers = test_global->opt.cpu_count; + num_workers = odp_cpumask_default_worker(&cpumask, num_workers); + test_global->opt.cpu_count = num_workers; + odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + ret = create_timers(test_global); + if (ret) + goto quit; + + odp_barrier_init(&test_global->barrier, num_workers + 1); + odp_atomic_init_u64(&test_global->events, 0); + odp_atomic_init_u64(&test_global->last_events, 0); + + odph_thread_param_init(&thr_param); + thr_param.start = run_test; + thr_param.arg = (void *)test_global; + thr_param.thr_type = ODP_THREAD_WORKER; + + odph_thread_common_param_init(&thr_common); + thr_common.instance = instance; + thr_common.cpumask = &cpumask; + thr_common.share_param = 1; + + odph_thread_create(thread_tbl, &thr_common, &thr_param, num_workers); + odp_barrier_wait(&test_global->barrier); + + ret = start_timers(test_global); + if (ret) + goto quit; + + if (test_global->opt.mode == MODE_PERIODIC) { + while (odp_atomic_load_u64(&test_global->events) < test_global->opt.tot_timers) + odp_time_wait_ns(10 * ODP_TIME_MSEC_IN_NS); + + cancel_periodic_timers(test_global); + } + + odph_thread_join(thread_tbl, num_workers); + print_stat(test_global); + +quit: + if (test_global->file) + fclose(test_global->file); + + if (destroy_timers(test_global)) + ret = -1; + + if (shm_log != ODP_SHM_INVALID && odp_shm_free(shm_log)) + ret = -1; + + if (shm_ctx != ODP_SHM_INVALID && odp_shm_free(shm_ctx)) + ret = -1; + + if (odp_shm_free(shm)) + ret = -1; + + if (odp_term_local()) { + printf("Term local failed.\n"); + ret = -1; + } + + if (odp_term_global(instance)) { + printf("Term global failed.\n"); + ret = -1; + } + + return ret; +} diff --git a/test/performance/odp_timer_accuracy_run.sh b/test/performance/odp_timer_accuracy_run.sh new file mode 100755 index 000000000..84ad2a573 --- /dev/null +++ b/test/performance/odp_timer_accuracy_run.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022-2024 Nokia +# + +TEST_DIR="${TEST_DIR:-$(dirname $0)}" + +$TEST_DIR/odp_timer_accuracy${EXEEXT} -p 100000000 -n 10 + +RET_VAL=$? +if [ $RET_VAL -ne 0 ] ; then + echo odp_timer_accuracy FAILED + exit $RET_VAL +fi + +exit 0 diff --git a/test/performance/odp_timer_perf.c b/test/performance/odp_timer_perf.c index 918267a1b..6da5f2296 100644 --- a/test/performance/odp_timer_perf.c +++ b/test/performance/odp_timer_perf.c @@ -1,7 +1,5 @@ -/* Copyright (c) 2019-2023, Nokia - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2019-2023 Nokia */ /** diff --git a/test/performance/odp_timer_perf_run.sh b/test/performance/odp_timer_perf_run.sh index 7738ca91b..aa8890e8e 100755 --- a/test/performance/odp_timer_perf_run.sh +++ b/test/performance/odp_timer_perf_run.sh @@ -1,9 +1,7 @@ #!/bin/sh # -# Copyright (c) 2020, Nokia -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2020 Nokia # TEST_DIR="${TEST_DIR:-$(dirname $0)}" |