diff options
author | Matias Elo <matias.elo@nokia.com> | 2023-08-08 11:02:27 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-08 11:02:27 +0300 |
commit | de97121a2e3afa072f7c51a0570f4b3bed0236c2 (patch) | |
tree | 1f34d2767951f54d11f0b9c8d48b0db04490d2c1 /platform/linux-generic/pktio/socket_xdp.c | |
parent | 2b359fc1759726826cf4e2afddbd0b7e39fab4c7 (diff) | |
parent | 1200684b94bf18ae98ba63fb49e9cda546b4832a (diff) |
Merge ODP v1.41.1.0v1.41.1.0_DPDK_22.11
Merge ODP linux-generic v1.41.1.0 into linux-dpdk.
Diffstat (limited to 'platform/linux-generic/pktio/socket_xdp.c')
-rw-r--r-- | platform/linux-generic/pktio/socket_xdp.c | 612 |
1 files changed, 404 insertions, 208 deletions
diff --git a/platform/linux-generic/pktio/socket_xdp.c b/platform/linux-generic/pktio/socket_xdp.c index be79ca267..867483f76 100644 --- a/platform/linux-generic/pktio/socket_xdp.c +++ b/platform/linux-generic/pktio/socket_xdp.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2022, Nokia +/* Copyright (c) 2022-2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -9,38 +9,42 @@ #ifdef _ODP_PKTIO_XDP #include <odp_posix_extensions.h> +#include <odp/api/cpu.h> #include <odp/api/debug.h> #include <odp/api/hints.h> +#include <odp/api/packet_io_stats.h> #include <odp/api/system_info.h> #include <odp/api/ticketlock.h> -#include <odp/api/packet_io_stats.h> #include <odp_classification_internal.h> #include <odp_debug_internal.h> #include <odp_libconfig_internal.h> #include <odp_macros_internal.h> -#include <odp_packet_io_internal.h> #include <odp_packet_internal.h> +#include <odp_packet_io_internal.h> #include <odp_parse_internal.h> #include <odp_pool_internal.h> #include <odp_socket_common.h> -#include <string.h> #include <errno.h> -#include <sys/socket.h> -#include <unistd.h> -#include <poll.h> -#include <sys/ioctl.h> #include <linux/ethtool.h> +#include <linux/if_xdp.h> #include <linux/sockios.h> #include <net/if.h> -#include <linux/if_xdp.h> +#include <poll.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> #include <xdp/xsk.h> #define NUM_DESCS_DEFAULT 1024U #define MIN_FRAME_SIZE 2048U +#define MAX_QUEUES (ODP_PKTIN_MAX_QUEUES > ODP_PKTOUT_MAX_QUEUES ? \ + ODP_PKTIN_MAX_QUEUES : ODP_PKTOUT_MAX_QUEUES) + #define IF_DELIM " " #define Q_DELIM ':' #define CONF_BASE_STR "pktio_xdp" @@ -63,6 +67,10 @@ static const char * const internal_stats_strs[] = { #define MAX_INTERNAL_STATS _ODP_ARRAY_SIZE(internal_stats_strs) +static const char * const shadow_q_driver_strs[] = { + "mlx", +}; + typedef struct { uint64_t rx_dropped; uint64_t rx_inv_descs; @@ -94,9 +102,29 @@ typedef struct { } xdp_umem_info_t; typedef struct { - xdp_sock_t qs[ODP_PKTOUT_MAX_QUEUES]; + uint32_t rx; + uint32_t tx; + uint32_t other; + uint32_t combined; +} drv_channels_t; + +typedef struct { + /* Queue counts for getting/setting driver's ethtool queue configuration. */ + drv_channels_t drv_channels; + /* Packet I/O level requested input queue count. */ + uint32_t num_in_conf_qs; + /* Packet I/O level requested output queue count. */ + uint32_t num_out_conf_qs; + /* Actual internal queue count. */ + uint32_t num_qs; + /* Length of driver's ethtool RSS indirection table. */ + uint32_t drv_num_rss; +} q_num_conf_t; + +typedef struct { + xdp_sock_t qs[MAX_QUEUES]; xdp_umem_info_t *umem_info; - uint32_t num_q; + q_num_conf_t q_num_conf; int pktio_idx; int helper_sock; uint32_t mtu; @@ -104,6 +132,7 @@ typedef struct { uint32_t bind_q; odp_bool_t lockless_rx; odp_bool_t lockless_tx; + odp_bool_t is_shadow_q; } xdp_sock_info_t; typedef struct { @@ -137,13 +166,88 @@ static inline xdp_sock_info_t *pkt_priv(pktio_entry_t *pktio_entry) return (xdp_sock_info_t *)(uintptr_t)(pktio_entry->pkt_priv); } +static odp_bool_t get_nic_queue_count(int fd, const char *devname, drv_channels_t *cur_channels) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int ret; + + memset(&channels, 0, sizeof(struct ethtool_channels)); + channels.cmd = ETHTOOL_GCHANNELS; + snprintf(ifr.ifr_name, IF_NAMESIZE, "%s", devname); + ifr.ifr_data = (char *)&channels; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + + if (ret == -1) { + _ODP_DBG("Unable to query NIC queue capabilities: %s\n", strerror(errno)); + return false; + } + + cur_channels->rx = channels.rx_count; + cur_channels->tx = channels.tx_count; + cur_channels->other = channels.other_count; + cur_channels->combined = channels.combined_count; + + return true; +} + +static odp_bool_t get_nic_rss_indir_count(int fd, const char *devname, uint32_t *drv_num_rss) +{ + struct ethtool_rxfh indir; + struct ifreq ifr; + int ret; + + memset(&indir, 0, sizeof(struct ethtool_rxfh)); + indir.cmd = ETHTOOL_GRSSH; + snprintf(ifr.ifr_name, IF_NAMESIZE, "%s", devname); + ifr.ifr_data = (char *)&indir; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + + if (ret == -1) { + _ODP_DBG("Unable to query NIC RSS indirection table size: %s\n", strerror(errno)); + return false; + } + + *drv_num_rss = indir.indir_size; + + return true; +} + +static odp_bool_t is_shadow_q_driver(int fd, const char *devname) +{ + struct ethtool_drvinfo info; + struct ifreq ifr; + int ret; + + memset(&info, 0, sizeof(struct ethtool_drvinfo)); + info.cmd = ETHTOOL_GDRVINFO; + snprintf(ifr.ifr_name, IF_NAMESIZE, "%s", devname); + ifr.ifr_data = (char *)&info; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + + if (ret == -1) { + _ODP_DBG("Unable to query NIC driver information: %s\n", strerror(errno)); + return false; + } + + for (uint32_t i = 0U; i < _ODP_ARRAY_SIZE(shadow_q_driver_strs); ++i) { + if (strstr(info.driver, shadow_q_driver_strs[i]) != NULL) { + _ODP_PRINT("Driver with XDP shadow queues in use: %s, manual RSS" + " configuration likely required\n", info.driver); + return true; + } + } + + return false; +} + static void parse_options(xdp_umem_info_t *umem_info) { if (!_odp_libconfig_lookup_ext_int(CONF_BASE_STR, NULL, RX_DESCS_STR, &umem_info->num_rx_desc) || !_odp_libconfig_lookup_ext_int(CONF_BASE_STR, NULL, TX_DESCS_STR, &umem_info->num_tx_desc)) { - _ODP_ERR("Unable to parse xdp descriptor configuration, using defaults (%d).\n", + _ODP_ERR("Unable to parse xdp descriptor configuration, using defaults (%d)\n", NUM_DESCS_DEFAULT); goto defaults; } @@ -151,7 +255,7 @@ static void parse_options(xdp_umem_info_t *umem_info) if (umem_info->num_rx_desc <= 0 || umem_info->num_tx_desc <= 0 || !_ODP_CHECK_IS_POWER2(umem_info->num_rx_desc) || !_ODP_CHECK_IS_POWER2(umem_info->num_tx_desc)) { - _ODP_ERR("Invalid xdp descriptor configuration, using defaults (%d).\n", + _ODP_ERR("Invalid xdp descriptor configuration, using defaults (%d)\n", NUM_DESCS_DEFAULT); goto defaults; } @@ -163,150 +267,317 @@ defaults: umem_info->num_tx_desc = NUM_DESCS_DEFAULT; } -static int umem_create(xdp_umem_info_t *umem_info, pool_t *pool) +static int sock_xdp_open(odp_pktio_t pktio, pktio_entry_t *pktio_entry, const char *devname, + odp_pool_t pool_hdl) +{ + xdp_sock_info_t *priv; + pool_t *pool; + int ret; + + if (disable_pktio) + return -1; + + priv = pkt_priv(pktio_entry); + memset(priv, 0, sizeof(xdp_sock_info_t)); + pool = _odp_pool_entry(pool_hdl); + priv->umem_info = (xdp_umem_info_t *)pool->mem_src_data; + priv->umem_info->pool = pool; + /* Mark transitory kernel-owned packets with the pktio index, so that they can be freed on + * close. */ + priv->pktio_idx = 1 + odp_pktio_index(pktio); + /* Querying with ioctl() via AF_XDP socket doesn't seem to work, so + * create a helper socket for this. */ + ret = socket(AF_INET, SOCK_DGRAM, 0); + + if (ret == -1) { + _ODP_ERR("Error creating helper socket for xdp: %s\n", strerror(errno)); + return -1; + } + + priv->helper_sock = ret; + priv->mtu = _odp_mtu_get_fd(priv->helper_sock, devname); + + if (priv->mtu == 0U) + goto mtu_err; + + priv->max_mtu = pool->seg_len; + + for (int i = 0; i < MAX_QUEUES; ++i) { + odp_ticketlock_init(&priv->qs[i].rx_lock); + odp_ticketlock_init(&priv->qs[i].tx_lock); + } + + if (!get_nic_queue_count(priv->helper_sock, devname, &priv->q_num_conf.drv_channels) || + !get_nic_rss_indir_count(priv->helper_sock, devname, &priv->q_num_conf.drv_num_rss)) + _ODP_PRINT("Warning: Unable to query NIC queue count/RSS, manual cleanup" + " required\n"); + + priv->is_shadow_q = is_shadow_q_driver(priv->helper_sock, pktio_entry->name); + parse_options(priv->umem_info); + _ODP_DBG("Socket xdp interface (%s):\n", pktio_entry->name); + _ODP_DBG(" num_rx_desc: %d\n", priv->umem_info->num_rx_desc); + _ODP_DBG(" num_tx_desc: %d\n", priv->umem_info->num_tx_desc); + + return 0; + +mtu_err: + close(priv->helper_sock); + + return -1; +} + +static odp_bool_t set_nic_queue_count(int fd, const char *devname, drv_channels_t *new_channels) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int ret; + + memset(&channels, 0, sizeof(struct ethtool_channels)); + channels.cmd = ETHTOOL_SCHANNELS; + channels.rx_count = new_channels->rx; + channels.tx_count = new_channels->tx; + channels.other_count = new_channels->other; + channels.combined_count = new_channels->combined; + snprintf(ifr.ifr_name, IF_NAMESIZE, "%s", devname); + ifr.ifr_data = (char *)&channels; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + + if (ret == -1) { + _ODP_DBG("Unable to set NIC queue count: %s\n", strerror(errno)); + return false; + } + + return true; +} + +static odp_bool_t set_nic_rss_indir(int fd, const char *devname, struct ethtool_rxfh *indir) +{ + struct ifreq ifr; + int ret; + + indir->cmd = ETHTOOL_SRSSH; + snprintf(ifr.ifr_name, IF_NAMESIZE, "%s", devname); + ifr.ifr_data = (char *)indir; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + + if (ret == -1) { + _ODP_DBG("Unable to set NIC RSS indirection table: %s\n", strerror(errno)); + return false; + } + + return true; +} + +static int sock_xdp_close(pktio_entry_t *pktio_entry) +{ + xdp_sock_info_t *priv = pkt_priv(pktio_entry); + struct ethtool_rxfh indir; + + memset(&indir, 0, sizeof(struct ethtool_rxfh)); + + if (priv->q_num_conf.num_qs != 0U) + (void)set_nic_queue_count(priv->helper_sock, pktio_entry->name, + &priv->q_num_conf.drv_channels); + + if (priv->q_num_conf.drv_num_rss != 0U && !priv->is_shadow_q) + (void)set_nic_rss_indir(priv->helper_sock, pktio_entry->name, &indir); + + close(priv->helper_sock); + + return 0; +} + +static int umem_create(xdp_umem_info_t *umem_info) { struct xsk_umem_config cfg; if (umem_info->ref_cnt++ > 0U) return 0; - parse_options(umem_info); - umem_info->pool = pool; /* Fill queue size is recommended to be >= HW RX ring size + AF_XDP RX - * ring size, so use size twice the size of AF_XDP RX ring. */ + * ring size, so use size twice the size of AF_XDP RX ring. */ cfg.fill_size = umem_info->num_rx_desc * 2U; cfg.comp_size = umem_info->num_tx_desc; - cfg.frame_size = pool->block_size; - cfg.frame_headroom = sizeof(odp_packet_hdr_t) + pool->headroom; + cfg.frame_size = umem_info->pool->block_size; + cfg.frame_headroom = sizeof(odp_packet_hdr_t) + umem_info->pool->headroom; cfg.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG; - return xsk_umem__create(&umem_info->umem, pool->base_addr, pool->shm_size, - &umem_info->fill_q, &umem_info->compl_q, &cfg); + return xsk_umem__create(&umem_info->umem, umem_info->pool->base_addr, + umem_info->pool->shm_size, &umem_info->fill_q, &umem_info->compl_q, + &cfg); } -static void umem_delete(xdp_umem_info_t *umem_info) +static void fill_socket_config(struct xsk_socket_config *config, xdp_umem_info_t *umem_info) { - if (umem_info->ref_cnt-- != 1U) - return; - - while (xsk_umem__delete(umem_info->umem) == -EBUSY) - continue; + config->rx_size = umem_info->num_rx_desc * 2U; + config->tx_size = umem_info->num_tx_desc; + config->libxdp_flags = 0U; + config->xdp_flags = 0U; + config->bind_flags = XDP_ZEROCOPY; } -static uint32_t get_bind_queue_index(const char *devname) +static odp_bool_t reserve_fill_queue_elements(xdp_sock_info_t *sock_info, xdp_sock_t *sock, + int num) { - const char *param = getenv("ODP_PKTIO_XDP_PARAMS"); - char *tmp_str; - char *tmp; - char *if_str; - int idx = 0; + pool_t *pool; + odp_packet_t packets[num]; + int count; + struct xsk_ring_prod *fill_q; + uint32_t start_idx; + int pktio_idx; + uint32_t block_size; + odp_packet_hdr_t *pkt_hdr; + + pool = sock_info->umem_info->pool; + count = odp_packet_alloc_multi(_odp_pool_handle(pool), sock_info->mtu, packets, num); + + if (count <= 0) { + ++sock->i_stats[RX_PKT_ALLOC_ERR]; + return false; + } - if (param == NULL) - goto out; + fill_q = &sock->fill_q; - tmp_str = strdup(param); + if (xsk_ring_prod__reserve(fill_q, count, &start_idx) == 0U) { + odp_packet_free_multi(packets, count); + ++sock->i_stats[RX_DESC_RSV_ERR]; + return false; + } - if (tmp_str == NULL) - goto out; + pktio_idx = sock_info->pktio_idx; + block_size = pool->block_size; - tmp = strtok(tmp_str, IF_DELIM); + for (int i = 0; i < count; ++i) { + pkt_hdr = packet_hdr(packets[i]); + pkt_hdr->ms_pktio_idx = pktio_idx; + *xsk_ring_prod__fill_addr(fill_q, start_idx++) = + pkt_hdr->event_hdr.index.event * block_size; + } - if (tmp == NULL) - goto out_str; + xsk_ring_prod__submit(&sock->fill_q, count); - while (tmp) { - if_str = strchr(tmp, Q_DELIM); + return true; +} - if (if_str != NULL && if_str != &tmp[strlen(tmp) - 1U]) { - if (strncmp(devname, tmp, (uint64_t)(uintptr_t)(if_str - tmp)) == 0) { - idx = _ODP_MAX(atoi(++if_str), 0); - break; - } +static odp_bool_t create_sockets(xdp_sock_info_t *sock_info, const char *devname) +{ + struct xsk_socket_config config; + uint32_t bind_q, i; + struct xsk_umem *umem; + xdp_sock_t *sock; + int ret; + + bind_q = sock_info->bind_q; + umem = sock_info->umem_info->umem; + + for (i = 0U; i < sock_info->q_num_conf.num_qs;) { + sock = &sock_info->qs[i]; + fill_socket_config(&config, sock_info->umem_info); + ret = xsk_socket__create_shared(&sock->xsk, devname, bind_q, umem, &sock->rx, + &sock->tx, &sock->fill_q, &sock->compl_q, &config); + + if (ret) { + _ODP_ERR("Error creating xdp socket for bind queue %u: %d\n", bind_q, ret); + goto err; + } + + ++i; + + if (!reserve_fill_queue_elements(sock_info, sock, config.rx_size)) { + _ODP_ERR("Unable to reserve fill queue descriptors for queue: %u\n", + bind_q); + goto err; } - tmp = strtok(NULL, IF_DELIM); + ++bind_q; } -out_str: - free(tmp_str); + /* Ring setup/clean up routines seem to be asynchronous with some drivers and might not be + * ready yet after xsk_socket__create_shared(). */ + sleep(1U); + + return true; -out: - return idx; +err: + for (uint32_t j = 0U; j < i; ++j) { + xsk_socket__delete(sock_info->qs[j].xsk); + sock_info->qs[j].xsk = NULL; + } + + return false; } -static int sock_xdp_open(odp_pktio_t pktio, pktio_entry_t *pktio_entry, const char *devname, - odp_pool_t pool_hdl) +static void umem_delete(xdp_umem_info_t *umem_info) { - xdp_sock_info_t *priv; - pool_t *pool; + if (umem_info->ref_cnt-- != 1U) + return; + + while (xsk_umem__delete(umem_info->umem) == -EBUSY) + continue; +} + +static int sock_xdp_start(pktio_entry_t *pktio_entry) +{ + xdp_sock_info_t *priv = pkt_priv(pktio_entry); int ret; + drv_channels_t channels = priv->q_num_conf.drv_channels; + struct ethtool_rxfh *indir = calloc(1U, sizeof(struct ethtool_rxfh) + + sizeof(((struct ethtool_rxfh *)0)->rss_config[0U]) + * priv->q_num_conf.drv_num_rss); - if (disable_pktio) + if (indir == NULL) { + _ODP_ERR("Error allocating NIC RSS table\n"); return -1; + } - priv = pkt_priv(pktio_entry); - memset(priv, 0, sizeof(xdp_sock_info_t)); - pool = _odp_pool_entry(pool_hdl); - priv->umem_info = (xdp_umem_info_t *)pool->mem_src_data; - ret = umem_create(priv->umem_info, pool); + ret = umem_create(priv->umem_info); if (ret) { _ODP_ERR("Error creating UMEM pool for xdp: %d\n", ret); - return -1; + goto err; } - /* Mark transitory kernel-owned packets with the pktio index, so that they can be freed on - * close. */ - priv->pktio_idx = 1 + odp_pktio_index(pktio); - /* Querying with ioctl() via AF_XDP socket doesn't seem to work, so - * create a helper socket for this. */ - ret = socket(AF_INET, SOCK_DGRAM, 0); - - if (ret == -1) { - _ODP_ERR("Error creating helper socket for xdp: %s\n", strerror(errno)); - goto sock_err; - } + priv->q_num_conf.num_qs = _ODP_MAX(priv->q_num_conf.num_in_conf_qs, + priv->q_num_conf.num_out_conf_qs); + priv->bind_q = priv->is_shadow_q ? priv->q_num_conf.num_qs : 0U; + channels.combined = priv->q_num_conf.num_qs; - priv->helper_sock = ret; - priv->mtu = _odp_mtu_get_fd(priv->helper_sock, devname); + if (!set_nic_queue_count(priv->helper_sock, pktio_entry->name, &channels)) + _ODP_PRINT("Warning: Unable to configure NIC queue count, manual configuration" + " required\n"); - if (priv->mtu == 0U) - goto mtu_err; + if (priv->q_num_conf.num_in_conf_qs > 0U && !priv->is_shadow_q) { + indir->indir_size = priv->q_num_conf.drv_num_rss; - priv->max_mtu = pool->seg_len; + for (uint32_t i = 0U; i < indir->indir_size; ++i) + indir->rss_config[i] = (i % priv->q_num_conf.num_in_conf_qs); - for (int i = 0; i < ODP_PKTOUT_MAX_QUEUES; ++i) { - odp_ticketlock_init(&priv->qs[i].rx_lock); - odp_ticketlock_init(&priv->qs[i].tx_lock); + if (!set_nic_rss_indir(priv->helper_sock, pktio_entry->name, indir)) + _ODP_PRINT("Warning: Unable to configure NIC RSS, manual configuration" + " required\n"); } - priv->bind_q = get_bind_queue_index(pktio_entry->name); - - _ODP_DBG("Socket xdp interface (%s):\n", pktio_entry->name); - _ODP_DBG(" num_rx_desc: %d\n", priv->umem_info->num_rx_desc); - _ODP_DBG(" num_tx_desc: %d\n", priv->umem_info->num_tx_desc); - _ODP_DBG(" starting bind queue: %u\n", priv->bind_q); + if (!create_sockets(priv, pktio_entry->name)) + goto sock_err; return 0; -mtu_err: - close(priv->helper_sock); - sock_err: umem_delete(priv->umem_info); +err: + free(indir); + return -1; } -static int sock_xdp_close(pktio_entry_t *pktio_entry) +static int sock_xdp_stop(pktio_entry_t *pktio_entry) { xdp_sock_info_t *priv = pkt_priv(pktio_entry); pool_t *pool = priv->umem_info->pool; odp_packet_hdr_t *pkt_hdr; - close(priv->helper_sock); - - for (uint32_t i = 0U; i < priv->num_q; ++i) { + for (uint32_t i = 0U; i < priv->q_num_conf.num_qs; ++i) { if (priv->qs[i].xsk != NULL) { xsk_socket__delete(priv->qs[i].xsk); priv->qs[i].xsk = NULL; @@ -342,7 +613,7 @@ static int sock_xdp_stats(pktio_entry_t *pktio_entry, odp_pktio_stats_t *stats) memset(stats, 0, sizeof(odp_pktio_stats_t)); - for (uint32_t i = 0U; i < priv->num_q; ++i) { + for (uint32_t i = 0U; i < priv->q_num_conf.num_qs; ++i) { sock = &priv->qs[i]; qi_stats = sock->qi_stats; qo_stats = sock->qo_stats; @@ -372,7 +643,7 @@ static int sock_xdp_stats_reset(pktio_entry_t *pktio_entry) struct xdp_statistics xdp_stats; socklen_t optlen = sizeof(struct xdp_statistics); - for (uint32_t i = 0U; i < priv->num_q; ++i) { + for (uint32_t i = 0U; i < priv->q_num_conf.num_qs; ++i) { sock = &priv->qs[i]; memset(&sock->qi_stats, 0, sizeof(odp_pktin_queue_stats_t)); memset(&sock->qo_stats, 0, sizeof(odp_pktout_queue_stats_t)); @@ -431,7 +702,7 @@ static int sock_xdp_extra_stat_info(pktio_entry_t *pktio_entry, odp_pktio_extra_ int num) { xdp_sock_info_t *priv = pkt_priv(pktio_entry); - const int total_stats = MAX_INTERNAL_STATS * priv->num_q; + const int total_stats = MAX_INTERNAL_STATS * priv->q_num_conf.num_qs; if (info != NULL && num > 0) { for (int i = 0; i < _ODP_MIN(num, total_stats); ++i) @@ -446,7 +717,7 @@ static int sock_xdp_extra_stat_info(pktio_entry_t *pktio_entry, odp_pktio_extra_ static int sock_xdp_extra_stats(pktio_entry_t *pktio_entry, uint64_t stats[], int num) { xdp_sock_info_t *priv = pkt_priv(pktio_entry); - const int total_stats = MAX_INTERNAL_STATS * priv->num_q; + const int total_stats = MAX_INTERNAL_STATS * priv->q_num_conf.num_qs; uint64_t *i_stats; if (stats != NULL && num > 0) { @@ -462,7 +733,7 @@ static int sock_xdp_extra_stats(pktio_entry_t *pktio_entry, uint64_t stats[], in static int sock_xdp_extra_stat_counter(pktio_entry_t *pktio_entry, uint32_t id, uint64_t *stat) { xdp_sock_info_t *priv = pkt_priv(pktio_entry); - const uint32_t total_stats = MAX_INTERNAL_STATS * priv->num_q; + const uint32_t total_stats = MAX_INTERNAL_STATS * priv->q_num_conf.num_qs; if (id >= total_stats) { _ODP_ERR("Invalid counter id: %u (allowed range: 0-%u)\n", id, total_stats - 1U); @@ -506,6 +777,9 @@ static uint32_t process_received(pktio_entry_t *pktio_entry, xdp_sock_t *sock, p uint64_t errors = 0U, octets = 0U; odp_pktio_t pktio_hdl = pktio_entry->handle; uint32_t num_rx = 0U; + uint32_t num_cls = 0U; + uint32_t num_pkts = 0U; + const int cls_enabled = pktio_cls_enabled(pktio_entry); for (int i = 0; i < num; ++i) { extract_data(xsk_ring_cons__rx_desc(rx, start_idx++), base_addr, &pkt_data); @@ -527,7 +801,7 @@ static uint32_t process_received(pktio_entry_t *pktio_entry, xdp_sock_t *sock, p continue; } - if (pktio_cls_enabled(pktio_entry)) { + if (cls_enabled) { odp_pool_t new_pool; ret = _odp_cls_classify_packet(pktio_entry, pkt_data.data, @@ -546,60 +820,29 @@ static uint32_t process_received(pktio_entry_t *pktio_entry, xdp_sock_t *sock, p } pkt_data.pkt_hdr->input = pktio_hdl; - packets[num_rx++] = pkt_data.pkt; + num_pkts++; octets += pkt_data.len; + + if (cls_enabled) { + /* Enqueue packets directly to classifier destination queue */ + packets[num_cls++] = pkt_data.pkt; + num_cls = _odp_cls_enq(packets, num_cls, (i + 1 == num)); + } else { + packets[num_rx++] = pkt_data.pkt; + } } + /* Enqueue remaining classified packets */ + if (odp_unlikely(num_cls)) + _odp_cls_enq(packets, num_cls, true); + sock->qi_stats.octets += octets; - sock->qi_stats.packets += num_rx; + sock->qi_stats.packets += num_pkts; sock->qi_stats.errors += errors; return num_rx; } -static odp_bool_t reserve_fill_queue_elements(xdp_sock_info_t *sock_info, xdp_sock_t *sock, - int num) -{ - pool_t *pool; - odp_packet_t packets[num]; - int count; - struct xsk_ring_prod *fill_q; - uint32_t start_idx; - int pktio_idx; - uint32_t block_size; - odp_packet_hdr_t *pkt_hdr; - - pool = sock_info->umem_info->pool; - count = odp_packet_alloc_multi(_odp_pool_handle(pool), sock_info->mtu, packets, num); - - if (count <= 0) { - ++sock->i_stats[RX_PKT_ALLOC_ERR]; - return false; - } - - fill_q = &sock->fill_q; - - if (xsk_ring_prod__reserve(fill_q, count, &start_idx) == 0U) { - odp_packet_free_multi(packets, count); - ++sock->i_stats[RX_DESC_RSV_ERR]; - return false; - } - - pktio_idx = sock_info->pktio_idx; - block_size = pool->block_size; - - for (int i = 0; i < count; ++i) { - pkt_hdr = packet_hdr(packets[i]); - pkt_hdr->ms_pktio_idx = pktio_idx; - *xsk_ring_prod__fill_addr(fill_q, start_idx++) = - pkt_hdr->event_hdr.index.event * block_size; - } - - xsk_ring_prod__submit(&sock->fill_q, count); - - return true; -} - static int sock_xdp_recv(pktio_entry_t *pktio_entry, int index, odp_packet_t packets[], int num) { xdp_sock_info_t *priv; @@ -608,6 +851,7 @@ static int sock_xdp_recv(pktio_entry_t *pktio_entry, int index, odp_packet_t pac uint32_t start_idx = 0U, recvd, procd; priv = pkt_priv(pktio_entry); + _ODP_ASSERT((uint32_t)index < priv->q_num_conf.num_in_conf_qs); sock = &priv->qs[index]; if (!priv->lockless_rx) @@ -719,6 +963,7 @@ static int sock_xdp_send(pktio_entry_t *pktio_entry, int index, const odp_packet return 0; priv = pkt_priv(pktio_entry); + _ODP_ASSERT((uint32_t)index < priv->q_num_conf.num_out_conf_qs); sock = &priv->qs[index]; if (!priv->lockless_tx) @@ -830,12 +1075,13 @@ static int sock_xdp_link_info(pktio_entry_t *pktio_entry, odp_pktio_link_info_t pktio_entry->name, info); } -static int set_queue_capability(int fd, const char *devname, odp_pktio_capability_t *capa) +static int get_nic_queue_capability(int fd, const char *devname, odp_pktio_capability_t *capa) { - struct ifreq ifr; struct ethtool_channels channels; - uint32_t max_channels; + struct ifreq ifr; int ret; + const uint32_t cc = odp_cpu_count(); + uint32_t max_channels; memset(&channels, 0, sizeof(struct ethtool_channels)); channels.cmd = ETHTOOL_GCHANNELS; @@ -845,16 +1091,16 @@ static int set_queue_capability(int fd, const char *devname, odp_pktio_capabilit if (ret == -1 || channels.max_combined == 0U) { if (ret == -1 && errno != EOPNOTSUPP) { - _ODP_ERR("Unable to query NIC channel capabilities: %s\n", strerror(errno)); + _ODP_ERR("Unable to query NIC queue capabilities: %s\n", strerror(errno)); return -1; } channels.max_combined = 1U; } - max_channels = _ODP_MIN((uint32_t)ODP_PKTOUT_MAX_QUEUES, channels.max_combined); + max_channels = _ODP_MIN(cc, channels.max_combined); capa->max_input_queues = _ODP_MIN((uint32_t)ODP_PKTIN_MAX_QUEUES, max_channels); - capa->max_output_queues = max_channels; + capa->max_output_queues = _ODP_MIN((uint32_t)ODP_PKTOUT_MAX_QUEUES, max_channels); return 0; } @@ -865,7 +1111,7 @@ static int sock_xdp_capability(pktio_entry_t *pktio_entry, odp_pktio_capability_ memset(capa, 0, sizeof(odp_pktio_capability_t)); - if (set_queue_capability(priv->helper_sock, pktio_entry->name, capa)) + if (get_nic_queue_capability(priv->helper_sock, pktio_entry->name, capa)) return -1; capa->set_op.op.promisc_mode = 1U; @@ -903,72 +1149,22 @@ static int sock_xdp_input_queues_config(pktio_entry_t *pktio_entry, { xdp_sock_info_t *priv = pkt_priv(pktio_entry); + priv->q_num_conf.num_in_conf_qs = param->num_queues; priv->lockless_rx = pktio_entry->param.in_mode == ODP_PKTIN_MODE_SCHED || param->op_mode == ODP_PKTIO_OP_MT_UNSAFE; return 0; } -static void fill_socket_config(struct xsk_socket_config *config, xdp_umem_info_t *umem_info) -{ - config->rx_size = umem_info->num_rx_desc; - config->tx_size = umem_info->num_tx_desc; - config->libxdp_flags = 0U; - config->xdp_flags = 0U; - config->bind_flags = XDP_ZEROCOPY; /* TODO: XDP_COPY */ -} - static int sock_xdp_output_queues_config(pktio_entry_t *pktio_entry, const odp_pktout_queue_param_t *param) { xdp_sock_info_t *priv = pkt_priv(pktio_entry); - struct xsk_socket_config config; - const char *devname = pktio_entry->name; - uint32_t bind_q, i; - struct xsk_umem *umem; - xdp_sock_t *sock; - int ret; + priv->q_num_conf.num_out_conf_qs = param->num_queues; priv->lockless_tx = param->op_mode == ODP_PKTIO_OP_MT_UNSAFE; - fill_socket_config(&config, priv->umem_info); - bind_q = priv->bind_q; - umem = priv->umem_info->umem; - - for (i = 0U; i < param->num_queues;) { - sock = &priv->qs[i]; - ret = xsk_socket__create_shared(&sock->xsk, devname, bind_q, umem, &sock->rx, - &sock->tx, &sock->fill_q, &sock->compl_q, &config); - - if (ret) { - _ODP_ERR("Error creating xdp socket for bind queue %u: %d\n", bind_q, ret); - goto err; - } - - ++i; - - if (!reserve_fill_queue_elements(priv, sock, config.rx_size)) { - _ODP_ERR("Unable to reserve fill queue descriptors for queue: %u.\n", - bind_q); - goto err; - } - - ++bind_q; - } - - priv->num_q = i; - /* Ring setup/clean up routines seem to be asynchronous with some drivers and might not be - * ready yet after xsk_socket__create_shared(). */ - sleep(1U); return 0; - -err: - for (uint32_t j = 0U; j < i; ++j) { - xsk_socket__delete(priv->qs[j].xsk); - priv->qs[j].xsk = NULL; - } - - return -1; } const pktio_if_ops_t _odp_sock_xdp_pktio_ops = { @@ -979,8 +1175,8 @@ const pktio_if_ops_t _odp_sock_xdp_pktio_ops = { .term = NULL, .open = sock_xdp_open, .close = sock_xdp_close, - .start = NULL, - .stop = NULL, + .start = sock_xdp_start, + .stop = sock_xdp_stop, .stats = sock_xdp_stats, .stats_reset = sock_xdp_stats_reset, .pktin_queue_stats = sock_xdp_pktin_queue_stats, |