aboutsummaryrefslogtreecommitdiff
path: root/platform/linux-generic/pktio
diff options
context:
space:
mode:
Diffstat (limited to 'platform/linux-generic/pktio')
-rw-r--r--platform/linux-generic/pktio/dpdk.c116
-rw-r--r--platform/linux-generic/pktio/io_ops.c3
-rw-r--r--platform/linux-generic/pktio/ipc.c12
-rw-r--r--platform/linux-generic/pktio/loop.c15
-rw-r--r--platform/linux-generic/pktio/netmap.c12
-rw-r--r--platform/linux-generic/pktio/socket.c2
-rw-r--r--platform/linux-generic/pktio/socket_mmap.c4
-rw-r--r--platform/linux-generic/pktio/socket_xdp.c688
8 files changed, 779 insertions, 73 deletions
diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c
index 030560b0d..006344b48 100644
--- a/platform/linux-generic/pktio/dpdk.c
+++ b/platform/linux-generic/pktio/dpdk.c
@@ -23,14 +23,15 @@
#include <odp/api/time.h>
#include <odp/api/plat/time_inlines.h>
-#include <odp_align_internal.h>
#include <odp_packet_io_internal.h>
+#include <odp_pool_internal.h>
#include <odp_classification_internal.h>
#include <odp_socket_common.h>
#include <odp_packet_dpdk.h>
#include <odp_debug_internal.h>
#include <odp_libconfig_internal.h>
#include <odp_errno_define.h>
+#include <odp_macros_internal.h>
#include <protocols/eth.h>
#include <protocols/udp.h>
@@ -120,7 +121,7 @@ struct pkt_cache_t {
typedef union ODP_ALIGNED_CACHE {
struct pkt_cache_t s;
- uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(struct pkt_cache_t))];
+ uint8_t pad[_ODP_ROUNDUP_CACHE_LINE(sizeof(struct pkt_cache_t))];
} pkt_cache_t;
/** Packet IO using DPDK interface */
@@ -153,6 +154,15 @@ typedef struct ODP_ALIGNED_CACHE {
ODP_STATIC_ASSERT(PKTIO_PRIVATE_SIZE >= sizeof(pkt_dpdk_t),
"PKTIO_PRIVATE_SIZE too small");
+typedef struct {
+ uint32_t dpdk_elt_size;
+ uint8_t pool_in_use;
+ struct rte_mempool *pkt_pool;
+} mem_src_data_t;
+
+ODP_STATIC_ASSERT(_ODP_POOL_MEM_SRC_DATA_SIZE >= sizeof(mem_src_data_t),
+ "_ODP_POOL_MEM_SRC_DATA_SIZE too small");
+
static inline struct rte_mbuf *mbuf_from_pkt_hdr(odp_packet_hdr_t *pkt_hdr)
{
return ((struct rte_mbuf *)pkt_hdr) - 1;
@@ -168,6 +178,11 @@ static inline pkt_dpdk_t *pkt_priv(pktio_entry_t *pktio_entry)
return (pkt_dpdk_t *)(uintptr_t)(pktio_entry->s.pkt_priv);
}
+static inline mem_src_data_t *mem_src_priv(uint8_t *data)
+{
+ return (mem_src_data_t *)data;
+}
+
static int disable_pktio; /** !0 this pktio disabled, 0 enabled */
static int dpdk_pktio_init(void);
@@ -320,13 +335,14 @@ static void pktmbuf_init(struct rte_mempool *mp, void *opaque_arg ODP_UNUSED,
* Create custom DPDK packet pool
*/
static struct rte_mempool *mbuf_pool_create(const char *name,
- pool_t *pool_entry)
+ pool_t *pool_entry,
+ uint32_t dpdk_elt_size)
{
odp_shm_info_t shm_info;
struct rte_mempool *mp = NULL;
struct rte_pktmbuf_pool_private mbp_priv;
struct rte_mempool_objsz sz;
- unsigned int elt_size = pool_entry->dpdk_elt_size;
+ unsigned int elt_size = dpdk_elt_size;
unsigned int num = pool_entry->num, populated = 0;
uint32_t total_size;
uint64_t page_size, offset = 0, remainder = 0;
@@ -423,9 +439,10 @@ static int pool_enqueue(struct rte_mempool *mp,
{
odp_packet_t pkt_tbl[num];
pool_t *pool_entry = (pool_t *)mp->pool_config;
+ mem_src_data_t *mem_src_data = mem_src_priv(pool_entry->mem_src_data);
unsigned i;
- if (odp_unlikely(num == 0 || !pool_entry->pool_in_use))
+ if (odp_unlikely(num == 0 || !mem_src_data->pool_in_use))
return 0;
for (i = 0; i < num; i++) {
@@ -497,67 +514,72 @@ static void pool_free(struct rte_mempool *mp)
}
}
-static void pool_destroy(void *pool)
+static void pool_destroy(uint8_t *data)
{
- struct rte_mempool *mp = (struct rte_mempool *)pool;
+ mem_src_data_t *mem_src_data = mem_src_priv(data);
- if (mp != NULL) {
- pool_t *pool_entry = (pool_t *)mp->pool_config;
-
- pool_entry->pool_in_use = 0;
- rte_mempool_free(mp);
+ if (mem_src_data->pkt_pool != NULL) {
+ mem_src_data->pool_in_use = 0;
+ rte_mempool_free(mem_src_data->pkt_pool);
}
+
+ mem_src_data->pkt_pool = NULL;
}
-int _odp_dpdk_pool_create(pool_t *pool)
+static int pool_create(uint8_t *data, pool_t *pool)
{
struct rte_mempool *pkt_pool;
char pool_name[RTE_MEMPOOL_NAMESIZE];
+ mem_src_data_t *mem_src_data = mem_src_priv(data);
+
+ mem_src_data->pkt_pool = NULL;
if (!_ODP_DPDK_ZERO_COPY)
return 0;
- pool->pool_in_use = 0;
-
+ mem_src_data->pool_in_use = 0;
snprintf(pool_name, sizeof(pool_name),
"dpdk_pktpool_%" PRIu32 "_%" PRIu32 "", odp_global_ro.main_pid,
pool->pool_idx);
- pkt_pool = mbuf_pool_create(pool_name, pool);
+ pkt_pool = mbuf_pool_create(pool_name, pool, mem_src_data->dpdk_elt_size);
if (pkt_pool == NULL) {
ODP_ERR("Creating external DPDK pool failed\n");
return -1;
}
- pool->ext_desc = pkt_pool;
- pool->ext_destroy = pool_destroy;
- pool->pool_in_use = 1;
+ mem_src_data->pkt_pool = pkt_pool;
+ mem_src_data->pool_in_use = 1;
return 0;
}
-uint32_t _odp_dpdk_pool_obj_size(pool_t *pool, uint32_t block_size)
+static void pool_obj_size(uint8_t *data, uint32_t *block_size, uint32_t *block_offset,
+ uint32_t *flags)
{
struct rte_mempool_objsz sz;
+ uint32_t size;
uint32_t total_size;
+ mem_src_data_t *mem_src_data = mem_src_priv(data);
if (!_ODP_DPDK_ZERO_COPY)
- return block_size;
+ return;
if (odp_global_rw->dpdk_initialized == 0) {
if (dpdk_pktio_init()) {
ODP_ERR("Initializing DPDK failed\n");
- return 0;
+ *block_size = 0;
+ return;
}
odp_global_rw->dpdk_initialized = 1;
}
- block_size += sizeof(struct rte_mbuf);
- total_size = rte_mempool_calc_obj_size(block_size, MEMPOOL_FLAGS, &sz);
- pool->dpdk_elt_size = sz.elt_size;
- pool->block_offset = sz.header_size + sizeof(struct rte_mbuf);
-
- return total_size;
+ *flags |= ODP_SHM_HP;
+ size = *block_size + sizeof(struct rte_mbuf);
+ total_size = rte_mempool_calc_obj_size(size, MEMPOOL_FLAGS, &sz);
+ mem_src_data->dpdk_elt_size = sz.elt_size;
+ *block_size = total_size;
+ *block_offset = sz.header_size + sizeof(struct rte_mbuf);
}
static struct rte_mempool_ops odp_pool_ops = {
@@ -1735,7 +1757,9 @@ static int dpdk_open(odp_pktio_t id ODP_UNUSED,
pkt_dpdk->min_rx_burst = 0;
if (_ODP_DPDK_ZERO_COPY) {
- pkt_pool = (struct rte_mempool *)pool_entry->ext_desc;
+ mem_src_data_t *mem_src_data = mem_src_priv(pool_entry->mem_src_data);
+
+ pkt_pool = mem_src_data->pkt_pool;
} else {
snprintf(pool_name, sizeof(pool_name), "pktpool_%s", netdev);
/* Check if the pool exists already */
@@ -2406,27 +2430,27 @@ const pktio_if_ops_t _odp_dpdk_pktio_ops = {
.output_queues_config = dpdk_output_queues_config
};
-#else
-
-#include <stdint.h>
-
-#include <odp/api/hints.h>
-
-#include <odp_packet_dpdk.h>
-#include <odp_pool_internal.h>
-
-/*
- * Dummy functions for pool_create()
- */
-
-uint32_t _odp_dpdk_pool_obj_size(pool_t *pool ODP_UNUSED, uint32_t block_size)
+static odp_bool_t is_mem_src_active(void)
{
- return block_size;
+ return !disable_pktio && _ODP_DPDK_ZERO_COPY;
}
-int _odp_dpdk_pool_create(pool_t *pool ODP_UNUSED)
+static void force_mem_src_disable(void)
{
- return 0;
+ if (_ODP_DPDK_ZERO_COPY)
+ disable_pktio = 1;
}
+const _odp_pool_mem_src_ops_t _odp_pool_dpdk_mem_src_ops = {
+ .name = "dpdk_zc",
+ .is_active = is_mem_src_active,
+ .force_disable = force_mem_src_disable,
+ .adjust_size = pool_obj_size,
+ .bind = pool_create,
+ .unbind = pool_destroy
+};
+
+#else
+/* Avoid warning about empty translation unit */
+typedef int _odp_dummy;
#endif /* _ODP_PKTIO_DPDK */
diff --git a/platform/linux-generic/pktio/io_ops.c b/platform/linux-generic/pktio/io_ops.c
index b5a08b58a..f9ea89f71 100644
--- a/platform/linux-generic/pktio/io_ops.c
+++ b/platform/linux-generic/pktio/io_ops.c
@@ -16,6 +16,9 @@ const pktio_if_ops_t * const _odp_pktio_if_ops[] = {
#ifdef _ODP_PKTIO_DPDK
&_odp_dpdk_pktio_ops,
#endif
+#ifdef _ODP_PKTIO_XDP
+ &_odp_sock_xdp_pktio_ops,
+#endif
#ifdef _ODP_PKTIO_NETMAP
&_odp_netmap_pktio_ops,
#endif
diff --git a/platform/linux-generic/pktio/ipc.c b/platform/linux-generic/pktio/ipc.c
index 81938a983..455243159 100644
--- a/platform/linux-generic/pktio/ipc.c
+++ b/platform/linux-generic/pktio/ipc.c
@@ -5,10 +5,12 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
+#include <odp/api/system_info.h>
+
#include <odp_debug_internal.h>
#include <odp_packet_io_internal.h>
#include <odp_errno_define.h>
-#include <odp/api/system_info.h>
+#include <odp_macros_internal.h>
#include <odp_shm_internal.h>
#include <odp_ring_ptr_internal.h>
#include <odp_global_data.h>
@@ -124,7 +126,7 @@ static ring_ptr_t *_ring_create(const char *name, uint32_t count,
shm_flags |= ODP_SHM_SINGLE_VA;
/* count must be a power of 2 */
- if (!CHECK_IS_POWER2(count)) {
+ if (!_ODP_CHECK_IS_POWER2(count)) {
ODP_ERR("Requested size is invalid, must be a power of 2\n");
_odp_errno = EINVAL;
return NULL;
@@ -234,17 +236,17 @@ static int _ipc_init_master(pktio_entry_t *pktio_entry,
uint32_t ring_size;
uint32_t ring_mask;
- if ((uint64_t)ROUNDUP_POWER2_U32(pool->num + 1) > UINT32_MAX) {
+ if ((uint64_t)_ODP_ROUNDUP_POWER2_U32(pool->num + 1) > UINT32_MAX) {
ODP_ERR("Too large packet pool\n");
return -1;
}
/* Ring must be able to store all packets in the pool */
- ring_size = ROUNDUP_POWER2_U32(pool->num + 1);
+ ring_size = _ODP_ROUNDUP_POWER2_U32(pool->num + 1);
/* Ring size has to larger than burst size */
if (ring_size <= IPC_BURST_SIZE)
- ring_size = ROUNDUP_POWER2_U32(IPC_BURST_SIZE + 1);
+ ring_size = _ODP_ROUNDUP_POWER2_U32(IPC_BURST_SIZE + 1);
ring_mask = ring_size - 1;
pktio_ipc->ring_size = ring_size;
diff --git a/platform/linux-generic/pktio/loop.c b/platform/linux-generic/pktio/loop.c
index 3e21efecd..c702f9ded 100644
--- a/platform/linux-generic/pktio/loop.c
+++ b/platform/linux-generic/pktio/loop.c
@@ -197,7 +197,7 @@ static int loopback_recv(pktio_entry_t *pktio_entry, int index ODP_UNUSED,
* parser in the case of a segmented packet. */
if (odp_unlikely(seg_len < PARSE_BYTES &&
pkt_len > seg_len)) {
- seg_len = MIN(pkt_len, PARSE_BYTES);
+ seg_len = _ODP_MIN(pkt_len, PARSE_BYTES);
odp_packet_copy_to_mem(pkt, 0, seg_len, buf);
pkt_addr = buf;
} else {
@@ -405,20 +405,9 @@ static int loopback_send(pktio_entry_t *pktio_entry, int index ODP_UNUSED,
}
for (i = 0; i < nb_tx; ++i) {
- odp_ipsec_packet_result_t result;
-
- if (odp_packet_subtype(pkt_tbl[i]) ==
- ODP_EVENT_PACKET_IPSEC &&
- pktio_entry->s.config.outbound_ipsec) {
-
- /* Possibly postprocessing packet */
- odp_ipsec_result(&result, pkt_tbl[i]);
- }
packet_subtype_set(pkt_tbl[i], ODP_EVENT_PACKET_BASIC);
- }
-
- for (i = 0; i < nb_tx; ++i)
loopback_fix_checksums(pkt_tbl[i], pktout_cfg, pktout_capa);
+ }
odp_ticketlock_lock(&pktio_entry->s.txl);
diff --git a/platform/linux-generic/pktio/netmap.c b/platform/linux-generic/pktio/netmap.c
index 94b88e21e..342f38431 100644
--- a/platform/linux-generic/pktio/netmap.c
+++ b/platform/linux-generic/pktio/netmap.c
@@ -24,17 +24,17 @@
#include <odp_socket_common.h>
#include <odp_debug_internal.h>
#include <odp_errno_define.h>
-#include <protocols/eth.h>
+#include <odp_classification_datamodel.h>
+#include <odp_classification_internal.h>
+#include <odp_libconfig_internal.h>
+#include <odp_macros_internal.h>
+#include <protocols/eth.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <poll.h>
#include <linux/ethtool.h>
#include <linux/sockios.h>
-#include <odp_classification_datamodel.h>
-#include <odp_classification_internal.h>
-#include <odp_libconfig_internal.h>
-
#include <inttypes.h>
/* Disable netmap debug prints */
@@ -73,7 +73,7 @@ struct netmap_ring_t {
typedef union ODP_ALIGNED_CACHE {
struct netmap_ring_t s;
- uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(struct netmap_ring_t))];
+ uint8_t pad[_ODP_ROUNDUP_CACHE_LINE(sizeof(struct netmap_ring_t))];
} netmap_ring_t;
/** Netmap ring slot */
diff --git a/platform/linux-generic/pktio/socket.c b/platform/linux-generic/pktio/socket.c
index 9d1bbe545..0d756c4e1 100644
--- a/platform/linux-generic/pktio/socket.c
+++ b/platform/linux-generic/pktio/socket.c
@@ -289,7 +289,7 @@ static int sock_mmsg_recv(pktio_entry_t *pktio_entry, int index ODP_UNUSED,
/* Make sure there is enough data for the packet
* parser in the case of a segmented packet. */
if (odp_unlikely(seg_len < PARSE_BYTES && pkt_len > seg_len)) {
- seg_len = MIN(pkt_len, PARSE_BYTES);
+ seg_len = _ODP_MIN(pkt_len, PARSE_BYTES);
odp_packet_copy_to_mem(pkt, 0, seg_len, buf);
base = buf;
}
diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux-generic/pktio/socket_mmap.c
index 7824b0e91..4845b5dab 100644
--- a/platform/linux-generic/pktio/socket_mmap.c
+++ b/platform/linux-generic/pktio/socket_mmap.c
@@ -25,6 +25,7 @@
#include <odp_classification_datamodel.h>
#include <odp_classification_internal.h>
#include <odp_global_data.h>
+#include <odp_macros_internal.h>
#include <protocols/eth.h>
#include <protocols/ip.h>
@@ -433,8 +434,7 @@ static int mmap_setup_ring(pkt_sock_mmap_t *pkt_sock, struct ring *ring,
ring->type = type;
ring->version = TPACKET_V2;
- frame_size = ROUNDUP_POWER2_U32(mtu + TPACKET_HDRLEN
- + TPACKET_ALIGNMENT);
+ frame_size = _ODP_ROUNDUP_POWER2_U32(mtu + TPACKET_HDRLEN + TPACKET_ALIGNMENT);
block_size = BLOCK_SIZE;
if (frame_size > block_size)
block_size = frame_size;
diff --git a/platform/linux-generic/pktio/socket_xdp.c b/platform/linux-generic/pktio/socket_xdp.c
new file mode 100644
index 000000000..e43e4bf89
--- /dev/null
+++ b/platform/linux-generic/pktio/socket_xdp.c
@@ -0,0 +1,688 @@
+/* Copyright (c) 2022, Nokia
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <odp/autoheader_internal.h>
+
+#ifdef _ODP_PKTIO_XDP
+
+#include <odp_posix_extensions.h>
+#include <odp/api/debug.h>
+#include <odp/api/hints.h>
+#include <odp/api/system_info.h>
+#include <odp/api/ticketlock.h>
+
+#include <odp_debug_internal.h>
+#include <odp_macros_internal.h>
+#include <odp_packet_io_internal.h>
+#include <odp_packet_internal.h>
+#include <odp_parse_internal.h>
+#include <odp_classification_internal.h>
+#include <odp_socket_common.h>
+
+#include <string.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <poll.h>
+
+#include <xdp/xsk.h>
+
+#define NUM_XDP_DESCS 1024U
+#define MIN_FRAME_SIZE 2048U
+#define IF_DELIM " "
+#define Q_DELIM ':'
+
+typedef struct {
+ struct xsk_ring_prod fill_q;
+ struct xsk_ring_cons compl_q;
+ struct xsk_umem *umem;
+ pool_t *pool;
+} xdp_umem_info_t;
+
+typedef struct {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_cons compl_q;
+ struct xsk_ring_prod tx;
+ struct xsk_ring_prod fill_q;
+ xdp_umem_info_t *umem_info;
+ struct xsk_socket *xsk;
+ int pktio_idx;
+ int helper_sock;
+ uint32_t mtu;
+ uint32_t max_mtu;
+} xdp_sock_info_t;
+
+typedef struct {
+ odp_ticketlock_t rx_lock ODP_ALIGNED_CACHE;
+ odp_ticketlock_t tx_lock ODP_ALIGNED_CACHE;
+ xdp_sock_info_t sock_info;
+} pkt_xdp_t;
+
+typedef struct {
+ odp_packet_hdr_t *pkt_hdr;
+ odp_packet_t pkt;
+ uint8_t *data;
+ uint32_t len;
+} pkt_data_t;
+
+ODP_STATIC_ASSERT(PKTIO_PRIVATE_SIZE >= sizeof(pkt_xdp_t),
+ "PKTIO_PRIVATE_SIZE too small");
+
+static odp_bool_t disable_pktio;
+
+static int sock_xdp_init_global(void)
+{
+ if (getenv("ODP_PKTIO_DISABLE_SOCKET_XDP")) {
+ ODP_PRINT("PKTIO: socket xdp skipped,"
+ " enabled export ODP_PKTIO_DISABLE_SOCKET_XDP=1.\n");
+ disable_pktio = true;
+ } else {
+ ODP_PRINT("PKTIO: initialized socket xdp,"
+ " use export ODP_PKTIO_DISABLE_SOCKET_XDP=1 to disable.\n");
+ }
+
+ return 0;
+}
+
+static inline pkt_xdp_t *pkt_priv(pktio_entry_t *pktio_entry)
+{
+ return (pkt_xdp_t *)(uintptr_t)(pktio_entry->s.pkt_priv);
+}
+
+static void fill_socket_config(struct xsk_socket_config *config)
+{
+ config->rx_size = NUM_XDP_DESCS;
+ config->tx_size = NUM_XDP_DESCS;
+ config->libxdp_flags = 0U;
+ config->xdp_flags = 0U;
+ config->bind_flags = XDP_ZEROCOPY; /* TODO: XDP_COPY */
+}
+
+static uint32_t get_bind_queue_index(const char *devname)
+{
+ const char *param = getenv("ODP_PKTIO_XDP_PARAMS");
+ char *tmp_str;
+ char *tmp;
+ char *if_str;
+ int idx = 0;
+
+ if (param == NULL)
+ goto out;
+
+ tmp_str = strdup(param);
+
+ if (tmp_str == NULL)
+ goto out;
+
+ tmp = strtok(tmp_str, IF_DELIM);
+
+ if (tmp == NULL)
+ goto out_str;
+
+ while (tmp) {
+ if_str = strchr(tmp, Q_DELIM);
+
+ if (if_str != NULL && if_str != &tmp[strlen(tmp) - 1U]) {
+ if (strncmp(devname, tmp, (uint64_t)(uintptr_t)(if_str - tmp)) == 0) {
+ idx = _ODP_MAX(atoi(++if_str), 0);
+ break;
+ }
+ }
+
+ tmp = strtok(NULL, IF_DELIM);
+ }
+
+out_str:
+ free(tmp_str);
+
+out:
+ return idx;
+}
+
+static odp_bool_t reserve_fill_queue_elements(xdp_sock_info_t *sock_info, int num)
+{
+ pool_t *pool;
+ odp_packet_t packets[num];
+ int count;
+ struct xsk_ring_prod *fill_q;
+ uint32_t start_idx;
+ int pktio_idx;
+ uint32_t block_size;
+ odp_packet_hdr_t *pkt_hdr;
+
+ pool = sock_info->umem_info->pool;
+ count = odp_packet_alloc_multi(pool->pool_hdl, sock_info->mtu, packets, num);
+
+ if (count <= 0)
+ return false;
+
+ fill_q = &sock_info->fill_q;
+
+ if (xsk_ring_prod__reserve(fill_q, count, &start_idx) == 0U) {
+ odp_packet_free_multi(packets, count);
+ return false;
+ }
+
+ pktio_idx = sock_info->pktio_idx;
+ block_size = pool->block_size;
+
+ for (int i = 0; i < count; ++i) {
+ pkt_hdr = packet_hdr(packets[i]);
+ pkt_hdr->ms_pktio_idx = pktio_idx;
+ *xsk_ring_prod__fill_addr(fill_q, start_idx++) =
+ pkt_hdr->event_hdr.index.event * block_size;
+ }
+
+ xsk_ring_prod__submit(&sock_info->fill_q, count);
+
+ return true;
+}
+
+static int sock_xdp_open(odp_pktio_t pktio, pktio_entry_t *pktio_entry, const char *devname,
+ odp_pool_t pool_hdl)
+{
+ pkt_xdp_t *priv;
+ pool_t *pool;
+ struct xsk_socket_config config;
+ uint32_t bind_q;
+ int ret;
+
+ if (disable_pktio)
+ return -1;
+
+ priv = pkt_priv(pktio_entry);
+ memset(priv, 0, sizeof(pkt_xdp_t));
+ pool = pool_entry_from_hdl(pool_hdl);
+ priv->sock_info.umem_info = (xdp_umem_info_t *)pool->mem_src_data;
+ priv->sock_info.xsk = NULL;
+ /* Mark transitory kernel-owned packets with the pktio index, so that they can be freed on
+ * close. */
+ priv->sock_info.pktio_idx = 1 + odp_pktio_index(pktio);
+ fill_socket_config(&config);
+ bind_q = get_bind_queue_index(devname);
+ /* With xsk_socket__create_shared(), as only one bind queue index can
+ * be passed, NIC in use needs to be configured accordingly to have
+ * only a single combined TX-RX queue, otherwise traffic may not end up
+ * on the socket. For now, always bind to the first queue (overridable
+ * with environment variable). */
+ ret = xsk_socket__create_shared(&priv->sock_info.xsk, devname, bind_q,
+ priv->sock_info.umem_info->umem, &priv->sock_info.rx,
+ &priv->sock_info.tx, &priv->sock_info.fill_q,
+ &priv->sock_info.compl_q, &config);
+
+ if (ret) {
+ ODP_ERR("Error creating xdp socket for bind queue %u: %d\n", bind_q, ret);
+ goto xsk_err;
+ }
+
+ /* Ring setup/clean up routines seem to be asynchronous with some drivers and might not be
+ * ready yet after xsk_socket__create_shared(). */
+ sleep(1U);
+
+ /* Querying with ioctl() via AF_XDP socket doesn't seem to work, so
+ * create a helper socket for this. */
+ priv->sock_info.helper_sock = -1;
+ ret = socket(AF_INET, SOCK_DGRAM, 0);
+
+ if (ret == -1) {
+ ODP_ERR("Error creating helper socket for xdp: %s\n", strerror(errno));
+ goto sock_err;
+ }
+
+ priv->sock_info.helper_sock = ret;
+ priv->sock_info.mtu = _odp_mtu_get_fd(priv->sock_info.helper_sock, devname);
+
+ if (priv->sock_info.mtu == 0U)
+ goto res_err;
+
+ priv->sock_info.max_mtu = pool->seg_len;
+
+ if (!reserve_fill_queue_elements(&priv->sock_info, config.rx_size)) {
+ ODP_ERR("Unable to reserve fill queue descriptors.\n");
+ goto res_err;
+ }
+
+ odp_ticketlock_init(&priv->rx_lock);
+ odp_ticketlock_init(&priv->tx_lock);
+
+ return 0;
+
+res_err:
+ close(priv->sock_info.helper_sock);
+ priv->sock_info.helper_sock = -1;
+
+sock_err:
+ xsk_socket__delete(priv->sock_info.xsk);
+ priv->sock_info.xsk = NULL;
+
+xsk_err:
+ return -1;
+}
+
+static int sock_xdp_close(pktio_entry_t *pktio_entry)
+{
+ pkt_xdp_t *priv = pkt_priv(pktio_entry);
+ pool_t *pool = priv->sock_info.umem_info->pool;
+ odp_packet_hdr_t *pkt_hdr;
+
+ if (priv->sock_info.helper_sock != -1)
+ close(priv->sock_info.helper_sock);
+
+ if (priv->sock_info.xsk != NULL)
+ xsk_socket__delete(priv->sock_info.xsk);
+
+ /* Ring setup/clean up routines seem to be asynchronous with some drivers and might not be
+ * ready yet after xsk_socket__delete(). */
+ sleep(1U);
+
+ /* Free all packets that were in fill or completion queues at the time of closing. */
+ for (uint32_t i = 0U; i < pool->num + pool->skipped_blocks; ++i) {
+ pkt_hdr = packet_hdr(packet_from_event_hdr(event_hdr_from_index(pool, i)));
+
+ if (pkt_hdr->ms_pktio_idx == priv->sock_info.pktio_idx) {
+ pkt_hdr->ms_pktio_idx = 0U;
+ odp_packet_free(packet_handle(pkt_hdr));
+ }
+ }
+
+ return 0;
+}
+
+static inline void extract_data(const struct xdp_desc *rx_desc, uint8_t *pool_base_addr,
+ pkt_data_t *pkt_data)
+{
+ uint64_t frame_off;
+ uint64_t pkt_off;
+
+ /* UMEM "addresses" are offsets from start of a registered UMEM area.
+ * Additionally, the packet data offset (where received packet data
+ * starts within a UMEM frame) is encoded to the UMEM address with
+ * XSK_UNALIGNED_BUF_OFFSET_SHIFT left bitshift when XDP_ZEROCOPY and
+ * XDP_UMEM_UNALIGNED_CHUNK_FLAG are enabled. */
+ frame_off = rx_desc->addr;
+ pkt_off = xsk_umem__add_offset_to_addr(frame_off);
+ frame_off = xsk_umem__extract_addr(frame_off);
+ pkt_data->pkt_hdr = xsk_umem__get_data(pool_base_addr, frame_off);
+ pkt_data->pkt = packet_handle(pkt_data->pkt_hdr);
+ pkt_data->data = xsk_umem__get_data(pool_base_addr, pkt_off);
+ pkt_data->len = rx_desc->len;
+}
+
+static uint32_t process_received(pktio_entry_t *pktio_entry, xdp_sock_info_t *sock_info,
+ uint32_t start_idx, odp_packet_t packets[], int num)
+{
+ pkt_data_t pkt_data;
+ struct xsk_ring_cons *rx = &sock_info->rx;
+ uint8_t *base_addr = sock_info->umem_info->pool->base_addr;
+ const odp_proto_layer_t layer = pktio_entry->s.parse_layer;
+ const odp_proto_chksums_t in_chksums = pktio_entry->s.in_chksums;
+ const odp_pktin_config_opt_t opt = pktio_entry->s.config.pktin;
+ uint64_t l4_part_sum = 0U;
+ odp_pool_t *pool_hdl = &sock_info->umem_info->pool->pool_hdl;
+ odp_pktio_t pktio_hdl = pktio_entry->s.handle;
+ uint32_t num_rx = 0U;
+
+ for (int i = 0; i < num; ++i) {
+ extract_data(xsk_ring_cons__rx_desc(rx, start_idx++), base_addr, &pkt_data);
+ pkt_data.pkt_hdr->ms_pktio_idx = 0U;
+ packet_init(pkt_data.pkt_hdr, pkt_data.len);
+
+ if (layer) {
+ if (_odp_packet_parse_common(&pkt_data.pkt_hdr->p, pkt_data.data,
+ pkt_data.len, pkt_data.len,
+ layer, in_chksums, &l4_part_sum, opt) < 0) {
+ odp_packet_free(pkt_data.pkt);
+ continue;
+ }
+
+ if (pktio_cls_enabled(pktio_entry) &&
+ _odp_cls_classify_packet(pktio_entry, pkt_data.data, pool_hdl,
+ pkt_data.pkt_hdr)) {
+ odp_packet_free(pkt_data.pkt);
+ continue;
+ }
+ }
+
+ pkt_data.pkt_hdr->seg_data = pkt_data.data;
+ pkt_data.pkt_hdr->event_hdr.base_data = pkt_data.data;
+ pkt_data.pkt_hdr->input = pktio_hdl;
+ packets[num_rx++] = pkt_data.pkt;
+ }
+
+ return num_rx;
+}
+
+static int sock_xdp_recv(pktio_entry_t *pktio_entry, int index ODP_UNUSED, odp_packet_t packets[],
+ int num)
+{
+ pkt_xdp_t *priv;
+ struct pollfd fd;
+ uint32_t start_idx = 0U, recvd, procd;
+
+ priv = pkt_priv(pktio_entry);
+ odp_ticketlock_lock(&priv->rx_lock);
+
+ if (odp_unlikely(xsk_ring_prod__needs_wakeup(&priv->sock_info.fill_q))) {
+ fd.fd = xsk_socket__fd(priv->sock_info.xsk);
+ fd.events = POLLIN;
+ (void)poll(&fd, 1U, 0);
+ }
+
+ recvd = xsk_ring_cons__peek(&priv->sock_info.rx, num, &start_idx);
+
+ if (recvd == 0U) {
+ odp_ticketlock_unlock(&priv->rx_lock);
+ return 0;
+ }
+
+ procd = process_received(pktio_entry, &priv->sock_info, start_idx, packets, recvd);
+ xsk_ring_cons__release(&priv->sock_info.rx, recvd);
+ (void)reserve_fill_queue_elements(&priv->sock_info, recvd);
+ odp_ticketlock_unlock(&priv->rx_lock);
+
+ return procd;
+}
+
+static inline void populate_tx_desc(pool_t *pool, odp_packet_hdr_t *pkt_hdr,
+ struct xdp_desc *tx_desc)
+{
+ uint64_t frame_off;
+ uint64_t pkt_off;
+
+ frame_off = pkt_hdr->event_hdr.index.event * pool->block_size;
+ pkt_off = (uint64_t)(uintptr_t)pkt_hdr->event_hdr.base_data
+ - (uint64_t)(uintptr_t)pool->base_addr - frame_off;
+ pkt_off <<= XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+ tx_desc->addr = frame_off | pkt_off;
+ tx_desc->len = pkt_hdr->frame_len;
+}
+
+static void handle_pending_tx(xdp_sock_info_t *sock_info, int num)
+{
+ struct xsk_ring_cons *compl_q;
+ uint32_t sent;
+ uint8_t *base_addr;
+ uint32_t start_idx;
+ uint64_t frame_off;
+ odp_packet_t pkt;
+
+ if (odp_unlikely(xsk_ring_prod__needs_wakeup(&sock_info->tx)))
+ (void)sendto(xsk_socket__fd(sock_info->xsk), NULL, 0U, MSG_DONTWAIT, NULL, 0U);
+
+ compl_q = &sock_info->compl_q;
+ sent = xsk_ring_cons__peek(compl_q, num, &start_idx);
+ base_addr = sock_info->umem_info->pool->base_addr;
+
+ odp_packet_t packets[sent];
+
+ if (sent) {
+ for (uint32_t i = 0U; i < sent; ++i) {
+ frame_off = *xsk_ring_cons__comp_addr(compl_q, start_idx++);
+ frame_off = xsk_umem__extract_addr(frame_off);
+ pkt = xsk_umem__get_data(base_addr, frame_off);
+ packets[i] = pkt;
+ packet_hdr(packets[i])->ms_pktio_idx = 0U;
+ }
+
+ odp_packet_free_multi(packets, sent);
+ xsk_ring_cons__release(compl_q, sent);
+ }
+}
+
+static int sock_xdp_send(pktio_entry_t *pktio_entry, int index ODP_UNUSED,
+ const odp_packet_t packets[], int num)
+{
+ pkt_xdp_t *priv;
+ xdp_sock_info_t *sock_info;
+ pool_t *pool;
+ odp_pool_t pool_hdl;
+ int pktio_idx, i;
+ struct xsk_ring_prod *tx;
+ odp_packet_t pkt;
+ odp_packet_hdr_t *pkt_hdr;
+ uint32_t start_idx;
+
+ if (odp_unlikely(num == 0))
+ return 0;
+
+ priv = pkt_priv(pktio_entry);
+ odp_ticketlock_lock(&priv->tx_lock);
+ sock_info = &priv->sock_info;
+ pool = sock_info->umem_info->pool;
+ pool_hdl = pool->pool_hdl;
+ pktio_idx = sock_info->pktio_idx;
+ tx = &sock_info->tx;
+
+ for (i = 0; i < num; ++i) {
+ pkt = ODP_PACKET_INVALID;
+
+ if (odp_unlikely(odp_packet_num_segs(packets[i])) > 1) {
+ /* TODO: handle segmented packets */
+ ODP_ERR("Only single-segment packets supported\n");
+ break;
+ }
+
+ pkt_hdr = packet_hdr(packets[i]);
+
+ if (pkt_hdr->event_hdr.pool_ptr != pool) {
+ pkt = odp_packet_copy(packets[i], pool_hdl);
+
+ if (odp_unlikely(pkt == ODP_PACKET_INVALID))
+ break;
+
+ pkt_hdr = packet_hdr(pkt);
+ }
+
+ if (xsk_ring_prod__reserve(tx, 1U, &start_idx) == 0U) {
+ handle_pending_tx(sock_info, NUM_XDP_DESCS);
+
+ if (xsk_ring_prod__reserve(tx, 1U, &start_idx) == 0U) {
+ if (pkt != ODP_PACKET_INVALID)
+ odp_packet_free(pkt);
+
+ break;
+ }
+ }
+
+ if (pkt != ODP_PACKET_INVALID)
+ odp_packet_free(packets[i]);
+
+ pkt_hdr->ms_pktio_idx = pktio_idx;
+ populate_tx_desc(pool, pkt_hdr, xsk_ring_prod__tx_desc(tx, start_idx));
+ }
+
+ xsk_ring_prod__submit(tx, i);
+ handle_pending_tx(sock_info, NUM_XDP_DESCS);
+ odp_ticketlock_unlock(&priv->tx_lock);
+
+ return i;
+}
+
+static uint32_t sock_xdp_mtu_get(pktio_entry_t *pktio_entry)
+{
+ return pkt_priv(pktio_entry)->sock_info.mtu;
+}
+
+static int sock_xdp_mtu_set(pktio_entry_t *pktio_entry, uint32_t maxlen_input,
+ uint32_t maxlen_output ODP_UNUSED)
+{
+ pkt_xdp_t *priv = pkt_priv(pktio_entry);
+ int ret;
+
+ ret = _odp_mtu_set_fd(priv->sock_info.helper_sock, pktio_entry->s.name, maxlen_input);
+ if (ret)
+ return ret;
+
+ priv->sock_info.mtu = maxlen_input;
+
+ return 0;
+}
+
+static int sock_xdp_promisc_mode_set(pktio_entry_t *pktio_entry, int enable)
+{
+ return _odp_promisc_mode_set_fd(pkt_priv(pktio_entry)->sock_info.helper_sock,
+ pktio_entry->s.name, enable);
+}
+
+static int sock_xdp_promisc_mode_get(pktio_entry_t *pktio_entry)
+{
+ return _odp_promisc_mode_get_fd(pkt_priv(pktio_entry)->sock_info.helper_sock,
+ pktio_entry->s.name);
+}
+
+static int sock_xdp_mac_addr_get(pktio_entry_t *pktio_entry ODP_UNUSED, void *mac_addr)
+{
+ return _odp_mac_addr_get_fd(pkt_priv(pktio_entry)->sock_info.helper_sock,
+ pktio_entry->s.name, mac_addr) ? -1 : ETH_ALEN;
+}
+
+static int sock_xdp_link_status(pktio_entry_t *pktio_entry)
+{
+ return _odp_link_status_fd(pkt_priv(pktio_entry)->sock_info.helper_sock,
+ pktio_entry->s.name);
+}
+
+static int sock_xdp_link_info(pktio_entry_t *pktio_entry, odp_pktio_link_info_t *info)
+{
+ return _odp_link_info_fd(pkt_priv(pktio_entry)->sock_info.helper_sock,
+ pktio_entry->s.name, info);
+}
+
+static int sock_xdp_capability(pktio_entry_t *pktio_entry, odp_pktio_capability_t *capa)
+{
+ pkt_xdp_t *priv = pkt_priv(pktio_entry);
+
+ memset(capa, 0, sizeof(odp_pktio_capability_t));
+ capa->max_input_queues = 1U;
+ capa->max_output_queues = 1U;
+ capa->set_op.op.promisc_mode = 1U;
+ capa->set_op.op.maxlen = 1U;
+
+ capa->maxlen.equal = true;
+ capa->maxlen.min_input = _ODP_SOCKET_MTU_MIN;
+ capa->maxlen.max_input = priv->sock_info.max_mtu;
+ capa->maxlen.min_output = _ODP_SOCKET_MTU_MIN;
+ capa->maxlen.max_output = priv->sock_info.max_mtu;
+
+ capa->config.parser.layer = ODP_PROTO_LAYER_ALL;
+
+ capa->stats.pktio.all_counters = 0U;
+ capa->stats.pktin_queue.all_counters = 0U;
+ capa->stats.pktout_queue.all_counters = 0U;
+
+ return 0;
+}
+
+const pktio_if_ops_t _odp_sock_xdp_pktio_ops = {
+ /* TODO: at least stats */
+ .name = "socket_xdp",
+ .print = NULL,
+ .init_global = sock_xdp_init_global,
+ .init_local = NULL,
+ .term = NULL,
+ .open = sock_xdp_open,
+ .close = sock_xdp_close,
+ .start = NULL,
+ .stop = NULL,
+ .stats = NULL,
+ .stats_reset = NULL,
+ .pktin_queue_stats = NULL,
+ .pktout_queue_stats = NULL,
+ .extra_stat_info = NULL,
+ .extra_stats = NULL,
+ .extra_stat_counter = NULL,
+ .pktio_ts_res = NULL,
+ .pktio_ts_from_ns = NULL,
+ .pktio_time = NULL,
+ .recv = sock_xdp_recv,
+ .recv_tmo = NULL,
+ .recv_mq_tmo = NULL,
+ .fd_set = NULL,
+ .send = sock_xdp_send,
+ .maxlen_get = sock_xdp_mtu_get,
+ .maxlen_set = sock_xdp_mtu_set,
+ .promisc_mode_set = sock_xdp_promisc_mode_set,
+ .promisc_mode_get = sock_xdp_promisc_mode_get,
+ .mac_get = sock_xdp_mac_addr_get,
+ .mac_set = NULL,
+ .link_status = sock_xdp_link_status,
+ .link_info = sock_xdp_link_info,
+ .capability = sock_xdp_capability,
+ .config = NULL,
+ .input_queues_config = NULL,
+ .output_queues_config = NULL
+};
+
+static odp_bool_t sock_xdp_is_mem_src_active(void)
+{
+ return !disable_pktio;
+}
+
+static void sock_xdp_force_mem_src_disable(void)
+{
+ disable_pktio = true;
+}
+
+static void sock_xdp_adjust_block_size(uint8_t *data ODP_UNUSED, uint32_t *block_size,
+ uint32_t *block_offset ODP_UNUSED, uint32_t *flags)
+{
+ const uint32_t size = *block_size + XDP_PACKET_HEADROOM;
+ const uint64_t ps = odp_sys_page_size();
+ /* AF_XDP requires frames to be between 2kB and page size, so with
+ * XDP_ZEROCOPY, if block size is less than 2kB, adjust it to 2kB, if
+ * it is larger than page size, make pool creation fail. */
+ if (disable_pktio)
+ return;
+
+ if (size > ps) {
+ ODP_ERR("Adjusted pool block size larger than page size: %u > %" PRIu64 "\n",
+ size, ps);
+ *block_size = 0U;
+ }
+
+ *flags |= ODP_SHM_HP;
+ *block_size = _ODP_MAX(size, MIN_FRAME_SIZE);
+}
+
+static int sock_xdp_umem_create(uint8_t *data, pool_t *pool)
+{
+ struct xsk_umem_config cfg;
+ xdp_umem_info_t *umem_info = (xdp_umem_info_t *)data;
+
+ umem_info->pool = pool;
+ /* Fill queue size is recommended to be >= HW RX ring size + AF_XDP RX
+ * ring size, so use size twice the size of AF_XDP RX ring. */
+ cfg.fill_size = NUM_XDP_DESCS * 2U; /* TODO: num descs vs pool size */
+ cfg.comp_size = NUM_XDP_DESCS;
+ cfg.frame_size = pool->block_size;
+ cfg.frame_headroom = sizeof(odp_packet_hdr_t) + pool->headroom;
+ cfg.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+ return xsk_umem__create(&umem_info->umem, pool->base_addr, pool->shm_size,
+ &umem_info->fill_q, &umem_info->compl_q, &cfg);
+}
+
+static void sock_xdp_umem_delete(uint8_t *data)
+{
+ xdp_umem_info_t *umem_info = (xdp_umem_info_t *)data;
+
+ while (xsk_umem__delete(umem_info->umem) == -EBUSY)
+ continue;
+}
+
+const _odp_pool_mem_src_ops_t _odp_pool_sock_xdp_mem_src_ops = {
+ .name = "xdp_zc",
+ .is_active = sock_xdp_is_mem_src_active,
+ .force_disable = sock_xdp_force_mem_src_disable,
+ .adjust_size = sock_xdp_adjust_block_size,
+ .bind = sock_xdp_umem_create,
+ .unbind = sock_xdp_umem_delete
+};
+
+#else
+/* Avoid warning about empty translation unit */
+typedef int _odp_dummy;
+#endif