diff options
Diffstat (limited to 'platform/linux-generic/pktio/socket_mmap.c')
-rw-r--r-- | platform/linux-generic/pktio/socket_mmap.c | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/platform/linux-generic/pktio/socket_mmap.c b/platform/linux-generic/pktio/socket_mmap.c new file mode 100644 index 0000000..b9c2ee4 --- /dev/null +++ b/platform/linux-generic/pktio/socket_mmap.c @@ -0,0 +1,491 @@ +/* Copyright (c) 2013, Linaro Limited + * Copyright (c) 2013, Nokia Solutions and Networks + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <sys/socket.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <stdint.h> +#include <net/if.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <errno.h> + +#include <odp.h> +#include <odp_packet_socket.h> +#include <odp_packet_internal.h> +#include <odp_debug_internal.h> +#include <odp/hints.h> + +#include <odp/helper/eth.h> +#include <odp/helper/ip.h> + +static int set_pkt_sock_fanout_mmap(pkt_sock_mmap_t *const pkt_sock, + int sock_group_idx) +{ + int sockfd = pkt_sock->sockfd; + int val; + int err; + uint16_t fanout_group; + + fanout_group = (uint16_t)(sock_group_idx & 0xffff); + val = (PACKET_FANOUT_HASH << 16) | fanout_group; + + err = setsockopt(sockfd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (err != 0) { + __odp_errno = errno; + ODP_ERR("setsockopt(PACKET_FANOUT): %s\n", strerror(errno)); + return -1; + } + return 0; +} + +union frame_map { + struct { + struct tpacket2_hdr tp_h ODP_ALIGNED(TPACKET_ALIGNMENT); + struct sockaddr_ll s_ll + ODP_ALIGNED(TPACKET_ALIGN(sizeof(struct tpacket2_hdr))); + } *v2; + + void *raw; +}; + +static int mmap_pkt_socket(void) +{ + int ver = TPACKET_V2; + + int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + + if (sock == -1) { + __odp_errno = errno; + ODP_ERR("socket(SOCK_RAW): %s\n", strerror(errno)); + return -1; + } + + ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver)); + if (ret == -1) { + __odp_errno = errno; + ODP_ERR("setsockopt(PACKET_VERSION): %s\n", strerror(errno)); + close(sock); + return -1; + } + + return sock; +} + +static inline int mmap_rx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); +} + +static inline void mmap_rx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static inline int mmap_tx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); +} + +static inline void mmap_tx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline unsigned pkt_mmap_v2_rx(int sock, struct ring *ring, + odp_packet_t pkt_table[], unsigned len, + odp_pool_t pool, + unsigned char if_mac[]) +{ + union frame_map ppd; + unsigned frame_num, next_frame_num; + uint8_t *pkt_buf; + int pkt_len; + struct ethhdr *eth_hdr; + unsigned i = 0; + + (void)sock; + + frame_num = ring->frame_num; + + while (i < len) { + if (mmap_rx_kernel_ready(ring->rd[frame_num].iov_base)) { + ppd.raw = ring->rd[frame_num].iov_base; + + next_frame_num = (frame_num + 1) % ring->rd_num; + + pkt_buf = (uint8_t *)ppd.raw + ppd.v2->tp_h.tp_mac; + pkt_len = ppd.v2->tp_h.tp_snaplen; + + /* Don't receive packets sent by ourselves */ + eth_hdr = (struct ethhdr *)pkt_buf; + if (odp_unlikely(ethaddrs_equal(if_mac, + eth_hdr->h_source))) { + mmap_rx_user_ready(ppd.raw); /* drop */ + frame_num = next_frame_num; + continue; + } + + pkt_table[i] = odp_packet_alloc(pool, pkt_len); + if (odp_unlikely(pkt_table[i] == ODP_PACKET_INVALID)) + break; + + if (odp_packet_copydata_in(pkt_table[i], 0, + pkt_len, pkt_buf) != 0) { + odp_packet_free(pkt_table[i]); + break; + } + + mmap_rx_user_ready(ppd.raw); + + /* Parse and set packet header data */ + _odp_packet_reset_parse(pkt_table[i]); + + frame_num = next_frame_num; + i++; + } else { + break; + } + } + + ring->frame_num = frame_num; + + return i; +} + +static inline unsigned pkt_mmap_v2_tx(int sock, struct ring *ring, + odp_packet_t pkt_table[], unsigned len) +{ + union frame_map ppd; + uint32_t pkt_len; + unsigned frame_num, next_frame_num; + int ret; + unsigned i = 0; + + frame_num = ring->frame_num; + + while (i < len) { + if (mmap_tx_kernel_ready(ring->rd[frame_num].iov_base)) { + ppd.raw = ring->rd[frame_num].iov_base; + + next_frame_num = (frame_num + 1) % ring->rd_num; + + pkt_len = odp_packet_len(pkt_table[i]); + ppd.v2->tp_h.tp_snaplen = pkt_len; + ppd.v2->tp_h.tp_len = pkt_len; + + odp_packet_copydata_out(pkt_table[i], 0, pkt_len, + (uint8_t *)ppd.raw + + TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll)); + + mmap_tx_user_ready(ppd.raw); + + odp_packet_free(pkt_table[i]); + frame_num = next_frame_num; + i++; + } else { + break; + } + } + + ring->frame_num = frame_num; + + ret = sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret == -1) { + if (errno != EAGAIN) { + __odp_errno = errno; + ODP_ERR("sendto(pkt mmap): %s\n", strerror(errno)); + return -1; + } + } + + return i; +} + +static void mmap_fill_ring(struct ring *ring, odp_pool_t pool_hdl, int fanout) +{ + /*@todo add Huge Pages support*/ + int pz = getpagesize(); + uint32_t pool_id; + pool_entry_t *pool_entry; + + if (pool_hdl == ODP_POOL_INVALID) + ODP_ABORT("Invalid pool handle\n"); + + pool_id = pool_handle_to_index(pool_hdl); + pool_entry = get_pool_entry(pool_id); + + /* Frame has to capture full packet which can fit to the pool block.*/ + ring->req.tp_frame_size = (pool_entry->s.blk_size + + TPACKET_HDRLEN + TPACKET_ALIGNMENT + + + (pz - 1)) & (-pz); + + /* Calculate how many pages do we need to hold all pool packets + * and align size to page boundary. + */ + ring->req.tp_block_size = (ring->req.tp_frame_size * + pool_entry->s.buf_num + (pz - 1)) & (-pz); + + if (!fanout) { + /* Single socket is in use. Use 1 block with buf_num frames. */ + ring->req.tp_block_nr = 1; + } else { + /* Fanout is in use, more likely taffic split accodring to + * number of cpu threads. Use cpu blocks and buf_num frames. */ + ring->req.tp_block_nr = odp_cpu_count(); + } + + ring->req.tp_frame_nr = ring->req.tp_block_size / + ring->req.tp_frame_size * ring->req.tp_block_nr; + + ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr; + ring->rd_num = ring->req.tp_frame_nr; + ring->flen = ring->req.tp_frame_size; +} + +static int mmap_set_packet_loss_discard(int sock) +{ + int ret, discard = 1; + + ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *)&discard, + sizeof(discard)); + if (ret == -1) { + __odp_errno = errno; + ODP_ERR("setsockopt(PACKET_LOSS): %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static int mmap_setup_ring(int sock, struct ring *ring, int type, + odp_pool_t pool_hdl, int fanout) +{ + int ret = 0; + + ring->sock = sock; + ring->type = type; + ring->version = TPACKET_V2; + + if (type == PACKET_TX_RING) { + ret = mmap_set_packet_loss_discard(sock); + if (ret != 0) + return -1; + } + + mmap_fill_ring(ring, pool_hdl, fanout); + + ret = setsockopt(sock, SOL_PACKET, type, &ring->req, sizeof(ring->req)); + if (ret == -1) { + __odp_errno = errno; + ODP_ERR("setsockopt(pkt mmap): %s\n", strerror(errno)); + return -1; + } + + ring->rd_len = ring->rd_num * sizeof(*ring->rd); + ring->rd = malloc(ring->rd_len); + if (!ring->rd) { + __odp_errno = errno; + ODP_ERR("malloc(): %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static int mmap_sock(pkt_sock_mmap_t *pkt_sock) +{ + int i; + int sock = pkt_sock->sockfd; + + /* map rx + tx buffer to userspace : they are in this order */ + pkt_sock->mmap_len = + pkt_sock->rx_ring.req.tp_block_size * + pkt_sock->rx_ring.req.tp_block_nr + + pkt_sock->tx_ring.req.tp_block_size * + pkt_sock->tx_ring.req.tp_block_nr; + + pkt_sock->mmap_base = + mmap(NULL, pkt_sock->mmap_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0); + + if (pkt_sock->mmap_base == MAP_FAILED) { + __odp_errno = errno; + ODP_ERR("mmap rx&tx buffer failed: %s\n", strerror(errno)); + return -1; + } + + pkt_sock->rx_ring.mm_space = pkt_sock->mmap_base; + memset(pkt_sock->rx_ring.rd, 0, pkt_sock->rx_ring.rd_len); + for (i = 0; i < pkt_sock->rx_ring.rd_num; ++i) { + pkt_sock->rx_ring.rd[i].iov_base = + pkt_sock->rx_ring.mm_space + + (i * pkt_sock->rx_ring.flen); + pkt_sock->rx_ring.rd[i].iov_len = pkt_sock->rx_ring.flen; + } + + pkt_sock->tx_ring.mm_space = + pkt_sock->mmap_base + pkt_sock->rx_ring.mm_len; + memset(pkt_sock->tx_ring.rd, 0, pkt_sock->tx_ring.rd_len); + for (i = 0; i < pkt_sock->tx_ring.rd_num; ++i) { + pkt_sock->tx_ring.rd[i].iov_base = + pkt_sock->tx_ring.mm_space + + (i * pkt_sock->tx_ring.flen); + pkt_sock->tx_ring.rd[i].iov_len = pkt_sock->tx_ring.flen; + } + + return 0; +} + +static void mmap_unmap_sock(pkt_sock_mmap_t *pkt_sock) +{ + munmap(pkt_sock->mmap_base, pkt_sock->mmap_len); + free(pkt_sock->rx_ring.rd); + free(pkt_sock->tx_ring.rd); +} + +static int mmap_bind_sock(pkt_sock_mmap_t *pkt_sock, const char *netdev) +{ + int ret; + + pkt_sock->ll.sll_family = PF_PACKET; + pkt_sock->ll.sll_protocol = htons(ETH_P_ALL); + pkt_sock->ll.sll_ifindex = if_nametoindex(netdev); + pkt_sock->ll.sll_hatype = 0; + pkt_sock->ll.sll_pkttype = 0; + pkt_sock->ll.sll_halen = 0; + + ret = bind(pkt_sock->sockfd, (struct sockaddr *)&pkt_sock->ll, + sizeof(pkt_sock->ll)); + if (ret == -1) { + __odp_errno = errno; + ODP_ERR("bind(to IF): %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static int mmap_store_hw_addr(pkt_sock_mmap_t *const pkt_sock, + const char *netdev) +{ + struct ifreq ethreq; + int ret; + + /* get MAC address */ + memset(ðreq, 0, sizeof(ethreq)); + snprintf(ethreq.ifr_name, IF_NAMESIZE, "%s", netdev); + ret = ioctl(pkt_sock->sockfd, SIOCGIFHWADDR, ðreq); + if (ret != 0) { + __odp_errno = errno; + ODP_ERR("ioctl(SIOCGIFHWADDR): %s: \"%s\".\n", + strerror(errno), + ethreq.ifr_name); + return -1; + } + + ethaddr_copy(pkt_sock->if_mac, + (unsigned char *)ethreq.ifr_ifru.ifru_hwaddr.sa_data); + + return 0; +} + +int setup_pkt_sock_mmap(pkt_sock_mmap_t *const pkt_sock, const char *netdev, + odp_pool_t pool, int fanout) +{ + int if_idx; + int ret = 0; + + memset(pkt_sock, 0, sizeof(*pkt_sock)); + + if (pool == ODP_POOL_INVALID) + return -1; + + /* Store eth buffer offset for pkt buffers from this pool */ + pkt_sock->frame_offset = 0; + + pkt_sock->pool = pool; + pkt_sock->sockfd = mmap_pkt_socket(); + if (pkt_sock->sockfd == -1) + return -1; + + ret = mmap_bind_sock(pkt_sock, netdev); + if (ret != 0) + return -1; + + ret = mmap_setup_ring(pkt_sock->sockfd, &pkt_sock->tx_ring, + PACKET_TX_RING, pool, fanout); + if (ret != 0) + return -1; + + ret = mmap_setup_ring(pkt_sock->sockfd, &pkt_sock->rx_ring, + PACKET_RX_RING, pool, fanout); + if (ret != 0) + return -1; + + ret = mmap_sock(pkt_sock); + if (ret != 0) + return -1; + + ret = mmap_store_hw_addr(pkt_sock, netdev); + if (ret != 0) + return -1; + + if_idx = if_nametoindex(netdev); + if (if_idx == 0) { + __odp_errno = errno; + ODP_ERR("if_nametoindex(): %s\n", strerror(errno)); + return -1; + } + + pkt_sock->fanout = fanout; + if (fanout) { + ret = set_pkt_sock_fanout_mmap(pkt_sock, if_idx); + if (ret != 0) + return -1; + } + + return pkt_sock->sockfd; +} + +int close_pkt_sock_mmap(pkt_sock_mmap_t *const pkt_sock) +{ + mmap_unmap_sock(pkt_sock); + if (pkt_sock->sockfd != -1 && close(pkt_sock->sockfd) != 0) { + __odp_errno = errno; + ODP_ERR("close(sockfd): %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +int recv_pkt_sock_mmap(pkt_sock_mmap_t *const pkt_sock, + odp_packet_t pkt_table[], unsigned len) +{ + return pkt_mmap_v2_rx(pkt_sock->rx_ring.sock, &pkt_sock->rx_ring, + pkt_table, len, pkt_sock->pool, + pkt_sock->if_mac); +} + +int send_pkt_sock_mmap(pkt_sock_mmap_t *const pkt_sock, + odp_packet_t pkt_table[], unsigned len) +{ + return pkt_mmap_v2_tx(pkt_sock->tx_ring.sock, &pkt_sock->tx_ring, + pkt_table, len); +} |