aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2011-11-28 09:29:18 -0800
committerBen Pfaff <blp@nicira.com>2011-11-28 09:29:18 -0800
commit8522ba0996907af86eb63afd4c69e9fd6bb1178a (patch)
tree81a2f5ac9259c324c36b4dbe84a3c1514fc4370f
parent3907401ce6c5a848797507fcd6bc97218d4847e2 (diff)
dpif-linux: Use poll() internally in dpif_linux_recv().
Using poll() internally in dpif_linux_recv(), instead of relying on the results of the main loop poll() call, brings netperf CRR performance back within 1% of par versus the code base before the poll_fd_woke() optimizations were introduced. It also increases the ovs-benchmark results by about 5% versus that baseline, too. My theory is that this is because the main loop takes long enough that a significant number of packets can arrive during the main loop itself, so this reduces the time before OVS gets to those packets.
-rw-r--r--lib/dpif-linux.c52
-rw-r--r--lib/netlink-socket.c13
-rw-r--r--lib/netlink-socket.h1
3 files changed, 56 insertions, 10 deletions
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 4b4ac55b..cb5f5ea1 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -28,7 +28,9 @@
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
+#include <poll.h>
#include <stdlib.h>
+#include <strings.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -63,6 +65,7 @@ BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS));
enum { N_UPCALL_SOCKS = 16 };
BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS));
+BUILD_ASSERT_DECL(N_UPCALL_SOCKS <= 32); /* We use a 32-bit word as a mask. */
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
* missing if we have old headers. */
@@ -135,8 +138,8 @@ struct dpif_linux {
/* Upcall messages. */
struct nl_sock *upcall_socks[N_UPCALL_SOCKS];
- int last_read_upcall;
- unsigned int listen_mask;
+ uint32_t ready_mask; /* 1-bit for each sock with unread messages. */
+ unsigned int listen_mask; /* Mask of DPIF_UC_* bits. */
/* Change notification. */
struct sset changed_ports; /* Ports that have changed. */
@@ -1009,6 +1012,8 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
return error;
}
}
+
+ dpif->ready_mask = 0;
}
dpif->listen_mask = listen_mask;
@@ -1088,24 +1093,51 @@ static int
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- int i;
int read_tries = 0;
if (!dpif->listen_mask) {
return EAGAIN;
}
- for (i = 0; i < N_UPCALL_SOCKS; i++) {
- struct nl_sock *upcall_sock;
- int dp_ifindex;
+ if (!dpif->ready_mask) {
+ struct pollfd pfds[N_UPCALL_SOCKS];
+ int retval;
+ int i;
- dpif->last_read_upcall = (dpif->last_read_upcall + 1) &
- (N_UPCALL_SOCKS - 1);
- upcall_sock = dpif->upcall_socks[dpif->last_read_upcall];
+ for (i = 0; i < N_UPCALL_SOCKS; i++) {
+ pfds[i].fd = nl_sock_fd(dpif->upcall_socks[i]);
+ pfds[i].events = POLLIN;
+ pfds[i].revents = 0;
+ }
+ do {
+ retval = poll(pfds, N_UPCALL_SOCKS, 0);
+ } while (retval < 0 && errno == EINTR);
+ if (retval < 0) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "poll failed (%s)", strerror(errno));
+ } else if (!retval) {
+ return EAGAIN;
+ }
+
+ for (i = 0; i < N_UPCALL_SOCKS; i++) {
+ if (pfds[i].revents) {
+ dpif->ready_mask |= 1u << i;
+ }
+ }
+
+ assert(dpif->ready_mask);
+ }
+
+ do {
+ int indx = ffs(dpif->ready_mask) - 1;
+ struct nl_sock *upcall_sock = dpif->upcall_socks[indx];
+
+ dpif->ready_mask &= ~(1u << indx);
for (;;) {
struct ofpbuf *buf;
+ int dp_ifindex;
int error;
if (++read_tries > 50) {
@@ -1131,7 +1163,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
return error;
}
}
- }
+ } while (dpif->ready_mask);
return EAGAIN;
}
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 3e64de39..7f496261 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -854,6 +854,19 @@ nl_sock_wait(const struct nl_sock *sock, short int events)
poll_fd_wait(sock->fd, events);
}
+/* Returns the underlying fd for 'sock', for use in "poll()"-like operations
+ * that can't use nl_sock_wait().
+ *
+ * It's a little tricky to use the returned fd correctly, because nl_sock does
+ * "copy on write" to allow a single nl_sock to be used for notifications,
+ * transactions, and dumps. If 'sock' is used only for notifications and
+ * transactions (and never for dump) then the usage is safe. */
+int
+nl_sock_fd(const struct nl_sock *sock)
+{
+ return sock->fd;
+}
+
/* Returns the PID associated with this socket. */
uint32_t
nl_sock_pid(const struct nl_sock *sock)
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index b6356b9e..f2a5d3fa 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -60,6 +60,7 @@ int nl_sock_transact(struct nl_sock *, const struct ofpbuf *request,
int nl_sock_drain(struct nl_sock *);
void nl_sock_wait(const struct nl_sock *, short int events);
+int nl_sock_fd(const struct nl_sock *);
uint32_t nl_sock_pid(const struct nl_sock *);