aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Gross <jesse@nicira.com>2010-12-29 22:13:15 -0800
committerJesse Gross <jesse@nicira.com>2011-02-07 13:49:01 -0800
commit6ce39213456b27257acbaf146398dce26d1466b9 (patch)
tree3841453cfdde9261c94b343f341c6bc15e257ed3
parent6e0ce48e27d5a845bdc8b57fb41bcec8a54f336a (diff)
datapath: Use vlan acceleration for vlan operations.
Using the kernel vlan acceleration has a number of benefits: it enables hardware tagging, allows usage of TSO and checksum offloading, and is generally easier to manipulate. This switches the vlan actions to use skb->vlan_tci field for any necessary changes. In places that do not support vlan acceleration in a way that we can use (in particular kernels before 2.6.37) we perform any necessary conversions, such as tagging and GSO before the packet leaves Open vSwitch. Signed-off-by: Jesse Gross <jesse@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
-rw-r--r--datapath/actions.c152
-rw-r--r--datapath/datapath.c8
-rw-r--r--datapath/flow.c8
-rw-r--r--datapath/tunnel.c92
-rw-r--r--datapath/vlan.h14
-rw-r--r--datapath/vport-internal_dev.c8
-rw-r--r--datapath/vport-netdev.c48
7 files changed, 157 insertions, 173 deletions
diff --git a/datapath/actions.c b/datapath/actions.c
index 3223c65b..89393771 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -24,6 +24,7 @@
#include "checksum.h"
#include "datapath.h"
#include "openvswitch/datapath-protocol.h"
+#include "vlan.h"
#include "vport.h"
static int do_execute_actions(struct datapath *, struct sk_buff *,
@@ -52,20 +53,28 @@ static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
return NULL;
}
-static struct sk_buff *vlan_pull_tag(struct sk_buff *skb)
+static struct sk_buff *strip_vlan(struct sk_buff *skb)
{
- struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
struct ethhdr *eh;
- /* Verify we were given a vlan packet */
- if (vh->h_vlan_proto != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)
+ if (vlan_tx_tag_present(skb)) {
+ vlan_set_tci(skb, 0);
return skb;
+ }
+
+ if (unlikely(vlan_eth_hdr(skb)->h_vlan_proto != htons(ETH_P_8021Q) ||
+ skb->len < VLAN_ETH_HLEN))
+ return skb;
+
+ skb = make_writable(skb, 0);
+ if (unlikely(!skb))
+ return NULL;
if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+ ETH_HLEN, VLAN_HLEN, 0));
- memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
+ memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
@@ -80,133 +89,32 @@ static struct sk_buff *modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a, u32 actions_len)
{
__be16 tci = nla_get_be16(a);
+ struct vlan_ethhdr *vh;
+ __be16 old_tci;
- skb = make_writable(skb, VLAN_HLEN);
- if (!skb)
- return ERR_PTR(-ENOMEM);
+ if (vlan_tx_tag_present(skb) || skb->protocol != htons(ETH_P_8021Q))
+ return __vlan_hwaccel_put_tag(skb, ntohs(tci));
- if (skb->protocol == htons(ETH_P_8021Q)) {
- /* Modify vlan id, but maintain other TCI values */
- struct vlan_ethhdr *vh;
- __be16 old_tci;
-
- if (skb->len < VLAN_ETH_HLEN)
- return skb;
-
- vh = vlan_eth_hdr(skb);
- old_tci = vh->h_vlan_TCI;
-
- vh->h_vlan_TCI = tci;
-
- if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
- __be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
-
- skb->csum = ~csum_partial((char *)diff, sizeof(diff),
- ~skb->csum);
- }
- } else {
- int err;
-
- /* Add vlan header */
+ skb = make_writable(skb, 0);
+ if (unlikely(!skb))
+ return NULL;
- /* Set up checksumming pointers for checksum-deferred packets
- * on Xen. Otherwise, dev_queue_xmit() will try to do this
- * when we send the packet out on the wire, and it will fail at
- * that point because skb_checksum_setup() will not look inside
- * an 802.1Q header. */
- err = vswitch_skb_checksum_setup(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return ERR_PTR(err);
- }
+ if (unlikely(skb->len < VLAN_ETH_HLEN))
+ return skb;
- /* GSO is not implemented for packets with an 802.1Q header, so
- * we have to do segmentation before we add that header.
- *
- * GSO does work with hardware-accelerated VLAN tagging, but we
- * can't use hardware-accelerated VLAN tagging since it
- * requires the device to have a VLAN group configured (with
- * e.g. vconfig(8)) and we don't do that.
- *
- * Having to do this here may be a performance loss, since we
- * can't take advantage of TSO hardware support, although it
- * does not make a measurable network performance difference
- * for 1G Ethernet. Fixing that would require patching the
- * kernel (either to add GSO support to the VLAN protocol or to
- * support hardware-accelerated VLAN tagging without VLAN
- * groups configured). */
- if (skb_is_gso(skb)) {
- const struct nlattr *actions_left;
- int actions_len_left;
- struct sk_buff *segs;
-
- segs = skb_gso_segment(skb, 0);
- kfree_skb(skb);
- if (IS_ERR(segs))
- return ERR_CAST(segs);
-
- actions_len_left = actions_len;
- actions_left = nla_next(a, &actions_len_left);
-
- do {
- struct sk_buff *nskb = segs->next;
-
- segs->next = NULL;
-
- /* GSO can change the checksum type so update.*/
- compute_ip_summed(segs, true);
-
- segs = __vlan_put_tag(segs, ntohs(tci));
- err = -ENOMEM;
- if (segs) {
- err = do_execute_actions(
- dp, segs, key, actions_left,
- actions_len_left);
- }
-
- if (unlikely(err)) {
- while ((segs = nskb)) {
- nskb = segs->next;
- segs->next = NULL;
- kfree_skb(segs);
- }
- return ERR_PTR(err);
- }
-
- segs = nskb;
- } while (segs->next);
-
- skb = segs;
- compute_ip_summed(skb, true);
- }
+ vh = vlan_eth_hdr(skb);
- /* The hardware-accelerated version of vlan_put_tag() works
- * only for a device that has a VLAN group configured (with
- * e.g. vconfig(8)), so call the software-only version
- * __vlan_put_tag() directly instead.
- */
- skb = __vlan_put_tag(skb, ntohs(tci));
- if (!skb)
- return ERR_PTR(-ENOMEM);
+ old_tci = vh->h_vlan_TCI;
+ vh->h_vlan_TCI = tci;
- /* GSO doesn't fix up the hardware computed checksum so this
- * will only be hit in the non-GSO case. */
- if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + ETH_HLEN, VLAN_HLEN, 0));
+ if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) {
+ __be16 diff[] = { ~old_tci, vh->h_vlan_TCI };
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
}
return skb;
}
-static struct sk_buff *strip_vlan(struct sk_buff *skb)
-{
- skb = make_writable(skb, 0);
- if (skb)
- vlan_pull_tag(skb);
- return skb;
-}
-
static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
{
return (key->dl_type == htons(ETH_P_IP) &&
@@ -417,8 +325,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case ODP_ACTION_ATTR_SET_DL_TCI:
skb = modify_vlan_tci(dp, skb, key, a, rem);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
break;
case ODP_ACTION_ATTR_STRIP_VLAN:
diff --git a/datapath/datapath.c b/datapath/datapath.c
index ba32e37f..c48dc9df 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -483,6 +483,14 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
nskb = skb->next;
skb->next = NULL;
+ if (vlan_tx_tag_present(skb)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb)) {
+ err = -ENOMEM;
+ goto err_kfree_skbs;
+ }
+ }
+
len = sizeof(struct odp_header);
len += nla_total_size(skb->len);
len += nla_total_size(FLOW_BUFSIZE);
diff --git a/datapath/flow.c b/datapath/flow.c
index 735e1479..4b0e6cc3 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -34,6 +34,8 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
+#include "vlan.h"
+
static struct kmem_cache *flow_cache;
static unsigned int hash_seed __read_mostly;
@@ -449,8 +451,12 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
/* dl_type, dl_vlan, dl_vlan_pcp. */
__skb_pull(skb, 2 * ETH_ALEN);
- if (eth->h_proto == htons(ETH_P_8021Q))
+
+ if (vlan_tx_tag_present(skb))
+ key->dl_tci = htons(vlan_get_tci(skb));
+ else if (eth->h_proto == htons(ETH_P_8021Q))
parse_vlan(skb, key);
+
key->dl_type = parse_ethertype(skb);
skb_reset_network_header(skb);
__skb_push(skb, skb->data - (unsigned char *)eth);
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 95492f76..4cabd93c 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -33,6 +33,7 @@
#include "datapath.h"
#include "table.h"
#include "tunnel.h"
+#include "vlan.h"
#include "vport.h"
#include "vport-generic.h"
#include "vport-internal_dev.h"
@@ -439,6 +440,7 @@ void tnl_rcv(struct vport *vport, struct sk_buff *skb)
ecn_decapsulate(skb);
compute_ip_summed(skb, false);
+ vlan_set_tci(skb, 0);
vport_receive(vport, skb);
}
@@ -682,7 +684,8 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab
vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
vh->h_vlan_encapsulated_proto = skb->protocol;
- }
+ } else
+ vlan_set_tci(nskb, vlan_get_tci(skb));
skb_reset_mac_header(nskb);
/* Protocol */
@@ -720,17 +723,27 @@ static bool check_mtu(struct sk_buff *skb,
int mtu = 0;
unsigned int packet_length = skb->len - ETH_HLEN;
- if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ /* Allow for one level of tagging in the packet length. */
+ if (!vlan_tx_tag_present(skb) &&
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
packet_length -= VLAN_HLEN;
if (pmtud) {
+ int vlan_header = 0;
+
frag_off = htons(IP_DF);
+ /* The tag needs to go in packet regardless of where it
+ * currently is, so subtract it from the MTU.
+ */
+ if (vlan_tx_tag_present(skb) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ vlan_header = VLAN_HLEN;
+
mtu = dst_mtu(&rt_dst(rt))
- ETH_HLEN
- mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
- VLAN_HLEN : 0);
+ - vlan_header;
}
if (skb->protocol == htons(ETH_P_IP)) {
@@ -1041,28 +1054,18 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
goto error_free;
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + mutable->tunnel_hlen;
+ + mutable->tunnel_hlen
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ skb = check_headroom(skb, min_headroom);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto error;
+ }
if (skb_is_gso(skb)) {
struct sk_buff *nskb;
- /*
- * If we are doing GSO on a pskb it is better to make sure that
- * the headroom is correct now. We will only have to copy the
- * portion in the linear data area and GSO will preserve
- * headroom when it creates the segments. This is particularly
- * beneficial on Xen where we get a lot of GSO pskbs.
- * Conversely, we avoid copying if it is just to get our own
- * writable clone because GSO will do the copy for us.
- */
- if (skb_headroom(skb) < min_headroom) {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
- }
-
nskb = skb_gso_segment(skb, 0);
kfree_skb(skb);
if (IS_ERR(nskb)) {
@@ -1071,32 +1074,23 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb,
}
skb = nskb;
- } else {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /*
- * Pages aren't locked and could change at any time.
- * If this happens after we compute the checksum, the
- * checksum will be wrong. We linearize now to avoid
- * this problem.
- */
- if (unlikely(need_linearize(skb))) {
- err = __skb_linearize(skb);
- if (unlikely(err))
- goto error_free;
- }
-
- err = skb_checksum_help(skb);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ /* Pages aren't locked and could change at any time.
+ * If this happens after we compute the checksum, the
+ * checksum will be wrong. We linearize now to avoid
+ * this problem.
+ */
+ if (unlikely(need_linearize(skb))) {
+ err = __skb_linearize(skb);
if (unlikely(err))
goto error_free;
- } else if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
- }
+ }
+
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error_free;
+ } else if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
return skb;
@@ -1159,7 +1153,8 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
u8 tos;
/* Validate the protocol headers before we try to use them. */
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (skb->protocol == htons(ETH_P_8021Q) &&
+ !vlan_tx_tag_present(skb)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
goto error_free;
@@ -1250,6 +1245,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
struct sk_buff *next_skb = skb->next;
skb->next = NULL;
+ if (unlikely(vlan_deaccel_tag(skb)))
+ goto next;
+
if (likely(cache)) {
skb_push(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
diff --git a/datapath/vlan.h b/datapath/vlan.h
index dc900183..02a62909 100644
--- a/datapath/vlan.h
+++ b/datapath/vlan.h
@@ -43,4 +43,18 @@ u16 vlan_tx_tag_get(struct sk_buff *skb);
#define __vlan_hwaccel_put_tag rpl__vlan_hwaccel_put_tag
struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, u16 vlan_tci);
#endif /* NEED_VLAN_FIELD */
+
+static inline int vlan_deaccel_tag(struct sk_buff *skb)
+{
+ if (!vlan_tx_tag_present(skb))
+ return 0;
+
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ vlan_set_tci(skb, 0);
+ return 0;
+}
+
#endif /* vlan.h */
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index be290745..0b91b345 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -6,6 +6,7 @@
* kernel, by Linus Torvalds and others.
*/
+#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -230,8 +231,13 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
- skb->dev = netdev;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+ if (unlikely(vlan_deaccel_tag(skb)))
+ return 0;
+#endif
+
len = skb->len;
+ skb->dev = netdev;
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index 85e0eb9a..3693004c 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -261,10 +261,56 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
static int netdev_send(struct vport *vport, struct sk_buff *skb)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- int len = skb->len;
+ int len;
skb->dev = netdev_vport->dev;
forward_ip_summed(skb);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+ if (vlan_tx_tag_present(skb)) {
+ int err;
+
+ err = vswitch_skb_checksum_setup(skb);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ if (skb_is_gso(skb)) {
+ struct sk_buff *nskb;
+
+ nskb = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ skb = nskb;
+ if (IS_ERR(skb))
+ return 0;
+
+ len = 0;
+ do {
+ nskb = skb->next;
+ skb->next = NULL;
+
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (likely(skb)) {
+ len += skb->len;
+ vlan_set_tci(skb, 0);
+ dev_queue_xmit(skb);
+ }
+
+ skb = nskb;
+ } while (skb);
+
+ return len;
+ } else {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ return 0;
+ vlan_set_tci(skb, 0);
+ }
+ }
+#endif
+
+ len = skb->len;
dev_queue_xmit(skb);
return len;