aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Mestery <kmestery@cisco.com>2012-10-20 12:15:24 -0700
committerPravin B Shelar <pshelar@nicira.com>2012-10-20 12:15:24 -0700
commit356af50bc2a81305002feb94f04fd0dea9e9eb8f (patch)
treebea0f83dd5f3ac0d0346532c9e1793c8d13dd064
parent4206b80f6444ceef47ceb9e1a6b88a40eabacb1f (diff)
datapath: Add support for tun_key to Open vSwitch datapath
This is a first pass at providing a tun_key which can be used as the basis for flow-based tunnelling. The tun_key includes and replaces the tun_id in both struct ovs_skb_cb and struct sw_tun_key. This patch allows all existing tun_id behaviour to still work. Existing users of tun_id are redirected to tun_key->tun_id to retain compatibility. However, when the userspace code is updated to make use of the new tun_key, the old behaviour will be deprecated and removed. NOTE: With these changes, the tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Signed-off-by: Kyle Mestery <kmestery@cisco.com> Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Reviewed-by: Jesse Gross <jesse@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
-rw-r--r--NEWS3
-rw-r--r--datapath/actions.c40
-rw-r--r--datapath/datapath.c9
-rw-r--r--datapath/datapath.h5
-rw-r--r--datapath/flow.c86
-rw-r--r--datapath/flow.h12
-rw-r--r--datapath/tunnel.c210
-rw-r--r--datapath/tunnel.h26
-rw-r--r--datapath/vport-capwap.c95
-rw-r--r--datapath/vport-gre.c131
-rw-r--r--datapath/vport.c2
-rw-r--r--include/linux/openvswitch.h18
-rw-r--r--lib/dpif-netdev.c1
-rw-r--r--lib/odp-util.c15
-rw-r--r--lib/odp-util.h3
15 files changed, 486 insertions, 170 deletions
diff --git a/NEWS b/NEWS
index e00deaed..f5d7f9e6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
post-v1.8.0
------------------------
+ - The tunneling code no longer assumes input and output keys are symmetric.
+ If they are not, PMTUD needs to be disabled for tunneling to work. Note
+ this only applies to flow-based keys.
- FreeBSD is now a supported platform, thanks to code contributions from
Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
- ovs-bugtool: New --ovs option to report only OVS related information.
diff --git a/datapath/actions.c b/datapath/actions.c
index ec9b595c..972f7a21 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -37,7 +37,8 @@
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr, int len, bool keep_skb);
+ const struct nlattr *attr, int len,
+ struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb);
static int make_writable(struct sk_buff *skb, int write_len)
{
@@ -308,7 +309,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
}
static int sample(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr)
+ const struct nlattr *attr,
+ struct ovs_key_ipv4_tunnel *tun_key)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
@@ -329,11 +331,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
return do_execute_actions(dp, skb, nla_data(acts_list),
- nla_len(acts_list), true);
+ nla_len(acts_list), tun_key, true);
}
static int execute_set_action(struct sk_buff *skb,
- const struct nlattr *nested_attr)
+ const struct nlattr *nested_attr,
+ struct ovs_key_ipv4_tunnel *tun_key)
{
int err = 0;
@@ -343,7 +346,22 @@ static int execute_set_action(struct sk_buff *skb,
break;
case OVS_KEY_ATTR_TUN_ID:
- OVS_CB(skb)->tun_id = nla_get_be64(nested_attr);
+ if (!OVS_CB(skb)->tun_key) {
+ /* If tun_key is NULL for this skb, assign it to
+ * a value the caller passed in for action processing
+ * and output. This can disappear once we drop support
+ * for setting tun_id outside of tun_key.
+ */
+ memset(tun_key, 0, sizeof(struct ovs_key_ipv4_tunnel));
+ OVS_CB(skb)->tun_key = tun_key;
+ }
+
+ OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr);
+ OVS_CB(skb)->tun_key->tun_flags |= OVS_FLOW_TNL_F_KEY;
+ break;
+
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ OVS_CB(skb)->tun_key = nla_data(nested_attr);
break;
case OVS_KEY_ATTR_ETHERNET:
@@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb,
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr, int len, bool keep_skb)
+ const struct nlattr *attr, int len,
+ struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
@@ -407,11 +426,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_SET:
- err = execute_set_action(skb, nla_data(a));
+ err = execute_set_action(skb, nla_data(a), tun_key);
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, a);
+ err = sample(dp, skb, a, tun_key);
break;
}
@@ -458,6 +477,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
struct loop_counter *loop;
int error;
+ struct ovs_key_ipv4_tunnel tun_key;
/* Check whether we've looped too much. */
loop = &__get_cpu_var(loop_counters);
@@ -469,9 +489,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
goto out_loop;
}
- OVS_CB(skb)->tun_id = 0;
+ OVS_CB(skb)->tun_key = NULL;
error = do_execute_actions(dp, skb, acts->actions,
- acts->actions_len, false);
+ acts->actions_len, &tun_key, false);
/* Check whether sub-actions looped too much. */
if (unlikely(loop->looping))
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a6915fb2..3f963be2 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -587,12 +587,19 @@ static int validate_set(const struct nlattr *a,
switch (key_type) {
const struct ovs_key_ipv4 *ipv4_key;
+ const struct ovs_key_ipv4_tunnel *tun_key;
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_TUN_ID:
case OVS_KEY_ATTR_ETHERNET:
break;
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ tun_key = nla_data(ovs_key);
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
+ break;
+
case OVS_KEY_ATTR_IPV4:
if (flow_key->eth.type != htons(ETH_P_IP))
return -EINVAL;
@@ -785,7 +792,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
&flow->key.phy.in_port,
- &flow->key.phy.tun_id,
+ &flow->key.tun.tun_key,
a[OVS_PACKET_ATTR_KEY]);
if (err)
goto err_flow_put;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index affbf0e0..c5df12d6 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -96,7 +96,8 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
- * @tun_id: ID of the tunnel that encapsulated this packet. It is 0 if the
+ * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
+ * packet is not being tunneled.
* @ip_summed: Consistently stores L4 checksumming status across different
* kernel versions.
* @csum_start: Stores the offset from which to start checksumming independent
@@ -107,7 +108,7 @@ struct datapath {
*/
struct ovs_skb_cb {
struct sw_flow *flow;
- __be64 tun_id;
+ struct ovs_key_ipv4_tunnel *tun_key;
#ifdef NEED_CSUM_NORMALIZE
enum csum_type ip_summed;
u16 csum_start;
diff --git a/datapath/flow.c b/datapath/flow.c
index d07337c8..42aff6d8 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
memset(key, 0, sizeof(*key));
key->phy.priority = skb->priority;
- key->phy.tun_id = OVS_CB(skb)->tun_id;
+ if (OVS_CB(skb)->tun_key)
+ memcpy(&key->tun.tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun.tun_key));
key->phy.in_port = in_port;
skb_reset_mac_header(skb);
@@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
/* Not upstream. */
[OVS_KEY_ATTR_TUN_ID] = sizeof(__be64),
+ [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel),
};
static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
@@ -1022,9 +1024,39 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
swkey->phy.in_port = DP_MAX_PORTS;
}
- if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
- swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) &&
+ attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ __be64 tun_id;
+
+ tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+
+ tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ if (tun_id != tun_key->tun_id)
+ return -EINVAL;
+
+ memcpy(&swkey->tun.tun_key, tun_key, sizeof(swkey->tun.tun_key));
+ attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
+ } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
+ swkey->tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ swkey->tun.tun_key.tun_flags |= OVS_FLOW_TNL_F_KEY;
+
attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+ } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
+
+ memcpy(&swkey->tun.tun_key, tun_key, sizeof(swkey->tun.tun_key));
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
}
/* Data attributes. */
@@ -1162,14 +1194,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+ struct ovs_key_ipv4_tunnel *tun_key,
const struct nlattr *attr)
{
const struct nlattr *nla;
int rem;
+ __be64 tun_id;
*in_port = DP_MAX_PORTS;
- *tun_id = 0;
+ memset(tun_key, 0, sizeof(*tun_key));
*priority = 0;
nla_for_each_nested(nla, attr, rem) {
@@ -1185,7 +1219,35 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
break;
case OVS_KEY_ATTR_TUN_ID:
- *tun_id = nla_get_be64(nla);
+ tun_id = nla_get_be64(nla);
+
+ if (tun_key->ipv4_dst) {
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+ if (tun_key->tun_id != tun_id)
+ return -EINVAL;
+ break;
+ }
+ tun_key->tun_id = tun_id;
+ tun_key->tun_flags |= OVS_FLOW_TNL_F_KEY;
+
+ break;
+
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY) {
+ tun_id = tun_key->tun_id;
+
+ memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+
+ if (tun_key->tun_id != tun_id)
+ return -EINVAL;
+ } else
+ memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
break;
case OVS_KEY_ATTR_IN_PORT:
@@ -1210,8 +1272,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
goto nla_put_failure;
- if (swkey->phy.tun_id != cpu_to_be64(0) &&
- nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id))
+ if (swkey->tun.tun_key.ipv4_dst) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, sizeof(*tun_key));
+ if (!nla)
+ goto nla_put_failure;
+ tun_key = nla_data(nla);
+ memcpy(tun_key, &swkey->tun.tun_key, sizeof(*tun_key));
+ }
+ if ((swkey->tun.tun_key.tun_flags & OVS_FLOW_TNL_F_KEY) &&
+ nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id))
goto nla_put_failure;
if (swkey->phy.in_port != DP_MAX_PORTS &&
diff --git a/datapath/flow.h b/datapath/flow.h
index 02c563a3..c52e029f 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -42,11 +42,13 @@ struct sw_flow_actions {
struct sw_flow_key {
struct {
- __be64 tun_id; /* Encapsulating tunnel ID. */
u32 priority; /* Packet QoS priority. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} phy;
struct {
+ struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
+ } tun;
+ struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
@@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_TUN_ID 8 -- 4 12
+ * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
* OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
@@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* -------------------------------------------------
- * total 156
+ * total 184
*/
-#define FLOW_BUFSIZE 156
+#define FLOW_BUFSIZE 184
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+ struct ovs_key_ipv4_tunnel *tun_key,
const struct nlattr *);
#define MAX_ACTIONS_BUFSIZE (16 * 1024)
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index d651c118..020d9d4b 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
return NULL;
}
-static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
+static void ecn_decapsulate(struct sk_buff *skb)
{
- if (unlikely(INET_ECN_is_ce(tos))) {
+ if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) {
__be16 protocol = skb->protocol;
skb_set_network_header(skb, ETH_HLEN);
@@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
* - skb->csum does not include the inner Ethernet header.
* - The layer pointers are undefined.
*/
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
{
struct ethhdr *eh;
@@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
skb_clear_rxhash(skb);
secpath_reset(skb);
- ecn_decapsulate(skb, tos);
+ ecn_decapsulate(skb);
vlan_set_tci(skb, 0);
if (unlikely(compute_ip_summed(skb, false))) {
@@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
bool ovs_tnl_frag_needed(struct vport *vport,
const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
+ struct sk_buff *skb, unsigned int mtu)
{
unsigned int eth_hdr_len = ETH_HLEN;
unsigned int total_length = 0, header_length = 0, payload_length;
@@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport,
ipv6_build_icmp(skb, nskb, mtu, payload_length);
#endif
- /*
- * Assume that flow based keys are symmetric with respect to input
- * and output and use the key that we were going to put on the
- * outgoing packet for the fake received packet. If the keys are
- * not symmetric then PMTUD needs to be disabled since we won't have
- * any way of synthesizing packets.
- */
- if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
- (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
- OVS_CB(nskb)->tun_id = flow_key;
-
if (unlikely(compute_ip_summed(nskb, false))) {
kfree_skb(nskb);
return false;
@@ -721,14 +710,26 @@ bool ovs_tnl_frag_needed(struct vport *vport,
static bool check_mtu(struct sk_buff *skb,
struct vport *vport,
const struct tnl_mutable_config *mutable,
- const struct rtable *rt, __be16 *frag_offp)
+ const struct rtable *rt, __be16 *frag_offp,
+ int tunnel_hlen)
{
- bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
- bool pmtud = mutable->flags & TNL_F_PMTUD;
- __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
+ bool df_inherit;
+ bool pmtud;
+ __be16 frag_off;
int mtu = 0;
unsigned int packet_length = skb->len - ETH_HLEN;
+ if (OVS_CB(skb)->tun_key->ipv4_dst) {
+ df_inherit = false;
+ pmtud = false;
+ frag_off = OVS_CB(skb)->tun_key->tun_flags & OVS_FLOW_TNL_F_DONT_FRAGMENT ?
+ htons(IP_DF) : 0;
+ } else {
+ df_inherit = mutable->flags & TNL_F_DF_INHERIT;
+ pmtud = mutable->flags & TNL_F_PMTUD;
+ frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
+ }
+
/* Allow for one level of tagging in the packet length. */
if (!vlan_tx_tag_present(skb) &&
eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
@@ -746,7 +747,7 @@ static bool check_mtu(struct sk_buff *skb,
mtu = dst_mtu(&rt_dst(rt))
- ETH_HLEN
- - mutable->tunnel_hlen
+ - tunnel_hlen
- vlan_header;
}
@@ -760,8 +761,7 @@ static bool check_mtu(struct sk_buff *skb,
mtu = max(mtu, IP_MIN_MTU);
if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu,
- OVS_CB(skb)->tun_id))
+ ovs_tnl_frag_needed(vport, mutable, skb, mtu))
return false;
}
}
@@ -777,8 +777,7 @@ static bool check_mtu(struct sk_buff *skb,
mtu = max(mtu, IPV6_MIN_MTU);
if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu,
- OVS_CB(skb)->tun_id))
+ ovs_tnl_frag_needed(vport, mutable, skb, mtu))
return false;
}
}
@@ -790,6 +789,7 @@ static bool check_mtu(struct sk_buff *skb,
static void create_tunnel_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key,
const struct rtable *rt, void *header)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
@@ -806,7 +806,7 @@ static void create_tunnel_header(const struct vport *vport,
if (!iph->ttl)
iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
- tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
+ tnl_vport->tnl_ops->build_header(vport, mutable, tun_key, iph + 1);
}
static void *get_cached_header(const struct tnl_cache *cache)
@@ -907,14 +907,22 @@ static struct tnl_cache *build_cache(struct vport *vport,
struct rtable *rt)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ static const struct ovs_key_ipv4_tunnel tun_key;
struct tnl_cache *cache;
void *cache_data;
int cache_len;
struct hh_cache *hh;
+ int tunnel_hlen;
if (!(mutable->flags & TNL_F_HDR_CACHE))
return NULL;
+ tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, &tun_key);
+ if (tunnel_hlen < 0)
+ return NULL;
+
+ tunnel_hlen += sizeof(struct iphdr);
+
/*
* If there is no entry in the ARP cache or if this device does not
* support hard header caching just fall back to the IP stack.
@@ -937,7 +945,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
else
cache = NULL;
- cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + mutable->tunnel_hlen;
+ cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + tunnel_hlen;
cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
cache_len, GFP_ATOMIC);
@@ -946,9 +954,9 @@ static struct tnl_cache *build_cache(struct vport *vport,
create_eth_hdr(cache, hh);
cache_data = get_cached_header(cache) + cache->hh_len;
- cache->len = cache->hh_len + mutable->tunnel_hlen;
+ cache->len = cache->hh_len + tunnel_hlen;
- create_tunnel_header(vport, mutable, rt, cache_data);
+ create_tunnel_header(vport, mutable, &tun_key, rt, cache_data);
cache->mutable_seq = mutable->seq;
cache->rt = rt;
@@ -1000,15 +1008,16 @@ unlock:
}
static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
- u8 ipproto, u8 tos)
+ __be32 saddr, __be32 daddr, u8 ipproto,
+ u8 tos)
{
/* Tunnel configuration keeps DSCP part of TOS bits, But Linux
* router expect RT_TOS bits only. */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
struct flowi fl = { .nl_u = { .ip4_u = {
- .daddr = mutable->key.daddr,
- .saddr = mutable->key.saddr,
+ .daddr = daddr,
+ .saddr = saddr,
.tos = RT_TOS(tos) } },
.proto = ipproto };
struct rtable *rt;
@@ -1018,8 +1027,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
return rt;
#else
- struct flowi4 fl = { .daddr = mutable->key.daddr,
- .saddr = mutable->key.saddr,
+ struct flowi4 fl = { .daddr = daddr,
+ .saddr = saddr,
.flowi4_tos = RT_TOS(tos),
.flowi4_proto = ipproto };
@@ -1029,7 +1038,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
static struct rtable *find_route(struct vport *vport,
const struct tnl_mutable_config *mutable,
- u8 tos, struct tnl_cache **cache)
+ __be32 saddr, __be32 daddr, u8 tos,
+ struct tnl_cache **cache)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
@@ -1037,17 +1047,17 @@ static struct rtable *find_route(struct vport *vport,
*cache = NULL;
tos = RT_TOS(tos);
- if (likely(tos == RT_TOS(mutable->tos) &&
- check_cache_valid(cur_cache, mutable))) {
+ if (tos == RT_TOS(mutable->tos) &&
+ check_cache_valid(cur_cache, mutable)) {
*cache = cur_cache;
return cur_cache->rt;
} else {
struct rtable *rt;
- rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos);
+ rt = __find_route(mutable, saddr, daddr,
+ tnl_vport->tnl_ops->ipproto, tos);
if (IS_ERR(rt))
return NULL;
-
if (likely(tos == RT_TOS(mutable->tos)))
*cache = build_cache(vport, mutable, rt);
@@ -1076,13 +1086,14 @@ static bool need_linearize(const struct sk_buff *skb)
static struct sk_buff *handle_offloads(struct sk_buff *skb,
const struct tnl_mutable_config *mutable,
- const struct rtable *rt)
+ const struct rtable *rt,
+ int tunnel_hlen)
{
int min_headroom;
int err;
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + mutable->tunnel_hlen
+ + tunnel_hlen
+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -1137,14 +1148,14 @@ error:
}
static int send_frags(struct sk_buff *skb,
- const struct tnl_mutable_config *mutable)
+ int tunnel_hlen)
{
int sent_len;
sent_len = 0;
while (skb) {
struct sk_buff *next = skb->next;
- int frag_len = skb->len - mutable->tunnel_hlen;
+ int frag_len = skb->len - tunnel_hlen;
int err;
skb->next = NULL;
@@ -1173,15 +1184,17 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
-
enum vport_err_type err = VPORT_E_TX_ERROR;
struct rtable *rt;
struct dst_entry *unattached_dst = NULL;
struct tnl_cache *cache;
+ struct ovs_key_ipv4_tunnel tun_key;
int sent_len = 0;
+ int tunnel_hlen;
__be16 frag_off = 0;
+ __be32 daddr;
+ __be32 saddr;
u8 ttl;
- u8 inner_tos;
u8 tos;
/* Validate the protocol headers before we try to use them. */
@@ -1207,30 +1220,68 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
}
#endif
- /* ToS */
- if (skb->protocol == htons(ETH_P_IP))
- inner_tos = ip_hdr(skb)->tos;
+ /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here,
+ * and zero it out.
+ */
+ if (!OVS_CB(skb)->tun_key) {
+ memset(&tun_key, 0, sizeof(tun_key));
+ OVS_CB(skb)->tun_key = &tun_key;
+ }
+
+ tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, OVS_CB(skb)->tun_key);
+ if (unlikely(tunnel_hlen < 0)) {
+ err = VPORT_E_TX_DROPPED;
+ goto error_free;
+ }
+ tunnel_hlen += sizeof(struct iphdr);
+
+ if (OVS_CB(skb)->tun_key->ipv4_dst) {
+ daddr = OVS_CB(skb)->tun_key->ipv4_dst;
+ saddr = OVS_CB(skb)->tun_key->ipv4_src;
+ tos = OVS_CB(skb)->tun_key->ipv4_tos;
+ ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
+ } else {
+ u8 inner_tos;
+ daddr = mutable->key.daddr;
+ saddr = mutable->key.saddr;
+
+ /* ToS */
+ if (skb->protocol == htons(ETH_P_IP))
+ inner_tos = ip_hdr(skb)->tos;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
#endif
- else
- inner_tos = 0;
+ else
+ inner_tos = 0;
- if (mutable->flags & TNL_F_TOS_INHERIT)
- tos = inner_tos;
- else
- tos = mutable->tos;
+ if (mutable->flags & TNL_F_TOS_INHERIT)
+ tos = inner_tos;
+ else
+ tos = mutable->tos;
+
+ tos = INET_ECN_encapsulate(tos, inner_tos);
+
+ /* TTL */
+ ttl = mutable->ttl;
+ if (mutable->flags & TNL_F_TTL_INHERIT) {
+ if (skb->protocol == htons(ETH_P_IP))
+ ttl = ip_hdr(skb)->ttl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ttl = ipv6_hdr(skb)->hop_limit;
+#endif
+ }
+
+ }
/* Route lookup */
- rt = find_route(vport, mutable, tos, &cache);
+ rt = find_route(vport, mutable, saddr, daddr, tos, &cache);
if (unlikely(!rt))
goto error_free;
if (unlikely(!cache))
unattached_dst = &rt_dst(rt);
- tos = INET_ECN_encapsulate(tos, inner_tos);
-
/* Reset SKB */
nf_reset(skb);
secpath_reset(skb);
@@ -1238,12 +1289,12 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
skb_clear_rxhash(skb);
/* Offloading */
- skb = handle_offloads(skb, mutable, rt);
+ skb = handle_offloads(skb, mutable, rt, tunnel_hlen);
if (IS_ERR(skb))
goto error;
/* MTU */
- if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
+ if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) {
err = VPORT_E_TX_DROPPED;
goto error_free;
}
@@ -1252,25 +1303,19 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
* If we are over the MTU, allow the IP stack to handle fragmentation.
* Fragmentation is a slow path anyways.
*/
- if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
+ if (unlikely(skb->len + tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
cache)) {
unattached_dst = &rt_dst(rt);
dst_hold(unattached_dst);
cache = NULL;
}
- /* TTL */
- ttl = mutable->ttl;
- if (!ttl)
- ttl = ip4_dst_hoplimit(&rt_dst(rt));
-
- if (mutable->flags & TNL_F_TTL_INHERIT) {
- if (skb->protocol == htons(ETH_P_IP))
- ttl = ip_hdr(skb)->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- ttl = ipv6_hdr(skb)->hop_limit;
-#endif
+ /* TTL Fixup. */
+ if (!OVS_CB(skb)->tun_key->ipv4_dst) {
+ if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
+ if (!ttl)
+ ttl = ip4_dst_hoplimit(&rt_dst(rt));
+ }
}
while (skb) {
@@ -1288,8 +1333,8 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
skb_set_network_header(skb, cache->hh_len);
} else {
- skb_push(skb, mutable->tunnel_hlen);
- create_tunnel_header(vport, mutable, rt, skb->data);
+ skb_push(skb, tunnel_hlen);
+ create_tunnel_header(vport, mutable, OVS_CB(skb)->tun_key, rt, skb->data);
skb_reset_network_header(skb);
if (next_skb)
@@ -1308,7 +1353,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
ip_select_ident(iph, &rt_dst(rt), NULL);
skb = tnl_vport->tnl_ops->update_header(vport, mutable,
- &rt_dst(rt), skb);
+ &rt_dst(rt), skb, tunnel_hlen);
if (unlikely(!skb))
goto next;
@@ -1341,7 +1386,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
sent_len += orig_len;
}
} else
- sent_len += send_frags(skb, mutable);
+ sent_len += send_frags(skb, tunnel_hlen);
next:
skb = next_skb;
@@ -1427,12 +1472,6 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
else
mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]);
- mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
- if (mutable->tunnel_hlen < 0)
- return mutable->tunnel_hlen;
-
- mutable->tunnel_hlen += sizeof(struct iphdr);
-
old_vport = port_table_lookup(&mutable->key, &old_mutable);
if (old_vport && old_vport != cur_vport)
return -EEXIST;
@@ -1442,7 +1481,8 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
struct net_device *dev;
struct rtable *rt;
- rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos);
+ rt = __find_route(mutable, mutable->key.saddr, mutable->key.daddr,
+ tnl_ops->ipproto, mutable->tos);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt_dst(rt).dev;
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index d2a87f27..951a6f1f 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -109,8 +109,6 @@ struct tnl_mutable_config {
unsigned seq;
- unsigned tunnel_hlen;
-
unsigned char eth_addr[ETH_ALEN];
/* Configured via OVS_TUNNEL_ATTR_* attributes. */
@@ -132,7 +130,8 @@ struct tnl_ops {
* build_header() (i.e. excludes the IP header). Returns a negative
* error code if the configuration is invalid.
*/
- int (*hdr_len)(const struct tnl_mutable_config *);
+ int (*hdr_len)(const struct tnl_mutable_config *,
+ const struct ovs_key_ipv4_tunnel *);
/*
* Builds the static portion of the tunnel header, which is stored in
@@ -143,7 +142,8 @@ struct tnl_ops {
* called for every packet, so try not to make it too slow.
*/
void (*build_header)(const struct vport *,
- const struct tnl_mutable_config *, void *header);
+ const struct tnl_mutable_config *,
+ const struct ovs_key_ipv4_tunnel *, void *header);
/*
* Updates the cached header of a packet to match the actual packet
@@ -155,7 +155,8 @@ struct tnl_ops {
*/
struct sk_buff *(*update_header)(const struct vport *,
const struct tnl_mutable_config *,
- struct dst_entry *, struct sk_buff *);
+ struct dst_entry *, struct sk_buff *,
+ int tunnel_hlen);
};
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
@@ -270,14 +271,14 @@ int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
const char *ovs_tnl_get_name(const struct vport *vport);
const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb);
struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
__be64 key, int tunnel_type,
const struct tnl_mutable_config **mutable);
bool ovs_tnl_frag_needed(struct vport *vport,
const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
+ struct sk_buff *skb, unsigned int mtu);
void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
int ovs_tnl_init(void);
@@ -287,4 +288,15 @@ static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
return vport_priv(vport);
}
+static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
+ const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
+{
+ tun_key->tun_id = tun_id;
+ tun_key->ipv4_src = iph->saddr;
+ tun_key->ipv4_dst = iph->daddr;
+ tun_key->ipv4_tos = iph->tos;
+ tun_key->ipv4_ttl = iph->ttl;
+ tun_key->tun_flags = tun_flags;
+}
+
#endif /* tunnel.h */
diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index 05a099d6..8a63416b 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -155,16 +155,52 @@ static struct inet_frags frag_state = {
.secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
};
-static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
+static int get_capwap_param(const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ u32 *flags, __be64 *out_key)
+{
+ if (tun_key->ipv4_dst) {
+ *flags = 0;
+
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)
+ *flags = TNL_F_OUT_KEY_ACTION;
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_CSUM)
+ *flags |= TNL_F_CSUM;
+ *out_key = tun_key->tun_id;
+ } else {
+ *flags = mutable->flags;
+ if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+ if (likely(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) {
+ *out_key = tun_key->tun_id;
+ } else {
+ *out_key = 0;
+ return -EINVAL;
+ }
+ } else
+ *out_key = mutable->out_key;
+
+ }
+ return 0;
+}
+
+static int capwap_hdr_len(const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key)
{
int size = CAPWAP_MIN_HLEN;
+ u32 flags;
+ __be64 out_key;
+ int err;
+
+ err = get_capwap_param(mutable, tun_key, &flags, &out_key);
+ if (err)
+ return err;
/* CAPWAP has no checksums. */
- if (mutable->flags & TNL_F_CSUM)
+ if (flags & TNL_F_CSUM)
return -EINVAL;
/* if keys are specified, then add WSI field */
- if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
+ if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
size += sizeof(struct capwaphdr_wsi) +
sizeof(struct capwaphdr_wsi_key);
}
@@ -174,10 +210,15 @@ static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
static void capwap_build_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key,
void *header)
{
struct udphdr *udph = header;
struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
+ u32 flags;
+ __be64 out_key;
+
+ get_capwap_param(mutable, tun_key, &flags, &out_key);
udph->source = htons(CAPWAP_SRC_PORT);
udph->dest = htons(CAPWAP_DST_PORT);
@@ -186,7 +227,7 @@ static void capwap_build_header(const struct vport *vport,
cwh->frag_id = 0;
cwh->frag_off = 0;
- if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
+ if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
cwh->begin = CAPWAP_KEYED;
@@ -197,9 +238,9 @@ static void capwap_build_header(const struct vport *vport,
wsi->flags = CAPWAP_WSI_F_KEY64;
wsi->reserved_padding = 0;
- if (mutable->out_key) {
+ if (out_key) {
struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
- opt->key = mutable->out_key;
+ opt->key = out_key;
}
} else {
/* make packet readable by old capwap code */
@@ -210,30 +251,39 @@ static void capwap_build_header(const struct vport *vport,
static struct sk_buff *capwap_update_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
struct dst_entry *dst,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ int tunnel_hlen)
{
+ const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
struct udphdr *udph = udp_hdr(skb);
+ u32 flags;
+ __be64 out_key;
- if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+ if (get_capwap_param(mutable, tun_key, &flags, &out_key)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (flags & TNL_F_OUT_KEY_ACTION) {
/* first field in WSI is key */
struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
- opt->key = OVS_CB(skb)->tun_id;
+ opt->key = out_key;
}
udph->len = htons(skb->len - skb_transport_offset(skb));
if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
- unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable);
+ unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key);
skb = fragment(skb, vport, dst, hlen);
}
return skb;
}
-static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
+static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_present)
{
struct capwaphdr *cwh = capwap_hdr(skb);
struct capwaphdr_wsi *wsi;
@@ -270,12 +320,15 @@ static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
opt = (struct capwaphdr_wsi_key *)(wsi + 1);
*key = opt->key;
+ *key_present = true;
+ } else {
+ *key_present = false;
}
return 0;
}
-static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
+static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_present)
{
struct capwaphdr *cwh = capwap_hdr(skb);
int hdr_len = sizeof(struct udphdr);
@@ -301,7 +354,7 @@ static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
cwh = capwap_hdr(skb);
}
- if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key))
+ if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_present))
goto error;
return skb;
@@ -316,12 +369,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
struct vport *vport;
const struct tnl_mutable_config *mutable;
struct iphdr *iph;
+ struct ovs_key_ipv4_tunnel tun_key;
__be64 key = 0;
+ bool key_present = false;
if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
goto error;
- skb = process_capwap_proto(skb, &key);
+ skb = process_capwap_proto(skb, &key, &key_present);
if (unlikely(!skb))
goto out;
@@ -333,12 +388,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
goto error;
}
- if (mutable->flags & TNL_F_IN_KEY_MATCH)
- OVS_CB(skb)->tun_id = key;
- else
- OVS_CB(skb)->tun_id = 0;
+ if (key_present && mutable->key.daddr &&
+ !(mutable->flags & TNL_F_IN_KEY_MATCH))
+ key_present = false;
+
+ tnl_tun_key_init(&tun_key, iph, key, key_present ? OVS_FLOW_TNL_F_KEY : 0);
+ OVS_CB(skb)->tun_key = &tun_key;
- ovs_tnl_rcv(vport, skb, iph->tos);
+ ovs_tnl_rcv(vport, skb);
goto out;
error:
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index e3a190f5..a25da026 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -45,22 +45,61 @@ struct gre_base_hdr {
__be16 protocol;
};
-static int gre_hdr_len(const struct tnl_mutable_config *mutable)
+static int get_gre_param(const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ u32 *flags, u32 *tunnel_type, __be64 *out_key)
+{
+ if (tun_key->ipv4_dst) {
+ *flags = 0;
+
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)
+ *flags = TNL_F_OUT_KEY_ACTION;
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_CSUM)
+ *flags |= TNL_F_CSUM;
+ *tunnel_type = TNL_T_PROTO_GRE;
+ *out_key = tun_key->tun_id;
+ } else {
+ *flags = mutable->flags;
+ *tunnel_type = mutable->key.tunnel_type;
+ if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+ if (likely(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY)) {
+ *out_key = tun_key->tun_id;
+ } else {
+ *out_key = 0;
+ return -EINVAL;
+ }
+ } else
+ *out_key = mutable->out_key;
+
+ }
+ return 0;
+}
+
+static int gre_hdr_len(const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key)
{
int len;
+ u32 flags;
+ u32 tunnel_type;
+ __be64 out_key;
+ int err;
+
+ err = get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
+ if (err)
+ return err;
len = GRE_HEADER_SECTION;
- if (mutable->flags & TNL_F_CSUM)
+ if (flags & TNL_F_CSUM)
len += GRE_HEADER_SECTION;
/* Set key for GRE64 tunnels, even when key if is zero. */
- if (mutable->out_key ||
- mutable->key.tunnel_type & TNL_T_PROTO_GRE64 ||
- mutable->flags & TNL_F_OUT_KEY_ACTION) {
+ if (out_key ||
+ tunnel_type & TNL_T_PROTO_GRE64 ||
+ flags & TNL_F_OUT_KEY_ACTION) {
len += GRE_HEADER_SECTION;
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+ if (tunnel_type & TNL_T_PROTO_GRE64)
len += GRE_HEADER_SECTION;
}
return len;
@@ -88,32 +127,38 @@ static __be32 be64_get_high32(__be64 x)
static void gre_build_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
+ const struct ovs_key_ipv4_tunnel *tun_key,
void *header)
{
struct gre_base_hdr *greh = header;
__be32 *options = (__be32 *)(greh + 1);
+ u32 flags;
+ u32 tunnel_type;
+ __be64 out_key;
+
+ get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
greh->protocol = htons(ETH_P_TEB);
greh->flags = 0;
- if (mutable->flags & TNL_F_CSUM) {
+ if (flags & TNL_F_CSUM) {
greh->flags |= GRE_CSUM;
*options = 0;
options++;
}
- if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+ if (flags & TNL_F_OUT_KEY_ACTION) {
greh->flags |= GRE_KEY;
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+ if (tunnel_type & TNL_T_PROTO_GRE64)
greh->flags |= GRE_SEQ;
- } else if (mutable->out_key ||
- mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+ } else if (out_key ||
+ tunnel_type & TNL_T_PROTO_GRE64) {
greh->flags |= GRE_KEY;
- *options = be64_get_low32(mutable->out_key);
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+ *options = be64_get_low32(out_key);
+ if (tunnel_type & TNL_T_PROTO_GRE64) {
options++;
- *options = be64_get_high32(mutable->out_key);
+ *options = be64_get_high32(out_key);
greh->flags |= GRE_SEQ;
}
}
@@ -122,28 +167,37 @@ static void gre_build_header(const struct vport *vport,
static struct sk_buff *gre_update_header(const struct vport *vport,
const struct tnl_mutable_config *mutable,
struct dst_entry *dst,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ int tunnel_hlen)
{
- __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
+ u32 flags;
+ u32 tunnel_type;
+ __be64 out_key;
+ const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+ __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
- GRE_HEADER_SECTION);
+ if (get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
/* Work backwards over the options so the checksum is last. */
- if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+ if (flags & TNL_F_OUT_KEY_ACTION) {
+ if (tunnel_type & TNL_T_PROTO_GRE64) {
/* Set higher 32 bits to seq. */
- *options = be64_get_high32(OVS_CB(skb)->tun_id);
+ *options = be64_get_high32(out_key);
options--;
}
- *options = be64_get_low32(OVS_CB(skb)->tun_id);
+ *options = be64_get_low32(out_key);
options--;
- } else if (mutable->out_key ||
- mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+ } else if (out_key || tunnel_type & TNL_T_PROTO_GRE64) {
options--;
- if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+ if (tunnel_type & TNL_T_PROTO_GRE64)
options--;
}
- if (mutable->flags & TNL_F_CSUM)
+ if (flags & TNL_F_CSUM)
*(__sum16 *)options = csum_fold(skb_checksum(skb,
skb_transport_offset(skb),
skb->len - skb_transport_offset(skb),
@@ -335,7 +389,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
#endif
__skb_pull(skb, tunnel_hdr_len);
- ovs_tnl_frag_needed(vport, mutable, skb, mtu, key);
+ ovs_tnl_frag_needed(vport, mutable, skb, mtu);
__skb_push(skb, tunnel_hdr_len);
out:
@@ -370,6 +424,24 @@ static bool check_checksum(struct sk_buff *skb)
return (csum == 0);
}
+static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
+ __be16 gre_flags)
+{
+ u32 tunnel_flags = 0;
+
+ if (gre_flags & GRE_KEY) {
+ if (mutable->key.daddr && (mutable->flags & TNL_F_IN_KEY_MATCH))
+ tunnel_flags = OVS_FLOW_TNL_F_KEY;
+ else if (!mutable->key.daddr)
+ tunnel_flags = OVS_FLOW_TNL_F_KEY;
+ }
+
+ if (gre_flags & GRE_CSUM)
+ tunnel_flags |= OVS_FLOW_TNL_F_CSUM;
+
+ return tunnel_flags;
+}
+
/* Called with rcu_read_lock and BH disabled. */
static int gre_rcv(struct sk_buff *skb)
{
@@ -377,6 +449,7 @@ static int gre_rcv(struct sk_buff *skb)
const struct tnl_mutable_config *mutable;
int hdr_len;
struct iphdr *iph;
+ struct ovs_key_ipv4_tunnel tun_key;
__be16 flags;
__be64 key;
u32 tunnel_type;
@@ -401,15 +474,13 @@ static int gre_rcv(struct sk_buff *skb)
goto error;
}
- if (mutable->flags & TNL_F_IN_KEY_MATCH)
- OVS_CB(skb)->tun_id = key;
- else
- OVS_CB(skb)->tun_id = 0;
+ tnl_tun_key_init(&tun_key, iph, key, gre_flags_to_tunnel_flags(mutable, flags));
+ OVS_CB(skb)->tun_key = &tun_key;
__skb_pull(skb, hdr_len);
skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
- ovs_tnl_rcv(vport, skb, iph->tos);
+ ovs_tnl_rcv(vport, skb);
return 0;
error:
diff --git a/datapath/vport.c b/datapath/vport.c
index af1c066f..d9c8cfd2 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -463,7 +463,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
OVS_CB(skb)->flow = NULL;
if (!(vport->ops->flags & VPORT_F_TUN_ID))
- OVS_CB(skb)->tun_id = 0;
+ OVS_CB(skb)->tun_key = NULL;
ovs_dp_process_received_packet(vport, skb);
}
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 294f6d08..89feb61f 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -279,7 +279,8 @@ enum ovs_key_attr {
OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
- OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
+ OVS_KEY_ATTR_IPV4_TUNNEL = 62, /* struct ovs_key_ipv4_tunnel */
+ OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
__OVS_KEY_ATTR_MAX
};
@@ -361,6 +362,21 @@ struct ovs_key_nd {
__u8 nd_tll[6];
};
+/* Values for ovs_key_ipv4_tunnel->tun_flags */
+#define OVS_FLOW_TNL_F_DONT_FRAGMENT (1 << 0)
+#define OVS_FLOW_TNL_F_CSUM (1 << 1)
+#define OVS_FLOW_TNL_F_KEY (1 << 2)
+
+struct ovs_key_ipv4_tunnel {
+ __be64 tun_id;
+ __u32 tun_flags;
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ __u8 ipv4_tos;
+ __u8 ipv4_ttl;
+ __u8 pad[2];
+};
+
/**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c9e3210f..797cb06b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
case OVS_KEY_ATTR_TUN_ID:
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_IPV6:
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
/* not implemented */
break;
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 257d7a7a..9ed17ed8 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
case OVS_KEY_ATTR_UNSPEC: return "unspec";
case OVS_KEY_ATTR_ENCAP: return "encap";
case OVS_KEY_ATTR_PRIORITY: return "priority";
+ case OVS_KEY_ATTR_TUN_ID: return "tun_id";
+ case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel";
case OVS_KEY_ATTR_IN_PORT: return "in_port";
case OVS_KEY_ATTR_ETHERNET: return "eth";
case OVS_KEY_ATTR_VLAN: return "vlan";
@@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
case OVS_KEY_ATTR_ICMPV6: return "icmpv6";
case OVS_KEY_ATTR_ARP: return "arp";
case OVS_KEY_ATTR_ND: return "nd";
- case OVS_KEY_ATTR_TUN_ID: return "tun_id";
case __OVS_KEY_ATTR_MAX:
default:
@@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type)
case OVS_KEY_ATTR_ENCAP: return -2;
case OVS_KEY_ATTR_PRIORITY: return 4;
case OVS_KEY_ATTR_TUN_ID: return 8;
+ case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel);
case OVS_KEY_ATTR_IN_PORT: return 4;
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16);
@@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
const struct ovs_key_icmpv6 *icmpv6_key;
const struct ovs_key_arp *arp_key;
const struct ovs_key_nd *nd_key;
+ const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
enum ovs_key_attr attr = nl_attr_type(a);
int expected_len;
@@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a)));
break;
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ ipv4_tun_key = nl_attr_get(a);
+ ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32
+ ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")",
+ ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags,
+ IP_ARGS(&ipv4_tun_key->ipv4_src),
+ IP_ARGS(&ipv4_tun_key->ipv4_dst),
+ ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl);
+ break;
+
case OVS_KEY_ATTR_IN_PORT:
ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a));
break;
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 16f2b156..57073bad 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_TUN_ID 8 -- 4 12
+ * OVS_KEY_ATTR_IPV4_TUNNEL 24 -- 4 28
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
* OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
@@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* -------------------------------------------------
- * total 156
+ * total 184
*
* We include some slack space in case the calculation isn't quite right or we
* add another field and forget to adjust this value.