diff options
author | Jesse Gross <jesse@nicira.com> | 2014-06-05 19:07:32 -0700 |
---|---|---|
committer | Jesse Gross <jesse@nicira.com> | 2014-06-20 15:19:35 -0700 |
commit | c1fc1411d204c59608bf9fe36a65bd221b10cbb2 (patch) | |
tree | efafb29b0bf6dbc98dd0e8da168ba1fc78b962cb /datapath/flow_netlink.c | |
parent | 1d2a1b5f5252e4c6ce8bbf8d91ca27aba52496e6 (diff) |
datapath: Add support for Geneve tunneling.
This adds support for Geneve - Generic Network Virtualization
Encapsulation. The protocol is documented at
http://tools.ietf.org/html/draft-gross-geneve-00
The kernel implementation is completely agnostic to the options
that are in use and can handle newly defined options without
further work. It does this by simply matching on a byte array
of options and allowing userspace to setup flows on this array.
Userspace currently implements only support for basic version of
Geneve. It can work with the base header (including the VNI) and
is capable of parsing options but does not currently support any
particular option definitions. Over time, the intention is to
allow options to be matched through OpenFlow without requiring
explicit support in OVS userspace.
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Diffstat (limited to 'datapath/flow_netlink.c')
-rw-r--r-- | datapath/flow_netlink.c | 143 |
1 files changed, 129 insertions, 14 deletions
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index c5ca2f490..22ad2d00b 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -42,6 +42,7 @@ #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> +#include <net/geneve.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> @@ -89,18 +90,21 @@ static void update_range__(struct sw_flow_match *match, } \ } while (0) -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ + update_range__(match, offset, len, is_mask); \ if (is_mask) { \ if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ + memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\ } else { \ - memcpy(&(match)->key->field, value_p, len); \ + memcpy((u8 *)(match)->key + offset, value_p, len); \ } \ } while (0) +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ + value_p, len, is_mask) + static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; @@ -348,6 +352,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, [OVS_TUNNEL_KEY_ATTR_OAM] = 0, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -356,7 +361,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a)) { + if (ovs_tunnel_key_lens[type] != nla_len(a) && + ovs_tunnel_key_lens[type] != -1) { OVS_NLERR("IPv4 tunnel attribute type has unexpected " " length (type=%d, length=%d, expected=%d).\n", type, nla_len(a), ovs_tunnel_key_lens[type]); @@ -395,6 +401,56 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR("Geneve option length exceeds " + "maximum size (len %d, max %zu).\n", + nla_len(a), + sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR("Geneve option length is not " + "a multiple of 4 (len %d).\n", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR("Geneve option key length (%d)" + " is different from mask length (%d).", + match->key->tun_opts_len, nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, + true); + } + + SW_FLOW_KEY_MEMCPY_OFFSET(match, + (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, + nla_len(a)), + nla_data(a), nla_len(a), is_mask); + break; default: return -EINVAL; } @@ -423,8 +479,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } static int ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) + const struct ovs_key_ipv4_tunnel *output, + const struct geneve_opt *tun_opts, + int swkey_tun_opts_len) { struct nlattr *nla; @@ -455,6 +512,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; + if (tun_opts && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)); nla_nest_end(skb, nla); return 0; @@ -900,7 +960,7 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow, return 0; } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, +int ovs_nla_put_flow(struct datapath *dp, const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; @@ -916,9 +976,24 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; - if ((swkey->tun_key.ipv4_dst || is_mask) && - ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; + if ((swkey->tun_key.ipv4_dst || is_mask)) { + const struct geneve_opt *opts = NULL; + + if (!is_mask) { + struct vport *in_port; + + in_port = ovs_vport_ovsl_rcu(dp, swkey->phy.in_port); + if (in_port->ops->type == OVS_VPORT_TYPE_GENEVE) + opts = GENEVE_OPTS(output, swkey->tun_opts_len); + } else { + if (output->tun_opts_len) + opts = GENEVE_OPTS(output, swkey->tun_opts_len); + } + + if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, + swkey->tun_opts_len)) + goto nla_put_failure; + } if (swkey->phy.in_port == DP_MAX_PORTS) { if (is_mask && (output->phy.in_port == 0xffff)) @@ -1309,17 +1384,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (err) return err; + if (key.tun_opts_len) { + struct geneve_opt *option = GENEVE_OPTS(&key, + key.tun_opts_len); + int opts_len = key.tun_opts_len; + bool crit_opt = false; + + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + }; + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info)); + sizeof(*tun_info) + key.tun_opts_len); if (IS_ERR(a)) return PTR_ERR(a); tun_info = nla_data(a); tun_info->tunnel = key.tun_key; + tun_info->options_len = key.tun_opts_len; + + if (tun_info->options_len) { + /* We need to store the options in the action itself since + * everything else will go away after flow setup. We can append + * it to tun_info and then point there. + */ + tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy(tun_info->options, GENEVE_OPTS(&key, key.tun_opts_len), + key.tun_opts_len); + } else { + tun_info->options = NULL; + } add_nested_action_end(*sfa, start); @@ -1611,7 +1724,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return -EMSGSIZE; err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel, - &tun_info->tunnel); + tun_info->options_len ? + tun_info->options : NULL, + tun_info->options_len); if (err) return err; nla_nest_end(skb, start); |