diff options
author | Pravin B Shelar <pshelar@nicira.com> | 2012-02-16 17:12:36 -0800 |
---|---|---|
committer | Pravin B Shelar <pshelar@nicira.com> | 2012-02-16 17:12:36 -0800 |
commit | 95b1d73a4a6944a44c028b771a339d2971d93071 (patch) | |
tree | 20097313f0c67906bed245952d3221068a157641 /datapath | |
parent | fe0731b10011075ab40960e13eb2ae5f39771e90 (diff) |
datapath: Increase maximum number of datapath ports.
Use hash table to store ports of datapath. Allow 64K ports per switch.
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Bug #2462
Diffstat (limited to 'datapath')
-rw-r--r-- | datapath/actions.c | 2 | ||||
-rw-r--r-- | datapath/datapath.c | 114 | ||||
-rw-r--r-- | datapath/datapath.h | 33 | ||||
-rw-r--r-- | datapath/dp_sysfs_dp.c | 4 | ||||
-rw-r--r-- | datapath/dp_sysfs_if.c | 2 | ||||
-rw-r--r-- | datapath/flow.c | 11 | ||||
-rw-r--r-- | datapath/flow.h | 3 | ||||
-rw-r--r-- | datapath/vport.c | 1 | ||||
-rw-r--r-- | datapath/vport.h | 2 |
9 files changed, 117 insertions, 55 deletions
diff --git a/datapath/actions.c b/datapath/actions.c index 824791d1..4b076039 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -248,7 +248,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) if (unlikely(!skb)) return -ENOMEM; - vport = rcu_dereference(dp->ports[out_port]); + vport = ovs_vport_rcu(dp, out_port); if (unlikely(!vport)) { kfree_skb(skb); return -ENODEV; diff --git a/datapath/datapath.c b/datapath/datapath.c index 220c7dd4..23fb1dc8 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -120,7 +120,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) /* Must be called with rcu_read_lock or RTNL lock. */ const char *ovs_dp_name(const struct datapath *dp) { - struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); + struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); } @@ -131,7 +131,7 @@ static int get_dpifindex(struct datapath *dp) rcu_read_lock(); - local = rcu_dereference(dp->ports[OVSP_LOCAL]); + local = ovs_vport_rcu(dp, OVSP_LOCAL); if (local) ifindex = local->ops->get_ifindex(local); else @@ -244,9 +244,30 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); + kfree(dp->ports); kobject_put(&dp->ifobj); } +static struct hlist_head *vport_hash_bucket(const struct datapath *dp, + u16 port_no) +{ + return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; +} + +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) +{ + struct vport *vport; + struct hlist_node *n; + struct hlist_head *head; + + head = vport_hash_bucket(dp, port_no); + hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + if (vport->port_no == port_no) + return vport; + } + return NULL; +} + /* Called with RTNL lock and genl_lock. */ static struct vport *new_vport(const struct vport_parms *parms) { @@ -255,13 +276,11 @@ static struct vport *new_vport(const struct vport_parms *parms) vport = ovs_vport_add(parms); if (!IS_ERR(vport)) { struct datapath *dp = parms->dp; + struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); - rcu_assign_pointer(dp->ports[parms->port_no], vport); - list_add(&vport->node, &dp->port_list); - + hlist_add_head_rcu(&vport->dp_hash_node, head); dp_ifinfo_notify(RTM_NEWLINK, vport); } - return vport; } @@ -272,11 +291,11 @@ void ovs_dp_detach_port(struct vport *p) if (p->port_no != OVSP_LOCAL) ovs_dp_sysfs_del_if(p); + dp_ifinfo_notify(RTM_DELLINK, p); /* First drop references to device. */ - list_del(&p->node); - rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + hlist_del_rcu(&p->dp_hash_node); /* Then destroy it. */ ovs_vport_del(p); @@ -1354,7 +1373,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err; + int err, i; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1371,8 +1390,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (dp == NULL) goto err_unlock_rtnl; - INIT_LIST_HEAD(&dp->port_list); - /* Initialize kobject for bridge. This will be added as * /sys/class/net/<devname>/brif later, if sysfs is enabled. */ dp->ifobj.kset = NULL; @@ -1391,6 +1408,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) } ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); + dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) { + err = -ENOMEM; + goto err_destroy_percpu; + } + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1405,7 +1432,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err == -EBUSY) err = -EEXIST; - goto err_destroy_percpu; + goto err_destroy_ports_array; } reply = ovs_dp_cmd_build_info(dp, info->snd_pid, @@ -1426,7 +1453,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) return 0; err_destroy_local_port: - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); +err_destroy_ports_array: + kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: @@ -1442,16 +1471,22 @@ err: /* Called with genl_mutex. */ static void __dp_destroy(struct datapath *dp) { - struct vport *vport, *next_vport; + int i; rtnl_lock(); - list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) - if (vport->port_no != OVSP_LOCAL) - ovs_dp_detach_port(vport); + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + struct hlist_node *node, *n; + + hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + if (vport->port_no != OVSP_LOCAL) + ovs_dp_detach_port(vport); + } ovs_dp_sysfs_del_dp(dp); list_del(&dp->list_node); - ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); /* rtnl_unlock() will wait until all the references to devices that * are pending unregistration have been dropped. We do it here to @@ -1705,7 +1740,7 @@ static struct vport *lookup_vport(struct net *net, if (!dp) return ERR_PTR(-ENODEV); - vport = rcu_dereference_rtnl(dp->ports[port_no]); + vport = ovs_vport_rtnl_rcu(dp, port_no); if (!vport) return ERR_PTR(-ENOENT); return vport; @@ -1761,7 +1796,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (port_no >= DP_MAX_PORTS) goto exit_unlock; - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock; @@ -1771,7 +1806,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -EFBIG; goto exit_unlock; } - vport = rtnl_dereference(dp->ports[port_no]); + vport = ovs_vport_rtnl(dp, port_no); if (!vport) break; } @@ -1936,32 +1971,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; - u32 port_no; - int retval; + int bucket = cb->args[0], skip = cb->args[1]; + int i, j = 0; dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; rcu_read_lock(); - for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { + for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - - vport = rcu_dereference(dp->ports[port_no]); - if (!vport) - continue; - - if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_NEW) < 0) - break; + struct hlist_node *n; + + j = 0; + hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + if (j >= skip && + ovs_vport_cmd_fill_info(vport, skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + OVS_VPORT_CMD_NEW) < 0) + goto out; + + j++; + } + skip = 0; } +out: rcu_read_unlock(); - cb->args[0] = port_no; - retval = skb->len; + cb->args[0] = i; + cb->args[1] = j; - return retval; + return skb->len; } static struct genl_ops dp_vport_genl_ops[] = { diff --git a/datapath/datapath.h b/datapath/datapath.h index b012a766..18c8598f 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -34,7 +34,9 @@ #include "vlan.h" #include "vport.h" -#define DP_MAX_PORTS 1024 +#define DP_MAX_PORTS USHRT_MAX +#define DP_VPORT_HASH_BUCKETS 1024 + #define SAMPLE_ACTION_DEPTH 3 /** @@ -63,10 +65,8 @@ struct dp_stats_percpu { * @ifobj: Represents /sys/class/net/<devname>/brif. Protected by RTNL. * @n_flows: Number of flows currently in flow table. * @table: Current flow table. Protected by genl_lock and RCU. - * @ports: Map from port number to &struct vport. %OVSP_LOCAL port - * always exists, other ports may be %NULL. Protected by RTNL and RCU. - * @port_list: List of all ports in @ports in arbitrary order. RTNL required - * to iterate or modify. + * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by + * RTNL and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. * @@ -82,8 +82,7 @@ struct datapath { struct flow_table __rcu *table; /* Switch ports. */ - struct vport __rcu *ports[DP_MAX_PORTS]; - struct list_head port_list; + struct hlist_head *ports; /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; @@ -159,6 +158,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) write_pnet(&dp->net, net); } +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); + +static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no) +{ + ASSERT_RTNL(); + return ovs_lookup_vport(dp, port_no); +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_multicast_group ovs_dp_vport_multicast_group; extern int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/datapath/dp_sysfs_dp.c b/datapath/dp_sysfs_dp.c index 2582321c..ec673cec 100644 --- a/datapath/dp_sysfs_dp.c +++ b/datapath/dp_sysfs_dp.c @@ -362,7 +362,7 @@ static struct attribute_group bridge_group = { */ int ovs_dp_sysfs_add_dp(struct datapath *dp) { - struct vport *vport = rtnl_dereference(dp->ports[OVSP_LOCAL]); + struct vport *vport = ovs_vport_rtnl(dp, OVSP_LOCAL); struct kobject *kobj = vport->ops->get_kobj(vport); int err; @@ -398,7 +398,7 @@ int ovs_dp_sysfs_add_dp(struct datapath *dp) int ovs_dp_sysfs_del_dp(struct datapath *dp) { - struct vport *vport = rtnl_dereference(dp->ports[OVSP_LOCAL]); + struct vport *vport = ovs_vport_rtnl(dp, OVSP_LOCAL); struct kobject *kobj = vport->ops->get_kobj(vport); #ifdef CONFIG_NET_NS diff --git a/datapath/dp_sysfs_if.c b/datapath/dp_sysfs_if.c index f564e980..436ee490 100644 --- a/datapath/dp_sysfs_if.c +++ b/datapath/dp_sysfs_if.c @@ -209,7 +209,7 @@ struct sysfs_ops ovs_brport_sysfs_ops = { int ovs_dp_sysfs_add_if(struct vport *p) { struct datapath *dp = p->dp; - struct vport *local_port = rtnl_dereference(dp->ports[OVSP_LOCAL]); + struct vport *local_port = ovs_vport_rtnl(dp, OVSP_LOCAL); struct brport_attribute **a; int err; diff --git a/datapath/flow.c b/datapath/flow.c index 823c6b5b..fb4fc217 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - /* At least DP_MAX_PORTS actions are required to be able to flood a - * packet to every port. Factor of 2 allows for setting VLAN tags, - * etc. */ - if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4)) + if (actions_len > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); @@ -1020,7 +1017,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, swkey->phy.in_port = in_port; attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); } else { - swkey->phy.in_port = USHRT_MAX; + swkey->phy.in_port = DP_MAX_PORTS; } if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) { @@ -1169,7 +1166,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, const struct nlattr *nla; int rem; - *in_port = USHRT_MAX; + *in_port = DP_MAX_PORTS; *tun_id = 0; *priority = 0; @@ -1213,7 +1210,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) if (swkey->phy.tun_id != cpu_to_be64(0)) NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id); - if (swkey->phy.in_port != USHRT_MAX) + if (swkey->phy.in_port != DP_MAX_PORTS) NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port); nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); diff --git a/datapath/flow.h b/datapath/flow.h index 61310d06..5261fa84 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -44,7 +44,7 @@ struct sw_flow_key { struct { __be64 tun_id; /* Encapsulating tunnel ID. */ u32 priority; /* Packet QoS priority. */ - u16 in_port; /* Input switch port (or USHRT_MAX). */ + u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ @@ -168,6 +168,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, const struct nlattr *); +#define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { diff --git a/datapath/vport.c b/datapath/vport.c index d81f6869..b75a866a 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -192,6 +192,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->port_no = parms->port_no; vport->upcall_pid = parms->upcall_pid; vport->ops = ops; + INIT_HLIST_NODE(&vport->dp_hash_node); /* Initialize kobject for bridge. This will be added as * /sys/class/net/<devname>/brport later, if sysfs is enabled. */ diff --git a/datapath/vport.h b/datapath/vport.h index ee9715d7..2aafde01 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -84,6 +84,7 @@ struct vport_err_stats { * @upcall_pid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. + * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @stats_lock: Protects @err_stats and @offset_stats. @@ -101,6 +102,7 @@ struct vport { u32 upcall_pid; struct hlist_node hash_node; + struct hlist_node dp_hash_node; const struct vport_ops *ops; struct vport_percpu_stats __percpu *percpu_stats; |