diff options
-rw-r--r-- | datapath/Modules.mk | 1 | ||||
-rw-r--r-- | datapath/datapath.c | 100 | ||||
-rw-r--r-- | datapath/datapath.h | 65 | ||||
-rw-r--r-- | datapath/flow.c | 30 | ||||
-rw-r--r-- | datapath/flow.h | 9 | ||||
-rw-r--r-- | datapath/table.c | 392 | ||||
-rw-r--r-- | datapath/table.h | 44 |
7 files changed, 371 insertions, 270 deletions
diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 1e8bc047..ba9e01c2 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -27,6 +27,7 @@ openvswitch_headers = \ datapath.h \ dp_sysfs.h \ flow.h \ + table.h \ vport.h \ vport-internal_dev.h \ vport-netdev.h diff --git a/datapath/datapath.c b/datapath/datapath.c index a7b20f5f..9b34fcc5 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -45,6 +45,7 @@ #include "datapath.h" #include "actions.h" #include "flow.h" +#include "table.h" #include "vport-internal_dev.h" #include "compat.h" @@ -240,7 +241,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) /* Allocate table. */ err = -ENOMEM; - rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE)); + rcu_assign_pointer(dp->table, tbl_create(0)); if (!dp->table) goto err_free_dp; @@ -271,7 +272,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) err_destroy_local_port: dp_detach_port(dp->ports[ODPP_LOCAL], 1); err_destroy_table: - dp_table_destroy(dp->table, 0); + tbl_destroy(dp->table, NULL); err_free_dp: kfree(dp); err_put_module: @@ -298,7 +299,7 @@ static void do_destroy_dp(struct datapath *dp) dp_detach_port(dp->ports[ODPP_LOCAL], 1); - dp_table_destroy(dp->table, 1); + tbl_destroy(dp->table, flow_free_tbl); for (i = 0; i < DP_N_QUEUES; i++) skb_queue_purge(&dp->queues[i]); @@ -511,7 +512,7 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) struct datapath *dp = p->dp; struct dp_stats_percpu *stats; struct odp_flow_key key; - struct sw_flow *flow; + struct tbl_node *flow_node; WARN_ON_ONCE(skb_shared(skb)); skb_warn_if_lro(skb); @@ -530,8 +531,9 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) } } - flow = dp_table_lookup(rcu_dereference(dp->table), &key); - if (flow) { + flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp); + if (flow_node) { + struct sw_flow *flow = flow_cast(flow_node); struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts); flow_used(flow, skb); execute_actions(dp, skb, &key, acts->actions, acts->n_actions, @@ -854,8 +856,18 @@ err: static int flush_flows(struct datapath *dp) { - dp->n_flows = 0; - return dp_table_flush(dp); + struct tbl *old_table = rcu_dereference(dp->table); + struct tbl *new_table; + + new_table = tbl_create(0); + if (!new_table) + return -ENOMEM; + + rcu_assign_pointer(dp->table, new_table); + + tbl_deferred_destroy(old_table, flow_free_tbl); + + return 0; } static int validate_actions(const struct sw_flow_actions *actions) @@ -952,11 +964,27 @@ static void clear_stats(struct sw_flow *flow) flow->byte_count = 0; } +static int expand_table(struct datapath *dp) +{ + struct tbl *old_table = rcu_dereference(dp->table); + struct tbl *new_table; + + new_table = tbl_expand(old_table); + if (IS_ERR(new_table)) + return PTR_ERR(new_table); + + rcu_assign_pointer(dp->table, new_table); + tbl_deferred_destroy(old_table, NULL); + + return 0; +} + static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) { struct odp_flow_put uf; + struct tbl_node *flow_node; struct sw_flow *flow; - struct dp_table *table; + struct tbl *table; struct odp_flow_stats stats; int error; @@ -966,8 +994,8 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) memset(uf.flow.key.reserved, 0, sizeof uf.flow.key.reserved); table = rcu_dereference(dp->table); - flow = dp_table_lookup(table, &uf.flow.key); - if (!flow) { + flow_node = tbl_lookup(table, &uf.flow.key, flow_hash(&uf.flow.key), flow_cmp); + if (!flow_node) { /* No such flow. */ struct sw_flow_actions *acts; @@ -976,12 +1004,8 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) goto error; /* Expand table, if necessary, to make room. */ - if (dp->n_flows >= table->n_buckets) { - error = -ENOSPC; - if (table->n_buckets >= DP_MAX_BUCKETS) - goto error; - - error = dp_table_expand(dp); + if (tbl_count(table) >= tbl_n_buckets(table)) { + error = expand_table(dp); if (error) goto error; table = rcu_dereference(dp->table); @@ -1004,16 +1028,18 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - error = dp_table_insert(table, flow); + error = tbl_insert(table, &flow->tbl_node, flow_hash(&flow->key)); if (error) goto error_free_flow_acts; - dp->n_flows++; + memset(&stats, 0, sizeof(struct odp_flow_stats)); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts, *new_acts; unsigned long int flags; + flow = flow_cast(flow_node); + /* Bail out if we're not allowed to modify an existing flow. */ error = -EEXIST; if (!(uf.flags & ODPPF_MODIFY)) @@ -1100,8 +1126,9 @@ static int answer_query(struct sw_flow *flow, u32 query_flags, static int del_flow(struct datapath *dp, struct odp_flow __user *ufp) { - struct dp_table *table = rcu_dereference(dp->table); + struct tbl *table = rcu_dereference(dp->table); struct odp_flow uf; + struct tbl_node *flow_node; struct sw_flow *flow; int error; @@ -1110,13 +1137,12 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp) goto error; memset(uf.key.reserved, 0, sizeof uf.key.reserved); - flow = dp_table_lookup(table, &uf.key); + flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp); error = -ENOENT; - if (!flow) + if (!flow_node) goto error; - /* XXX redundant lookup */ - error = dp_table_delete(table, flow); + error = tbl_remove(table, flow_node); if (error) goto error; @@ -1124,7 +1150,8 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp) * be using this flow. We used to synchronize_rcu() to make sure that * we get completely accurate stats, but that blows our performance, * badly. */ - dp->n_flows--; + + flow = flow_cast(flow_node); error = answer_query(flow, 0, ufp); flow_deferred_free(flow); @@ -1134,23 +1161,23 @@ error: static int query_flows(struct datapath *dp, const struct odp_flowvec *flowvec) { - struct dp_table *table = rcu_dereference(dp->table); + struct tbl *table = rcu_dereference(dp->table); int i; for (i = 0; i < flowvec->n_flows; i++) { struct __user odp_flow *ufp = &flowvec->flows[i]; struct odp_flow uf; - struct sw_flow *flow; + struct tbl_node *flow_node; int error; if (__copy_from_user(&uf, ufp, sizeof uf)) return -EFAULT; memset(uf.key.reserved, 0, sizeof uf.key.reserved); - flow = dp_table_lookup(table, &uf.key); - if (!flow) + flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp); + if (!flow_node) error = __put_user(ENOENT, &ufp->stats.error); else - error = answer_query(flow, uf.flags, ufp); + error = answer_query(flow_cast(flow_node), uf.flags, ufp); if (error) return -EFAULT; } @@ -1163,8 +1190,9 @@ struct list_flows_cbdata { int listed_flows; }; -static int list_flow(struct sw_flow *flow, void *cbdata_) +static int list_flow(struct tbl_node *node, void *cbdata_) { + struct sw_flow *flow = flow_cast(node); struct list_flows_cbdata *cbdata = cbdata_; struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; int error; @@ -1191,8 +1219,7 @@ static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec) cbdata.uflows = flowvec->flows; cbdata.n_flows = flowvec->n_flows; cbdata.listed_flows = 0; - error = dp_table_foreach(rcu_dereference(dp->table), - list_flow, &cbdata); + error = tbl_foreach(rcu_dereference(dp->table), list_flow, &cbdata); return error ? error : cbdata.listed_flows; } @@ -1295,12 +1322,13 @@ error: static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) { + struct tbl *table = rcu_dereference(dp->table); struct odp_stats stats; int i; - stats.n_flows = dp->n_flows; - stats.cur_capacity = rcu_dereference(dp->table)->n_buckets; - stats.max_capacity = DP_MAX_BUCKETS; + stats.n_flows = tbl_count(table); + stats.cur_capacity = tbl_n_buckets(table); + stats.max_capacity = TBL_MAX_BUCKETS; stats.n_ports = dp->n_ports; stats.max_ports = DP_MAX_PORTS; stats.max_groups = DP_MAX_GROUPS; diff --git a/datapath/datapath.h b/datapath/datapath.h index 553e19fd..991a7e81 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -32,56 +32,6 @@ struct dp_port; #define DP_MAX_PORTS 1024 #define DP_MAX_GROUPS 16 -#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*))) -#define DP_L2_SIZE (1 << DP_L2_BITS) -#define DP_L2_SHIFT 0 - -#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**))) -#define DP_L1_SIZE (1 << DP_L1_BITS) -#define DP_L1_SHIFT DP_L2_BITS - -/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */ -#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) - -/** - * struct dp_table - flow table - * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and - * %DP_MAX_BUCKETS) - * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers - * to buckets - * @hash_seed: random number used for flow hashing, to make the hash - * distribution harder to predict - * @rcu: RCU callback structure - * - * The @buckets array is logically an array of pointers to buckets. It is - * broken into two levels to avoid the need to kmalloc() any object larger than - * a single page or to use vmalloc(). @buckets is always nonnull, as is each - * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash - * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j < - * %DP_L1_SIZE). - */ -struct dp_table { - unsigned int n_buckets; - struct dp_bucket ***buckets; - unsigned int hash_seed; - struct rcu_head rcu; -}; - -/** - * struct dp_bucket - single bucket within datapath flow table - * @rcu: RCU callback structure - * @n_flows: number of flows in @flows[] array - * @flows: array of @n_flows pointers to flows - * - * The expected number of flows per bucket is 1, but this allows for an - * arbitrary number of collisions. - */ -struct dp_bucket { - struct rcu_head rcu; - unsigned int n_flows; - struct sw_flow *flows[]; -}; - #define DP_N_QUEUES 3 #define DP_MAX_QUEUE_LEN 100 @@ -143,8 +93,7 @@ struct datapath { wait_queue_head_t waitqueue; /* Flow table. */ - unsigned int n_flows; - struct dp_table *table; + struct tbl *table; /* Port groups. */ struct dp_port_group *groups[DP_MAX_GROUPS]; @@ -208,18 +157,6 @@ struct ovs_skb_cb { extern struct notifier_block dp_device_notifier; extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); -/* Flow table. */ -struct dp_table *dp_table_create(unsigned int n_buckets); -void dp_table_destroy(struct dp_table *, int free_flows); -struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); -int dp_table_insert(struct dp_table *, struct sw_flow *); -int dp_table_delete(struct dp_table *, struct sw_flow *); -int dp_table_expand(struct datapath *); -int dp_table_flush(struct datapath *); -int dp_table_foreach(struct dp_table *table, - int (*callback)(struct sw_flow *flow, void *aux), - void *aux); - void dp_process_received_packet(struct dp_port *, struct sk_buff *); int dp_detach_port(struct dp_port *, int may_delete); int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg); diff --git a/datapath/flow.c b/datapath/flow.c index 8228da2e..548c729a 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -14,6 +14,7 @@ #include <linux/if_vlan.h> #include <net/llc_pdu.h> #include <linux/kernel.h> +#include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/llc.h> #include <linux/module.h> @@ -31,6 +32,7 @@ #include "compat.h" struct kmem_cache *flow_cache; +static unsigned int hash_seed; struct arp_eth_header { @@ -134,7 +136,7 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions) /* Frees 'flow' immediately. */ -void flow_free(struct sw_flow *flow) +static void flow_free(struct sw_flow *flow) { if (unlikely(!flow)) return; @@ -142,6 +144,12 @@ void flow_free(struct sw_flow *flow) kmem_cache_free(flow_cache, flow); } +void flow_free_tbl(struct tbl_node *node) +{ + struct sw_flow *flow = flow_cast(node); + flow_free(flow); +} + /* RCU callback used by flow_deferred_free. */ static void rcu_free_flow_callback(struct rcu_head *rcu) { @@ -319,6 +327,24 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) return retval; } +struct sw_flow *flow_cast(const struct tbl_node *node) +{ + return container_of(node, struct sw_flow, tbl_node); +} + +u32 flow_hash(const struct odp_flow_key *key) +{ + return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed); +} + +int flow_cmp(const struct tbl_node *node, void *key2_) +{ + const struct odp_flow_key *key1 = &flow_cast(node)->key; + const struct odp_flow_key *key2 = key2_; + + return !memcmp(key1, key2, sizeof(struct odp_flow_key)); +} + /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int flow_init(void) @@ -328,6 +354,8 @@ int flow_init(void) if (flow_cache == NULL) return -ENOMEM; + get_random_bytes(&hash_seed, sizeof hash_seed); + return 0; } diff --git a/datapath/flow.h b/datapath/flow.h index 44cc3a60..4a393cb9 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include "openvswitch/datapath-protocol.h" +#include "table.h" struct sk_buff; @@ -27,6 +28,8 @@ struct sw_flow_actions { struct sw_flow { struct rcu_head rcu; + struct tbl_node tbl_node; + struct odp_flow_key key; struct sw_flow_actions *sf_acts; @@ -43,12 +46,16 @@ struct sw_flow { extern struct kmem_cache *flow_cache; struct sw_flow_actions *flow_actions_alloc(size_t n_actions); -void flow_free(struct sw_flow *); void flow_deferred_free(struct sw_flow *); void flow_deferred_free_acts(struct sw_flow_actions *); int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); void flow_used(struct sw_flow *, struct sk_buff *); +struct sw_flow *flow_cast(const struct tbl_node *); +u32 flow_hash(const struct odp_flow_key *key); +int flow_cmp(const struct tbl_node *, void *target); +void flow_free_tbl(struct tbl_node *); + int flow_init(void); void flow_exit(void); diff --git a/datapath/table.c b/datapath/table.c index 23ae8abe..e4561d69 100644 --- a/datapath/table.c +++ b/datapath/table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -8,44 +8,80 @@ #include "flow.h" #include "datapath.h" +#include "table.h" #include <linux/gfp.h> -#include <linux/jhash.h> -#include <linux/random.h> #include <linux/slab.h> -#include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/highmem.h> #include <asm/pgtable.h> -static inline int bucket_size(int n_flows) +/** + * struct tbl - hash table + * @n_buckets: number of buckets (a power of 2 between %TBL_L1_SIZE and + * %TBL_MAX_BUCKETS) + * @buckets: pointer to @n_buckets/%TBL_L1_SIZE pointers to %TBL_L1_SIZE pointers + * to buckets + * @rcu: RCU callback structure + * @obj_destructor: Called on each element when the table is destroyed. + * + * The @buckets array is logically an array of pointers to buckets. It is + * broken into two levels to avoid the need to kmalloc() any object larger than + * a single page or to use vmalloc(). @buckets is always nonnull, as is each + * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash + * bucket is nonempty (for 0 <= i < @n_buckets/%TBL_L1_SIZE, 0 <= j < + * %TBL_L1_SIZE). + */ +struct tbl { + struct rcu_head rcu; + unsigned int n_buckets; + struct tbl_bucket ***buckets; + unsigned int count; + void (*obj_destructor)(struct tbl_node *); +}; + +/** + * struct tbl_bucket - single bucket within a hash table + * @rcu: RCU callback structure + * @n_objs: number of objects in @objs[] array + * @objs: array of @n_objs pointers to table nodes contained inside objects + * + * The expected number of objects per bucket is 1, but this allows for an + * arbitrary number of collisions. + */ +struct tbl_bucket { + struct rcu_head rcu; + unsigned int n_objs; + struct tbl_node *objs[]; +}; + +static inline int bucket_size(int n_objs) { - return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows; + return sizeof(struct tbl_bucket) + sizeof(struct tbl_node *) * n_objs; } -static struct dp_bucket *dp_bucket_alloc(int n_flows) +static struct tbl_bucket *bucket_alloc(int n_objs) { - return kmalloc(bucket_size(n_flows), GFP_KERNEL); + return kmalloc(bucket_size(n_objs), GFP_KERNEL); } -static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets, - int free_flows) +static void free_buckets(struct tbl_bucket ***l1, unsigned int n_buckets, + void (*free_obj)(struct tbl_node *)) { unsigned int i; - for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - struct dp_bucket **l2 = l1[i]; + for (i = 0; i < n_buckets >> TBL_L1_BITS; i++) { + struct tbl_bucket **l2 = l1[i]; unsigned int j; - for (j = 0; j < DP_L1_SIZE; j++) { - struct dp_bucket *bucket = l2[j]; + for (j = 0; j < TBL_L1_SIZE; j++) { + struct tbl_bucket *bucket = l2[j]; if (!bucket) continue; - if (free_flows) { + if (free_obj) { unsigned int k; - for (k = 0; k < bucket->n_flows; k++) - flow_free(bucket->flows[k]); + for (k = 0; k < bucket->n_objs; k++) + free_obj(bucket->objs[k]); } kfree(bucket); } @@ -54,19 +90,19 @@ static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets, kfree(l1); } -static struct dp_bucket ***alloc_buckets(unsigned int n_buckets) +static struct tbl_bucket ***alloc_buckets(unsigned int n_buckets) { - struct dp_bucket ***l1; + struct tbl_bucket ***l1; unsigned int i; - l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**), + l1 = kmalloc((n_buckets >> TBL_L1_BITS) * sizeof(struct tbl_bucket **), GFP_KERNEL); if (!l1) return NULL; - for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL); + for (i = 0; i < n_buckets >> TBL_L1_BITS; i++) { + l1[i] = (struct tbl_bucket **)get_zeroed_page(GFP_KERNEL); if (!l1[i]) { - free_buckets(l1, i << DP_L1_BITS, 0); + free_buckets(l1, i << TBL_L1_BITS, 0); return NULL; } } @@ -74,16 +110,19 @@ static struct dp_bucket ***alloc_buckets(unsigned int n_buckets) } /** - * dp_table_create - create and return a new flow table + * tbl_create - create and return a new hash table * @n_buckets: number of buckets in the new table * - * Creates and returns a new flow table, or %NULL if memory cannot be - * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to - * %DP_MAX_BUCKETS. + * Creates and returns a new hash table, or %NULL if memory cannot be + * allocated. @n_buckets must be a power of 2 in the range %TBL_L1_SIZE to + * %TBL_MAX_BUCKETS. */ -struct dp_table *dp_table_create(unsigned int n_buckets) +struct tbl *tbl_create(unsigned int n_buckets) { - struct dp_table *table; + struct tbl *table; + + if (!n_buckets) + n_buckets = TBL_L1_SIZE; table = kzalloc(sizeof *table, GFP_KERNEL); if (!table) @@ -93,7 +132,6 @@ struct dp_table *dp_table_create(unsigned int n_buckets) table->buckets = alloc_buckets(n_buckets); if (!table->buckets) goto err_free_table; - get_random_bytes(&table->hash_seed, sizeof table->hash_seed); return table; @@ -104,108 +142,126 @@ err: } /** - * dp_table_destroy - destroy flow table and optionally the flows it contains - * @table: table to destroy (must not be %NULL) - * @free_flows: whether to destroy the flows + * tbl_destroy - destroy hash table and optionally the objects it contains + * @table: table to destroy + * @destructor: function to be called on objects at destruction time * - * If @free_flows is zero, then the buckets in @table are destroyed but not the - * flows within those buckets. This behavior is useful when a table is being - * replaced by a larger or smaller one without destroying the flows. + * If a destructor is null, then the buckets in @table are destroyed + * but not the objects within those buckets. This behavior is useful when a + * table is being replaced by a larger or smaller one without destroying the + * objects. * - * If @free_flows is nonzero, then the flows in @table are destroyed as well as - * the buckets. + * If a destructor is not null, then it is called on the objects in @table + * before destroying the buckets. */ -void dp_table_destroy(struct dp_table *table, int free_flows) +void tbl_destroy(struct tbl *table, void (*destructor)(struct tbl_node *)) { - free_buckets(table->buckets, table->n_buckets, free_flows); + if (!table) + return; + + free_buckets(table->buckets, table->n_buckets, destructor); kfree(table); } -static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash) +static void destroy_table_rcu(struct rcu_head *rcu) { - unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; - unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); + struct tbl *table = container_of(rcu, struct tbl, rcu); + tbl_destroy(table, table->obj_destructor); +} + +/** + * tbl_deferred_destroy - destroy table after a RCU grace period + * @table: table to destroy + * @destructor: function to be called on objects at destruction time + * + * Calls tbl_destroy() on @table after an RCU grace period. If @destructor is + * not null it is called on every element before the table is destroyed. */ +void tbl_deferred_destroy(struct tbl *table, void (*destructor)(struct tbl_node *)) +{ + if (!table) + return; + + table->obj_destructor = destructor; + call_rcu(&table->rcu, destroy_table_rcu); +} + +static struct tbl_bucket **find_bucket(struct tbl *table, u32 hash) +{ + unsigned int l1 = (hash & (table->n_buckets - 1)) >> TBL_L1_SHIFT; + unsigned int l2 = hash & ((1 << TBL_L2_BITS) - 1); return &table->buckets[l1][l2]; } -static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key) +static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash, + int (*cmp)(const struct tbl_node *, void *)) { int i; - for (i = 0; i < bucket->n_flows; i++) { - struct sw_flow *flow = rcu_dereference(bucket->flows[i]); - if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key))) + for (i = 0; i < bucket->n_objs; i++) { + struct tbl_node *obj = rcu_dereference(bucket->objs[i]); + if (obj->hash == hash && likely(cmp(obj, target))) return i; } return -1; } -static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash, - const struct odp_flow_key *key) +/** + * tbl_lookup - searches hash table for a matching object + * @table: hash table to search + * @target: identifier for the object that is being searched for, will be + * provided as an argument to @cmp when making comparisions + * @hash: hash of @target + * @cmp: comparision function to match objects with the given hash, returns + * nonzero if the objects match, zero otherwise + * + * Searches @table for an object identified by @target. Returns the tbl_node + * contained in the object if successful, otherwise %NULL. + */ +struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash, + int (*cmp)(const struct tbl_node *, void *)) { - struct dp_bucket **bucketp = find_bucket(table, hash); - struct dp_bucket *bucket = rcu_dereference(*bucketp); + struct tbl_bucket **bucketp = find_bucket(table, hash); + struct tbl_bucket *bucket = rcu_dereference(*bucketp); int index; if (!bucket) return NULL; - index = search_bucket(bucket, key); + index = search_bucket(bucket, target, hash, cmp); if (index < 0) return NULL; - return bucket->flows[index]; -} - -static u32 flow_hash(const struct dp_table *table, - const struct odp_flow_key *key) -{ - return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed); + return bucket->objs[index]; } /** - * dp_table_lookup - searches flow table for a matching flow - * @table: flow table to search - * @key: flow key for which to search - * - * Searches @table for a flow whose key is equal to @key. Returns the flow if - * successful, otherwise %NULL. - */ -struct sw_flow *dp_table_lookup(struct dp_table *table, - const struct odp_flow_key *key) -{ - return lookup_flow(table, flow_hash(table, key), key); -} - -/** - * dp_table_foreach - iterate through flow table + * tbl_foreach - iterate through hash table * @table: table to iterate - * @callback: function to call for each flow entry + * @callback: function to call for each entry * @aux: Extra data to pass to @callback * - * Iterates through all of the flows in @table in hash order, passing each of + * Iterates through all of the objects in @table in hash order, passing each of * them in turn to @callback. If @callback returns nonzero, this terminates - * the iteration and dp_table_foreach() returns the same value. Returns 0 if + * the iteration and tbl_foreach() returns the same value. Returns 0 if * @callback never returns nonzero. * * This function does not try to intelligently handle the case where @callback * adds or removes flows in @table. */ -int dp_table_foreach(struct dp_table *table, - int (*callback)(struct sw_flow *flow, void *aux), - void *aux) +int tbl_foreach(struct tbl *table, + int (*callback)(struct tbl_node *, void *aux), void *aux) { unsigned int i, j, k; - for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) { - struct dp_bucket **l2 = table->buckets[i]; - for (j = 0; j < DP_L1_SIZE; j++) { - struct dp_bucket *bucket = rcu_dereference(l2[j]); + for (i = 0; i < table->n_buckets >> TBL_L1_BITS; i++) { + struct tbl_bucket **l2 = table->buckets[i]; + for (j = 0; j < TBL_L1_SIZE; j++) { + struct tbl_bucket *bucket = rcu_dereference(l2[j]); if (!bucket) continue; - for (k = 0; k < bucket->n_flows; k++) { - int error = (*callback)(bucket->flows[k], aux); + for (k = 0; k < bucket->n_objs; k++) { + int error = (*callback)(bucket->objs[k], aux); if (error) return error; } @@ -214,154 +270,154 @@ int dp_table_foreach(struct dp_table *table, return 0; } -static int insert_flow(struct sw_flow *flow, void *new_table_) -{ - struct dp_table *new_table = new_table_; - return dp_table_insert(new_table, flow); -} - -static void dp_free_table_rcu(struct rcu_head *rcu) +static int insert_table_flow(struct tbl_node *node, void *new_table_) { - struct dp_table *table = container_of(rcu, struct dp_table, rcu); - dp_table_destroy(table, 0); + struct tbl *new_table = new_table_; + return tbl_insert(new_table, node, node->hash); } /** - * dp_table_expand - replace datapath's flow table by one with more buckets - * @dp: datapath to expand + * tbl_expand - create a hash table with more buckets + * @table: table to expand * - * Replaces @dp's flow table by one that has twice as many buckets. All of the - * flows in @dp's flow table are moved to the new flow table. Returns 0 if - * successful, otherwise a negative error. + * Creates a new table containing the same objects as @table but with twice + * as many buckets. Returns 0 if successful, otherwise a negative error. The + * caller should free @table upon success (probably using + * tbl_deferred_destroy()). */ -int dp_table_expand(struct datapath *dp) +struct tbl *tbl_expand(struct tbl *table) { - struct dp_table *old_table = rcu_dereference(dp->table); - struct dp_table *new_table; + int err; + int n_buckets = table->n_buckets * 2; + struct tbl *new_table; - new_table = dp_table_create(old_table->n_buckets * 2); + if (n_buckets >= TBL_MAX_BUCKETS) { + err = -ENOSPC; + goto error; + } + + err = -ENOMEM; + new_table = tbl_create(n_buckets); if (!new_table) goto error; - if (dp_table_foreach(old_table, insert_flow, new_table)) + if (tbl_foreach(table, insert_table_flow, new_table)) goto error_free_new_table; - rcu_assign_pointer(dp->table, new_table); - call_rcu(&old_table->rcu, dp_free_table_rcu); - return 0; + return new_table; error_free_new_table: - dp_table_destroy(new_table, 0); + tbl_destroy(new_table, NULL); error: - return -ENOMEM; -} - -static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) -{ - struct dp_table *table = container_of(rcu, struct dp_table, rcu); - dp_table_destroy(table, 1); + return ERR_PTR(err); } /** - * dp_table_flush - clear datapath's flow table - * @dp: datapath to clear + * tbl_n_buckets - returns the number of buckets + * @table: table to examine * - * Replaces @dp's flow table by an empty flow table, destroying all the flows - * in the old table (after a suitable RCU grace period). + * Returns the number of buckets currently allocated in @table, useful when + * deciding whether to expand. */ -int dp_table_flush(struct datapath *dp) +int tbl_n_buckets(struct tbl *table) { - struct dp_table *old_table = rcu_dereference(dp->table); - struct dp_table *new_table = dp_table_create(DP_L1_SIZE); - if (!new_table) - return -ENOMEM; - rcu_assign_pointer(dp->table, new_table); - call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu); - return 0; + return table->n_buckets; } -static void dp_free_bucket_rcu(struct rcu_head *rcu) +static void free_bucket_rcu(struct rcu_head *rcu) { - struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu); + struct tbl_bucket *bucket = container_of(rcu, struct tbl_bucket, rcu); kfree(bucket); } /** - * dp_table_insert - insert flow into table - * @table: table in which to insert flow - * @target: flow to insert + * tbl_insert - insert object into table + * @table: table in which to insert object + * @target: tbl_node contained in object to insert + * @hash: hash of object to insert * - * The caller must ensure that no flow with key identical to @target->key + * The caller must ensure that no object considered to be identical to @target * already exists in @table. Returns 0 or a negative error (currently just * -ENOMEM). - * - * The caller is responsible for updating &struct datapath's n_flows member. */ -int dp_table_insert(struct dp_table *table, struct sw_flow *target) +int tbl_insert(struct tbl *table, struct tbl_node *target, u32 hash) { - u32 hash = flow_hash(table, &target->key); - struct dp_bucket **oldp = find_bucket(table, hash); - struct dp_bucket *old = *rcu_dereference(oldp); - unsigned int n = old ? old->n_flows : 0; - struct dp_bucket *new = dp_bucket_alloc(n + 1); + struct tbl_bucket **oldp = find_bucket(table, hash); + struct tbl_bucket *old = *rcu_dereference(oldp); + unsigned int n = old ? old->n_objs : 0; + struct tbl_bucket *new = bucket_alloc(n + 1); if (!new) return -ENOMEM; - new->n_flows = n + 1; + target->hash = hash; + + new->n_objs = n + 1; if (old) - memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*)); - new->flows[n] = target; + memcpy(new->objs, old->objs, n * sizeof(struct tbl_node *)); + new->objs[n] = target; rcu_assign_pointer(*oldp, new); if (old) - call_rcu(&old->rcu, dp_free_bucket_rcu); + call_rcu(&old->rcu, free_bucket_rcu); + + table->count++; return 0; } /** - * dp_table_delete - remove flow from table - * @table: table from which to remove flow - * @target: flow to remove + * tbl_delete - remove object from table + * @table: table from which to remove object + * @target: tbl_node inside of object to remove * * The caller must ensure that @target itself is in @table. (It is not - * good enough for @table to contain a different flow with a key equal to - * @target's key.) + * good enough for @table to contain a different object considered identical + * @target.) * * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is* - * possible for a flow deletion to fail due to lack of memory. - * - * The caller is responsible for updating &struct datapath's n_flows member. + * possible for object deletion to fail due to lack of memory. */ -int dp_table_delete(struct dp_table *table, struct sw_flow *target) +int tbl_remove(struct tbl *table, struct tbl_node *target) { - u32 hash = flow_hash(table, &target->key); - struct dp_bucket **oldp = find_bucket(table, hash); - struct dp_bucket *old = *rcu_dereference(oldp); - unsigned int n = old->n_flows; - struct dp_bucket *new; + struct tbl_bucket **oldp = find_bucket(table, target->hash); + struct tbl_bucket *old = *rcu_dereference(oldp); + unsigned int n = old->n_objs; + struct tbl_bucket *new; if (n > 1) { unsigned int i; - new = dp_bucket_alloc(n - 1); + new = bucket_alloc(n - 1); if (!new) return -ENOMEM; - new->n_flows = 0; + new->n_objs = 0; for (i = 0; i < n; i++) { - struct sw_flow *flow = old->flows[i]; - if (flow != target) - new->flows[new->n_flows++] = flow; + struct tbl_node *obj = old->objs[i]; + if (obj != target) + new->objs[new->n_objs++] = obj; } - WARN_ON_ONCE(new->n_flows != n - 1); + WARN_ON_ONCE(new->n_objs != n - 1); } else { new = NULL; } rcu_assign_pointer(*oldp, new); - call_rcu(&old->rcu, dp_free_bucket_rcu); + call_rcu(&old->rcu, free_bucket_rcu); + + table->count--; return 0; } + +/** + * tbl_count - retrieves the number of stored objects + * @table: table to count + * + * Returns the number of objects that have been inserted into the hash table. + */ +unsigned int tbl_count(struct tbl *table) +{ + return table->count; +} diff --git a/datapath/table.h b/datapath/table.h new file mode 100644 index 00000000..9ae1ee32 --- /dev/null +++ b/datapath/table.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#ifndef TABLE_H +#define TABLE_H 1 + +struct tbl; +struct tbl_bucket; + +struct tbl_node { + u32 hash; +}; + +#define TBL_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct tbl_bucket *))) +#define TBL_L2_SIZE (1 << TBL_L2_BITS) +#define TBL_L2_SHIFT 0 + +#define TBL_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct tbl_bucket **))) +#define TBL_L1_SIZE (1 << TBL_L1_BITS) +#define TBL_L1_SHIFT TBL_L2_BITS + +/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */ +#define TBL_MAX_BUCKETS (TBL_L1_SIZE * TBL_L2_SIZE) + +struct tbl *tbl_create(unsigned int n_buckets); +void tbl_destroy(struct tbl *, void (*destructor)(struct tbl_node *)); +struct tbl_node *tbl_lookup(struct tbl *, void *target, u32 hash, + int (*cmp)(const struct tbl_node *, void *target)); +int tbl_insert(struct tbl *, struct tbl_node *, u32 hash); +int tbl_remove(struct tbl *, struct tbl_node *); +unsigned int tbl_count(struct tbl *); +int tbl_foreach(struct tbl *, + int (*callback)(struct tbl_node *, void *aux), void *aux); + +int tbl_n_buckets(struct tbl *); +struct tbl *tbl_expand(struct tbl *); +void tbl_deferred_destroy(struct tbl *, void (*destructor)(struct tbl_node *)); + +#endif /* table.h */ |