aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--datapath/Modules.mk1
-rw-r--r--datapath/datapath.c100
-rw-r--r--datapath/datapath.h65
-rw-r--r--datapath/flow.c30
-rw-r--r--datapath/flow.h9
-rw-r--r--datapath/table.c392
-rw-r--r--datapath/table.h44
7 files changed, 371 insertions, 270 deletions
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 1e8bc047..ba9e01c2 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -27,6 +27,7 @@ openvswitch_headers = \
datapath.h \
dp_sysfs.h \
flow.h \
+ table.h \
vport.h \
vport-internal_dev.h \
vport-netdev.h
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a7b20f5f..9b34fcc5 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -45,6 +45,7 @@
#include "datapath.h"
#include "actions.h"
#include "flow.h"
+#include "table.h"
#include "vport-internal_dev.h"
#include "compat.h"
@@ -240,7 +241,7 @@ static int create_dp(int dp_idx, const char __user *devnamep)
/* Allocate table. */
err = -ENOMEM;
- rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
+ rcu_assign_pointer(dp->table, tbl_create(0));
if (!dp->table)
goto err_free_dp;
@@ -271,7 +272,7 @@ static int create_dp(int dp_idx, const char __user *devnamep)
err_destroy_local_port:
dp_detach_port(dp->ports[ODPP_LOCAL], 1);
err_destroy_table:
- dp_table_destroy(dp->table, 0);
+ tbl_destroy(dp->table, NULL);
err_free_dp:
kfree(dp);
err_put_module:
@@ -298,7 +299,7 @@ static void do_destroy_dp(struct datapath *dp)
dp_detach_port(dp->ports[ODPP_LOCAL], 1);
- dp_table_destroy(dp->table, 1);
+ tbl_destroy(dp->table, flow_free_tbl);
for (i = 0; i < DP_N_QUEUES; i++)
skb_queue_purge(&dp->queues[i]);
@@ -511,7 +512,7 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
struct datapath *dp = p->dp;
struct dp_stats_percpu *stats;
struct odp_flow_key key;
- struct sw_flow *flow;
+ struct tbl_node *flow_node;
WARN_ON_ONCE(skb_shared(skb));
skb_warn_if_lro(skb);
@@ -530,8 +531,9 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
}
}
- flow = dp_table_lookup(rcu_dereference(dp->table), &key);
- if (flow) {
+ flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp);
+ if (flow_node) {
+ struct sw_flow *flow = flow_cast(flow_node);
struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts);
flow_used(flow, skb);
execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
@@ -854,8 +856,18 @@ err:
static int flush_flows(struct datapath *dp)
{
- dp->n_flows = 0;
- return dp_table_flush(dp);
+ struct tbl *old_table = rcu_dereference(dp->table);
+ struct tbl *new_table;
+
+ new_table = tbl_create(0);
+ if (!new_table)
+ return -ENOMEM;
+
+ rcu_assign_pointer(dp->table, new_table);
+
+ tbl_deferred_destroy(old_table, flow_free_tbl);
+
+ return 0;
}
static int validate_actions(const struct sw_flow_actions *actions)
@@ -952,11 +964,27 @@ static void clear_stats(struct sw_flow *flow)
flow->byte_count = 0;
}
+static int expand_table(struct datapath *dp)
+{
+ struct tbl *old_table = rcu_dereference(dp->table);
+ struct tbl *new_table;
+
+ new_table = tbl_expand(old_table);
+ if (IS_ERR(new_table))
+ return PTR_ERR(new_table);
+
+ rcu_assign_pointer(dp->table, new_table);
+ tbl_deferred_destroy(old_table, NULL);
+
+ return 0;
+}
+
static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
{
struct odp_flow_put uf;
+ struct tbl_node *flow_node;
struct sw_flow *flow;
- struct dp_table *table;
+ struct tbl *table;
struct odp_flow_stats stats;
int error;
@@ -966,8 +994,8 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
memset(uf.flow.key.reserved, 0, sizeof uf.flow.key.reserved);
table = rcu_dereference(dp->table);
- flow = dp_table_lookup(table, &uf.flow.key);
- if (!flow) {
+ flow_node = tbl_lookup(table, &uf.flow.key, flow_hash(&uf.flow.key), flow_cmp);
+ if (!flow_node) {
/* No such flow. */
struct sw_flow_actions *acts;
@@ -976,12 +1004,8 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
goto error;
/* Expand table, if necessary, to make room. */
- if (dp->n_flows >= table->n_buckets) {
- error = -ENOSPC;
- if (table->n_buckets >= DP_MAX_BUCKETS)
- goto error;
-
- error = dp_table_expand(dp);
+ if (tbl_count(table) >= tbl_n_buckets(table)) {
+ error = expand_table(dp);
if (error)
goto error;
table = rcu_dereference(dp->table);
@@ -1004,16 +1028,18 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- error = dp_table_insert(table, flow);
+ error = tbl_insert(table, &flow->tbl_node, flow_hash(&flow->key));
if (error)
goto error_free_flow_acts;
- dp->n_flows++;
+
memset(&stats, 0, sizeof(struct odp_flow_stats));
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts, *new_acts;
unsigned long int flags;
+ flow = flow_cast(flow_node);
+
/* Bail out if we're not allowed to modify an existing flow. */
error = -EEXIST;
if (!(uf.flags & ODPPF_MODIFY))
@@ -1100,8 +1126,9 @@ static int answer_query(struct sw_flow *flow, u32 query_flags,
static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
{
- struct dp_table *table = rcu_dereference(dp->table);
+ struct tbl *table = rcu_dereference(dp->table);
struct odp_flow uf;
+ struct tbl_node *flow_node;
struct sw_flow *flow;
int error;
@@ -1110,13 +1137,12 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
goto error;
memset(uf.key.reserved, 0, sizeof uf.key.reserved);
- flow = dp_table_lookup(table, &uf.key);
+ flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
error = -ENOENT;
- if (!flow)
+ if (!flow_node)
goto error;
- /* XXX redundant lookup */
- error = dp_table_delete(table, flow);
+ error = tbl_remove(table, flow_node);
if (error)
goto error;
@@ -1124,7 +1150,8 @@ static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
* be using this flow. We used to synchronize_rcu() to make sure that
* we get completely accurate stats, but that blows our performance,
* badly. */
- dp->n_flows--;
+
+ flow = flow_cast(flow_node);
error = answer_query(flow, 0, ufp);
flow_deferred_free(flow);
@@ -1134,23 +1161,23 @@ error:
static int query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
{
- struct dp_table *table = rcu_dereference(dp->table);
+ struct tbl *table = rcu_dereference(dp->table);
int i;
for (i = 0; i < flowvec->n_flows; i++) {
struct __user odp_flow *ufp = &flowvec->flows[i];
struct odp_flow uf;
- struct sw_flow *flow;
+ struct tbl_node *flow_node;
int error;
if (__copy_from_user(&uf, ufp, sizeof uf))
return -EFAULT;
memset(uf.key.reserved, 0, sizeof uf.key.reserved);
- flow = dp_table_lookup(table, &uf.key);
- if (!flow)
+ flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
+ if (!flow_node)
error = __put_user(ENOENT, &ufp->stats.error);
else
- error = answer_query(flow, uf.flags, ufp);
+ error = answer_query(flow_cast(flow_node), uf.flags, ufp);
if (error)
return -EFAULT;
}
@@ -1163,8 +1190,9 @@ struct list_flows_cbdata {
int listed_flows;
};
-static int list_flow(struct sw_flow *flow, void *cbdata_)
+static int list_flow(struct tbl_node *node, void *cbdata_)
{
+ struct sw_flow *flow = flow_cast(node);
struct list_flows_cbdata *cbdata = cbdata_;
struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
int error;
@@ -1191,8 +1219,7 @@ static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
cbdata.uflows = flowvec->flows;
cbdata.n_flows = flowvec->n_flows;
cbdata.listed_flows = 0;
- error = dp_table_foreach(rcu_dereference(dp->table),
- list_flow, &cbdata);
+ error = tbl_foreach(rcu_dereference(dp->table), list_flow, &cbdata);
return error ? error : cbdata.listed_flows;
}
@@ -1295,12 +1322,13 @@ error:
static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
{
+ struct tbl *table = rcu_dereference(dp->table);
struct odp_stats stats;
int i;
- stats.n_flows = dp->n_flows;
- stats.cur_capacity = rcu_dereference(dp->table)->n_buckets;
- stats.max_capacity = DP_MAX_BUCKETS;
+ stats.n_flows = tbl_count(table);
+ stats.cur_capacity = tbl_n_buckets(table);
+ stats.max_capacity = TBL_MAX_BUCKETS;
stats.n_ports = dp->n_ports;
stats.max_ports = DP_MAX_PORTS;
stats.max_groups = DP_MAX_GROUPS;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 553e19fd..991a7e81 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -32,56 +32,6 @@ struct dp_port;
#define DP_MAX_PORTS 1024
#define DP_MAX_GROUPS 16
-#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*)))
-#define DP_L2_SIZE (1 << DP_L2_BITS)
-#define DP_L2_SHIFT 0
-
-#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**)))
-#define DP_L1_SIZE (1 << DP_L1_BITS)
-#define DP_L1_SHIFT DP_L2_BITS
-
-/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */
-#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE)
-
-/**
- * struct dp_table - flow table
- * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and
- * %DP_MAX_BUCKETS)
- * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers
- * to buckets
- * @hash_seed: random number used for flow hashing, to make the hash
- * distribution harder to predict
- * @rcu: RCU callback structure
- *
- * The @buckets array is logically an array of pointers to buckets. It is
- * broken into two levels to avoid the need to kmalloc() any object larger than
- * a single page or to use vmalloc(). @buckets is always nonnull, as is each
- * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash
- * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j <
- * %DP_L1_SIZE).
- */
-struct dp_table {
- unsigned int n_buckets;
- struct dp_bucket ***buckets;
- unsigned int hash_seed;
- struct rcu_head rcu;
-};
-
-/**
- * struct dp_bucket - single bucket within datapath flow table
- * @rcu: RCU callback structure
- * @n_flows: number of flows in @flows[] array
- * @flows: array of @n_flows pointers to flows
- *
- * The expected number of flows per bucket is 1, but this allows for an
- * arbitrary number of collisions.
- */
-struct dp_bucket {
- struct rcu_head rcu;
- unsigned int n_flows;
- struct sw_flow *flows[];
-};
-
#define DP_N_QUEUES 3
#define DP_MAX_QUEUE_LEN 100
@@ -143,8 +93,7 @@ struct datapath {
wait_queue_head_t waitqueue;
/* Flow table. */
- unsigned int n_flows;
- struct dp_table *table;
+ struct tbl *table;
/* Port groups. */
struct dp_port_group *groups[DP_MAX_GROUPS];
@@ -208,18 +157,6 @@ struct ovs_skb_cb {
extern struct notifier_block dp_device_notifier;
extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
-/* Flow table. */
-struct dp_table *dp_table_create(unsigned int n_buckets);
-void dp_table_destroy(struct dp_table *, int free_flows);
-struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *);
-int dp_table_insert(struct dp_table *, struct sw_flow *);
-int dp_table_delete(struct dp_table *, struct sw_flow *);
-int dp_table_expand(struct datapath *);
-int dp_table_flush(struct datapath *);
-int dp_table_foreach(struct dp_table *table,
- int (*callback)(struct sw_flow *flow, void *aux),
- void *aux);
-
void dp_process_received_packet(struct dp_port *, struct sk_buff *);
int dp_detach_port(struct dp_port *, int may_delete);
int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
diff --git a/datapath/flow.c b/datapath/flow.c
index 8228da2e..548c729a 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -14,6 +14,7 @@
#include <linux/if_vlan.h>
#include <net/llc_pdu.h>
#include <linux/kernel.h>
+#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/llc.h>
#include <linux/module.h>
@@ -31,6 +32,7 @@
#include "compat.h"
struct kmem_cache *flow_cache;
+static unsigned int hash_seed;
struct arp_eth_header
{
@@ -134,7 +136,7 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
/* Frees 'flow' immediately. */
-void flow_free(struct sw_flow *flow)
+static void flow_free(struct sw_flow *flow)
{
if (unlikely(!flow))
return;
@@ -142,6 +144,12 @@ void flow_free(struct sw_flow *flow)
kmem_cache_free(flow_cache, flow);
}
+void flow_free_tbl(struct tbl_node *node)
+{
+ struct sw_flow *flow = flow_cast(node);
+ flow_free(flow);
+}
+
/* RCU callback used by flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
@@ -319,6 +327,24 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
return retval;
}
+struct sw_flow *flow_cast(const struct tbl_node *node)
+{
+ return container_of(node, struct sw_flow, tbl_node);
+}
+
+u32 flow_hash(const struct odp_flow_key *key)
+{
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), hash_seed);
+}
+
+int flow_cmp(const struct tbl_node *node, void *key2_)
+{
+ const struct odp_flow_key *key1 = &flow_cast(node)->key;
+ const struct odp_flow_key *key2 = key2_;
+
+ return !memcmp(key1, key2, sizeof(struct odp_flow_key));
+}
+
/* Initializes the flow module.
* Returns zero if successful or a negative error code. */
int flow_init(void)
@@ -328,6 +354,8 @@ int flow_init(void)
if (flow_cache == NULL)
return -ENOMEM;
+ get_random_bytes(&hash_seed, sizeof hash_seed);
+
return 0;
}
diff --git a/datapath/flow.h b/datapath/flow.h
index 44cc3a60..4a393cb9 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include "openvswitch/datapath-protocol.h"
+#include "table.h"
struct sk_buff;
@@ -27,6 +28,8 @@ struct sw_flow_actions {
struct sw_flow {
struct rcu_head rcu;
+ struct tbl_node tbl_node;
+
struct odp_flow_key key;
struct sw_flow_actions *sf_acts;
@@ -43,12 +46,16 @@ struct sw_flow {
extern struct kmem_cache *flow_cache;
struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
-void flow_free(struct sw_flow *);
void flow_deferred_free(struct sw_flow *);
void flow_deferred_free_acts(struct sw_flow_actions *);
int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
void flow_used(struct sw_flow *, struct sk_buff *);
+struct sw_flow *flow_cast(const struct tbl_node *);
+u32 flow_hash(const struct odp_flow_key *key);
+int flow_cmp(const struct tbl_node *, void *target);
+void flow_free_tbl(struct tbl_node *);
+
int flow_init(void);
void flow_exit(void);
diff --git a/datapath/table.c b/datapath/table.c
index 23ae8abe..e4561d69 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
* Distributed under the terms of the GNU GPL version 2.
*
* Significant portions of this file may be copied from parts of the Linux
@@ -8,44 +8,80 @@
#include "flow.h"
#include "datapath.h"
+#include "table.h"
#include <linux/gfp.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/highmem.h>
#include <asm/pgtable.h>
-static inline int bucket_size(int n_flows)
+/**
+ * struct tbl - hash table
+ * @n_buckets: number of buckets (a power of 2 between %TBL_L1_SIZE and
+ * %TBL_MAX_BUCKETS)
+ * @buckets: pointer to @n_buckets/%TBL_L1_SIZE pointers to %TBL_L1_SIZE pointers
+ * to buckets
+ * @rcu: RCU callback structure
+ * @obj_destructor: Called on each element when the table is destroyed.
+ *
+ * The @buckets array is logically an array of pointers to buckets. It is
+ * broken into two levels to avoid the need to kmalloc() any object larger than
+ * a single page or to use vmalloc(). @buckets is always nonnull, as is each
+ * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash
+ * bucket is nonempty (for 0 <= i < @n_buckets/%TBL_L1_SIZE, 0 <= j <
+ * %TBL_L1_SIZE).
+ */
+struct tbl {
+ struct rcu_head rcu;
+ unsigned int n_buckets;
+ struct tbl_bucket ***buckets;
+ unsigned int count;
+ void (*obj_destructor)(struct tbl_node *);
+};
+
+/**
+ * struct tbl_bucket - single bucket within a hash table
+ * @rcu: RCU callback structure
+ * @n_objs: number of objects in @objs[] array
+ * @objs: array of @n_objs pointers to table nodes contained inside objects
+ *
+ * The expected number of objects per bucket is 1, but this allows for an
+ * arbitrary number of collisions.
+ */
+struct tbl_bucket {
+ struct rcu_head rcu;
+ unsigned int n_objs;
+ struct tbl_node *objs[];
+};
+
+static inline int bucket_size(int n_objs)
{
- return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows;
+ return sizeof(struct tbl_bucket) + sizeof(struct tbl_node *) * n_objs;
}
-static struct dp_bucket *dp_bucket_alloc(int n_flows)
+static struct tbl_bucket *bucket_alloc(int n_objs)
{
- return kmalloc(bucket_size(n_flows), GFP_KERNEL);
+ return kmalloc(bucket_size(n_objs), GFP_KERNEL);
}
-static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets,
- int free_flows)
+static void free_buckets(struct tbl_bucket ***l1, unsigned int n_buckets,
+ void (*free_obj)(struct tbl_node *))
{
unsigned int i;
- for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- struct dp_bucket **l2 = l1[i];
+ for (i = 0; i < n_buckets >> TBL_L1_BITS; i++) {
+ struct tbl_bucket **l2 = l1[i];
unsigned int j;
- for (j = 0; j < DP_L1_SIZE; j++) {
- struct dp_bucket *bucket = l2[j];
+ for (j = 0; j < TBL_L1_SIZE; j++) {
+ struct tbl_bucket *bucket = l2[j];
if (!bucket)
continue;
- if (free_flows) {
+ if (free_obj) {
unsigned int k;
- for (k = 0; k < bucket->n_flows; k++)
- flow_free(bucket->flows[k]);
+ for (k = 0; k < bucket->n_objs; k++)
+ free_obj(bucket->objs[k]);
}
kfree(bucket);
}
@@ -54,19 +90,19 @@ static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets,
kfree(l1);
}
-static struct dp_bucket ***alloc_buckets(unsigned int n_buckets)
+static struct tbl_bucket ***alloc_buckets(unsigned int n_buckets)
{
- struct dp_bucket ***l1;
+ struct tbl_bucket ***l1;
unsigned int i;
- l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**),
+ l1 = kmalloc((n_buckets >> TBL_L1_BITS) * sizeof(struct tbl_bucket **),
GFP_KERNEL);
if (!l1)
return NULL;
- for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL);
+ for (i = 0; i < n_buckets >> TBL_L1_BITS; i++) {
+ l1[i] = (struct tbl_bucket **)get_zeroed_page(GFP_KERNEL);
if (!l1[i]) {
- free_buckets(l1, i << DP_L1_BITS, 0);
+ free_buckets(l1, i << TBL_L1_BITS, 0);
return NULL;
}
}
@@ -74,16 +110,19 @@ static struct dp_bucket ***alloc_buckets(unsigned int n_buckets)
}
/**
- * dp_table_create - create and return a new flow table
+ * tbl_create - create and return a new hash table
* @n_buckets: number of buckets in the new table
*
- * Creates and returns a new flow table, or %NULL if memory cannot be
- * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to
- * %DP_MAX_BUCKETS.
+ * Creates and returns a new hash table, or %NULL if memory cannot be
+ * allocated. @n_buckets must be a power of 2 in the range %TBL_L1_SIZE to
+ * %TBL_MAX_BUCKETS.
*/
-struct dp_table *dp_table_create(unsigned int n_buckets)
+struct tbl *tbl_create(unsigned int n_buckets)
{
- struct dp_table *table;
+ struct tbl *table;
+
+ if (!n_buckets)
+ n_buckets = TBL_L1_SIZE;
table = kzalloc(sizeof *table, GFP_KERNEL);
if (!table)
@@ -93,7 +132,6 @@ struct dp_table *dp_table_create(unsigned int n_buckets)
table->buckets = alloc_buckets(n_buckets);
if (!table->buckets)
goto err_free_table;
- get_random_bytes(&table->hash_seed, sizeof table->hash_seed);
return table;
@@ -104,108 +142,126 @@ err:
}
/**
- * dp_table_destroy - destroy flow table and optionally the flows it contains
- * @table: table to destroy (must not be %NULL)
- * @free_flows: whether to destroy the flows
+ * tbl_destroy - destroy hash table and optionally the objects it contains
+ * @table: table to destroy
+ * @destructor: function to be called on objects at destruction time
*
- * If @free_flows is zero, then the buckets in @table are destroyed but not the
- * flows within those buckets. This behavior is useful when a table is being
- * replaced by a larger or smaller one without destroying the flows.
+ * If a destructor is null, then the buckets in @table are destroyed
+ * but not the objects within those buckets. This behavior is useful when a
+ * table is being replaced by a larger or smaller one without destroying the
+ * objects.
*
- * If @free_flows is nonzero, then the flows in @table are destroyed as well as
- * the buckets.
+ * If a destructor is not null, then it is called on the objects in @table
+ * before destroying the buckets.
*/
-void dp_table_destroy(struct dp_table *table, int free_flows)
+void tbl_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
{
- free_buckets(table->buckets, table->n_buckets, free_flows);
+ if (!table)
+ return;
+
+ free_buckets(table->buckets, table->n_buckets, destructor);
kfree(table);
}
-static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash)
+static void destroy_table_rcu(struct rcu_head *rcu)
{
- unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT;
- unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1);
+ struct tbl *table = container_of(rcu, struct tbl, rcu);
+ tbl_destroy(table, table->obj_destructor);
+}
+
+/**
+ * tbl_deferred_destroy - destroy table after a RCU grace period
+ * @table: table to destroy
+ * @destructor: function to be called on objects at destruction time
+ *
+ * Calls tbl_destroy() on @table after an RCU grace period. If @destructor is
+ * not null it is called on every element before the table is destroyed. */
+void tbl_deferred_destroy(struct tbl *table, void (*destructor)(struct tbl_node *))
+{
+ if (!table)
+ return;
+
+ table->obj_destructor = destructor;
+ call_rcu(&table->rcu, destroy_table_rcu);
+}
+
+static struct tbl_bucket **find_bucket(struct tbl *table, u32 hash)
+{
+ unsigned int l1 = (hash & (table->n_buckets - 1)) >> TBL_L1_SHIFT;
+ unsigned int l2 = hash & ((1 << TBL_L2_BITS) - 1);
return &table->buckets[l1][l2];
}
-static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key)
+static int search_bucket(const struct tbl_bucket *bucket, void *target, u32 hash,
+ int (*cmp)(const struct tbl_node *, void *))
{
int i;
- for (i = 0; i < bucket->n_flows; i++) {
- struct sw_flow *flow = rcu_dereference(bucket->flows[i]);
- if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key)))
+ for (i = 0; i < bucket->n_objs; i++) {
+ struct tbl_node *obj = rcu_dereference(bucket->objs[i]);
+ if (obj->hash == hash && likely(cmp(obj, target)))
return i;
}
return -1;
}
-static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash,
- const struct odp_flow_key *key)
+/**
+ * tbl_lookup - searches hash table for a matching object
+ * @table: hash table to search
+ * @target: identifier for the object that is being searched for, will be
+ * provided as an argument to @cmp when making comparisions
+ * @hash: hash of @target
+ * @cmp: comparision function to match objects with the given hash, returns
+ * nonzero if the objects match, zero otherwise
+ *
+ * Searches @table for an object identified by @target. Returns the tbl_node
+ * contained in the object if successful, otherwise %NULL.
+ */
+struct tbl_node *tbl_lookup(struct tbl *table, void *target, u32 hash,
+ int (*cmp)(const struct tbl_node *, void *))
{
- struct dp_bucket **bucketp = find_bucket(table, hash);
- struct dp_bucket *bucket = rcu_dereference(*bucketp);
+ struct tbl_bucket **bucketp = find_bucket(table, hash);
+ struct tbl_bucket *bucket = rcu_dereference(*bucketp);
int index;
if (!bucket)
return NULL;
- index = search_bucket(bucket, key);
+ index = search_bucket(bucket, target, hash, cmp);
if (index < 0)
return NULL;
- return bucket->flows[index];
-}
-
-static u32 flow_hash(const struct dp_table *table,
- const struct odp_flow_key *key)
-{
- return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed);
+ return bucket->objs[index];
}
/**
- * dp_table_lookup - searches flow table for a matching flow
- * @table: flow table to search
- * @key: flow key for which to search
- *
- * Searches @table for a flow whose key is equal to @key. Returns the flow if
- * successful, otherwise %NULL.
- */
-struct sw_flow *dp_table_lookup(struct dp_table *table,
- const struct odp_flow_key *key)
-{
- return lookup_flow(table, flow_hash(table, key), key);
-}
-
-/**
- * dp_table_foreach - iterate through flow table
+ * tbl_foreach - iterate through hash table
* @table: table to iterate
- * @callback: function to call for each flow entry
+ * @callback: function to call for each entry
* @aux: Extra data to pass to @callback
*
- * Iterates through all of the flows in @table in hash order, passing each of
+ * Iterates through all of the objects in @table in hash order, passing each of
* them in turn to @callback. If @callback returns nonzero, this terminates
- * the iteration and dp_table_foreach() returns the same value. Returns 0 if
+ * the iteration and tbl_foreach() returns the same value. Returns 0 if
* @callback never returns nonzero.
*
* This function does not try to intelligently handle the case where @callback
* adds or removes flows in @table.
*/
-int dp_table_foreach(struct dp_table *table,
- int (*callback)(struct sw_flow *flow, void *aux),
- void *aux)
+int tbl_foreach(struct tbl *table,
+ int (*callback)(struct tbl_node *, void *aux), void *aux)
{
unsigned int i, j, k;
- for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) {
- struct dp_bucket **l2 = table->buckets[i];
- for (j = 0; j < DP_L1_SIZE; j++) {
- struct dp_bucket *bucket = rcu_dereference(l2[j]);
+ for (i = 0; i < table->n_buckets >> TBL_L1_BITS; i++) {
+ struct tbl_bucket **l2 = table->buckets[i];
+ for (j = 0; j < TBL_L1_SIZE; j++) {
+ struct tbl_bucket *bucket = rcu_dereference(l2[j]);
if (!bucket)
continue;
- for (k = 0; k < bucket->n_flows; k++) {
- int error = (*callback)(bucket->flows[k], aux);
+ for (k = 0; k < bucket->n_objs; k++) {
+ int error = (*callback)(bucket->objs[k], aux);
if (error)
return error;
}
@@ -214,154 +270,154 @@ int dp_table_foreach(struct dp_table *table,
return 0;
}
-static int insert_flow(struct sw_flow *flow, void *new_table_)
-{
- struct dp_table *new_table = new_table_;
- return dp_table_insert(new_table, flow);
-}
-
-static void dp_free_table_rcu(struct rcu_head *rcu)
+static int insert_table_flow(struct tbl_node *node, void *new_table_)
{
- struct dp_table *table = container_of(rcu, struct dp_table, rcu);
- dp_table_destroy(table, 0);
+ struct tbl *new_table = new_table_;
+ return tbl_insert(new_table, node, node->hash);
}
/**
- * dp_table_expand - replace datapath's flow table by one with more buckets
- * @dp: datapath to expand
+ * tbl_expand - create a hash table with more buckets
+ * @table: table to expand
*
- * Replaces @dp's flow table by one that has twice as many buckets. All of the
- * flows in @dp's flow table are moved to the new flow table. Returns 0 if
- * successful, otherwise a negative error.
+ * Creates a new table containing the same objects as @table but with twice
+ * as many buckets. Returns 0 if successful, otherwise a negative error. The
+ * caller should free @table upon success (probably using
+ * tbl_deferred_destroy()).
*/
-int dp_table_expand(struct datapath *dp)
+struct tbl *tbl_expand(struct tbl *table)
{
- struct dp_table *old_table = rcu_dereference(dp->table);
- struct dp_table *new_table;
+ int err;
+ int n_buckets = table->n_buckets * 2;
+ struct tbl *new_table;
- new_table = dp_table_create(old_table->n_buckets * 2);
+ if (n_buckets >= TBL_MAX_BUCKETS) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = -ENOMEM;
+ new_table = tbl_create(n_buckets);
if (!new_table)
goto error;
- if (dp_table_foreach(old_table, insert_flow, new_table))
+ if (tbl_foreach(table, insert_table_flow, new_table))
goto error_free_new_table;
- rcu_assign_pointer(dp->table, new_table);
- call_rcu(&old_table->rcu, dp_free_table_rcu);
- return 0;
+ return new_table;
error_free_new_table:
- dp_table_destroy(new_table, 0);
+ tbl_destroy(new_table, NULL);
error:
- return -ENOMEM;
-}
-
-static void dp_free_table_and_flows_rcu(struct rcu_head *rcu)
-{
- struct dp_table *table = container_of(rcu, struct dp_table, rcu);
- dp_table_destroy(table, 1);
+ return ERR_PTR(err);
}
/**
- * dp_table_flush - clear datapath's flow table
- * @dp: datapath to clear
+ * tbl_n_buckets - returns the number of buckets
+ * @table: table to examine
*
- * Replaces @dp's flow table by an empty flow table, destroying all the flows
- * in the old table (after a suitable RCU grace period).
+ * Returns the number of buckets currently allocated in @table, useful when
+ * deciding whether to expand.
*/
-int dp_table_flush(struct datapath *dp)
+int tbl_n_buckets(struct tbl *table)
{
- struct dp_table *old_table = rcu_dereference(dp->table);
- struct dp_table *new_table = dp_table_create(DP_L1_SIZE);
- if (!new_table)
- return -ENOMEM;
- rcu_assign_pointer(dp->table, new_table);
- call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu);
- return 0;
+ return table->n_buckets;
}
-static void dp_free_bucket_rcu(struct rcu_head *rcu)
+static void free_bucket_rcu(struct rcu_head *rcu)
{
- struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu);
+ struct tbl_bucket *bucket = container_of(rcu, struct tbl_bucket, rcu);
kfree(bucket);
}
/**
- * dp_table_insert - insert flow into table
- * @table: table in which to insert flow
- * @target: flow to insert
+ * tbl_insert - insert object into table
+ * @table: table in which to insert object
+ * @target: tbl_node contained in object to insert
+ * @hash: hash of object to insert
*
- * The caller must ensure that no flow with key identical to @target->key
+ * The caller must ensure that no object considered to be identical to @target
* already exists in @table. Returns 0 or a negative error (currently just
* -ENOMEM).
- *
- * The caller is responsible for updating &struct datapath's n_flows member.
*/
-int dp_table_insert(struct dp_table *table, struct sw_flow *target)
+int tbl_insert(struct tbl *table, struct tbl_node *target, u32 hash)
{
- u32 hash = flow_hash(table, &target->key);
- struct dp_bucket **oldp = find_bucket(table, hash);
- struct dp_bucket *old = *rcu_dereference(oldp);
- unsigned int n = old ? old->n_flows : 0;
- struct dp_bucket *new = dp_bucket_alloc(n + 1);
+ struct tbl_bucket **oldp = find_bucket(table, hash);
+ struct tbl_bucket *old = *rcu_dereference(oldp);
+ unsigned int n = old ? old->n_objs : 0;
+ struct tbl_bucket *new = bucket_alloc(n + 1);
if (!new)
return -ENOMEM;
- new->n_flows = n + 1;
+ target->hash = hash;
+
+ new->n_objs = n + 1;
if (old)
- memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*));
- new->flows[n] = target;
+ memcpy(new->objs, old->objs, n * sizeof(struct tbl_node *));
+ new->objs[n] = target;
rcu_assign_pointer(*oldp, new);
if (old)
- call_rcu(&old->rcu, dp_free_bucket_rcu);
+ call_rcu(&old->rcu, free_bucket_rcu);
+
+ table->count++;
return 0;
}
/**
- * dp_table_delete - remove flow from table
- * @table: table from which to remove flow
- * @target: flow to remove
+ * tbl_delete - remove object from table
+ * @table: table from which to remove object
+ * @target: tbl_node inside of object to remove
*
* The caller must ensure that @target itself is in @table. (It is not
- * good enough for @table to contain a different flow with a key equal to
- * @target's key.)
+ * good enough for @table to contain a different object considered identical
+ * @target.)
*
* Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is*
- * possible for a flow deletion to fail due to lack of memory.
- *
- * The caller is responsible for updating &struct datapath's n_flows member.
+ * possible for object deletion to fail due to lack of memory.
*/
-int dp_table_delete(struct dp_table *table, struct sw_flow *target)
+int tbl_remove(struct tbl *table, struct tbl_node *target)
{
- u32 hash = flow_hash(table, &target->key);
- struct dp_bucket **oldp = find_bucket(table, hash);
- struct dp_bucket *old = *rcu_dereference(oldp);
- unsigned int n = old->n_flows;
- struct dp_bucket *new;
+ struct tbl_bucket **oldp = find_bucket(table, target->hash);
+ struct tbl_bucket *old = *rcu_dereference(oldp);
+ unsigned int n = old->n_objs;
+ struct tbl_bucket *new;
if (n > 1) {
unsigned int i;
- new = dp_bucket_alloc(n - 1);
+ new = bucket_alloc(n - 1);
if (!new)
return -ENOMEM;
- new->n_flows = 0;
+ new->n_objs = 0;
for (i = 0; i < n; i++) {
- struct sw_flow *flow = old->flows[i];
- if (flow != target)
- new->flows[new->n_flows++] = flow;
+ struct tbl_node *obj = old->objs[i];
+ if (obj != target)
+ new->objs[new->n_objs++] = obj;
}
- WARN_ON_ONCE(new->n_flows != n - 1);
+ WARN_ON_ONCE(new->n_objs != n - 1);
} else {
new = NULL;
}
rcu_assign_pointer(*oldp, new);
- call_rcu(&old->rcu, dp_free_bucket_rcu);
+ call_rcu(&old->rcu, free_bucket_rcu);
+
+ table->count--;
return 0;
}
+
+/**
+ * tbl_count - retrieves the number of stored objects
+ * @table: table to count
+ *
+ * Returns the number of objects that have been inserted into the hash table.
+ */
+unsigned int tbl_count(struct tbl *table)
+{
+ return table->count;
+}
diff --git a/datapath/table.h b/datapath/table.h
new file mode 100644
index 00000000..9ae1ee32
--- /dev/null
+++ b/datapath/table.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ * Distributed under the terms of the GNU GPL version 2.
+ *
+ * Significant portions of this file may be copied from parts of the Linux
+ * kernel, by Linus Torvalds and others.
+ */
+
+#ifndef TABLE_H
+#define TABLE_H 1
+
+struct tbl;
+struct tbl_bucket;
+
+struct tbl_node {
+ u32 hash;
+};
+
+#define TBL_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct tbl_bucket *)))
+#define TBL_L2_SIZE (1 << TBL_L2_BITS)
+#define TBL_L2_SHIFT 0
+
+#define TBL_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct tbl_bucket **)))
+#define TBL_L1_SIZE (1 << TBL_L1_BITS)
+#define TBL_L1_SHIFT TBL_L2_BITS
+
+/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */
+#define TBL_MAX_BUCKETS (TBL_L1_SIZE * TBL_L2_SIZE)
+
+struct tbl *tbl_create(unsigned int n_buckets);
+void tbl_destroy(struct tbl *, void (*destructor)(struct tbl_node *));
+struct tbl_node *tbl_lookup(struct tbl *, void *target, u32 hash,
+ int (*cmp)(const struct tbl_node *, void *target));
+int tbl_insert(struct tbl *, struct tbl_node *, u32 hash);
+int tbl_remove(struct tbl *, struct tbl_node *);
+unsigned int tbl_count(struct tbl *);
+int tbl_foreach(struct tbl *,
+ int (*callback)(struct tbl_node *, void *aux), void *aux);
+
+int tbl_n_buckets(struct tbl *);
+struct tbl *tbl_expand(struct tbl *);
+void tbl_deferred_destroy(struct tbl *, void (*destructor)(struct tbl_node *));
+
+#endif /* table.h */