From 6fa58f7a1533b96d8c958581ed18e0e5a245157b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 1 Sep 2009 10:31:32 -0700 Subject: datapath: Use hash table more tolerant of collisions for flow table. The hash table used until now in the kernel datapath for storing the flow table provides only two slots that a given flow can occupy. If both of those slots are already full, for a given flow, then that flow cannot be added at all and its packets must be handled entirely in userspace, taking a performance hit. The code does attempt to compensate for this by making the flow table rather large: 8 slots per flow actually in the flow table. In practice, this is usually good enough, but some of the tests that we have run show bad enough performance degradation or even timeouts of various kinds that we want to implement something better. This commit replaces the existing hash table by one with a completely different design in which buckets are flexibly sized and can accept any number of collisions. By use of suitable levels of indirection, this design is both simple and RCU-compatible. I did consider other schemes, but none of the ones that I came up with shared both of those two properties. This commit also adds kerneldoc comments for all of the flow table non-static functions and data structures. This has been lightly tested for correctness. It has not been tested for performance. Bug #1656. Bug #1851. --- datapath/datapath.h | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'datapath/datapath.h') diff --git a/datapath/datapath.h b/datapath/datapath.h index 62c79d43..1fe8faca 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -28,20 +28,54 @@ #define DP_MAX_PORTS 256 #define DP_MAX_GROUPS 16 -#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*))) +#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*))) #define DP_L2_SIZE (1 << DP_L2_BITS) #define DP_L2_SHIFT 0 -#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**))) +#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**))) #define DP_L1_SIZE (1 << DP_L1_BITS) #define DP_L1_SHIFT DP_L2_BITS +/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */ #define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) +/** + * struct dp_table - flow table + * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and + * %DP_MAX_BUCKETS) + * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers + * to buckets + * @hash_seed: random number used for flow hashing, to make the hash + * distribution harder to predict + * @rcu: RCU callback structure + * + * The @buckets array is logically an array of pointers to buckets. It is + * broken into two levels to avoid the need to kmalloc() any object larger than + * a single page or to use vmalloc(). @buckets is always nonnull, as is each + * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash + * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j < + * %DP_L1_SIZE). + */ struct dp_table { unsigned int n_buckets; - struct sw_flow ***flows[2]; + struct dp_bucket ***buckets; + unsigned int hash_seed; + struct rcu_head rcu; +}; + +/** + * struct dp_bucket - single bucket within datapath flow table + * @rcu: RCU callback structure + * @n_flows: number of flows in @flows[] array + * @flows: array of @n_flows pointers to flows + * + * The expected number of flows per bucket is 1, but this allows for an + * arbitrary number of collisions. + */ +struct dp_bucket { struct rcu_head rcu; + unsigned int n_flows; + struct sw_flow *flows[]; }; #define DP_N_QUEUES 2 @@ -104,7 +138,7 @@ extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); struct dp_table *dp_table_create(unsigned int n_buckets); void dp_table_destroy(struct dp_table *, int free_flows); struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); -struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *); +int dp_table_insert(struct dp_table *, struct sw_flow *); int dp_table_delete(struct dp_table *, struct sw_flow *); int dp_table_expand(struct datapath *); int dp_table_flush(struct datapath *); -- cgit v1.2.3