diff options
author | Ben Pfaff <blp@nicira.com> | 2009-09-02 10:14:53 -0700 |
---|---|---|
committer | Ben Pfaff <blp@nicira.com> | 2009-09-02 10:14:53 -0700 |
commit | f1acd62b54376a425a975f9af501c4c8c5689b39 (patch) | |
tree | a579f967870b61d3a226acdc62cc079c8ce996d1 /datapath | |
parent | 2c7807ac4f578bfdd7a46b79028935c9aa34cde3 (diff) | |
parent | 0ad9b732910b8f1aa7fc47ea57ff79e7618a4e6d (diff) |
Merge citrix branch into master.
Diffstat (limited to 'datapath')
-rw-r--r-- | datapath/datapath.c | 35 | ||||
-rw-r--r-- | datapath/datapath.h | 42 | ||||
-rw-r--r-- | datapath/flow.c | 43 | ||||
-rw-r--r-- | datapath/table.c | 337 |
4 files changed, 326 insertions, 131 deletions
diff --git a/datapath/datapath.c b/datapath/datapath.c index d822b73c..6f96ee40 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -839,7 +839,7 @@ static void clear_stats(struct sw_flow *flow) static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) { struct odp_flow_put uf; - struct sw_flow *flow, **bucket; + struct sw_flow *flow; struct dp_table *table; struct odp_flow_stats stats; int error; @@ -849,15 +849,10 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) goto error; uf.flow.key.reserved = 0; -retry: table = rcu_dereference(dp->table); - bucket = dp_table_lookup_for_insert(table, &uf.flow.key); - if (!bucket) { - /* No such flow, and the slots where it could go are full. */ - error = uf.flags & ODPPF_CREATE ? -EFBIG : -ENOENT; - goto error; - } else if (!*bucket) { - /* No such flow, but we found an available slot for it. */ + flow = dp_table_lookup(table, &uf.flow.key); + if (!flow) { + /* No such flow. */ struct sw_flow_actions *acts; error = -ENOENT; @@ -865,14 +860,15 @@ retry: goto error; /* Expand table, if necessary, to make room. */ - if (dp->n_flows * 4 >= table->n_buckets && - table->n_buckets < DP_MAX_BUCKETS) { + if (dp->n_flows >= table->n_buckets) { + error = -ENOSPC; + if (table->n_buckets >= DP_MAX_BUCKETS) + goto error; + error = dp_table_expand(dp); if (error) goto error; - - /* The bucket's location has changed. Try again. */ - goto retry; + table = rcu_dereference(dp->table); } /* Allocate flow. */ @@ -892,12 +888,13 @@ retry: rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - rcu_assign_pointer(*bucket, flow); + error = dp_table_insert(table, flow); + if (error) + goto error_free_flow_acts; dp->n_flows++; memset(&stats, 0, sizeof(struct odp_flow_stats)); } else { /* We found a matching flow. */ - struct sw_flow *flow = *rcu_dereference(bucket); struct sw_flow_actions *old_acts, *new_acts; unsigned long int flags; @@ -935,6 +932,8 @@ retry: return -EFAULT; return 0; +error_free_flow_acts: + kfree(flow->sf_acts); error_free_flow: kmem_cache_free(flow_cache, flow); error: @@ -1167,8 +1166,8 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) int i; stats.n_flows = dp->n_flows; - stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2; - stats.max_capacity = DP_MAX_BUCKETS * 2; + stats.cur_capacity = rcu_dereference(dp->table)->n_buckets; + stats.max_capacity = DP_MAX_BUCKETS; stats.n_ports = dp->n_ports; stats.max_ports = DP_MAX_PORTS; stats.max_groups = DP_MAX_GROUPS; diff --git a/datapath/datapath.h b/datapath/datapath.h index b5200848..122706a8 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -29,20 +29,54 @@ #define DP_MAX_PORTS 256 #define DP_MAX_GROUPS 16 -#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*))) +#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*))) #define DP_L2_SIZE (1 << DP_L2_BITS) #define DP_L2_SHIFT 0 -#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**))) +#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**))) #define DP_L1_SIZE (1 << DP_L1_BITS) #define DP_L1_SHIFT DP_L2_BITS +/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */ #define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) +/** + * struct dp_table - flow table + * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and + * %DP_MAX_BUCKETS) + * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers + * to buckets + * @hash_seed: random number used for flow hashing, to make the hash + * distribution harder to predict + * @rcu: RCU callback structure + * + * The @buckets array is logically an array of pointers to buckets. It is + * broken into two levels to avoid the need to kmalloc() any object larger than + * a single page or to use vmalloc(). @buckets is always nonnull, as is each + * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash + * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j < + * %DP_L1_SIZE). + */ struct dp_table { unsigned int n_buckets; - struct sw_flow ***flows[2]; + struct dp_bucket ***buckets; + unsigned int hash_seed; + struct rcu_head rcu; +}; + +/** + * struct dp_bucket - single bucket within datapath flow table + * @rcu: RCU callback structure + * @n_flows: number of flows in @flows[] array + * @flows: array of @n_flows pointers to flows + * + * The expected number of flows per bucket is 1, but this allows for an + * arbitrary number of collisions. + */ +struct dp_bucket { struct rcu_head rcu; + unsigned int n_flows; + struct sw_flow *flows[]; }; #define DP_N_QUEUES 2 @@ -105,7 +139,7 @@ extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); struct dp_table *dp_table_create(unsigned int n_buckets); void dp_table_destroy(struct dp_table *, int free_flows); struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); -struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *); +int dp_table_insert(struct dp_table *, struct sw_flow *); int dp_table_delete(struct dp_table *, struct sw_flow *); int dp_table_expand(struct datapath *); int dp_table_flush(struct datapath *); diff --git a/datapath/flow.c b/datapath/flow.c index 2ac79e70..ae60617d 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/in.h> #include <linux/rcupdate.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -29,6 +30,27 @@ struct kmem_cache *flow_cache; +struct arp_eth_header +{ + __be16 ar_hrd; /* format of hardware address */ + __be16 ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + __be16 ar_op; /* ARP opcode (command) */ + + /* Ethernet+IPv4 specific members. */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +} __attribute__((packed)); + +static inline int arphdr_ok(struct sk_buff *skb) +{ + int nh_ofs = skb_network_offset(skb); + return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header)); +} + static inline int iphdr_ok(struct sk_buff *skb) { int nh_ofs = skb_network_offset(skb); @@ -266,6 +288,27 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) } else { retval = 1; } + } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { + struct arp_eth_header *arp; + + arp = (struct arp_eth_header *)skb_network_header(skb); + + if (arp->ar_hrd == htons(1) + && arp->ar_pro == htons(ETH_P_IP) + && arp->ar_hln == ETH_ALEN + && arp->ar_pln == 4) { + + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ar_op) <= 0xff) { + key->nw_proto = ntohs(arp->ar_op); + } + + if (key->nw_proto == ARPOP_REQUEST + || key->nw_proto == ARPOP_REPLY) { + memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src)); + memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst)); + } + } } else { skb_reset_transport_header(skb); } diff --git a/datapath/table.c b/datapath/table.c index 11aeb888..23ae8abe 100644 --- a/datapath/table.c +++ b/datapath/table.c @@ -11,50 +11,76 @@ #include <linux/gfp.h> #include <linux/jhash.h> +#include <linux/random.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/highmem.h> #include <asm/pgtable.h> -static void free_table(struct sw_flow ***flows, unsigned int n_buckets, - int free_flows) +static inline int bucket_size(int n_flows) +{ + return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows; +} + +static struct dp_bucket *dp_bucket_alloc(int n_flows) +{ + return kmalloc(bucket_size(n_flows), GFP_KERNEL); +} + +static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets, + int free_flows) { unsigned int i; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - struct sw_flow **l2 = flows[i]; - if (free_flows) { - unsigned int j; - for (j = 0; j < DP_L1_SIZE; j++) { - if (l2[j]) - flow_free(l2[j]); + struct dp_bucket **l2 = l1[i]; + unsigned int j; + + for (j = 0; j < DP_L1_SIZE; j++) { + struct dp_bucket *bucket = l2[j]; + if (!bucket) + continue; + + if (free_flows) { + unsigned int k; + for (k = 0; k < bucket->n_flows; k++) + flow_free(bucket->flows[k]); } + kfree(bucket); } free_page((unsigned long)l2); } - kfree(flows); + kfree(l1); } -static struct sw_flow ***alloc_table(unsigned int n_buckets) +static struct dp_bucket ***alloc_buckets(unsigned int n_buckets) { - struct sw_flow ***flows; + struct dp_bucket ***l1; unsigned int i; - flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**), - GFP_KERNEL); - if (!flows) + l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**), + GFP_KERNEL); + if (!l1) return NULL; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL); - if (!flows[i]) { - free_table(flows, i << DP_L1_BITS, 0); + l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL); + if (!l1[i]) { + free_buckets(l1, i << DP_L1_BITS, 0); return NULL; } } - return flows; + return l1; } +/** + * dp_table_create - create and return a new flow table + * @n_buckets: number of buckets in the new table + * + * Creates and returns a new flow table, or %NULL if memory cannot be + * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to + * %DP_MAX_BUCKETS. + */ struct dp_table *dp_table_create(unsigned int n_buckets) { struct dp_table *table; @@ -64,95 +90,124 @@ struct dp_table *dp_table_create(unsigned int n_buckets) goto err; table->n_buckets = n_buckets; - table->flows[0] = alloc_table(n_buckets); - if (!table[0].flows) - goto err_free_tables; - - table->flows[1] = alloc_table(n_buckets); - if (!table->flows[1]) - goto err_free_flows0; + table->buckets = alloc_buckets(n_buckets); + if (!table->buckets) + goto err_free_table; + get_random_bytes(&table->hash_seed, sizeof table->hash_seed); return table; -err_free_flows0: - free_table(table->flows[0], table->n_buckets, 0); -err_free_tables: +err_free_table: kfree(table); err: return NULL; } +/** + * dp_table_destroy - destroy flow table and optionally the flows it contains + * @table: table to destroy (must not be %NULL) + * @free_flows: whether to destroy the flows + * + * If @free_flows is zero, then the buckets in @table are destroyed but not the + * flows within those buckets. This behavior is useful when a table is being + * replaced by a larger or smaller one without destroying the flows. + * + * If @free_flows is nonzero, then the flows in @table are destroyed as well as + * the buckets. + */ void dp_table_destroy(struct dp_table *table, int free_flows) { - int i; - for (i = 0; i < 2; i++) - free_table(table->flows[i], table->n_buckets, free_flows); + free_buckets(table->buckets, table->n_buckets, free_flows); kfree(table); } -static struct sw_flow **find_bucket(struct dp_table *table, - struct sw_flow ***flows, u32 hash) +static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash) { unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); - return &flows[l1][l2]; + return &table->buckets[l1][l2]; } -static struct sw_flow *lookup_table(struct dp_table *table, - struct sw_flow ***flows, u32 hash, - const struct odp_flow_key *key) +static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key) { - struct sw_flow **bucket = find_bucket(table, flows, hash); - struct sw_flow *flow = rcu_dereference(*bucket); - if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key))) - return flow; - return NULL; -} + int i; -static u32 flow_hash0(const struct odp_flow_key *key) -{ - return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa); + for (i = 0; i < bucket->n_flows; i++) { + struct sw_flow *flow = rcu_dereference(bucket->flows[i]); + if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key))) + return i; + } + + return -1; } -static u32 flow_hash1(const struct odp_flow_key *key) +static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash, + const struct odp_flow_key *key) { - return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555); + struct dp_bucket **bucketp = find_bucket(table, hash); + struct dp_bucket *bucket = rcu_dereference(*bucketp); + int index; + + if (!bucket) + return NULL; + + index = search_bucket(bucket, key); + if (index < 0) + return NULL; + + return bucket->flows[index]; } -static void find_buckets(struct dp_table *table, - const struct odp_flow_key *key, - struct sw_flow **buckets[2]) +static u32 flow_hash(const struct dp_table *table, + const struct odp_flow_key *key) { - buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key)); - buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key)); + return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed); } +/** + * dp_table_lookup - searches flow table for a matching flow + * @table: flow table to search + * @key: flow key for which to search + * + * Searches @table for a flow whose key is equal to @key. Returns the flow if + * successful, otherwise %NULL. + */ struct sw_flow *dp_table_lookup(struct dp_table *table, const struct odp_flow_key *key) { - struct sw_flow *flow; - flow = lookup_table(table, table->flows[0], flow_hash0(key), key); - if (!flow) - flow = lookup_table(table, table->flows[1], - flow_hash1(key), key); - return flow; + return lookup_flow(table, flow_hash(table, key), key); } +/** + * dp_table_foreach - iterate through flow table + * @table: table to iterate + * @callback: function to call for each flow entry + * @aux: Extra data to pass to @callback + * + * Iterates through all of the flows in @table in hash order, passing each of + * them in turn to @callback. If @callback returns nonzero, this terminates + * the iteration and dp_table_foreach() returns the same value. Returns 0 if + * @callback never returns nonzero. + * + * This function does not try to intelligently handle the case where @callback + * adds or removes flows in @table. + */ int dp_table_foreach(struct dp_table *table, int (*callback)(struct sw_flow *flow, void *aux), void *aux) { unsigned int i, j, k; - for (i = 0; i < 2; i++) { - for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) { - struct sw_flow **l2 = table->flows[i][j]; - for (k = 0; k < DP_L1_SIZE; k++) { - struct sw_flow *flow = rcu_dereference(l2[k]); - if (flow) { - int error = callback(flow, aux); - if (error) - return error; - } + for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) { + struct dp_bucket **l2 = table->buckets[i]; + for (j = 0; j < DP_L1_SIZE; j++) { + struct dp_bucket *bucket = rcu_dereference(l2[j]); + if (!bucket) + continue; + + for (k = 0; k < bucket->n_flows; k++) { + int error = (*callback)(bucket->flows[k], aux); + if (error) + return error; } } } @@ -162,18 +217,7 @@ int dp_table_foreach(struct dp_table *table, static int insert_flow(struct sw_flow *flow, void *new_table_) { struct dp_table *new_table = new_table_; - struct sw_flow **buckets[2]; - int i; - - find_buckets(new_table, &flow->key, buckets); - for (i = 0; i < 2; i++) { - if (!*buckets[i]) { - rcu_assign_pointer(*buckets[i], flow); - return 0; - } - } - WARN_ON_ONCE(1); - return 0; + return dp_table_insert(new_table, flow); } static void dp_free_table_rcu(struct rcu_head *rcu) @@ -182,16 +226,34 @@ static void dp_free_table_rcu(struct rcu_head *rcu) dp_table_destroy(table, 0); } +/** + * dp_table_expand - replace datapath's flow table by one with more buckets + * @dp: datapath to expand + * + * Replaces @dp's flow table by one that has twice as many buckets. All of the + * flows in @dp's flow table are moved to the new flow table. Returns 0 if + * successful, otherwise a negative error. + */ int dp_table_expand(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); - struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2); + struct dp_table *new_table; + + new_table = dp_table_create(old_table->n_buckets * 2); if (!new_table) - return -ENOMEM; - dp_table_foreach(old_table, insert_flow, new_table); + goto error; + + if (dp_table_foreach(old_table, insert_flow, new_table)) + goto error_free_new_table; + rcu_assign_pointer(dp->table, new_table); call_rcu(&old_table->rcu, dp_free_table_rcu); return 0; + +error_free_new_table: + dp_table_destroy(new_table, 0); +error: + return -ENOMEM; } static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) @@ -200,6 +262,13 @@ static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) dp_table_destroy(table, 1); } +/** + * dp_table_flush - clear datapath's flow table + * @dp: datapath to clear + * + * Replaces @dp's flow table by an empty flow table, destroying all the flows + * in the old table (after a suitable RCU grace period). + */ int dp_table_flush(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); @@ -211,38 +280,88 @@ int dp_table_flush(struct datapath *dp) return 0; } -struct sw_flow ** -dp_table_lookup_for_insert(struct dp_table *table, - const struct odp_flow_key *target) +static void dp_free_bucket_rcu(struct rcu_head *rcu) { - struct sw_flow **buckets[2]; - struct sw_flow **empty_bucket = NULL; - int i; + struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu); + kfree(bucket); +} - find_buckets(table, target, buckets); - for (i = 0; i < 2; i++) { - struct sw_flow *f = rcu_dereference(*buckets[i]); - if (f) { - if (!memcmp(&f->key, target, sizeof(struct odp_flow_key))) - return buckets[i]; - } else if (!empty_bucket) - empty_bucket = buckets[i]; - } - return empty_bucket; +/** + * dp_table_insert - insert flow into table + * @table: table in which to insert flow + * @target: flow to insert + * + * The caller must ensure that no flow with key identical to @target->key + * already exists in @table. Returns 0 or a negative error (currently just + * -ENOMEM). + * + * The caller is responsible for updating &struct datapath's n_flows member. + */ +int dp_table_insert(struct dp_table *table, struct sw_flow *target) +{ + u32 hash = flow_hash(table, &target->key); + struct dp_bucket **oldp = find_bucket(table, hash); + struct dp_bucket *old = *rcu_dereference(oldp); + unsigned int n = old ? old->n_flows : 0; + struct dp_bucket *new = dp_bucket_alloc(n + 1); + + if (!new) + return -ENOMEM; + + new->n_flows = n + 1; + if (old) + memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*)); + new->flows[n] = target; + + rcu_assign_pointer(*oldp, new); + if (old) + call_rcu(&old->rcu, dp_free_bucket_rcu); + + return 0; } +/** + * dp_table_delete - remove flow from table + * @table: table from which to remove flow + * @target: flow to remove + * + * The caller must ensure that @target itself is in @table. (It is not + * good enough for @table to contain a different flow with a key equal to + * @target's key.) + * + * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is* + * possible for a flow deletion to fail due to lack of memory. + * + * The caller is responsible for updating &struct datapath's n_flows member. + */ int dp_table_delete(struct dp_table *table, struct sw_flow *target) { - struct sw_flow **buckets[2]; - int i; + u32 hash = flow_hash(table, &target->key); + struct dp_bucket **oldp = find_bucket(table, hash); + struct dp_bucket *old = *rcu_dereference(oldp); + unsigned int n = old->n_flows; + struct dp_bucket *new; + + if (n > 1) { + unsigned int i; - find_buckets(table, &target->key, buckets); - for (i = 0; i < 2; i++) { - struct sw_flow *flow = rcu_dereference(*buckets[i]); - if (flow == target) { - rcu_assign_pointer(*buckets[i], NULL); - return 0; + new = dp_bucket_alloc(n - 1); + if (!new) + return -ENOMEM; + + new->n_flows = 0; + for (i = 0; i < n; i++) { + struct sw_flow *flow = old->flows[i]; + if (flow != target) + new->flows[new->n_flows++] = flow; } + WARN_ON_ONCE(new->n_flows != n - 1); + } else { + new = NULL; } - return -ENOENT; + + rcu_assign_pointer(*oldp, new); + call_rcu(&old->rcu, dp_free_bucket_rcu); + + return 0; } |