diff options
-rw-r--r-- | datapath/datapath.c | 35 | ||||
-rw-r--r-- | datapath/datapath.h | 42 | ||||
-rw-r--r-- | datapath/flow.c | 43 | ||||
-rw-r--r-- | datapath/table.c | 337 | ||||
-rwxr-xr-x | debian/corekeeper.init | 3 | ||||
-rw-r--r-- | extras/ezio/ovs-switchui.c | 2 | ||||
-rw-r--r-- | include/openflow/openflow-mgmt.h | 3 | ||||
-rw-r--r-- | include/openvswitch/datapath-protocol.h | 3 | ||||
-rw-r--r-- | lib/flow.c | 57 | ||||
-rw-r--r-- | lib/flow.h | 1 | ||||
-rw-r--r-- | lib/netdev-linux.c | 122 | ||||
-rw-r--r-- | lib/netdev-provider.h | 17 | ||||
-rw-r--r-- | lib/netdev.c | 48 | ||||
-rw-r--r-- | lib/netdev.h | 5 | ||||
-rw-r--r-- | ofproto/in-band.c | 385 | ||||
-rw-r--r-- | ofproto/in-band.h | 9 | ||||
-rw-r--r-- | ofproto/ofproto.c | 29 | ||||
-rw-r--r-- | vswitchd/bridge.c | 15 | ||||
-rw-r--r-- | vswitchd/mgmt.c | 65 |
19 files changed, 908 insertions, 313 deletions
diff --git a/datapath/datapath.c b/datapath/datapath.c index d822b73c..6f96ee40 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -839,7 +839,7 @@ static void clear_stats(struct sw_flow *flow) static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) { struct odp_flow_put uf; - struct sw_flow *flow, **bucket; + struct sw_flow *flow; struct dp_table *table; struct odp_flow_stats stats; int error; @@ -849,15 +849,10 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) goto error; uf.flow.key.reserved = 0; -retry: table = rcu_dereference(dp->table); - bucket = dp_table_lookup_for_insert(table, &uf.flow.key); - if (!bucket) { - /* No such flow, and the slots where it could go are full. */ - error = uf.flags & ODPPF_CREATE ? -EFBIG : -ENOENT; - goto error; - } else if (!*bucket) { - /* No such flow, but we found an available slot for it. */ + flow = dp_table_lookup(table, &uf.flow.key); + if (!flow) { + /* No such flow. */ struct sw_flow_actions *acts; error = -ENOENT; @@ -865,14 +860,15 @@ retry: goto error; /* Expand table, if necessary, to make room. */ - if (dp->n_flows * 4 >= table->n_buckets && - table->n_buckets < DP_MAX_BUCKETS) { + if (dp->n_flows >= table->n_buckets) { + error = -ENOSPC; + if (table->n_buckets >= DP_MAX_BUCKETS) + goto error; + error = dp_table_expand(dp); if (error) goto error; - - /* The bucket's location has changed. Try again. */ - goto retry; + table = rcu_dereference(dp->table); } /* Allocate flow. */ @@ -892,12 +888,13 @@ retry: rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - rcu_assign_pointer(*bucket, flow); + error = dp_table_insert(table, flow); + if (error) + goto error_free_flow_acts; dp->n_flows++; memset(&stats, 0, sizeof(struct odp_flow_stats)); } else { /* We found a matching flow. */ - struct sw_flow *flow = *rcu_dereference(bucket); struct sw_flow_actions *old_acts, *new_acts; unsigned long int flags; @@ -935,6 +932,8 @@ retry: return -EFAULT; return 0; +error_free_flow_acts: + kfree(flow->sf_acts); error_free_flow: kmem_cache_free(flow_cache, flow); error: @@ -1167,8 +1166,8 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) int i; stats.n_flows = dp->n_flows; - stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2; - stats.max_capacity = DP_MAX_BUCKETS * 2; + stats.cur_capacity = rcu_dereference(dp->table)->n_buckets; + stats.max_capacity = DP_MAX_BUCKETS; stats.n_ports = dp->n_ports; stats.max_ports = DP_MAX_PORTS; stats.max_groups = DP_MAX_GROUPS; diff --git a/datapath/datapath.h b/datapath/datapath.h index b5200848..122706a8 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -29,20 +29,54 @@ #define DP_MAX_PORTS 256 #define DP_MAX_GROUPS 16 -#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*))) +#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*))) #define DP_L2_SIZE (1 << DP_L2_BITS) #define DP_L2_SHIFT 0 -#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**))) +#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**))) #define DP_L1_SIZE (1 << DP_L1_BITS) #define DP_L1_SHIFT DP_L2_BITS +/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */ #define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) +/** + * struct dp_table - flow table + * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and + * %DP_MAX_BUCKETS) + * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers + * to buckets + * @hash_seed: random number used for flow hashing, to make the hash + * distribution harder to predict + * @rcu: RCU callback structure + * + * The @buckets array is logically an array of pointers to buckets. It is + * broken into two levels to avoid the need to kmalloc() any object larger than + * a single page or to use vmalloc(). @buckets is always nonnull, as is each + * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash + * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j < + * %DP_L1_SIZE). + */ struct dp_table { unsigned int n_buckets; - struct sw_flow ***flows[2]; + struct dp_bucket ***buckets; + unsigned int hash_seed; + struct rcu_head rcu; +}; + +/** + * struct dp_bucket - single bucket within datapath flow table + * @rcu: RCU callback structure + * @n_flows: number of flows in @flows[] array + * @flows: array of @n_flows pointers to flows + * + * The expected number of flows per bucket is 1, but this allows for an + * arbitrary number of collisions. + */ +struct dp_bucket { struct rcu_head rcu; + unsigned int n_flows; + struct sw_flow *flows[]; }; #define DP_N_QUEUES 2 @@ -105,7 +139,7 @@ extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); struct dp_table *dp_table_create(unsigned int n_buckets); void dp_table_destroy(struct dp_table *, int free_flows); struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); -struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *); +int dp_table_insert(struct dp_table *, struct sw_flow *); int dp_table_delete(struct dp_table *, struct sw_flow *); int dp_table_expand(struct datapath *); int dp_table_flush(struct datapath *); diff --git a/datapath/flow.c b/datapath/flow.c index 2ac79e70..ae60617d 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/in.h> #include <linux/rcupdate.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -29,6 +30,27 @@ struct kmem_cache *flow_cache; +struct arp_eth_header +{ + __be16 ar_hrd; /* format of hardware address */ + __be16 ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + __be16 ar_op; /* ARP opcode (command) */ + + /* Ethernet+IPv4 specific members. */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +} __attribute__((packed)); + +static inline int arphdr_ok(struct sk_buff *skb) +{ + int nh_ofs = skb_network_offset(skb); + return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header)); +} + static inline int iphdr_ok(struct sk_buff *skb) { int nh_ofs = skb_network_offset(skb); @@ -266,6 +288,27 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) } else { retval = 1; } + } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { + struct arp_eth_header *arp; + + arp = (struct arp_eth_header *)skb_network_header(skb); + + if (arp->ar_hrd == htons(1) + && arp->ar_pro == htons(ETH_P_IP) + && arp->ar_hln == ETH_ALEN + && arp->ar_pln == 4) { + + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ar_op) <= 0xff) { + key->nw_proto = ntohs(arp->ar_op); + } + + if (key->nw_proto == ARPOP_REQUEST + || key->nw_proto == ARPOP_REPLY) { + memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src)); + memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst)); + } + } } else { skb_reset_transport_header(skb); } diff --git a/datapath/table.c b/datapath/table.c index 11aeb888..23ae8abe 100644 --- a/datapath/table.c +++ b/datapath/table.c @@ -11,50 +11,76 @@ #include <linux/gfp.h> #include <linux/jhash.h> +#include <linux/random.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/highmem.h> #include <asm/pgtable.h> -static void free_table(struct sw_flow ***flows, unsigned int n_buckets, - int free_flows) +static inline int bucket_size(int n_flows) +{ + return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows; +} + +static struct dp_bucket *dp_bucket_alloc(int n_flows) +{ + return kmalloc(bucket_size(n_flows), GFP_KERNEL); +} + +static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets, + int free_flows) { unsigned int i; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - struct sw_flow **l2 = flows[i]; - if (free_flows) { - unsigned int j; - for (j = 0; j < DP_L1_SIZE; j++) { - if (l2[j]) - flow_free(l2[j]); + struct dp_bucket **l2 = l1[i]; + unsigned int j; + + for (j = 0; j < DP_L1_SIZE; j++) { + struct dp_bucket *bucket = l2[j]; + if (!bucket) + continue; + + if (free_flows) { + unsigned int k; + for (k = 0; k < bucket->n_flows; k++) + flow_free(bucket->flows[k]); } + kfree(bucket); } free_page((unsigned long)l2); } - kfree(flows); + kfree(l1); } -static struct sw_flow ***alloc_table(unsigned int n_buckets) +static struct dp_bucket ***alloc_buckets(unsigned int n_buckets) { - struct sw_flow ***flows; + struct dp_bucket ***l1; unsigned int i; - flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**), - GFP_KERNEL); - if (!flows) + l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**), + GFP_KERNEL); + if (!l1) return NULL; for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { - flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL); - if (!flows[i]) { - free_table(flows, i << DP_L1_BITS, 0); + l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL); + if (!l1[i]) { + free_buckets(l1, i << DP_L1_BITS, 0); return NULL; } } - return flows; + return l1; } +/** + * dp_table_create - create and return a new flow table + * @n_buckets: number of buckets in the new table + * + * Creates and returns a new flow table, or %NULL if memory cannot be + * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to + * %DP_MAX_BUCKETS. + */ struct dp_table *dp_table_create(unsigned int n_buckets) { struct dp_table *table; @@ -64,95 +90,124 @@ struct dp_table *dp_table_create(unsigned int n_buckets) goto err; table->n_buckets = n_buckets; - table->flows[0] = alloc_table(n_buckets); - if (!table[0].flows) - goto err_free_tables; - - table->flows[1] = alloc_table(n_buckets); - if (!table->flows[1]) - goto err_free_flows0; + table->buckets = alloc_buckets(n_buckets); + if (!table->buckets) + goto err_free_table; + get_random_bytes(&table->hash_seed, sizeof table->hash_seed); return table; -err_free_flows0: - free_table(table->flows[0], table->n_buckets, 0); -err_free_tables: +err_free_table: kfree(table); err: return NULL; } +/** + * dp_table_destroy - destroy flow table and optionally the flows it contains + * @table: table to destroy (must not be %NULL) + * @free_flows: whether to destroy the flows + * + * If @free_flows is zero, then the buckets in @table are destroyed but not the + * flows within those buckets. This behavior is useful when a table is being + * replaced by a larger or smaller one without destroying the flows. + * + * If @free_flows is nonzero, then the flows in @table are destroyed as well as + * the buckets. + */ void dp_table_destroy(struct dp_table *table, int free_flows) { - int i; - for (i = 0; i < 2; i++) - free_table(table->flows[i], table->n_buckets, free_flows); + free_buckets(table->buckets, table->n_buckets, free_flows); kfree(table); } -static struct sw_flow **find_bucket(struct dp_table *table, - struct sw_flow ***flows, u32 hash) +static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash) { unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); - return &flows[l1][l2]; + return &table->buckets[l1][l2]; } -static struct sw_flow *lookup_table(struct dp_table *table, - struct sw_flow ***flows, u32 hash, - const struct odp_flow_key *key) +static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key) { - struct sw_flow **bucket = find_bucket(table, flows, hash); - struct sw_flow *flow = rcu_dereference(*bucket); - if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key))) - return flow; - return NULL; -} + int i; -static u32 flow_hash0(const struct odp_flow_key *key) -{ - return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa); + for (i = 0; i < bucket->n_flows; i++) { + struct sw_flow *flow = rcu_dereference(bucket->flows[i]); + if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key))) + return i; + } + + return -1; } -static u32 flow_hash1(const struct odp_flow_key *key) +static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash, + const struct odp_flow_key *key) { - return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555); + struct dp_bucket **bucketp = find_bucket(table, hash); + struct dp_bucket *bucket = rcu_dereference(*bucketp); + int index; + + if (!bucket) + return NULL; + + index = search_bucket(bucket, key); + if (index < 0) + return NULL; + + return bucket->flows[index]; } -static void find_buckets(struct dp_table *table, - const struct odp_flow_key *key, - struct sw_flow **buckets[2]) +static u32 flow_hash(const struct dp_table *table, + const struct odp_flow_key *key) { - buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key)); - buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key)); + return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed); } +/** + * dp_table_lookup - searches flow table for a matching flow + * @table: flow table to search + * @key: flow key for which to search + * + * Searches @table for a flow whose key is equal to @key. Returns the flow if + * successful, otherwise %NULL. + */ struct sw_flow *dp_table_lookup(struct dp_table *table, const struct odp_flow_key *key) { - struct sw_flow *flow; - flow = lookup_table(table, table->flows[0], flow_hash0(key), key); - if (!flow) - flow = lookup_table(table, table->flows[1], - flow_hash1(key), key); - return flow; + return lookup_flow(table, flow_hash(table, key), key); } +/** + * dp_table_foreach - iterate through flow table + * @table: table to iterate + * @callback: function to call for each flow entry + * @aux: Extra data to pass to @callback + * + * Iterates through all of the flows in @table in hash order, passing each of + * them in turn to @callback. If @callback returns nonzero, this terminates + * the iteration and dp_table_foreach() returns the same value. Returns 0 if + * @callback never returns nonzero. + * + * This function does not try to intelligently handle the case where @callback + * adds or removes flows in @table. + */ int dp_table_foreach(struct dp_table *table, int (*callback)(struct sw_flow *flow, void *aux), void *aux) { unsigned int i, j, k; - for (i = 0; i < 2; i++) { - for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) { - struct sw_flow **l2 = table->flows[i][j]; - for (k = 0; k < DP_L1_SIZE; k++) { - struct sw_flow *flow = rcu_dereference(l2[k]); - if (flow) { - int error = callback(flow, aux); - if (error) - return error; - } + for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) { + struct dp_bucket **l2 = table->buckets[i]; + for (j = 0; j < DP_L1_SIZE; j++) { + struct dp_bucket *bucket = rcu_dereference(l2[j]); + if (!bucket) + continue; + + for (k = 0; k < bucket->n_flows; k++) { + int error = (*callback)(bucket->flows[k], aux); + if (error) + return error; } } } @@ -162,18 +217,7 @@ int dp_table_foreach(struct dp_table *table, static int insert_flow(struct sw_flow *flow, void *new_table_) { struct dp_table *new_table = new_table_; - struct sw_flow **buckets[2]; - int i; - - find_buckets(new_table, &flow->key, buckets); - for (i = 0; i < 2; i++) { - if (!*buckets[i]) { - rcu_assign_pointer(*buckets[i], flow); - return 0; - } - } - WARN_ON_ONCE(1); - return 0; + return dp_table_insert(new_table, flow); } static void dp_free_table_rcu(struct rcu_head *rcu) @@ -182,16 +226,34 @@ static void dp_free_table_rcu(struct rcu_head *rcu) dp_table_destroy(table, 0); } +/** + * dp_table_expand - replace datapath's flow table by one with more buckets + * @dp: datapath to expand + * + * Replaces @dp's flow table by one that has twice as many buckets. All of the + * flows in @dp's flow table are moved to the new flow table. Returns 0 if + * successful, otherwise a negative error. + */ int dp_table_expand(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); - struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2); + struct dp_table *new_table; + + new_table = dp_table_create(old_table->n_buckets * 2); if (!new_table) - return -ENOMEM; - dp_table_foreach(old_table, insert_flow, new_table); + goto error; + + if (dp_table_foreach(old_table, insert_flow, new_table)) + goto error_free_new_table; + rcu_assign_pointer(dp->table, new_table); call_rcu(&old_table->rcu, dp_free_table_rcu); return 0; + +error_free_new_table: + dp_table_destroy(new_table, 0); +error: + return -ENOMEM; } static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) @@ -200,6 +262,13 @@ static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) dp_table_destroy(table, 1); } +/** + * dp_table_flush - clear datapath's flow table + * @dp: datapath to clear + * + * Replaces @dp's flow table by an empty flow table, destroying all the flows + * in the old table (after a suitable RCU grace period). + */ int dp_table_flush(struct datapath *dp) { struct dp_table *old_table = rcu_dereference(dp->table); @@ -211,38 +280,88 @@ int dp_table_flush(struct datapath *dp) return 0; } -struct sw_flow ** -dp_table_lookup_for_insert(struct dp_table *table, - const struct odp_flow_key *target) +static void dp_free_bucket_rcu(struct rcu_head *rcu) { - struct sw_flow **buckets[2]; - struct sw_flow **empty_bucket = NULL; - int i; + struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu); + kfree(bucket); +} - find_buckets(table, target, buckets); - for (i = 0; i < 2; i++) { - struct sw_flow *f = rcu_dereference(*buckets[i]); - if (f) { - if (!memcmp(&f->key, target, sizeof(struct odp_flow_key))) - return buckets[i]; - } else if (!empty_bucket) - empty_bucket = buckets[i]; - } - return empty_bucket; +/** + * dp_table_insert - insert flow into table + * @table: table in which to insert flow + * @target: flow to insert + * + * The caller must ensure that no flow with key identical to @target->key + * already exists in @table. Returns 0 or a negative error (currently just + * -ENOMEM). + * + * The caller is responsible for updating &struct datapath's n_flows member. + */ +int dp_table_insert(struct dp_table *table, struct sw_flow *target) +{ + u32 hash = flow_hash(table, &target->key); + struct dp_bucket **oldp = find_bucket(table, hash); + struct dp_bucket *old = *rcu_dereference(oldp); + unsigned int n = old ? old->n_flows : 0; + struct dp_bucket *new = dp_bucket_alloc(n + 1); + + if (!new) + return -ENOMEM; + + new->n_flows = n + 1; + if (old) + memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*)); + new->flows[n] = target; + + rcu_assign_pointer(*oldp, new); + if (old) + call_rcu(&old->rcu, dp_free_bucket_rcu); + + return 0; } +/** + * dp_table_delete - remove flow from table + * @table: table from which to remove flow + * @target: flow to remove + * + * The caller must ensure that @target itself is in @table. (It is not + * good enough for @table to contain a different flow with a key equal to + * @target's key.) + * + * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is* + * possible for a flow deletion to fail due to lack of memory. + * + * The caller is responsible for updating &struct datapath's n_flows member. + */ int dp_table_delete(struct dp_table *table, struct sw_flow *target) { - struct sw_flow **buckets[2]; - int i; + u32 hash = flow_hash(table, &target->key); + struct dp_bucket **oldp = find_bucket(table, hash); + struct dp_bucket *old = *rcu_dereference(oldp); + unsigned int n = old->n_flows; + struct dp_bucket *new; + + if (n > 1) { + unsigned int i; - find_buckets(table, &target->key, buckets); - for (i = 0; i < 2; i++) { - struct sw_flow *flow = rcu_dereference(*buckets[i]); - if (flow == target) { - rcu_assign_pointer(*buckets[i], NULL); - return 0; + new = dp_bucket_alloc(n - 1); + if (!new) + return -ENOMEM; + + new->n_flows = 0; + for (i = 0; i < n; i++) { + struct sw_flow *flow = old->flows[i]; + if (flow != target) + new->flows[new->n_flows++] = flow; } + WARN_ON_ONCE(new->n_flows != n - 1); + } else { + new = NULL; } - return -ENOENT; + + rcu_assign_pointer(*oldp, new); + call_rcu(&old->rcu, dp_free_bucket_rcu); + + return 0; } diff --git a/debian/corekeeper.init b/debian/corekeeper.init index 27d62a12..d820b028 100755 --- a/debian/corekeeper.init +++ b/debian/corekeeper.init @@ -6,6 +6,7 @@ # adjust it to the program you want to run. # # Copyright (c) 2007 Javier Fernandez-Sanguino <jfs@debian.org> +# Copyright (c) 2009 Nicira Networks, Inc. # # This is free software; you may redistribute it and/or modify # it under the terms of the GNU General Public License as @@ -42,7 +43,7 @@ set -e case "$1" in start) log_daemon_msg "Initializing core dump location..." - if echo "/var/log/core/core.%e.%t" > /proc/sys/kernel/core_pattern + if echo "/var/log/core/core.%e.%t.%p" > /proc/sys/kernel/core_pattern then log_progress_msg "success" log_end_msg 0 diff --git a/extras/ezio/ovs-switchui.c b/extras/ezio/ovs-switchui.c index 721717ee..0f6640e0 100644 --- a/extras/ezio/ovs-switchui.c +++ b/extras/ezio/ovs-switchui.c @@ -2480,7 +2480,7 @@ choose_netdevs(struct svec *choices) retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev); if (!retval) { - bool exclude = netdev_get_in4(netdev, NULL) == 0; + bool exclude = netdev_get_in4(netdev, NULL, NULL) == 0; netdev_close(netdev); if (exclude) { continue; diff --git a/include/openflow/openflow-mgmt.h b/include/openflow/openflow-mgmt.h index c3b62c91..04017d42 100644 --- a/include/openflow/openflow-mgmt.h +++ b/include/openflow/openflow-mgmt.h @@ -243,7 +243,8 @@ enum ofmp_extended_data_flags { /* Body of extended data message. May be sent by either the switch or the * controller to send messages that are greater than 65535 bytes in - * length. + * length. The OpenFlow transaction id (xid) must be the same for all + * the individual OpenFlow messages that make up an extended message. * * OFMPT_EXTENDED_DATA (switch <-> controller) */ struct ofmp_extended_data { diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index 868c8542..04423d94 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -160,7 +160,8 @@ struct odp_flow_key { __be16 tp_dst; /* TCP/UDP destination port. */ __u8 dl_src[ETH_ALEN]; /* Ethernet source address. */ __u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */ - __u8 nw_proto; /* IP protocol. */ + __u8 nw_proto; /* IP protocol or lower 8 bits of + ARP opcode. */ __u8 reserved; /* Pad to 64 bits. */ }; @@ -31,6 +31,12 @@ #include "vlog.h" #define THIS_MODULE VLM_flow +static struct arp_eth_header * +pull_arp(struct ofpbuf *packet) +{ + return ofpbuf_try_pull(packet, ARP_ETH_HEADER_LEN); +} + static struct ip_header * pull_ip(struct ofpbuf *packet) { @@ -185,6 +191,23 @@ flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow) retval = 1; } } + } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + const struct arp_eth_header *arp = pull_arp(&b); + if (arp && arp->ar_hrd == htons(1) + && arp->ar_pro == htons(ETH_TYPE_IP) + && arp->ar_hln == ETH_ADDR_LEN + && arp->ar_pln == 4) { + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ar_op) <= 0xff) { + flow->nw_proto = ntohs(arp->ar_op); + } + + if ((flow->nw_proto == ARP_OP_REQUEST) + || (flow->nw_proto == ARP_OP_REPLY)) { + flow->nw_src = arp->ar_spa; + flow->nw_dst = arp->ar_tpa; + } + } } } return retval; @@ -212,8 +235,12 @@ flow_extract_stats(const flow_t *flow, struct ofpbuf *packet, stats->n_packets = 1; } +/* The Open vSwitch datapath supports matching on ARP payloads, which + * OpenFlow does not. This function is identical to 'flow_to_match', + * but does not hide the datapath's ability to match on ARP. */ void -flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match) +flow_to_ovs_match(const flow_t *flow, uint32_t wildcards, + struct ofp_match *match) { match->wildcards = htonl(wildcards); match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL @@ -230,6 +257,26 @@ flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match) match->pad = 0; } +/* Extract 'flow' with 'wildcards' into the OpenFlow match structure + * 'match'. */ +void +flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match) +{ + flow_to_ovs_match(flow, wildcards, match); + + /* The datapath supports matching on an ARP's opcode and IP addresses, + * but OpenFlow does not. We wildcard and zero out the appropriate + * fields so that OpenFlow is unaware of our trickery. */ + if (flow->dl_type == htons(ETH_TYPE_ARP)) { + wildcards |= (OFPFW_NW_PROTO | OFPFW_NW_SRC_ALL | OFPFW_NW_DST_ALL); + match->nw_src = 0; + match->nw_dst = 0; + match->nw_proto = 0; + } + match->wildcards = htonl(wildcards); +} + + void flow_from_match(flow_t *flow, uint32_t *wildcards, const struct ofp_match *match) @@ -237,6 +284,14 @@ flow_from_match(flow_t *flow, uint32_t *wildcards, if (wildcards) { *wildcards = ntohl(match->wildcards); } + /* The datapath supports matching on an ARP's opcode and IP addresses, + * but OpenFlow does not. In case the controller hasn't, we need to + * set the appropriate wildcard bits so that we're externally + * OpenFlow-compliant. */ + if (match->dl_type == htons(ETH_TYPE_ARP)) { + *wildcards |= (OFPFW_NW_PROTO | OFPFW_NW_SRC_ALL | OFPFW_NW_DST_ALL); + } + flow->nw_src = match->nw_src; flow->nw_dst = match->nw_dst; flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? ODPP_LOCAL @@ -36,6 +36,7 @@ int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *); void flow_extract_stats(const flow_t *flow, struct ofpbuf *packet, struct odp_flow_stats *stats); void flow_to_match(const flow_t *, uint32_t wildcards, struct ofp_match *); +void flow_to_ovs_match(const flow_t *, uint32_t wildcards, struct ofp_match *); void flow_from_match(flow_t *, uint32_t *wildcards, const struct ofp_match *); char *flow_to_string(const flow_t *); void flow_format(struct ds *, const flow_t *); diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 3e340444..11d83e97 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -96,7 +96,7 @@ struct netdev_linux_cache { int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; - struct in_addr in4; + struct in_addr address, netmask; struct in6_addr in6; int mtu; int carrier; @@ -125,6 +125,8 @@ static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *, int cmd, const char *cmd_name); static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *, int cmd, const char *cmd_name); +static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *, + int cmd, const char *cmd_name); static int get_flags(const struct netdev *, int *flagsp); static int set_flags(struct netdev *, int flags); static int do_get_ifindex(const char *netdev_name); @@ -935,49 +937,48 @@ netdev_linux_set_policing(struct netdev *netdev, return 0; } -/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if - * 'in4' is non-null) and returns true. Otherwise, returns false. */ static int -netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4) +netdev_linux_get_in4(const struct netdev *netdev_, + struct in_addr *address, struct in_addr *netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (!(netdev->cache->valid & VALID_IN4)) { - const struct sockaddr_in *sin; - struct ifreq ifr; int error; - ifr.ifr_addr.sa_family = AF_INET; - error = netdev_linux_do_ioctl(netdev_, &ifr, + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->address, SIOCGIFADDR, "SIOCGIFADDR"); if (error) { return error; } - sin = (struct sockaddr_in *) &ifr.ifr_addr; - netdev->cache->in4 = sin->sin_addr; + error = netdev_linux_get_ipv4(netdev_, &netdev->cache->netmask, + SIOCGIFNETMASK, "SIOCGIFNETMASK"); + if (error) { + return error; + } + netdev->cache->valid |= VALID_IN4; } - *in4 = netdev->cache->in4; - return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; + *address = netdev->cache->address; + *netmask = netdev->cache->netmask; + return address->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; } -/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If - * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a - * positive errno value. */ static int -netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr, - struct in_addr mask) +netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address, + struct in_addr netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; - error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); + error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address); if (!error) { netdev->cache->valid |= VALID_IN4; - netdev->cache->in4 = addr; - if (addr.s_addr != INADDR_ANY) { + netdev->cache->address = address; + netdev->cache->netmask = netmask; + if (address.s_addr != INADDR_ANY) { error = do_set_addr(netdev_, SIOCSIFNETMASK, - "SIOCSIFNETMASK", mask); + "SIOCSIFNETMASK", netmask); } } return error; @@ -1076,6 +1077,67 @@ netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router) return error; } +static int +netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop, + char **netdev_name) +{ + static const char fn[] = "/proc/net/route"; + FILE *stream; + char line[256]; + int ln; + + *netdev_name = NULL; + stream = fopen(fn, "r"); + if (stream == NULL) { + VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno)); + return errno; + } + + ln = 0; + while (fgets(line, sizeof line, stream)) { + if (++ln >= 2) { + char iface[17]; + uint32_t dest, gateway, mask; + int refcnt, metric, mtu; + unsigned int flags, use, window, irtt; + + if (sscanf(line, + "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32 + " %d %u %u\n", + iface, &dest, &gateway, &flags, &refcnt, + &use, &metric, &mask, &mtu, &window, &irtt) != 11) { + + VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s", + fn, ln, line); + continue; + } + if (!(flags & RTF_UP)) { + /* Skip routes that aren't up. */ + continue; + } + + /* The output of 'dest', 'mask', and 'gateway' were given in + * network byte order, so we don't need need any endian + * conversions here. */ + if ((dest & mask) == (host->s_addr & mask)) { + if (!gateway) { + /* The host is directly reachable. */ + next_hop->s_addr = 0; + } else { + /* To reach the host, we must go through a gateway. */ + next_hop->s_addr = gateway; + } + *netdev_name = xstrdup(iface); + fclose(stream); + return 0; + } + } + } + + fclose(stream); + return ENXIO; +} + /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, @@ -1269,6 +1331,7 @@ const struct netdev_class netdev_linux_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1312,6 +1375,7 @@ const struct netdev_class netdev_tap_class = { netdev_linux_set_in4, netdev_linux_get_in6, netdev_linux_add_router, + netdev_linux_get_next_hop, netdev_linux_arp_lookup, netdev_linux_update_flags, @@ -1591,3 +1655,19 @@ netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr, } return 0; } + +static int +netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, + int cmd, const char *cmd_name) +{ + struct ifreq ifr; + int error; + + ifr.ifr_addr.sa_family = AF_INET; + error = netdev_linux_do_ioctl(netdev, &ifr, cmd, cmd_name); + if (!error) { + const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; + *ip = sin->sin_addr; + } + return error; +} diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 9c880b1c..a573e246 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -208,7 +208,8 @@ struct netdev_class { int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate, unsigned int kbits_burst); - /* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address. + /* If 'netdev' has an assigned IPv4 address, sets '*address' to that + * address and '*netmask' to the associated netmask. * * The following error values have well-defined meanings: * @@ -218,7 +219,8 @@ struct netdev_class { * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ - int (*get_in4)(const struct netdev *netdev, struct in_addr *in4); + int (*get_in4)(const struct netdev *netdev, struct in_addr *address, + struct in_addr *netmask); /* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. @@ -246,6 +248,17 @@ struct netdev_class { * anyhow. */ int (*add_router)(struct netdev *netdev, struct in_addr router); + /* Looks up the next hop for 'host'. If succesful, stores the next hop + * gateway's address (0 if 'host' is on a directly connected network) in + * '*next_hop' and a copy of the name of the device to reach 'host' in + * '*netdev_name', and returns 0. The caller is responsible for freeing + * '*netdev_name' (by calling free()). + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*get_next_hop)(const struct in_addr *host, struct in_addr *next_hop, + char **netdev_name); + /* Looks up the ARP table entry for 'ip' on 'netdev' and stores the * corresponding MAC address in 'mac'. A return value of ENXIO, in * particular, indicates that there is no ARP table entry for 'ip' on diff --git a/lib/netdev.c b/lib/netdev.c index dcb63fa0..38610e11 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -391,9 +391,9 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise) : EOPNOTSUPP); } -/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and - * returns 0. Otherwise, returns a positive errno value and sets '*in4' to 0 - * (INADDR_ANY). +/* If 'netdev' has an assigned IPv4 address, sets '*address' to that address + * and '*netmask' to its netmask and returns 0. Otherwise, returns a positive + * errno value and sets '*address' to 0 (INADDR_ANY). * * The following error values have well-defined meanings: * @@ -401,18 +401,24 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise) * * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'. * - * 'in4' may be null, in which case the address itself is not reported. */ + * 'address' or 'netmask' or both may be null, in which case the address or netmask + * is not reported. */ int -netdev_get_in4(const struct netdev *netdev, struct in_addr *in4) +netdev_get_in4(const struct netdev *netdev, + struct in_addr *address_, struct in_addr *netmask_) { - struct in_addr dummy; + struct in_addr address; + struct in_addr netmask; int error; error = (netdev->class->get_in4 - ? netdev->class->get_in4(netdev, in4 ? in4 : &dummy) + ? netdev->class->get_in4(netdev, &address, &netmask) : EOPNOTSUPP); - if (error && in4) { - in4->s_addr = 0; + if (address_) { + address_->s_addr = error ? 0 : address.s_addr; + } + if (netmask_) { + netmask_->s_addr = error ? 0 : netmask.s_addr; } return error; } @@ -439,6 +445,28 @@ netdev_add_router(struct netdev *netdev, struct in_addr router) : EOPNOTSUPP); } +/* Looks up the next hop for 'host' for the TCP/IP stack that corresponds to + * 'netdev'. If a route cannot not be determined, sets '*next_hop' to 0, + * '*netdev_name' to null, and returns a positive errno value. Otherwise, if a + * next hop is found, stores the next hop gateway's address (0 if 'host' is on + * a directly connected network) in '*next_hop' and a copy of the name of the + * device to reach 'host' in '*netdev_name', and returns 0. The caller is + * responsible for freeing '*netdev_name' (by calling free()). */ +int +netdev_get_next_hop(const struct netdev *netdev, + const struct in_addr *host, struct in_addr *next_hop, + char **netdev_name) +{ + int error = (netdev->class->get_next_hop + ? netdev->class->get_next_hop(host, next_hop, netdev_name) + : EOPNOTSUPP); + if (error) { + next_hop->s_addr = 0; + *netdev_name = NULL; + } + return error; +} + /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and * returns 0. Otherwise, returns a positive errno value and sets '*in6' to * all-zero-bits (in6addr_any). @@ -633,7 +661,7 @@ netdev_find_dev_by_in4(const struct in_addr *in4) struct in_addr dev_in4; if (!netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev) - && !netdev_get_in4(netdev, &dev_in4) + && !netdev_get_in4(netdev, &dev_in4, NULL) && dev_in4.s_addr == in4->s_addr) { goto exit; } diff --git a/lib/netdev.h b/lib/netdev.h index b66d7bc0..4a29cf37 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -107,10 +107,13 @@ int netdev_get_features(struct netdev *, uint32_t *supported, uint32_t *peer); int netdev_set_advertisements(struct netdev *, uint32_t advertise); -int netdev_get_in4(const struct netdev *, struct in_addr *); +int netdev_get_in4(const struct netdev *, struct in_addr *address, + struct in_addr *netmask); int netdev_set_in4(struct netdev *, struct in_addr addr, struct in_addr mask); int netdev_get_in6(const struct netdev *, struct in6_addr *); int netdev_add_router(struct netdev *, struct in_addr router); +int netdev_get_next_hop(const struct netdev *, const struct in_addr *host, + struct in_addr *next_hop, char **); int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]); int netdev_get_flags(const struct netdev *, enum netdev_flags *); diff --git a/ofproto/in-band.c b/ofproto/in-band.c index a08af079..18415f48 100644 --- a/ofproto/in-band.c +++ b/ofproto/in-band.c @@ -22,6 +22,8 @@ #include <net/if.h> #include <string.h> #include <stdlib.h> +#include "dhcp.h" +#include "dpif.h" #include "flow.h" #include "mac-learning.h" #include "netdev.h" @@ -30,6 +32,7 @@ #include "ofproto.h" #include "ofpbuf.h" #include "openflow/openflow.h" +#include "openvswitch/datapath-protocol.h" #include "packets.h" #include "poll-loop.h" #include "rconn.h" @@ -43,14 +46,15 @@ #define IB_BASE_PRIORITY 18181800 enum { - IBR_FROM_LOCAL_PORT, /* Sent by the local port. */ - IBR_OFP_TO_LOCAL, /* Sent to secure channel on local port. */ - IBR_ARP_FROM_LOCAL, /* ARP from the local port. */ - IBR_ARP_FROM_CTL, /* ARP from the controller. */ - IBR_TO_CTL_OFP_SRC, /* To controller, OpenFlow source port. */ - IBR_TO_CTL_OFP_DST, /* To controller, OpenFlow dest port. */ - IBR_FROM_CTL_OFP_SRC, /* From controller, OpenFlow source port. */ - IBR_FROM_CTL_OFP_DST, /* From controller, OpenFlow dest port. */ + IBR_FROM_LOCAL_DHCP, /* From local port, DHCP. */ + IBR_TO_LOCAL_ARP, /* To local port, ARP. */ + IBR_FROM_LOCAL_ARP, /* From local port, ARP. */ + IBR_TO_REMOTE_ARP, /* To remote MAC, ARP. */ + IBR_FROM_REMOTE_ARP, /* From remote MAC, ARP. */ + IBR_TO_CTL_ARP, /* To controller IP, ARP. */ + IBR_FROM_CTL_ARP, /* From controller IP, ARP. */ + IBR_TO_CTL_OFP, /* To controller, OpenFlow port. */ + IBR_FROM_CTL_OFP, /* From controller, OpenFlow port. */ #if OFP_TCP_PORT != OFP_SSL_PORT #error Need to support separate TCP and SSL flows. #endif @@ -69,17 +73,17 @@ struct in_band { struct rconn *controller; struct status_category *ss_cat; - /* Keeping track of controller's MAC address. */ - uint32_t ip; /* Current IP, 0 if unknown. */ - uint32_t last_ip; /* Last known IP, 0 if never known. */ - uint8_t mac[ETH_ADDR_LEN]; /* Current MAC, 0 if unknown. */ - uint8_t last_mac[ETH_ADDR_LEN]; /* Last known MAC, 0 if never known */ - struct netdev *netdev; - time_t next_refresh; /* Next time to refresh MAC address. */ + /* Keep track of local port's information. */ + uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ + struct netdev *local_netdev; /* Local port's network device. */ + time_t next_local_refresh; - /* Keeping track of the local port's MAC address. */ - uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ - time_t next_local_refresh; /* Next time to refresh MAC address. */ + /* Keep track of controller and next hop's information. */ + uint32_t controller_ip; /* Controller IP, 0 if unknown. */ + uint8_t remote_mac[ETH_ADDR_LEN]; /* Remote MAC. */ + struct netdev *remote_netdev; + uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous remote MAC. */ + time_t next_remote_refresh; /* Rules that we set up. */ struct ib_rule rules[N_IB_RULES]; @@ -88,58 +92,64 @@ struct in_band { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); static const uint8_t * -get_controller_mac(struct in_band *ib) +get_remote_mac(struct in_band *ib) { + int retval; + bool have_mac; + struct in_addr c_in4; /* Controller's IP address. */ + struct in_addr r_in4; /* Next hop IP address. */ + char *next_hop_dev; time_t now = time_now(); - uint32_t controller_ip; - controller_ip = rconn_get_remote_ip(ib->controller); - if (controller_ip != ib->ip || now >= ib->next_refresh) { - bool have_mac; - - ib->ip = controller_ip; - - /* Look up MAC address. */ - memset(ib->mac, 0, sizeof ib->mac); - if (ib->ip) { - struct in_addr local_in4 = { rconn_get_local_ip(ib->controller) }; - struct in_addr in4; - int retval; - - /* Refresh device with IP address 'in4'. */ - if (!ib->netdev - || netdev_get_in4(ib->netdev, &in4) - || in4.s_addr != local_in4.s_addr) - { - netdev_close(ib->netdev); - ib->netdev = netdev_find_dev_by_in4(&local_in4); - } + if (now >= ib->next_remote_refresh) { + /* Find the next-hop IP address. */ + c_in4.s_addr = ib->controller_ip; + memset(ib->remote_mac, 0, sizeof ib->remote_mac); + retval = netdev_get_next_hop(ib->local_netdev, + &c_in4, &r_in4, &next_hop_dev); + if (retval) { + VLOG_WARN("cannot find route for controller ("IP_FMT"): %s", + IP_ARGS(&ib->controller_ip), strerror(retval)); + ib->next_remote_refresh = now + 1; + return NULL; + } + if (!r_in4.s_addr) { + r_in4.s_addr = c_in4.s_addr; + } - if (ib->netdev) { - retval = netdev_arp_lookup(ib->netdev, ib->ip, ib->mac); - if (retval) { - VLOG_DBG_RL(&rl, "cannot look up controller MAC address " - "("IP_FMT"): %s", - IP_ARGS(&ib->ip), strerror(retval)); - } - } else { - VLOG_DBG_RL(&rl, "cannot find device with IP address "IP_FMT, - IP_ARGS(&local_in4.s_addr)); + /* Get the next-hop IP and network device. */ + if (!ib->remote_netdev + || strcmp(netdev_get_name(ib->remote_netdev), next_hop_dev)) + { + netdev_close(ib->remote_netdev); + retval = netdev_open(next_hop_dev, NETDEV_ETH_TYPE_NONE, + &ib->remote_netdev); + if (retval) { + VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop " + "to controller "IP_FMT"): %s", + next_hop_dev, IP_ARGS(&ib->controller_ip), + strerror(retval)); + ib->next_remote_refresh = now + 1; + return NULL; } } - have_mac = !eth_addr_is_zero(ib->mac); - /* Log changes in IP, MAC addresses. */ - if (ib->ip && ib->ip != ib->last_ip) { - VLOG_DBG("controller IP address changed from "IP_FMT - " to "IP_FMT, IP_ARGS(&ib->last_ip), IP_ARGS(&ib->ip)); - ib->last_ip = ib->ip; + /* Look up the MAC address of the next-hop IP address. */ + retval = netdev_arp_lookup(ib->remote_netdev, r_in4.s_addr, + ib->remote_mac); + if (retval) { + VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s", + IP_ARGS(&r_in4.s_addr), strerror(retval)); } - if (have_mac && memcmp(ib->last_mac, ib->mac, ETH_ADDR_LEN)) { - VLOG_DBG("controller MAC address changed from "ETH_ADDR_FMT" to " + have_mac = !eth_addr_is_zero(ib->remote_mac); + free(next_hop_dev); + if (have_mac + && !eth_addr_equals(ib->last_remote_mac, ib->remote_mac)) { + VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT" to " ETH_ADDR_FMT, - ETH_ADDR_ARGS(ib->last_mac), ETH_ADDR_ARGS(ib->mac)); - memcpy(ib->last_mac, ib->mac, ETH_ADDR_LEN); + ETH_ADDR_ARGS(ib->last_remote_mac), + ETH_ADDR_ARGS(ib->remote_mac)); + memcpy(ib->last_remote_mac, ib->remote_mac, ETH_ADDR_LEN); } /* Schedule next refresh. @@ -147,9 +157,11 @@ get_controller_mac(struct in_band *ib) * If we have an IP address but not a MAC address, then refresh * quickly, since we probably will get a MAC address soon (via ARP). * Otherwise, we can afford to wait a little while. */ - ib->next_refresh = now + (!ib->ip || have_mac ? 10 : 1); + ib->next_remote_refresh + = now + (!ib->controller_ip || have_mac ? 10 : 1); } - return !eth_addr_is_zero(ib->mac) ? ib->mac : NULL; + + return !eth_addr_is_zero(ib->remote_mac) ? ib->remote_mac : NULL; } static const uint8_t * @@ -158,7 +170,7 @@ get_local_mac(struct in_band *ib) time_t now = time_now(); if (now >= ib->next_local_refresh) { uint8_t ea[ETH_ADDR_LEN]; - if (ib->netdev && !netdev_get_etheraddr(ib->netdev, ea)) { + if (ib->local_netdev && netdev_get_etheraddr(ib->local_netdev, ea)) { memcpy(ib->local_mac, ea, ETH_ADDR_LEN); } ib->next_local_refresh = now + 1; @@ -170,19 +182,15 @@ static void in_band_status_cb(struct status_reply *sr, void *in_band_) { struct in_band *in_band = in_band_; - const uint8_t *local_mac; - const uint8_t *controller_mac; - local_mac = get_local_mac(in_band); - if (local_mac) { + if (!eth_addr_is_zero(in_band->local_mac)) { status_reply_put(sr, "local-mac="ETH_ADDR_FMT, - ETH_ADDR_ARGS(local_mac)); + ETH_ADDR_ARGS(in_band->local_mac)); } - controller_mac = get_controller_mac(in_band); - if (controller_mac) { - status_reply_put(sr, "controller-mac="ETH_ADDR_FMT, - ETH_ADDR_ARGS(controller_mac)); + if (!eth_addr_is_zero(in_band->remote_mac)) { + status_reply_put(sr, "remote-mac="ETH_ADDR_FMT, + ETH_ADDR_ARGS(in_band->remote_mac)); } } @@ -224,54 +232,174 @@ setup_flow(struct in_band *in_band, int rule_idx, const flow_t *flow, } } +/* Returns true if 'packet' should be sent to the local port regardless + * of the flow table. */ +bool +in_band_msg_in_hook(struct in_band *in_band, const flow_t *flow, + const struct ofpbuf *packet) +{ + if (!in_band) { + return false; + } + + /* Regardless of how the flow table is configured, we want to be + * able to see replies to our DHCP requests. */ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT) + && packet->l7) { + struct dhcp_header *dhcp; + const uint8_t *local_mac; + + dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data, + sizeof *dhcp); + if (!dhcp) { + return false; + } + + local_mac = get_local_mac(in_band); + if (eth_addr_equals(dhcp->chaddr, local_mac)) { + return true; + } + } + + return false; +} + +/* Returns true if the rule that would match 'flow' with 'actions' is + * allowed to be set up in the datapath. */ +bool +in_band_rule_check(struct in_band *in_band, const flow_t *flow, + const struct odp_actions *actions) +{ + if (!in_band) { + return true; + } + + /* Don't allow flows that would prevent DHCP replies from being seen + * by the local port. */ + if (flow->dl_type == htons(ETH_TYPE_IP) + && flow->nw_proto == IP_TYPE_UDP + && flow->tp_src == htons(DHCP_SERVER_PORT) + && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { + int i; + + for (i=0; i<actions->n_actions; i++) { + if (actions->actions[i].output.type == ODPAT_OUTPUT + && actions->actions[i].output.port == ODPP_LOCAL) { + return true; + } + } + return false; + } + + return true; +} + void in_band_run(struct in_band *in_band) { - const uint8_t *controller_mac; + time_t now = time_now(); + uint32_t controller_ip; + const uint8_t *remote_mac; const uint8_t *local_mac; flow_t flow; - if (time_now() < MIN(in_band->next_refresh, in_band->next_local_refresh)) { + if (now < in_band->next_remote_refresh + && now < in_band->next_local_refresh) { return; } - controller_mac = get_controller_mac(in_band); - local_mac = get_local_mac(in_band); - /* Switch traffic sent by the local port. */ - memset(&flow, 0, sizeof flow); - flow.in_port = ODPP_LOCAL; - setup_flow(in_band, IBR_FROM_LOCAL_PORT, &flow, OFPFW_IN_PORT, - OFPP_NORMAL); + controller_ip = rconn_get_remote_ip(in_band->controller); + if (in_band->controller_ip && controller_ip != in_band->controller_ip) { + VLOG_DBG("controller IP address changed from "IP_FMT" to "IP_FMT, + IP_ARGS(&in_band->controller_ip), + IP_ARGS(&controller_ip)); + } + in_band->controller_ip = controller_ip; + + remote_mac = get_remote_mac(in_band); + local_mac = get_local_mac(in_band); if (local_mac) { - /* Deliver traffic sent to the connection's interface. */ + /* Allow DHCP requests to be sent from the local port. */ + memset(&flow, 0, sizeof flow); + flow.in_port = ODPP_LOCAL; + flow.dl_type = htons(ETH_TYPE_IP); + memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN); + flow.nw_proto = IP_TYPE_UDP; + flow.tp_src = htons(DHCP_CLIENT_PORT); + flow.tp_dst = htons(DHCP_SERVER_PORT); + setup_flow(in_band, IBR_FROM_LOCAL_DHCP, &flow, + (OFPFW_IN_PORT | OFPFW_DL_TYPE | OFPFW_DL_SRC + | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST), + OFPP_NORMAL); + + /* Allow the connection's interface to receive directed ARP traffic. */ memset(&flow, 0, sizeof flow); + flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_dst, local_mac, ETH_ADDR_LEN); - setup_flow(in_band, IBR_OFP_TO_LOCAL, &flow, OFPFW_DL_DST, - OFPP_NORMAL); + flow.nw_proto = ARP_OP_REPLY; + setup_flow(in_band, IBR_TO_LOCAL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), + OFPP_NORMAL); /* Allow the connection's interface to be the source of ARP traffic. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN); - setup_flow(in_band, IBR_ARP_FROM_LOCAL, &flow, - OFPFW_DL_TYPE | OFPFW_DL_SRC, OFPP_NORMAL); + flow.nw_proto = ARP_OP_REQUEST; + setup_flow(in_band, IBR_FROM_LOCAL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), + OFPP_NORMAL); + } else { + drop_flow(in_band, IBR_TO_LOCAL_ARP); + drop_flow(in_band, IBR_FROM_LOCAL_ARP); + } + + if (remote_mac) { + /* Allow ARP replies to the remote side's MAC. */ + memset(&flow, 0, sizeof flow); + flow.dl_type = htons(ETH_TYPE_ARP); + memcpy(flow.dl_dst, remote_mac, ETH_ADDR_LEN); + flow.nw_proto = ARP_OP_REPLY; + setup_flow(in_band, IBR_TO_REMOTE_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), + OFPP_NORMAL); + + /* Allow ARP requests from the remote side's MAC. */ + memset(&flow, 0, sizeof flow); + flow.dl_type = htons(ETH_TYPE_ARP); + memcpy(flow.dl_src, remote_mac, ETH_ADDR_LEN); + flow.nw_proto = ARP_OP_REQUEST; + setup_flow(in_band, IBR_FROM_REMOTE_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), + OFPP_NORMAL); } else { - drop_flow(in_band, IBR_OFP_TO_LOCAL); - drop_flow(in_band, IBR_ARP_FROM_LOCAL); + drop_flow(in_band, IBR_TO_REMOTE_ARP); + drop_flow(in_band, IBR_FROM_REMOTE_ARP); } - if (controller_mac) { - /* Switch ARP requests sent by the controller. (OFPP_NORMAL will "do - * the right thing" regarding VLANs here.) */ + if (controller_ip) { + /* Allow ARP replies to the controller's IP. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); - memcpy(flow.dl_dst, eth_addr_broadcast, ETH_ADDR_LEN); - memcpy(flow.dl_src, controller_mac, ETH_ADDR_LEN); - setup_flow(in_band, IBR_ARP_FROM_CTL, &flow, - OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_DL_SRC, + flow.nw_proto = ARP_OP_REPLY; + flow.nw_dst = controller_ip; + setup_flow(in_band, IBR_TO_CTL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK), OFPP_NORMAL); + /* Allow ARP requests from the controller's IP. */ + memset(&flow, 0, sizeof flow); + flow.dl_type = htons(ETH_TYPE_ARP); + flow.nw_proto = ARP_OP_REQUEST; + flow.nw_src = controller_ip; + setup_flow(in_band, IBR_FROM_CTL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK), + OFPP_NORMAL); + /* OpenFlow traffic to or from the controller. * * (A given field's value is completely ignored if it is wildcarded, @@ -279,29 +407,22 @@ in_band_run(struct in_band *in_band) * case here.) */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_IP); - memcpy(flow.dl_src, controller_mac, ETH_ADDR_LEN); - memcpy(flow.dl_dst, controller_mac, ETH_ADDR_LEN); flow.nw_proto = IP_TYPE_TCP; + flow.nw_src = controller_ip; + flow.nw_dst = controller_ip; flow.tp_src = htons(OFP_TCP_PORT); flow.tp_dst = htons(OFP_TCP_PORT); - setup_flow(in_band, IBR_TO_CTL_OFP_SRC, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO - | OFPFW_TP_SRC), OFPP_NORMAL); - setup_flow(in_band, IBR_TO_CTL_OFP_DST, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO + setup_flow(in_band, IBR_TO_CTL_OFP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK | OFPFW_TP_DST), OFPP_NORMAL); - setup_flow(in_band, IBR_FROM_CTL_OFP_SRC, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO + setup_flow(in_band, IBR_FROM_CTL_OFP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK | OFPFW_TP_SRC), OFPP_NORMAL); - setup_flow(in_band, IBR_FROM_CTL_OFP_DST, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO - | OFPFW_TP_DST), OFPP_NORMAL); } else { - drop_flow(in_band, IBR_ARP_FROM_CTL); - drop_flow(in_band, IBR_TO_CTL_OFP_DST); - drop_flow(in_band, IBR_TO_CTL_OFP_SRC); - drop_flow(in_band, IBR_FROM_CTL_OFP_DST); - drop_flow(in_band, IBR_FROM_CTL_OFP_SRC); + drop_flow(in_band, IBR_TO_CTL_ARP); + drop_flow(in_band, IBR_FROM_CTL_ARP); + drop_flow(in_band, IBR_TO_CTL_OFP); + drop_flow(in_band, IBR_FROM_CTL_OFP); } } @@ -309,7 +430,8 @@ void in_band_wait(struct in_band *in_band) { time_t now = time_now(); - time_t wakeup = MIN(in_band->next_refresh, in_band->next_local_refresh); + time_t wakeup + = MIN(in_band->next_remote_refresh, in_band->next_local_refresh); if (wakeup > now) { poll_timer_wait((wakeup - now) * 1000); } else { @@ -327,22 +449,44 @@ in_band_flushed(struct in_band *in_band) } } -void -in_band_create(struct ofproto *ofproto, struct switch_status *ss, - struct rconn *controller, struct in_band **in_bandp) +int +in_band_create(struct ofproto *ofproto, struct dpif *dpif, + struct switch_status *ss, struct rconn *controller, + struct in_band **in_bandp) { struct in_band *in_band; + char local_name[IF_NAMESIZE]; + struct netdev *local_netdev; + int error; + + error = dpif_port_get_name(dpif, ODPP_LOCAL, + local_name, sizeof local_name); + if (error) { + VLOG_ERR("failed to initialize in-band control: cannot get name " + "of datapath local port (%s)", strerror(error)); + return error; + } + + error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &local_netdev); + if (error) { + VLOG_ERR("failed to initialize in-band control: cannot open " + "datapath local port %s (%s)", local_name, strerror(error)); + return error; + } in_band = xcalloc(1, sizeof *in_band); in_band->ofproto = ofproto; in_band->controller = controller; in_band->ss_cat = switch_status_register(ss, "in-band", in_band_status_cb, in_band); - in_band->next_refresh = TIME_MIN; + in_band->local_netdev = local_netdev; in_band->next_local_refresh = TIME_MIN; - in_band->netdev = NULL; + in_band->remote_netdev = NULL; + in_band->next_remote_refresh = TIME_MIN; *in_bandp = in_band; + + return 0; } void @@ -350,7 +494,8 @@ in_band_destroy(struct in_band *in_band) { if (in_band) { switch_status_unregister(in_band->ss_cat); - netdev_close(in_band->netdev); + netdev_close(in_band->local_netdev); + netdev_close(in_band->remote_netdev); /* We don't own the rconn. */ } } diff --git a/ofproto/in-band.h b/ofproto/in-band.h index 624bee9e..ddbc5e56 100644 --- a/ofproto/in-band.h +++ b/ofproto/in-band.h @@ -21,15 +21,20 @@ struct dpif; struct in_band; +struct odp_actions; struct ofproto; struct rconn; struct settings; struct switch_status; -void in_band_create(struct ofproto *, struct switch_status *, - struct rconn *controller, struct in_band **); +int in_band_create(struct ofproto *, struct dpif *, struct switch_status *, + struct rconn *controller, struct in_band **); void in_band_destroy(struct in_band *); void in_band_run(struct in_band *); +bool in_band_msg_in_hook(struct in_band *, const flow_t *, + const struct ofpbuf *packet); +bool in_band_rule_check(struct in_band *, const flow_t *, + const struct odp_actions *); void in_band_wait(struct in_band *); void in_band_flushed(struct in_band *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index dbaa75bc..7650068e 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -424,9 +424,8 @@ ofproto_set_in_band(struct ofproto *p, bool in_band) { if (in_band != (p->in_band != NULL)) { if (in_band) { - in_band_create(p, p->switch_status, p->controller->rconn, - &p->in_band); - return 0; + return in_band_create(p, p->dpif, p->switch_status, + p->controller->rconn, &p->in_band); } else { ofproto_set_discovery(p, false, NULL, true); in_band_destroy(p->in_band); @@ -1700,7 +1699,7 @@ rule_post_uninstall(struct ofproto *ofproto, struct rule *rule) struct rule *super = rule->super; rule_account(ofproto, rule, 0); - if (ofproto->netflow) { + if (ofproto->netflow && rule->byte_count) { struct ofexpired expired; expired.flow = rule->cr.flow; expired.packet_count = rule->packet_count; @@ -2127,6 +2126,13 @@ xlate_actions(const union ofp_action *in, size_t n_in, ctx.tags = tags ? tags : &no_tags; ctx.may_setup_flow = true; do_xlate_actions(in, n_in, &ctx); + + /* Check with in-band control to see if we're allowed to setup this + * flow. */ + if (!in_band_rule_check(ofproto->in_band, flow, out)) { + ctx.may_setup_flow = false; + } + if (may_setup_flow) { *may_setup_flow = ctx.may_setup_flow; } @@ -2516,11 +2522,11 @@ flow_stats_ds_cb(struct cls_rule *rule_, void *cbdata_) } query_stats(cbdata->ofproto, rule, &packet_count, &byte_count); - flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &match); + flow_to_ovs_match(&rule->cr.flow, rule->cr.wc.wildcards, &match); ds_put_format(results, "duration=%llds, ", (time_msec() - rule->created) / 1000); - ds_put_format(results, "priority=%u", rule->cr.priority); + ds_put_format(results, "priority=%u, ", rule->cr.priority); ds_put_format(results, "n_packets=%"PRIu64", ", packet_count); ds_put_format(results, "n_bytes=%"PRIu64", ", byte_count); ofp_print_match(results, &match, true); @@ -3028,6 +3034,17 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet) payload.size = msg->length - sizeof *msg; flow_extract(&payload, msg->port, &flow); + /* Check with in-band control to see if this packet should be sent + * to the local port regardless of the flow table. */ + if (in_band_msg_in_hook(p->in_band, &flow, &payload)) { + union odp_action action; + + memset(&action, 0, sizeof(action)); + action.output.type = ODPAT_OUTPUT; + action.output.port = ODPP_LOCAL; + dpif_execute(p->dpif, flow.in_port, &action, 1, &payload); + } + rule = lookup_valid_rule(p, &flow); if (!rule) { /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */ diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 869d7172..7081512e 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1774,12 +1774,14 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; if (port_includes_vlan(port, m->out_vlan) - && set_dst(dst, flow, in_port, port, tags) - && !dst_is_duplicate(dsts, dst - dsts, dst)) + && set_dst(dst, flow, in_port, port, tags)) { if (port->vlan < 0) { dst->vlan = m->out_vlan; } + if (dst_is_duplicate(dsts, dst - dsts, dst)) { + continue; + } if (dst->dp_ifidx == flow->in_port && dst->vlan == vlan) { /* Don't send out input port on same VLAN. */ @@ -3369,6 +3371,7 @@ mirror_reconfigure_one(struct mirror *m) int *vlans; size_t i; bool mirror_all_ports; + bool any_ports_specified; /* Get output port. */ out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port", @@ -3407,11 +3410,18 @@ mirror_reconfigure_one(struct mirror *m) cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx); cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx); cfg_get_all_keys(&ports, "%s.select.port", pfx); + any_ports_specified = src_ports.n || dst_ports.n || ports.n; svec_append(&src_ports, &ports); svec_append(&dst_ports, &ports); svec_destroy(&ports); prune_ports(m, &src_ports); prune_ports(m, &dst_ports); + if (any_ports_specified && !src_ports.n && !dst_ports.n) { + VLOG_ERR("%s: none of the specified ports exist; " + "disabling port mirror %s", pfx, pfx); + mirror_destroy(m); + goto exit; + } /* Get all the vlans, and drop duplicate and invalid vlans. */ svec_init(&vlan_strings); @@ -3463,6 +3473,7 @@ mirror_reconfigure_one(struct mirror *m) } /* Clean up. */ +exit: svec_destroy(&src_ports); svec_destroy(&dst_ports); free(pfx); diff --git a/vswitchd/mgmt.c b/vswitchd/mgmt.c index e6e7d4ef..d15b4ba4 100644 --- a/vswitchd/mgmt.c +++ b/vswitchd/mgmt.c @@ -54,6 +54,7 @@ static struct rconn *mgmt_rconn; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); static struct svec capabilities; static struct ofpbuf ext_data_buffer; +static uint32_t ext_data_xid = UINT32_MAX; uint64_t mgmt_id; @@ -222,6 +223,10 @@ mgmt_reconfigure(void) if (retval == EAFNOSUPPORT) { VLOG_ERR("no support for %s vconn", controller_name); } + + /* Reset the extended message buffer when we create a new + * management connection. */ + ofpbuf_clear(&ext_data_buffer); } static void * @@ -261,12 +266,18 @@ send_openflow_buffer(struct ofpbuf *buffer) return EINVAL; } + /* Make sure there's room to transmit the data. We don't want to + * fail part way through a send. */ + if (rconn_packet_counter_read(txqlen) >= TXQ_LIMIT) { + return EAGAIN; + } + /* OpenFlow messages use a 16-bit length field, so messages over 64K * must be broken into multiple pieces. */ if (buffer->size <= 65535) { update_openflow_length(buffer); - retval = rconn_send_with_limit(mgmt_rconn, buffer, txqlen, TXQ_LIMIT); + retval = rconn_send(mgmt_rconn, buffer, txqlen); if (retval) { VLOG_WARN_RL(&rl, "send to %s failed: %s", rconn_get_name(mgmt_rconn), strerror(retval)); @@ -292,12 +303,10 @@ send_openflow_buffer(struct ofpbuf *buffer) &new_buffer); oed->type = header->type; - if (remain > 65535) { + if (remain > new_len) { oed->flags |= OFMPEDF_MORE_DATA; } - printf("xxx SENDING LEN: %d\n", new_len); - /* Copy the entire original message, including the OpenFlow * header, since management protocol structure definitions * include these headers. @@ -305,8 +314,7 @@ send_openflow_buffer(struct ofpbuf *buffer) ofpbuf_put(new_buffer, ptr, new_len); update_openflow_length(new_buffer); - retval = rconn_send_with_limit(mgmt_rconn, new_buffer, txqlen, - TXQ_LIMIT); + retval = rconn_send(mgmt_rconn, new_buffer, txqlen); if (retval) { VLOG_WARN_RL(&rl, "send to %s failed: %s", rconn_get_name(mgmt_rconn), strerror(retval)); @@ -670,23 +678,48 @@ static int recv_ofmp_extended_data(uint32_t xid, const struct ofmp_header *ofmph, size_t len) { - size_t data_len; + int data_len; struct ofmp_extended_data *ofmped; - uint8_t *ptr; - data_len = len - sizeof(*ofmped); - if (data_len <= sizeof(*ofmped)) { + if (len <= sizeof(*ofmped)) { /* xxx Send error. */ return -EINVAL; } + ext_data_xid = xid; ofmped = (struct ofmp_extended_data *)ofmph; - ptr = ofpbuf_put(&ext_data_buffer, ofmped->data, data_len); + data_len = len - sizeof(*ofmped); + ofpbuf_put(&ext_data_buffer, ofmped->data, data_len); + + if (!(ofmped->flags & OFMPEDF_MORE_DATA)) { + struct ofmp_header *new_oh; + int error; + + /* An embedded message must be greater than the size of an + * OpenFlow message. */ + new_oh = ofpbuf_at(&ext_data_buffer, 0, 65536); + if (!new_oh) { + VLOG_WARN_RL(&rl, "received short embedded message: %d\n", + ext_data_buffer.size); + return -EINVAL; + } + + /* Make sure that this is a management message and that there's + * not an embedded extended data message. */ + if ((new_oh->header.vendor != htonl(NX_VENDOR_ID)) + || (new_oh->header.subtype != htonl(NXT_MGMT)) + || (new_oh->type == htonl(OFMPT_EXTENDED_DATA))) { + VLOG_WARN_RL(&rl, "received bad embedded message\n"); + return -EINVAL; + } + new_oh->header.header.xid = ext_data_xid; + new_oh->header.header.length = 0; - if (!ofmped->flags & OFMPEDF_MORE_DATA) { - recv_ofmp(xid, ext_data_buffer.data, ext_data_buffer.size); + error = recv_ofmp(xid, ext_data_buffer.data, ext_data_buffer.size); ofpbuf_clear(&ext_data_buffer); + + return error; } return 0; @@ -707,6 +740,12 @@ int recv_ofmp(uint32_t xid, struct ofmp_header *ofmph, size_t len) len = ntohs(ofmph->header.header.length); } + /* Reset the extended data buffer if this isn't a continuation of an + * existing extended data message. */ + if (ext_data_xid != xid) { + ofpbuf_clear(&ext_data_buffer); + } + /* xxx Should sanity-check for min/max length */ switch (ntohs(ofmph->type)) { |