aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2009-09-02 10:14:53 -0700
committerBen Pfaff <blp@nicira.com>2009-09-02 10:14:53 -0700
commitf1acd62b54376a425a975f9af501c4c8c5689b39 (patch)
treea579f967870b61d3a226acdc62cc079c8ce996d1
parent2c7807ac4f578bfdd7a46b79028935c9aa34cde3 (diff)
parent0ad9b732910b8f1aa7fc47ea57ff79e7618a4e6d (diff)
Merge citrix branch into master.
-rw-r--r--datapath/datapath.c35
-rw-r--r--datapath/datapath.h42
-rw-r--r--datapath/flow.c43
-rw-r--r--datapath/table.c337
-rwxr-xr-xdebian/corekeeper.init3
-rw-r--r--extras/ezio/ovs-switchui.c2
-rw-r--r--include/openflow/openflow-mgmt.h3
-rw-r--r--include/openvswitch/datapath-protocol.h3
-rw-r--r--lib/flow.c57
-rw-r--r--lib/flow.h1
-rw-r--r--lib/netdev-linux.c122
-rw-r--r--lib/netdev-provider.h17
-rw-r--r--lib/netdev.c48
-rw-r--r--lib/netdev.h5
-rw-r--r--ofproto/in-band.c385
-rw-r--r--ofproto/in-band.h9
-rw-r--r--ofproto/ofproto.c29
-rw-r--r--vswitchd/bridge.c15
-rw-r--r--vswitchd/mgmt.c65
19 files changed, 908 insertions, 313 deletions
diff --git a/datapath/datapath.c b/datapath/datapath.c
index d822b73c..6f96ee40 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -839,7 +839,7 @@ static void clear_stats(struct sw_flow *flow)
static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
{
struct odp_flow_put uf;
- struct sw_flow *flow, **bucket;
+ struct sw_flow *flow;
struct dp_table *table;
struct odp_flow_stats stats;
int error;
@@ -849,15 +849,10 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
goto error;
uf.flow.key.reserved = 0;
-retry:
table = rcu_dereference(dp->table);
- bucket = dp_table_lookup_for_insert(table, &uf.flow.key);
- if (!bucket) {
- /* No such flow, and the slots where it could go are full. */
- error = uf.flags & ODPPF_CREATE ? -EFBIG : -ENOENT;
- goto error;
- } else if (!*bucket) {
- /* No such flow, but we found an available slot for it. */
+ flow = dp_table_lookup(table, &uf.flow.key);
+ if (!flow) {
+ /* No such flow. */
struct sw_flow_actions *acts;
error = -ENOENT;
@@ -865,14 +860,15 @@ retry:
goto error;
/* Expand table, if necessary, to make room. */
- if (dp->n_flows * 4 >= table->n_buckets &&
- table->n_buckets < DP_MAX_BUCKETS) {
+ if (dp->n_flows >= table->n_buckets) {
+ error = -ENOSPC;
+ if (table->n_buckets >= DP_MAX_BUCKETS)
+ goto error;
+
error = dp_table_expand(dp);
if (error)
goto error;
-
- /* The bucket's location has changed. Try again. */
- goto retry;
+ table = rcu_dereference(dp->table);
}
/* Allocate flow. */
@@ -892,12 +888,13 @@ retry:
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- rcu_assign_pointer(*bucket, flow);
+ error = dp_table_insert(table, flow);
+ if (error)
+ goto error_free_flow_acts;
dp->n_flows++;
memset(&stats, 0, sizeof(struct odp_flow_stats));
} else {
/* We found a matching flow. */
- struct sw_flow *flow = *rcu_dereference(bucket);
struct sw_flow_actions *old_acts, *new_acts;
unsigned long int flags;
@@ -935,6 +932,8 @@ retry:
return -EFAULT;
return 0;
+error_free_flow_acts:
+ kfree(flow->sf_acts);
error_free_flow:
kmem_cache_free(flow_cache, flow);
error:
@@ -1167,8 +1166,8 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
int i;
stats.n_flows = dp->n_flows;
- stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2;
- stats.max_capacity = DP_MAX_BUCKETS * 2;
+ stats.cur_capacity = rcu_dereference(dp->table)->n_buckets;
+ stats.max_capacity = DP_MAX_BUCKETS;
stats.n_ports = dp->n_ports;
stats.max_ports = DP_MAX_PORTS;
stats.max_groups = DP_MAX_GROUPS;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index b5200848..122706a8 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -29,20 +29,54 @@
#define DP_MAX_PORTS 256
#define DP_MAX_GROUPS 16
-#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*)))
+#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket*)))
#define DP_L2_SIZE (1 << DP_L2_BITS)
#define DP_L2_SHIFT 0
-#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**)))
+#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct dp_bucket**)))
#define DP_L1_SIZE (1 << DP_L1_BITS)
#define DP_L1_SHIFT DP_L2_BITS
+/* For 4 kB pages, this is 1,048,576 on 32-bit or 262,144 on 64-bit. */
#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE)
+/**
+ * struct dp_table - flow table
+ * @n_buckets: number of buckets (a power of 2 between %DP_L1_SIZE and
+ * %DP_MAX_BUCKETS)
+ * @buckets: pointer to @n_buckets/%DP_L1_SIZE pointers to %DP_L1_SIZE pointers
+ * to buckets
+ * @hash_seed: random number used for flow hashing, to make the hash
+ * distribution harder to predict
+ * @rcu: RCU callback structure
+ *
+ * The @buckets array is logically an array of pointers to buckets. It is
+ * broken into two levels to avoid the need to kmalloc() any object larger than
+ * a single page or to use vmalloc(). @buckets is always nonnull, as is each
+ * @buckets[i], but each @buckets[i][j] is nonnull only if the specified hash
+ * bucket is nonempty (for 0 <= i < @n_buckets/%DP_L1_SIZE, 0 <= j <
+ * %DP_L1_SIZE).
+ */
struct dp_table {
unsigned int n_buckets;
- struct sw_flow ***flows[2];
+ struct dp_bucket ***buckets;
+ unsigned int hash_seed;
+ struct rcu_head rcu;
+};
+
+/**
+ * struct dp_bucket - single bucket within datapath flow table
+ * @rcu: RCU callback structure
+ * @n_flows: number of flows in @flows[] array
+ * @flows: array of @n_flows pointers to flows
+ *
+ * The expected number of flows per bucket is 1, but this allows for an
+ * arbitrary number of collisions.
+ */
+struct dp_bucket {
struct rcu_head rcu;
+ unsigned int n_flows;
+ struct sw_flow *flows[];
};
#define DP_N_QUEUES 2
@@ -105,7 +139,7 @@ extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
struct dp_table *dp_table_create(unsigned int n_buckets);
void dp_table_destroy(struct dp_table *, int free_flows);
struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *);
-struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *);
+int dp_table_insert(struct dp_table *, struct sw_flow *);
int dp_table_delete(struct dp_table *, struct sw_flow *);
int dp_table_expand(struct datapath *);
int dp_table_flush(struct datapath *);
diff --git a/datapath/flow.c b/datapath/flow.c
index 2ac79e70..ae60617d 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
@@ -29,6 +30,27 @@
struct kmem_cache *flow_cache;
+struct arp_eth_header
+{
+ __be16 ar_hrd; /* format of hardware address */
+ __be16 ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ __be16 ar_op; /* ARP opcode (command) */
+
+ /* Ethernet+IPv4 specific members. */
+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
+ unsigned char ar_sip[4]; /* sender IP address */
+ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
+ unsigned char ar_tip[4]; /* target IP address */
+} __attribute__((packed));
+
+static inline int arphdr_ok(struct sk_buff *skb)
+{
+ int nh_ofs = skb_network_offset(skb);
+ return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header));
+}
+
static inline int iphdr_ok(struct sk_buff *skb)
{
int nh_ofs = skb_network_offset(skb);
@@ -266,6 +288,27 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
} else {
retval = 1;
}
+ } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+ struct arp_eth_header *arp;
+
+ arp = (struct arp_eth_header *)skb_network_header(skb);
+
+ if (arp->ar_hrd == htons(1)
+ && arp->ar_pro == htons(ETH_P_IP)
+ && arp->ar_hln == ETH_ALEN
+ && arp->ar_pln == 4) {
+
+ /* We only match on the lower 8 bits of the opcode. */
+ if (ntohs(arp->ar_op) <= 0xff) {
+ key->nw_proto = ntohs(arp->ar_op);
+ }
+
+ if (key->nw_proto == ARPOP_REQUEST
+ || key->nw_proto == ARPOP_REPLY) {
+ memcpy(&key->nw_src, arp->ar_sip, sizeof(key->nw_src));
+ memcpy(&key->nw_dst, arp->ar_tip, sizeof(key->nw_dst));
+ }
+ }
} else {
skb_reset_transport_header(skb);
}
diff --git a/datapath/table.c b/datapath/table.c
index 11aeb888..23ae8abe 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -11,50 +11,76 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
+#include <linux/random.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <asm/pgtable.h>
-static void free_table(struct sw_flow ***flows, unsigned int n_buckets,
- int free_flows)
+static inline int bucket_size(int n_flows)
+{
+ return sizeof(struct dp_bucket) + sizeof(struct sw_flow*) * n_flows;
+}
+
+static struct dp_bucket *dp_bucket_alloc(int n_flows)
+{
+ return kmalloc(bucket_size(n_flows), GFP_KERNEL);
+}
+
+static void free_buckets(struct dp_bucket ***l1, unsigned int n_buckets,
+ int free_flows)
{
unsigned int i;
for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- struct sw_flow **l2 = flows[i];
- if (free_flows) {
- unsigned int j;
- for (j = 0; j < DP_L1_SIZE; j++) {
- if (l2[j])
- flow_free(l2[j]);
+ struct dp_bucket **l2 = l1[i];
+ unsigned int j;
+
+ for (j = 0; j < DP_L1_SIZE; j++) {
+ struct dp_bucket *bucket = l2[j];
+ if (!bucket)
+ continue;
+
+ if (free_flows) {
+ unsigned int k;
+ for (k = 0; k < bucket->n_flows; k++)
+ flow_free(bucket->flows[k]);
}
+ kfree(bucket);
}
free_page((unsigned long)l2);
}
- kfree(flows);
+ kfree(l1);
}
-static struct sw_flow ***alloc_table(unsigned int n_buckets)
+static struct dp_bucket ***alloc_buckets(unsigned int n_buckets)
{
- struct sw_flow ***flows;
+ struct dp_bucket ***l1;
unsigned int i;
- flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**),
- GFP_KERNEL);
- if (!flows)
+ l1 = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct dp_bucket**),
+ GFP_KERNEL);
+ if (!l1)
return NULL;
for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
- flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL);
- if (!flows[i]) {
- free_table(flows, i << DP_L1_BITS, 0);
+ l1[i] = (struct dp_bucket **)get_zeroed_page(GFP_KERNEL);
+ if (!l1[i]) {
+ free_buckets(l1, i << DP_L1_BITS, 0);
return NULL;
}
}
- return flows;
+ return l1;
}
+/**
+ * dp_table_create - create and return a new flow table
+ * @n_buckets: number of buckets in the new table
+ *
+ * Creates and returns a new flow table, or %NULL if memory cannot be
+ * allocated. @n_buckets must be a power of 2 in the range %DP_L1_SIZE to
+ * %DP_MAX_BUCKETS.
+ */
struct dp_table *dp_table_create(unsigned int n_buckets)
{
struct dp_table *table;
@@ -64,95 +90,124 @@ struct dp_table *dp_table_create(unsigned int n_buckets)
goto err;
table->n_buckets = n_buckets;
- table->flows[0] = alloc_table(n_buckets);
- if (!table[0].flows)
- goto err_free_tables;
-
- table->flows[1] = alloc_table(n_buckets);
- if (!table->flows[1])
- goto err_free_flows0;
+ table->buckets = alloc_buckets(n_buckets);
+ if (!table->buckets)
+ goto err_free_table;
+ get_random_bytes(&table->hash_seed, sizeof table->hash_seed);
return table;
-err_free_flows0:
- free_table(table->flows[0], table->n_buckets, 0);
-err_free_tables:
+err_free_table:
kfree(table);
err:
return NULL;
}
+/**
+ * dp_table_destroy - destroy flow table and optionally the flows it contains
+ * @table: table to destroy (must not be %NULL)
+ * @free_flows: whether to destroy the flows
+ *
+ * If @free_flows is zero, then the buckets in @table are destroyed but not the
+ * flows within those buckets. This behavior is useful when a table is being
+ * replaced by a larger or smaller one without destroying the flows.
+ *
+ * If @free_flows is nonzero, then the flows in @table are destroyed as well as
+ * the buckets.
+ */
void dp_table_destroy(struct dp_table *table, int free_flows)
{
- int i;
- for (i = 0; i < 2; i++)
- free_table(table->flows[i], table->n_buckets, free_flows);
+ free_buckets(table->buckets, table->n_buckets, free_flows);
kfree(table);
}
-static struct sw_flow **find_bucket(struct dp_table *table,
- struct sw_flow ***flows, u32 hash)
+static struct dp_bucket **find_bucket(struct dp_table *table, u32 hash)
{
unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT;
unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1);
- return &flows[l1][l2];
+ return &table->buckets[l1][l2];
}
-static struct sw_flow *lookup_table(struct dp_table *table,
- struct sw_flow ***flows, u32 hash,
- const struct odp_flow_key *key)
+static int search_bucket(const struct dp_bucket *bucket, const struct odp_flow_key *key)
{
- struct sw_flow **bucket = find_bucket(table, flows, hash);
- struct sw_flow *flow = rcu_dereference(*bucket);
- if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key)))
- return flow;
- return NULL;
-}
+ int i;
-static u32 flow_hash0(const struct odp_flow_key *key)
-{
- return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa);
+ for (i = 0; i < bucket->n_flows; i++) {
+ struct sw_flow *flow = rcu_dereference(bucket->flows[i]);
+ if (!memcmp(&flow->key, key, sizeof(struct odp_flow_key)))
+ return i;
+ }
+
+ return -1;
}
-static u32 flow_hash1(const struct odp_flow_key *key)
+static struct sw_flow *lookup_flow(struct dp_table *table, u32 hash,
+ const struct odp_flow_key *key)
{
- return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555);
+ struct dp_bucket **bucketp = find_bucket(table, hash);
+ struct dp_bucket *bucket = rcu_dereference(*bucketp);
+ int index;
+
+ if (!bucket)
+ return NULL;
+
+ index = search_bucket(bucket, key);
+ if (index < 0)
+ return NULL;
+
+ return bucket->flows[index];
}
-static void find_buckets(struct dp_table *table,
- const struct odp_flow_key *key,
- struct sw_flow **buckets[2])
+static u32 flow_hash(const struct dp_table *table,
+ const struct odp_flow_key *key)
{
- buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key));
- buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key));
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), table->hash_seed);
}
+/**
+ * dp_table_lookup - searches flow table for a matching flow
+ * @table: flow table to search
+ * @key: flow key for which to search
+ *
+ * Searches @table for a flow whose key is equal to @key. Returns the flow if
+ * successful, otherwise %NULL.
+ */
struct sw_flow *dp_table_lookup(struct dp_table *table,
const struct odp_flow_key *key)
{
- struct sw_flow *flow;
- flow = lookup_table(table, table->flows[0], flow_hash0(key), key);
- if (!flow)
- flow = lookup_table(table, table->flows[1],
- flow_hash1(key), key);
- return flow;
+ return lookup_flow(table, flow_hash(table, key), key);
}
+/**
+ * dp_table_foreach - iterate through flow table
+ * @table: table to iterate
+ * @callback: function to call for each flow entry
+ * @aux: Extra data to pass to @callback
+ *
+ * Iterates through all of the flows in @table in hash order, passing each of
+ * them in turn to @callback. If @callback returns nonzero, this terminates
+ * the iteration and dp_table_foreach() returns the same value. Returns 0 if
+ * @callback never returns nonzero.
+ *
+ * This function does not try to intelligently handle the case where @callback
+ * adds or removes flows in @table.
+ */
int dp_table_foreach(struct dp_table *table,
int (*callback)(struct sw_flow *flow, void *aux),
void *aux)
{
unsigned int i, j, k;
- for (i = 0; i < 2; i++) {
- for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) {
- struct sw_flow **l2 = table->flows[i][j];
- for (k = 0; k < DP_L1_SIZE; k++) {
- struct sw_flow *flow = rcu_dereference(l2[k]);
- if (flow) {
- int error = callback(flow, aux);
- if (error)
- return error;
- }
+ for (i = 0; i < table->n_buckets >> DP_L1_BITS; i++) {
+ struct dp_bucket **l2 = table->buckets[i];
+ for (j = 0; j < DP_L1_SIZE; j++) {
+ struct dp_bucket *bucket = rcu_dereference(l2[j]);
+ if (!bucket)
+ continue;
+
+ for (k = 0; k < bucket->n_flows; k++) {
+ int error = (*callback)(bucket->flows[k], aux);
+ if (error)
+ return error;
}
}
}
@@ -162,18 +217,7 @@ int dp_table_foreach(struct dp_table *table,
static int insert_flow(struct sw_flow *flow, void *new_table_)
{
struct dp_table *new_table = new_table_;
- struct sw_flow **buckets[2];
- int i;
-
- find_buckets(new_table, &flow->key, buckets);
- for (i = 0; i < 2; i++) {
- if (!*buckets[i]) {
- rcu_assign_pointer(*buckets[i], flow);
- return 0;
- }
- }
- WARN_ON_ONCE(1);
- return 0;
+ return dp_table_insert(new_table, flow);
}
static void dp_free_table_rcu(struct rcu_head *rcu)
@@ -182,16 +226,34 @@ static void dp_free_table_rcu(struct rcu_head *rcu)
dp_table_destroy(table, 0);
}
+/**
+ * dp_table_expand - replace datapath's flow table by one with more buckets
+ * @dp: datapath to expand
+ *
+ * Replaces @dp's flow table by one that has twice as many buckets. All of the
+ * flows in @dp's flow table are moved to the new flow table. Returns 0 if
+ * successful, otherwise a negative error.
+ */
int dp_table_expand(struct datapath *dp)
{
struct dp_table *old_table = rcu_dereference(dp->table);
- struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2);
+ struct dp_table *new_table;
+
+ new_table = dp_table_create(old_table->n_buckets * 2);
if (!new_table)
- return -ENOMEM;
- dp_table_foreach(old_table, insert_flow, new_table);
+ goto error;
+
+ if (dp_table_foreach(old_table, insert_flow, new_table))
+ goto error_free_new_table;
+
rcu_assign_pointer(dp->table, new_table);
call_rcu(&old_table->rcu, dp_free_table_rcu);
return 0;
+
+error_free_new_table:
+ dp_table_destroy(new_table, 0);
+error:
+ return -ENOMEM;
}
static void dp_free_table_and_flows_rcu(struct rcu_head *rcu)
@@ -200,6 +262,13 @@ static void dp_free_table_and_flows_rcu(struct rcu_head *rcu)
dp_table_destroy(table, 1);
}
+/**
+ * dp_table_flush - clear datapath's flow table
+ * @dp: datapath to clear
+ *
+ * Replaces @dp's flow table by an empty flow table, destroying all the flows
+ * in the old table (after a suitable RCU grace period).
+ */
int dp_table_flush(struct datapath *dp)
{
struct dp_table *old_table = rcu_dereference(dp->table);
@@ -211,38 +280,88 @@ int dp_table_flush(struct datapath *dp)
return 0;
}
-struct sw_flow **
-dp_table_lookup_for_insert(struct dp_table *table,
- const struct odp_flow_key *target)
+static void dp_free_bucket_rcu(struct rcu_head *rcu)
{
- struct sw_flow **buckets[2];
- struct sw_flow **empty_bucket = NULL;
- int i;
+ struct dp_bucket *bucket = container_of(rcu, struct dp_bucket, rcu);
+ kfree(bucket);
+}
- find_buckets(table, target, buckets);
- for (i = 0; i < 2; i++) {
- struct sw_flow *f = rcu_dereference(*buckets[i]);
- if (f) {
- if (!memcmp(&f->key, target, sizeof(struct odp_flow_key)))
- return buckets[i];
- } else if (!empty_bucket)
- empty_bucket = buckets[i];
- }
- return empty_bucket;
+/**
+ * dp_table_insert - insert flow into table
+ * @table: table in which to insert flow
+ * @target: flow to insert
+ *
+ * The caller must ensure that no flow with key identical to @target->key
+ * already exists in @table. Returns 0 or a negative error (currently just
+ * -ENOMEM).
+ *
+ * The caller is responsible for updating &struct datapath's n_flows member.
+ */
+int dp_table_insert(struct dp_table *table, struct sw_flow *target)
+{
+ u32 hash = flow_hash(table, &target->key);
+ struct dp_bucket **oldp = find_bucket(table, hash);
+ struct dp_bucket *old = *rcu_dereference(oldp);
+ unsigned int n = old ? old->n_flows : 0;
+ struct dp_bucket *new = dp_bucket_alloc(n + 1);
+
+ if (!new)
+ return -ENOMEM;
+
+ new->n_flows = n + 1;
+ if (old)
+ memcpy(new->flows, old->flows, n * sizeof(struct sw_flow*));
+ new->flows[n] = target;
+
+ rcu_assign_pointer(*oldp, new);
+ if (old)
+ call_rcu(&old->rcu, dp_free_bucket_rcu);
+
+ return 0;
}
+/**
+ * dp_table_delete - remove flow from table
+ * @table: table from which to remove flow
+ * @target: flow to remove
+ *
+ * The caller must ensure that @target itself is in @table. (It is not
+ * good enough for @table to contain a different flow with a key equal to
+ * @target's key.)
+ *
+ * Returns 0 or a negative error (currently just -ENOMEM). Yes, it *is*
+ * possible for a flow deletion to fail due to lack of memory.
+ *
+ * The caller is responsible for updating &struct datapath's n_flows member.
+ */
int dp_table_delete(struct dp_table *table, struct sw_flow *target)
{
- struct sw_flow **buckets[2];
- int i;
+ u32 hash = flow_hash(table, &target->key);
+ struct dp_bucket **oldp = find_bucket(table, hash);
+ struct dp_bucket *old = *rcu_dereference(oldp);
+ unsigned int n = old->n_flows;
+ struct dp_bucket *new;
+
+ if (n > 1) {
+ unsigned int i;
- find_buckets(table, &target->key, buckets);
- for (i = 0; i < 2; i++) {
- struct sw_flow *flow = rcu_dereference(*buckets[i]);
- if (flow == target) {
- rcu_assign_pointer(*buckets[i], NULL);
- return 0;
+ new = dp_bucket_alloc(n - 1);
+ if (!new)
+ return -ENOMEM;
+
+ new->n_flows = 0;
+ for (i = 0; i < n; i++) {
+ struct sw_flow *flow = old->flows[i];
+ if (flow != target)
+ new->flows[new->n_flows++] = flow;
}
+ WARN_ON_ONCE(new->n_flows != n - 1);
+ } else {
+ new = NULL;
}
- return -ENOENT;
+
+ rcu_assign_pointer(*oldp, new);
+ call_rcu(&old->rcu, dp_free_bucket_rcu);
+
+ return 0;
}
diff --git a/debian/corekeeper.init b/debian/corekeeper.init
index 27d62a12..d820b028 100755
--- a/debian/corekeeper.init
+++ b/debian/corekeeper.init
@@ -6,6 +6,7 @@
# adjust it to the program you want to run.
#
# Copyright (c) 2007 Javier Fernandez-Sanguino <jfs@debian.org>
+# Copyright (c) 2009 Nicira Networks, Inc.
#
# This is free software; you may redistribute it and/or modify
# it under the terms of the GNU General Public License as
@@ -42,7 +43,7 @@ set -e
case "$1" in
start)
log_daemon_msg "Initializing core dump location..."
- if echo "/var/log/core/core.%e.%t" > /proc/sys/kernel/core_pattern
+ if echo "/var/log/core/core.%e.%t.%p" > /proc/sys/kernel/core_pattern
then
log_progress_msg "success"
log_end_msg 0
diff --git a/extras/ezio/ovs-switchui.c b/extras/ezio/ovs-switchui.c
index 721717ee..0f6640e0 100644
--- a/extras/ezio/ovs-switchui.c
+++ b/extras/ezio/ovs-switchui.c
@@ -2480,7 +2480,7 @@ choose_netdevs(struct svec *choices)
retval = netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev);
if (!retval) {
- bool exclude = netdev_get_in4(netdev, NULL) == 0;
+ bool exclude = netdev_get_in4(netdev, NULL, NULL) == 0;
netdev_close(netdev);
if (exclude) {
continue;
diff --git a/include/openflow/openflow-mgmt.h b/include/openflow/openflow-mgmt.h
index c3b62c91..04017d42 100644
--- a/include/openflow/openflow-mgmt.h
+++ b/include/openflow/openflow-mgmt.h
@@ -243,7 +243,8 @@ enum ofmp_extended_data_flags {
/* Body of extended data message. May be sent by either the switch or the
* controller to send messages that are greater than 65535 bytes in
- * length.
+ * length. The OpenFlow transaction id (xid) must be the same for all
+ * the individual OpenFlow messages that make up an extended message.
*
* OFMPT_EXTENDED_DATA (switch <-> controller) */
struct ofmp_extended_data {
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 868c8542..04423d94 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -160,7 +160,8 @@ struct odp_flow_key {
__be16 tp_dst; /* TCP/UDP destination port. */
__u8 dl_src[ETH_ALEN]; /* Ethernet source address. */
__u8 dl_dst[ETH_ALEN]; /* Ethernet destination address. */
- __u8 nw_proto; /* IP protocol. */
+ __u8 nw_proto; /* IP protocol or lower 8 bits of
+ ARP opcode. */
__u8 reserved; /* Pad to 64 bits. */
};
diff --git a/lib/flow.c b/lib/flow.c
index 1801d4de..c1f6240f 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -31,6 +31,12 @@
#include "vlog.h"
#define THIS_MODULE VLM_flow
+static struct arp_eth_header *
+pull_arp(struct ofpbuf *packet)
+{
+ return ofpbuf_try_pull(packet, ARP_ETH_HEADER_LEN);
+}
+
static struct ip_header *
pull_ip(struct ofpbuf *packet)
{
@@ -185,6 +191,23 @@ flow_extract(struct ofpbuf *packet, uint16_t in_port, flow_t *flow)
retval = 1;
}
}
+ } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
+ const struct arp_eth_header *arp = pull_arp(&b);
+ if (arp && arp->ar_hrd == htons(1)
+ && arp->ar_pro == htons(ETH_TYPE_IP)
+ && arp->ar_hln == ETH_ADDR_LEN
+ && arp->ar_pln == 4) {
+ /* We only match on the lower 8 bits of the opcode. */
+ if (ntohs(arp->ar_op) <= 0xff) {
+ flow->nw_proto = ntohs(arp->ar_op);
+ }
+
+ if ((flow->nw_proto == ARP_OP_REQUEST)
+ || (flow->nw_proto == ARP_OP_REPLY)) {
+ flow->nw_src = arp->ar_spa;
+ flow->nw_dst = arp->ar_tpa;
+ }
+ }
}
}
return retval;
@@ -212,8 +235,12 @@ flow_extract_stats(const flow_t *flow, struct ofpbuf *packet,
stats->n_packets = 1;
}
+/* The Open vSwitch datapath supports matching on ARP payloads, which
+ * OpenFlow does not. This function is identical to 'flow_to_match',
+ * but does not hide the datapath's ability to match on ARP. */
void
-flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match)
+flow_to_ovs_match(const flow_t *flow, uint32_t wildcards,
+ struct ofp_match *match)
{
match->wildcards = htonl(wildcards);
match->in_port = htons(flow->in_port == ODPP_LOCAL ? OFPP_LOCAL
@@ -230,6 +257,26 @@ flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match)
match->pad = 0;
}
+/* Extract 'flow' with 'wildcards' into the OpenFlow match structure
+ * 'match'. */
+void
+flow_to_match(const flow_t *flow, uint32_t wildcards, struct ofp_match *match)
+{
+ flow_to_ovs_match(flow, wildcards, match);
+
+ /* The datapath supports matching on an ARP's opcode and IP addresses,
+ * but OpenFlow does not. We wildcard and zero out the appropriate
+ * fields so that OpenFlow is unaware of our trickery. */
+ if (flow->dl_type == htons(ETH_TYPE_ARP)) {
+ wildcards |= (OFPFW_NW_PROTO | OFPFW_NW_SRC_ALL | OFPFW_NW_DST_ALL);
+ match->nw_src = 0;
+ match->nw_dst = 0;
+ match->nw_proto = 0;
+ }
+ match->wildcards = htonl(wildcards);
+}
+
+
void
flow_from_match(flow_t *flow, uint32_t *wildcards,
const struct ofp_match *match)
@@ -237,6 +284,14 @@ flow_from_match(flow_t *flow, uint32_t *wildcards,
if (wildcards) {
*wildcards = ntohl(match->wildcards);
}
+ /* The datapath supports matching on an ARP's opcode and IP addresses,
+ * but OpenFlow does not. In case the controller hasn't, we need to
+ * set the appropriate wildcard bits so that we're externally
+ * OpenFlow-compliant. */
+ if (match->dl_type == htons(ETH_TYPE_ARP)) {
+ *wildcards |= (OFPFW_NW_PROTO | OFPFW_NW_SRC_ALL | OFPFW_NW_DST_ALL);
+ }
+
flow->nw_src = match->nw_src;
flow->nw_dst = match->nw_dst;
flow->in_port = (match->in_port == htons(OFPP_LOCAL) ? ODPP_LOCAL
diff --git a/lib/flow.h b/lib/flow.h
index 35415057..cb201099 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -36,6 +36,7 @@ int flow_extract(struct ofpbuf *, uint16_t in_port, flow_t *);
void flow_extract_stats(const flow_t *flow, struct ofpbuf *packet,
struct odp_flow_stats *stats);
void flow_to_match(const flow_t *, uint32_t wildcards, struct ofp_match *);
+void flow_to_ovs_match(const flow_t *, uint32_t wildcards, struct ofp_match *);
void flow_from_match(flow_t *, uint32_t *wildcards, const struct ofp_match *);
char *flow_to_string(const flow_t *);
void flow_format(struct ds *, const flow_t *);
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 3e340444..11d83e97 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -96,7 +96,7 @@ struct netdev_linux_cache {
int ifindex;
uint8_t etheraddr[ETH_ADDR_LEN];
- struct in_addr in4;
+ struct in_addr address, netmask;
struct in6_addr in6;
int mtu;
int carrier;
@@ -125,6 +125,8 @@ static int netdev_linux_do_ethtool(struct netdev *, struct ethtool_cmd *,
int cmd, const char *cmd_name);
static int netdev_linux_do_ioctl(const struct netdev *, struct ifreq *,
int cmd, const char *cmd_name);
+static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *,
+ int cmd, const char *cmd_name);
static int get_flags(const struct netdev *, int *flagsp);
static int set_flags(struct netdev *, int flags);
static int do_get_ifindex(const char *netdev_name);
@@ -935,49 +937,48 @@ netdev_linux_set_policing(struct netdev *netdev,
return 0;
}
-/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
- * 'in4' is non-null) and returns true. Otherwise, returns false. */
static int
-netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *in4)
+netdev_linux_get_in4(const struct netdev *netdev_,
+ struct in_addr *address, struct in_addr *netmask)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
if (!(netdev->cache->valid & VALID_IN4)) {
- const struct sockaddr_in *sin;
- struct ifreq ifr;
int error;
- ifr.ifr_addr.sa_family = AF_INET;
- error = netdev_linux_do_ioctl(netdev_, &ifr,
+ error = netdev_linux_get_ipv4(netdev_, &netdev->cache->address,
SIOCGIFADDR, "SIOCGIFADDR");
if (error) {
return error;
}
- sin = (struct sockaddr_in *) &ifr.ifr_addr;
- netdev->cache->in4 = sin->sin_addr;
+ error = netdev_linux_get_ipv4(netdev_, &netdev->cache->netmask,
+ SIOCGIFNETMASK, "SIOCGIFNETMASK");
+ if (error) {
+ return error;
+ }
+
netdev->cache->valid |= VALID_IN4;
}
- *in4 = netdev->cache->in4;
- return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
+ *address = netdev->cache->address;
+ *netmask = netdev->cache->netmask;
+ return address->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
}
-/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
- * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a
- * positive errno value. */
static int
-netdev_linux_set_in4(struct netdev *netdev_, struct in_addr addr,
- struct in_addr mask)
+netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address,
+ struct in_addr netmask)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
int error;
- error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr);
+ error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address);
if (!error) {
netdev->cache->valid |= VALID_IN4;
- netdev->cache->in4 = addr;
- if (addr.s_addr != INADDR_ANY) {
+ netdev->cache->address = address;
+ netdev->cache->netmask = netmask;
+ if (address.s_addr != INADDR_ANY) {
error = do_set_addr(netdev_, SIOCSIFNETMASK,
- "SIOCSIFNETMASK", mask);
+ "SIOCSIFNETMASK", netmask);
}
}
return error;
@@ -1076,6 +1077,67 @@ netdev_linux_add_router(struct netdev *netdev UNUSED, struct in_addr router)
return error;
}
+static int
+netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop,
+ char **netdev_name)
+{
+ static const char fn[] = "/proc/net/route";
+ FILE *stream;
+ char line[256];
+ int ln;
+
+ *netdev_name = NULL;
+ stream = fopen(fn, "r");
+ if (stream == NULL) {
+ VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, strerror(errno));
+ return errno;
+ }
+
+ ln = 0;
+ while (fgets(line, sizeof line, stream)) {
+ if (++ln >= 2) {
+ char iface[17];
+ uint32_t dest, gateway, mask;
+ int refcnt, metric, mtu;
+ unsigned int flags, use, window, irtt;
+
+ if (sscanf(line,
+ "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32
+ " %d %u %u\n",
+ iface, &dest, &gateway, &flags, &refcnt,
+ &use, &metric, &mask, &mtu, &window, &irtt) != 11) {
+
+ VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s",
+ fn, ln, line);
+ continue;
+ }
+ if (!(flags & RTF_UP)) {
+ /* Skip routes that aren't up. */
+ continue;
+ }
+
+ /* The output of 'dest', 'mask', and 'gateway' were given in
+ * network byte order, so we don't need need any endian
+ * conversions here. */
+ if ((dest & mask) == (host->s_addr & mask)) {
+ if (!gateway) {
+ /* The host is directly reachable. */
+ next_hop->s_addr = 0;
+ } else {
+ /* To reach the host, we must go through a gateway. */
+ next_hop->s_addr = gateway;
+ }
+ *netdev_name = xstrdup(iface);
+ fclose(stream);
+ return 0;
+ }
+ }
+ }
+
+ fclose(stream);
+ return ENXIO;
+}
+
/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
* successfully retrieved, it stores the corresponding MAC address in 'mac' and
* returns 0. Otherwise, it returns a positive errno value; in particular,
@@ -1269,6 +1331,7 @@ const struct netdev_class netdev_linux_class = {
netdev_linux_set_in4,
netdev_linux_get_in6,
netdev_linux_add_router,
+ netdev_linux_get_next_hop,
netdev_linux_arp_lookup,
netdev_linux_update_flags,
@@ -1312,6 +1375,7 @@ const struct netdev_class netdev_tap_class = {
netdev_linux_set_in4,
netdev_linux_get_in6,
netdev_linux_add_router,
+ netdev_linux_get_next_hop,
netdev_linux_arp_lookup,
netdev_linux_update_flags,
@@ -1591,3 +1655,19 @@ netdev_linux_do_ioctl(const struct netdev *netdev, struct ifreq *ifr,
}
return 0;
}
+
+static int
+netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip,
+ int cmd, const char *cmd_name)
+{
+ struct ifreq ifr;
+ int error;
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ error = netdev_linux_do_ioctl(netdev, &ifr, cmd, cmd_name);
+ if (!error) {
+ const struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr;
+ *ip = sin->sin_addr;
+ }
+ return error;
+}
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 9c880b1c..a573e246 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -208,7 +208,8 @@ struct netdev_class {
int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate,
unsigned int kbits_burst);
- /* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address.
+ /* If 'netdev' has an assigned IPv4 address, sets '*address' to that
+ * address and '*netmask' to the associated netmask.
*
* The following error values have well-defined meanings:
*
@@ -218,7 +219,8 @@ struct netdev_class {
*
* This function may be set to null if it would always return EOPNOTSUPP
* anyhow. */
- int (*get_in4)(const struct netdev *netdev, struct in_addr *in4);
+ int (*get_in4)(const struct netdev *netdev, struct in_addr *address,
+ struct in_addr *netmask);
/* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
* 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared.
@@ -246,6 +248,17 @@ struct netdev_class {
* anyhow. */
int (*add_router)(struct netdev *netdev, struct in_addr router);
+ /* Looks up the next hop for 'host'. If succesful, stores the next hop
+ * gateway's address (0 if 'host' is on a directly connected network) in
+ * '*next_hop' and a copy of the name of the device to reach 'host' in
+ * '*netdev_name', and returns 0. The caller is responsible for freeing
+ * '*netdev_name' (by calling free()).
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
+ int (*get_next_hop)(const struct in_addr *host, struct in_addr *next_hop,
+ char **netdev_name);
+
/* Looks up the ARP table entry for 'ip' on 'netdev' and stores the
* corresponding MAC address in 'mac'. A return value of ENXIO, in
* particular, indicates that there is no ARP table entry for 'ip' on
diff --git a/lib/netdev.c b/lib/netdev.c
index dcb63fa0..38610e11 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -391,9 +391,9 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
: EOPNOTSUPP);
}
-/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and
- * returns 0. Otherwise, returns a positive errno value and sets '*in4' to 0
- * (INADDR_ANY).
+/* If 'netdev' has an assigned IPv4 address, sets '*address' to that address
+ * and '*netmask' to its netmask and returns 0. Otherwise, returns a positive
+ * errno value and sets '*address' to 0 (INADDR_ANY).
*
* The following error values have well-defined meanings:
*
@@ -401,18 +401,24 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
*
* - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'.
*
- * 'in4' may be null, in which case the address itself is not reported. */
+ * 'address' or 'netmask' or both may be null, in which case the address or netmask
+ * is not reported. */
int
-netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
+netdev_get_in4(const struct netdev *netdev,
+ struct in_addr *address_, struct in_addr *netmask_)
{
- struct in_addr dummy;
+ struct in_addr address;
+ struct in_addr netmask;
int error;
error = (netdev->class->get_in4
- ? netdev->class->get_in4(netdev, in4 ? in4 : &dummy)
+ ? netdev->class->get_in4(netdev, &address, &netmask)
: EOPNOTSUPP);
- if (error && in4) {
- in4->s_addr = 0;
+ if (address_) {
+ address_->s_addr = error ? 0 : address.s_addr;
+ }
+ if (netmask_) {
+ netmask_->s_addr = error ? 0 : netmask.s_addr;
}
return error;
}
@@ -439,6 +445,28 @@ netdev_add_router(struct netdev *netdev, struct in_addr router)
: EOPNOTSUPP);
}
+/* Looks up the next hop for 'host' for the TCP/IP stack that corresponds to
+ * 'netdev'. If a route cannot not be determined, sets '*next_hop' to 0,
+ * '*netdev_name' to null, and returns a positive errno value. Otherwise, if a
+ * next hop is found, stores the next hop gateway's address (0 if 'host' is on
+ * a directly connected network) in '*next_hop' and a copy of the name of the
+ * device to reach 'host' in '*netdev_name', and returns 0. The caller is
+ * responsible for freeing '*netdev_name' (by calling free()). */
+int
+netdev_get_next_hop(const struct netdev *netdev,
+ const struct in_addr *host, struct in_addr *next_hop,
+ char **netdev_name)
+{
+ int error = (netdev->class->get_next_hop
+ ? netdev->class->get_next_hop(host, next_hop, netdev_name)
+ : EOPNOTSUPP);
+ if (error) {
+ next_hop->s_addr = 0;
+ *netdev_name = NULL;
+ }
+ return error;
+}
+
/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and
* returns 0. Otherwise, returns a positive errno value and sets '*in6' to
* all-zero-bits (in6addr_any).
@@ -633,7 +661,7 @@ netdev_find_dev_by_in4(const struct in_addr *in4)
struct in_addr dev_in4;
if (!netdev_open(name, NETDEV_ETH_TYPE_NONE, &netdev)
- && !netdev_get_in4(netdev, &dev_in4)
+ && !netdev_get_in4(netdev, &dev_in4, NULL)
&& dev_in4.s_addr == in4->s_addr) {
goto exit;
}
diff --git a/lib/netdev.h b/lib/netdev.h
index b66d7bc0..4a29cf37 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -107,10 +107,13 @@ int netdev_get_features(struct netdev *,
uint32_t *supported, uint32_t *peer);
int netdev_set_advertisements(struct netdev *, uint32_t advertise);
-int netdev_get_in4(const struct netdev *, struct in_addr *);
+int netdev_get_in4(const struct netdev *, struct in_addr *address,
+ struct in_addr *netmask);
int netdev_set_in4(struct netdev *, struct in_addr addr, struct in_addr mask);
int netdev_get_in6(const struct netdev *, struct in6_addr *);
int netdev_add_router(struct netdev *, struct in_addr router);
+int netdev_get_next_hop(const struct netdev *, const struct in_addr *host,
+ struct in_addr *next_hop, char **);
int netdev_arp_lookup(const struct netdev *, uint32_t ip, uint8_t mac[6]);
int netdev_get_flags(const struct netdev *, enum netdev_flags *);
diff --git a/ofproto/in-band.c b/ofproto/in-band.c
index a08af079..18415f48 100644
--- a/ofproto/in-band.c
+++ b/ofproto/in-band.c
@@ -22,6 +22,8 @@
#include <net/if.h>
#include <string.h>
#include <stdlib.h>
+#include "dhcp.h"
+#include "dpif.h"
#include "flow.h"
#include "mac-learning.h"
#include "netdev.h"
@@ -30,6 +32,7 @@
#include "ofproto.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
+#include "openvswitch/datapath-protocol.h"
#include "packets.h"
#include "poll-loop.h"
#include "rconn.h"
@@ -43,14 +46,15 @@
#define IB_BASE_PRIORITY 18181800
enum {
- IBR_FROM_LOCAL_PORT, /* Sent by the local port. */
- IBR_OFP_TO_LOCAL, /* Sent to secure channel on local port. */
- IBR_ARP_FROM_LOCAL, /* ARP from the local port. */
- IBR_ARP_FROM_CTL, /* ARP from the controller. */
- IBR_TO_CTL_OFP_SRC, /* To controller, OpenFlow source port. */
- IBR_TO_CTL_OFP_DST, /* To controller, OpenFlow dest port. */
- IBR_FROM_CTL_OFP_SRC, /* From controller, OpenFlow source port. */
- IBR_FROM_CTL_OFP_DST, /* From controller, OpenFlow dest port. */
+ IBR_FROM_LOCAL_DHCP, /* From local port, DHCP. */
+ IBR_TO_LOCAL_ARP, /* To local port, ARP. */
+ IBR_FROM_LOCAL_ARP, /* From local port, ARP. */
+ IBR_TO_REMOTE_ARP, /* To remote MAC, ARP. */
+ IBR_FROM_REMOTE_ARP, /* From remote MAC, ARP. */
+ IBR_TO_CTL_ARP, /* To controller IP, ARP. */
+ IBR_FROM_CTL_ARP, /* From controller IP, ARP. */
+ IBR_TO_CTL_OFP, /* To controller, OpenFlow port. */
+ IBR_FROM_CTL_OFP, /* From controller, OpenFlow port. */
#if OFP_TCP_PORT != OFP_SSL_PORT
#error Need to support separate TCP and SSL flows.
#endif
@@ -69,17 +73,17 @@ struct in_band {
struct rconn *controller;
struct status_category *ss_cat;
- /* Keeping track of controller's MAC address. */
- uint32_t ip; /* Current IP, 0 if unknown. */
- uint32_t last_ip; /* Last known IP, 0 if never known. */
- uint8_t mac[ETH_ADDR_LEN]; /* Current MAC, 0 if unknown. */
- uint8_t last_mac[ETH_ADDR_LEN]; /* Last known MAC, 0 if never known */
- struct netdev *netdev;
- time_t next_refresh; /* Next time to refresh MAC address. */
+ /* Keep track of local port's information. */
+ uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */
+ struct netdev *local_netdev; /* Local port's network device. */
+ time_t next_local_refresh;
- /* Keeping track of the local port's MAC address. */
- uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */
- time_t next_local_refresh; /* Next time to refresh MAC address. */
+ /* Keep track of controller and next hop's information. */
+ uint32_t controller_ip; /* Controller IP, 0 if unknown. */
+ uint8_t remote_mac[ETH_ADDR_LEN]; /* Remote MAC. */
+ struct netdev *remote_netdev;
+ uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous remote MAC. */
+ time_t next_remote_refresh;
/* Rules that we set up. */
struct ib_rule rules[N_IB_RULES];
@@ -88,58 +92,64 @@ struct in_band {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
static const uint8_t *
-get_controller_mac(struct in_band *ib)
+get_remote_mac(struct in_band *ib)
{
+ int retval;
+ bool have_mac;
+ struct in_addr c_in4; /* Controller's IP address. */
+ struct in_addr r_in4; /* Next hop IP address. */
+ char *next_hop_dev;
time_t now = time_now();
- uint32_t controller_ip;
- controller_ip = rconn_get_remote_ip(ib->controller);
- if (controller_ip != ib->ip || now >= ib->next_refresh) {
- bool have_mac;
-
- ib->ip = controller_ip;
-
- /* Look up MAC address. */
- memset(ib->mac, 0, sizeof ib->mac);
- if (ib->ip) {
- struct in_addr local_in4 = { rconn_get_local_ip(ib->controller) };
- struct in_addr in4;
- int retval;
-
- /* Refresh device with IP address 'in4'. */
- if (!ib->netdev
- || netdev_get_in4(ib->netdev, &in4)
- || in4.s_addr != local_in4.s_addr)
- {
- netdev_close(ib->netdev);
- ib->netdev = netdev_find_dev_by_in4(&local_in4);
- }
+ if (now >= ib->next_remote_refresh) {
+ /* Find the next-hop IP address. */
+ c_in4.s_addr = ib->controller_ip;
+ memset(ib->remote_mac, 0, sizeof ib->remote_mac);
+ retval = netdev_get_next_hop(ib->local_netdev,
+ &c_in4, &r_in4, &next_hop_dev);
+ if (retval) {
+ VLOG_WARN("cannot find route for controller ("IP_FMT"): %s",
+ IP_ARGS(&ib->controller_ip), strerror(retval));
+ ib->next_remote_refresh = now + 1;
+ return NULL;
+ }
+ if (!r_in4.s_addr) {
+ r_in4.s_addr = c_in4.s_addr;
+ }
- if (ib->netdev) {
- retval = netdev_arp_lookup(ib->netdev, ib->ip, ib->mac);
- if (retval) {
- VLOG_DBG_RL(&rl, "cannot look up controller MAC address "
- "("IP_FMT"): %s",
- IP_ARGS(&ib->ip), strerror(retval));
- }
- } else {
- VLOG_DBG_RL(&rl, "cannot find device with IP address "IP_FMT,
- IP_ARGS(&local_in4.s_addr));
+ /* Get the next-hop IP and network device. */
+ if (!ib->remote_netdev
+ || strcmp(netdev_get_name(ib->remote_netdev), next_hop_dev))
+ {
+ netdev_close(ib->remote_netdev);
+ retval = netdev_open(next_hop_dev, NETDEV_ETH_TYPE_NONE,
+ &ib->remote_netdev);
+ if (retval) {
+ VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop "
+ "to controller "IP_FMT"): %s",
+ next_hop_dev, IP_ARGS(&ib->controller_ip),
+ strerror(retval));
+ ib->next_remote_refresh = now + 1;
+ return NULL;
}
}
- have_mac = !eth_addr_is_zero(ib->mac);
- /* Log changes in IP, MAC addresses. */
- if (ib->ip && ib->ip != ib->last_ip) {
- VLOG_DBG("controller IP address changed from "IP_FMT
- " to "IP_FMT, IP_ARGS(&ib->last_ip), IP_ARGS(&ib->ip));
- ib->last_ip = ib->ip;
+ /* Look up the MAC address of the next-hop IP address. */
+ retval = netdev_arp_lookup(ib->remote_netdev, r_in4.s_addr,
+ ib->remote_mac);
+ if (retval) {
+ VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s",
+ IP_ARGS(&r_in4.s_addr), strerror(retval));
}
- if (have_mac && memcmp(ib->last_mac, ib->mac, ETH_ADDR_LEN)) {
- VLOG_DBG("controller MAC address changed from "ETH_ADDR_FMT" to "
+ have_mac = !eth_addr_is_zero(ib->remote_mac);
+ free(next_hop_dev);
+ if (have_mac
+ && !eth_addr_equals(ib->last_remote_mac, ib->remote_mac)) {
+ VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT" to "
ETH_ADDR_FMT,
- ETH_ADDR_ARGS(ib->last_mac), ETH_ADDR_ARGS(ib->mac));
- memcpy(ib->last_mac, ib->mac, ETH_ADDR_LEN);
+ ETH_ADDR_ARGS(ib->last_remote_mac),
+ ETH_ADDR_ARGS(ib->remote_mac));
+ memcpy(ib->last_remote_mac, ib->remote_mac, ETH_ADDR_LEN);
}
/* Schedule next refresh.
@@ -147,9 +157,11 @@ get_controller_mac(struct in_band *ib)
* If we have an IP address but not a MAC address, then refresh
* quickly, since we probably will get a MAC address soon (via ARP).
* Otherwise, we can afford to wait a little while. */
- ib->next_refresh = now + (!ib->ip || have_mac ? 10 : 1);
+ ib->next_remote_refresh
+ = now + (!ib->controller_ip || have_mac ? 10 : 1);
}
- return !eth_addr_is_zero(ib->mac) ? ib->mac : NULL;
+
+ return !eth_addr_is_zero(ib->remote_mac) ? ib->remote_mac : NULL;
}
static const uint8_t *
@@ -158,7 +170,7 @@ get_local_mac(struct in_band *ib)
time_t now = time_now();
if (now >= ib->next_local_refresh) {
uint8_t ea[ETH_ADDR_LEN];
- if (ib->netdev && !netdev_get_etheraddr(ib->netdev, ea)) {
+ if (ib->local_netdev && netdev_get_etheraddr(ib->local_netdev, ea)) {
memcpy(ib->local_mac, ea, ETH_ADDR_LEN);
}
ib->next_local_refresh = now + 1;
@@ -170,19 +182,15 @@ static void
in_band_status_cb(struct status_reply *sr, void *in_band_)
{
struct in_band *in_band = in_band_;
- const uint8_t *local_mac;
- const uint8_t *controller_mac;
- local_mac = get_local_mac(in_band);
- if (local_mac) {
+ if (!eth_addr_is_zero(in_band->local_mac)) {
status_reply_put(sr, "local-mac="ETH_ADDR_FMT,
- ETH_ADDR_ARGS(local_mac));
+ ETH_ADDR_ARGS(in_band->local_mac));
}
- controller_mac = get_controller_mac(in_band);
- if (controller_mac) {
- status_reply_put(sr, "controller-mac="ETH_ADDR_FMT,
- ETH_ADDR_ARGS(controller_mac));
+ if (!eth_addr_is_zero(in_band->remote_mac)) {
+ status_reply_put(sr, "remote-mac="ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(in_band->remote_mac));
}
}
@@ -224,54 +232,174 @@ setup_flow(struct in_band *in_band, int rule_idx, const flow_t *flow,
}
}
+/* Returns true if 'packet' should be sent to the local port regardless
+ * of the flow table. */
+bool
+in_band_msg_in_hook(struct in_band *in_band, const flow_t *flow,
+ const struct ofpbuf *packet)
+{
+ if (!in_band) {
+ return false;
+ }
+
+ /* Regardless of how the flow table is configured, we want to be
+ * able to see replies to our DHCP requests. */
+ if (flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IP_TYPE_UDP
+ && flow->tp_src == htons(DHCP_SERVER_PORT)
+ && flow->tp_dst == htons(DHCP_CLIENT_PORT)
+ && packet->l7) {
+ struct dhcp_header *dhcp;
+ const uint8_t *local_mac;
+
+ dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data,
+ sizeof *dhcp);
+ if (!dhcp) {
+ return false;
+ }
+
+ local_mac = get_local_mac(in_band);
+ if (eth_addr_equals(dhcp->chaddr, local_mac)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Returns true if the rule that would match 'flow' with 'actions' is
+ * allowed to be set up in the datapath. */
+bool
+in_band_rule_check(struct in_band *in_band, const flow_t *flow,
+ const struct odp_actions *actions)
+{
+ if (!in_band) {
+ return true;
+ }
+
+ /* Don't allow flows that would prevent DHCP replies from being seen
+ * by the local port. */
+ if (flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IP_TYPE_UDP
+ && flow->tp_src == htons(DHCP_SERVER_PORT)
+ && flow->tp_dst == htons(DHCP_CLIENT_PORT)) {
+ int i;
+
+ for (i=0; i<actions->n_actions; i++) {
+ if (actions->actions[i].output.type == ODPAT_OUTPUT
+ && actions->actions[i].output.port == ODPP_LOCAL) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ return true;
+}
+
void
in_band_run(struct in_band *in_band)
{
- const uint8_t *controller_mac;
+ time_t now = time_now();
+ uint32_t controller_ip;
+ const uint8_t *remote_mac;
const uint8_t *local_mac;
flow_t flow;
- if (time_now() < MIN(in_band->next_refresh, in_band->next_local_refresh)) {
+ if (now < in_band->next_remote_refresh
+ && now < in_band->next_local_refresh) {
return;
}
- controller_mac = get_controller_mac(in_band);
- local_mac = get_local_mac(in_band);
- /* Switch traffic sent by the local port. */
- memset(&flow, 0, sizeof flow);
- flow.in_port = ODPP_LOCAL;
- setup_flow(in_band, IBR_FROM_LOCAL_PORT, &flow, OFPFW_IN_PORT,
- OFPP_NORMAL);
+ controller_ip = rconn_get_remote_ip(in_band->controller);
+ if (in_band->controller_ip && controller_ip != in_band->controller_ip) {
+ VLOG_DBG("controller IP address changed from "IP_FMT" to "IP_FMT,
+ IP_ARGS(&in_band->controller_ip),
+ IP_ARGS(&controller_ip));
+ }
+ in_band->controller_ip = controller_ip;
+
+ remote_mac = get_remote_mac(in_band);
+ local_mac = get_local_mac(in_band);
if (local_mac) {
- /* Deliver traffic sent to the connection's interface. */
+ /* Allow DHCP requests to be sent from the local port. */
+ memset(&flow, 0, sizeof flow);
+ flow.in_port = ODPP_LOCAL;
+ flow.dl_type = htons(ETH_TYPE_IP);
+ memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN);
+ flow.nw_proto = IP_TYPE_UDP;
+ flow.tp_src = htons(DHCP_CLIENT_PORT);
+ flow.tp_dst = htons(DHCP_SERVER_PORT);
+ setup_flow(in_band, IBR_FROM_LOCAL_DHCP, &flow,
+ (OFPFW_IN_PORT | OFPFW_DL_TYPE | OFPFW_DL_SRC
+ | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST),
+ OFPP_NORMAL);
+
+ /* Allow the connection's interface to receive directed ARP traffic. */
memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
memcpy(flow.dl_dst, local_mac, ETH_ADDR_LEN);
- setup_flow(in_band, IBR_OFP_TO_LOCAL, &flow, OFPFW_DL_DST,
- OFPP_NORMAL);
+ flow.nw_proto = ARP_OP_REPLY;
+ setup_flow(in_band, IBR_TO_LOCAL_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO),
+ OFPP_NORMAL);
/* Allow the connection's interface to be the source of ARP traffic. */
memset(&flow, 0, sizeof flow);
flow.dl_type = htons(ETH_TYPE_ARP);
memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN);
- setup_flow(in_band, IBR_ARP_FROM_LOCAL, &flow,
- OFPFW_DL_TYPE | OFPFW_DL_SRC, OFPP_NORMAL);
+ flow.nw_proto = ARP_OP_REQUEST;
+ setup_flow(in_band, IBR_FROM_LOCAL_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO),
+ OFPP_NORMAL);
+ } else {
+ drop_flow(in_band, IBR_TO_LOCAL_ARP);
+ drop_flow(in_band, IBR_FROM_LOCAL_ARP);
+ }
+
+ if (remote_mac) {
+ /* Allow ARP replies to the remote side's MAC. */
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
+ memcpy(flow.dl_dst, remote_mac, ETH_ADDR_LEN);
+ flow.nw_proto = ARP_OP_REPLY;
+ setup_flow(in_band, IBR_TO_REMOTE_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO),
+ OFPP_NORMAL);
+
+ /* Allow ARP requests from the remote side's MAC. */
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
+ memcpy(flow.dl_src, remote_mac, ETH_ADDR_LEN);
+ flow.nw_proto = ARP_OP_REQUEST;
+ setup_flow(in_band, IBR_FROM_REMOTE_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO),
+ OFPP_NORMAL);
} else {
- drop_flow(in_band, IBR_OFP_TO_LOCAL);
- drop_flow(in_band, IBR_ARP_FROM_LOCAL);
+ drop_flow(in_band, IBR_TO_REMOTE_ARP);
+ drop_flow(in_band, IBR_FROM_REMOTE_ARP);
}
- if (controller_mac) {
- /* Switch ARP requests sent by the controller. (OFPP_NORMAL will "do
- * the right thing" regarding VLANs here.) */
+ if (controller_ip) {
+ /* Allow ARP replies to the controller's IP. */
memset(&flow, 0, sizeof flow);
flow.dl_type = htons(ETH_TYPE_ARP);
- memcpy(flow.dl_dst, eth_addr_broadcast, ETH_ADDR_LEN);
- memcpy(flow.dl_src, controller_mac, ETH_ADDR_LEN);
- setup_flow(in_band, IBR_ARP_FROM_CTL, &flow,
- OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_DL_SRC,
+ flow.nw_proto = ARP_OP_REPLY;
+ flow.nw_dst = controller_ip;
+ setup_flow(in_band, IBR_TO_CTL_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK),
OFPP_NORMAL);
+ /* Allow ARP requests from the controller's IP. */
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
+ flow.nw_proto = ARP_OP_REQUEST;
+ flow.nw_src = controller_ip;
+ setup_flow(in_band, IBR_FROM_CTL_ARP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK),
+ OFPP_NORMAL);
+
/* OpenFlow traffic to or from the controller.
*
* (A given field's value is completely ignored if it is wildcarded,
@@ -279,29 +407,22 @@ in_band_run(struct in_band *in_band)
* case here.) */
memset(&flow, 0, sizeof flow);
flow.dl_type = htons(ETH_TYPE_IP);
- memcpy(flow.dl_src, controller_mac, ETH_ADDR_LEN);
- memcpy(flow.dl_dst, controller_mac, ETH_ADDR_LEN);
flow.nw_proto = IP_TYPE_TCP;
+ flow.nw_src = controller_ip;
+ flow.nw_dst = controller_ip;
flow.tp_src = htons(OFP_TCP_PORT);
flow.tp_dst = htons(OFP_TCP_PORT);
- setup_flow(in_band, IBR_TO_CTL_OFP_SRC, &flow,
- (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO
- | OFPFW_TP_SRC), OFPP_NORMAL);
- setup_flow(in_band, IBR_TO_CTL_OFP_DST, &flow,
- (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO
+ setup_flow(in_band, IBR_TO_CTL_OFP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK
| OFPFW_TP_DST), OFPP_NORMAL);
- setup_flow(in_band, IBR_FROM_CTL_OFP_SRC, &flow,
- (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO
+ setup_flow(in_band, IBR_FROM_CTL_OFP, &flow,
+ (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK
| OFPFW_TP_SRC), OFPP_NORMAL);
- setup_flow(in_band, IBR_FROM_CTL_OFP_DST, &flow,
- (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO
- | OFPFW_TP_DST), OFPP_NORMAL);
} else {
- drop_flow(in_band, IBR_ARP_FROM_CTL);
- drop_flow(in_band, IBR_TO_CTL_OFP_DST);
- drop_flow(in_band, IBR_TO_CTL_OFP_SRC);
- drop_flow(in_band, IBR_FROM_CTL_OFP_DST);
- drop_flow(in_band, IBR_FROM_CTL_OFP_SRC);
+ drop_flow(in_band, IBR_TO_CTL_ARP);
+ drop_flow(in_band, IBR_FROM_CTL_ARP);
+ drop_flow(in_band, IBR_TO_CTL_OFP);
+ drop_flow(in_band, IBR_FROM_CTL_OFP);
}
}
@@ -309,7 +430,8 @@ void
in_band_wait(struct in_band *in_band)
{
time_t now = time_now();
- time_t wakeup = MIN(in_band->next_refresh, in_band->next_local_refresh);
+ time_t wakeup
+ = MIN(in_band->next_remote_refresh, in_band->next_local_refresh);
if (wakeup > now) {
poll_timer_wait((wakeup - now) * 1000);
} else {
@@ -327,22 +449,44 @@ in_band_flushed(struct in_band *in_band)
}
}
-void
-in_band_create(struct ofproto *ofproto, struct switch_status *ss,
- struct rconn *controller, struct in_band **in_bandp)
+int
+in_band_create(struct ofproto *ofproto, struct dpif *dpif,
+ struct switch_status *ss, struct rconn *controller,
+ struct in_band **in_bandp)
{
struct in_band *in_band;
+ char local_name[IF_NAMESIZE];
+ struct netdev *local_netdev;
+ int error;
+
+ error = dpif_port_get_name(dpif, ODPP_LOCAL,
+ local_name, sizeof local_name);
+ if (error) {
+ VLOG_ERR("failed to initialize in-band control: cannot get name "
+ "of datapath local port (%s)", strerror(error));
+ return error;
+ }
+
+ error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &local_netdev);
+ if (error) {
+ VLOG_ERR("failed to initialize in-band control: cannot open "
+ "datapath local port %s (%s)", local_name, strerror(error));
+ return error;
+ }
in_band = xcalloc(1, sizeof *in_band);
in_band->ofproto = ofproto;
in_band->controller = controller;
in_band->ss_cat = switch_status_register(ss, "in-band",
in_band_status_cb, in_band);
- in_band->next_refresh = TIME_MIN;
+ in_band->local_netdev = local_netdev;
in_band->next_local_refresh = TIME_MIN;
- in_band->netdev = NULL;
+ in_band->remote_netdev = NULL;
+ in_band->next_remote_refresh = TIME_MIN;
*in_bandp = in_band;
+
+ return 0;
}
void
@@ -350,7 +494,8 @@ in_band_destroy(struct in_band *in_band)
{
if (in_band) {
switch_status_unregister(in_band->ss_cat);
- netdev_close(in_band->netdev);
+ netdev_close(in_band->local_netdev);
+ netdev_close(in_band->remote_netdev);
/* We don't own the rconn. */
}
}
diff --git a/ofproto/in-band.h b/ofproto/in-band.h
index 624bee9e..ddbc5e56 100644
--- a/ofproto/in-band.h
+++ b/ofproto/in-band.h
@@ -21,15 +21,20 @@
struct dpif;
struct in_band;
+struct odp_actions;
struct ofproto;
struct rconn;
struct settings;
struct switch_status;
-void in_band_create(struct ofproto *, struct switch_status *,
- struct rconn *controller, struct in_band **);
+int in_band_create(struct ofproto *, struct dpif *, struct switch_status *,
+ struct rconn *controller, struct in_band **);
void in_band_destroy(struct in_band *);
void in_band_run(struct in_band *);
+bool in_band_msg_in_hook(struct in_band *, const flow_t *,
+ const struct ofpbuf *packet);
+bool in_band_rule_check(struct in_band *, const flow_t *,
+ const struct odp_actions *);
void in_band_wait(struct in_band *);
void in_band_flushed(struct in_band *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index dbaa75bc..7650068e 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -424,9 +424,8 @@ ofproto_set_in_band(struct ofproto *p, bool in_band)
{
if (in_band != (p->in_band != NULL)) {
if (in_band) {
- in_band_create(p, p->switch_status, p->controller->rconn,
- &p->in_band);
- return 0;
+ return in_band_create(p, p->dpif, p->switch_status,
+ p->controller->rconn, &p->in_band);
} else {
ofproto_set_discovery(p, false, NULL, true);
in_band_destroy(p->in_band);
@@ -1700,7 +1699,7 @@ rule_post_uninstall(struct ofproto *ofproto, struct rule *rule)
struct rule *super = rule->super;
rule_account(ofproto, rule, 0);
- if (ofproto->netflow) {
+ if (ofproto->netflow && rule->byte_count) {
struct ofexpired expired;
expired.flow = rule->cr.flow;
expired.packet_count = rule->packet_count;
@@ -2127,6 +2126,13 @@ xlate_actions(const union ofp_action *in, size_t n_in,
ctx.tags = tags ? tags : &no_tags;
ctx.may_setup_flow = true;
do_xlate_actions(in, n_in, &ctx);
+
+ /* Check with in-band control to see if we're allowed to setup this
+ * flow. */
+ if (!in_band_rule_check(ofproto->in_band, flow, out)) {
+ ctx.may_setup_flow = false;
+ }
+
if (may_setup_flow) {
*may_setup_flow = ctx.may_setup_flow;
}
@@ -2516,11 +2522,11 @@ flow_stats_ds_cb(struct cls_rule *rule_, void *cbdata_)
}
query_stats(cbdata->ofproto, rule, &packet_count, &byte_count);
- flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &match);
+ flow_to_ovs_match(&rule->cr.flow, rule->cr.wc.wildcards, &match);
ds_put_format(results, "duration=%llds, ",
(time_msec() - rule->created) / 1000);
- ds_put_format(results, "priority=%u", rule->cr.priority);
+ ds_put_format(results, "priority=%u, ", rule->cr.priority);
ds_put_format(results, "n_packets=%"PRIu64", ", packet_count);
ds_put_format(results, "n_bytes=%"PRIu64", ", byte_count);
ofp_print_match(results, &match, true);
@@ -3028,6 +3034,17 @@ handle_odp_msg(struct ofproto *p, struct ofpbuf *packet)
payload.size = msg->length - sizeof *msg;
flow_extract(&payload, msg->port, &flow);
+ /* Check with in-band control to see if this packet should be sent
+ * to the local port regardless of the flow table. */
+ if (in_band_msg_in_hook(p->in_band, &flow, &payload)) {
+ union odp_action action;
+
+ memset(&action, 0, sizeof(action));
+ action.output.type = ODPAT_OUTPUT;
+ action.output.port = ODPP_LOCAL;
+ dpif_execute(p->dpif, flow.in_port, &action, 1, &payload);
+ }
+
rule = lookup_valid_rule(p, &flow);
if (!rule) {
/* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 869d7172..7081512e 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -1774,12 +1774,14 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan,
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
if (port_includes_vlan(port, m->out_vlan)
- && set_dst(dst, flow, in_port, port, tags)
- && !dst_is_duplicate(dsts, dst - dsts, dst))
+ && set_dst(dst, flow, in_port, port, tags))
{
if (port->vlan < 0) {
dst->vlan = m->out_vlan;
}
+ if (dst_is_duplicate(dsts, dst - dsts, dst)) {
+ continue;
+ }
if (dst->dp_ifidx == flow->in_port
&& dst->vlan == vlan) {
/* Don't send out input port on same VLAN. */
@@ -3369,6 +3371,7 @@ mirror_reconfigure_one(struct mirror *m)
int *vlans;
size_t i;
bool mirror_all_ports;
+ bool any_ports_specified;
/* Get output port. */
out_port_name = cfg_get_key(0, "mirror.%s.%s.output.port",
@@ -3407,11 +3410,18 @@ mirror_reconfigure_one(struct mirror *m)
cfg_get_all_keys(&src_ports, "%s.select.src-port", pfx);
cfg_get_all_keys(&dst_ports, "%s.select.dst-port", pfx);
cfg_get_all_keys(&ports, "%s.select.port", pfx);
+ any_ports_specified = src_ports.n || dst_ports.n || ports.n;
svec_append(&src_ports, &ports);
svec_append(&dst_ports, &ports);
svec_destroy(&ports);
prune_ports(m, &src_ports);
prune_ports(m, &dst_ports);
+ if (any_ports_specified && !src_ports.n && !dst_ports.n) {
+ VLOG_ERR("%s: none of the specified ports exist; "
+ "disabling port mirror %s", pfx, pfx);
+ mirror_destroy(m);
+ goto exit;
+ }
/* Get all the vlans, and drop duplicate and invalid vlans. */
svec_init(&vlan_strings);
@@ -3463,6 +3473,7 @@ mirror_reconfigure_one(struct mirror *m)
}
/* Clean up. */
+exit:
svec_destroy(&src_ports);
svec_destroy(&dst_ports);
free(pfx);
diff --git a/vswitchd/mgmt.c b/vswitchd/mgmt.c
index e6e7d4ef..d15b4ba4 100644
--- a/vswitchd/mgmt.c
+++ b/vswitchd/mgmt.c
@@ -54,6 +54,7 @@ static struct rconn *mgmt_rconn;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
static struct svec capabilities;
static struct ofpbuf ext_data_buffer;
+static uint32_t ext_data_xid = UINT32_MAX;
uint64_t mgmt_id;
@@ -222,6 +223,10 @@ mgmt_reconfigure(void)
if (retval == EAFNOSUPPORT) {
VLOG_ERR("no support for %s vconn", controller_name);
}
+
+ /* Reset the extended message buffer when we create a new
+ * management connection. */
+ ofpbuf_clear(&ext_data_buffer);
}
static void *
@@ -261,12 +266,18 @@ send_openflow_buffer(struct ofpbuf *buffer)
return EINVAL;
}
+ /* Make sure there's room to transmit the data. We don't want to
+ * fail part way through a send. */
+ if (rconn_packet_counter_read(txqlen) >= TXQ_LIMIT) {
+ return EAGAIN;
+ }
+
/* OpenFlow messages use a 16-bit length field, so messages over 64K
* must be broken into multiple pieces.
*/
if (buffer->size <= 65535) {
update_openflow_length(buffer);
- retval = rconn_send_with_limit(mgmt_rconn, buffer, txqlen, TXQ_LIMIT);
+ retval = rconn_send(mgmt_rconn, buffer, txqlen);
if (retval) {
VLOG_WARN_RL(&rl, "send to %s failed: %s",
rconn_get_name(mgmt_rconn), strerror(retval));
@@ -292,12 +303,10 @@ send_openflow_buffer(struct ofpbuf *buffer)
&new_buffer);
oed->type = header->type;
- if (remain > 65535) {
+ if (remain > new_len) {
oed->flags |= OFMPEDF_MORE_DATA;
}
- printf("xxx SENDING LEN: %d\n", new_len);
-
/* Copy the entire original message, including the OpenFlow
* header, since management protocol structure definitions
* include these headers.
@@ -305,8 +314,7 @@ send_openflow_buffer(struct ofpbuf *buffer)
ofpbuf_put(new_buffer, ptr, new_len);
update_openflow_length(new_buffer);
- retval = rconn_send_with_limit(mgmt_rconn, new_buffer, txqlen,
- TXQ_LIMIT);
+ retval = rconn_send(mgmt_rconn, new_buffer, txqlen);
if (retval) {
VLOG_WARN_RL(&rl, "send to %s failed: %s",
rconn_get_name(mgmt_rconn), strerror(retval));
@@ -670,23 +678,48 @@ static int
recv_ofmp_extended_data(uint32_t xid, const struct ofmp_header *ofmph,
size_t len)
{
- size_t data_len;
+ int data_len;
struct ofmp_extended_data *ofmped;
- uint8_t *ptr;
- data_len = len - sizeof(*ofmped);
- if (data_len <= sizeof(*ofmped)) {
+ if (len <= sizeof(*ofmped)) {
/* xxx Send error. */
return -EINVAL;
}
+ ext_data_xid = xid;
ofmped = (struct ofmp_extended_data *)ofmph;
- ptr = ofpbuf_put(&ext_data_buffer, ofmped->data, data_len);
+ data_len = len - sizeof(*ofmped);
+ ofpbuf_put(&ext_data_buffer, ofmped->data, data_len);
+
+ if (!(ofmped->flags & OFMPEDF_MORE_DATA)) {
+ struct ofmp_header *new_oh;
+ int error;
+
+ /* An embedded message must be greater than the size of an
+ * OpenFlow message. */
+ new_oh = ofpbuf_at(&ext_data_buffer, 0, 65536);
+ if (!new_oh) {
+ VLOG_WARN_RL(&rl, "received short embedded message: %d\n",
+ ext_data_buffer.size);
+ return -EINVAL;
+ }
+
+ /* Make sure that this is a management message and that there's
+ * not an embedded extended data message. */
+ if ((new_oh->header.vendor != htonl(NX_VENDOR_ID))
+ || (new_oh->header.subtype != htonl(NXT_MGMT))
+ || (new_oh->type == htonl(OFMPT_EXTENDED_DATA))) {
+ VLOG_WARN_RL(&rl, "received bad embedded message\n");
+ return -EINVAL;
+ }
+ new_oh->header.header.xid = ext_data_xid;
+ new_oh->header.header.length = 0;
- if (!ofmped->flags & OFMPEDF_MORE_DATA) {
- recv_ofmp(xid, ext_data_buffer.data, ext_data_buffer.size);
+ error = recv_ofmp(xid, ext_data_buffer.data, ext_data_buffer.size);
ofpbuf_clear(&ext_data_buffer);
+
+ return error;
}
return 0;
@@ -707,6 +740,12 @@ int recv_ofmp(uint32_t xid, struct ofmp_header *ofmph, size_t len)
len = ntohs(ofmph->header.header.length);
}
+ /* Reset the extended data buffer if this isn't a continuation of an
+ * existing extended data message. */
+ if (ext_data_xid != xid) {
+ ofpbuf_clear(&ext_data_buffer);
+ }
+
/* xxx Should sanity-check for min/max length */
switch (ntohs(ofmph->type))
{