aboutsummaryrefslogtreecommitdiff
path: root/datapath
diff options
context:
space:
mode:
Diffstat (limited to 'datapath')
-rw-r--r--datapath/.gitignore7
-rw-r--r--datapath/Makefile.am12
-rw-r--r--datapath/Modules.mk32
-rw-r--r--datapath/actions.c421
-rw-r--r--datapath/actions.h18
-rw-r--r--datapath/brc_procfs.c185
-rw-r--r--datapath/brc_procfs.h11
-rw-r--r--datapath/brc_sysfs.h25
-rw-r--r--datapath/brc_sysfs_dp.c532
-rw-r--r--datapath/brc_sysfs_if.c334
-rw-r--r--datapath/brcompat.c519
-rw-r--r--datapath/compat.h17
-rw-r--r--datapath/datapath.c1611
-rw-r--r--datapath/datapath.h139
-rw-r--r--datapath/dp_dev.c210
-rw-r--r--datapath/dp_dev.h27
-rw-r--r--datapath/dp_notify.c29
-rw-r--r--datapath/flow.c301
-rw-r--r--datapath/flow.h49
-rw-r--r--datapath/linux-2.6/.gitignore20
-rw-r--r--datapath/linux-2.6/Kbuild.in34
-rw-r--r--datapath/linux-2.6/Makefile.in9
-rw-r--r--datapath/linux-2.6/Makefile.main.in82
-rw-r--r--datapath/linux-2.6/Modules.mk50
-rw-r--r--datapath/linux-2.6/compat-2.6/compat26.h37
-rw-r--r--datapath/linux-2.6/compat-2.6/genetlink-brcompat.c20
-rw-r--r--datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c22
-rw-r--r--datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h19
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/cpumask.h11
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/dmi.h114
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/err.h21
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/icmp.h13
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/if_arp.h15
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/ip.h18
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/ipv6.h13
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/jiffies.h26
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/kernel.h9
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/lockdep.h450
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/log2.h17
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/mutex.h59
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/netdevice.h35
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h24
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h19
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/netlink.h24
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/percpu.h10
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/random.h17
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/rculist.h12
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h29
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/skbuff.h170
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/tcp.h18
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/timer.h96
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/types.h14
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/udp.h13
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/workqueue.h42
-rw-r--r--datapath/linux-2.6/compat-2.6/include/net/checksum.h16
-rw-r--r--datapath/linux-2.6/compat-2.6/include/net/genetlink.h123
-rw-r--r--datapath/linux-2.6/compat-2.6/include/net/netlink.h22
-rw-r--r--datapath/linux-2.6/compat-2.6/random32.c144
-rw-r--r--datapath/linux-2.6/compat-2.6/veth.c537
-rw-r--r--datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm1408
-rw-r--r--datapath/table.c240
61 files changed, 8551 insertions, 0 deletions
diff --git a/datapath/.gitignore b/datapath/.gitignore
new file mode 100644
index 00000000..5a59a0d3
--- /dev/null
+++ b/datapath/.gitignore
@@ -0,0 +1,7 @@
+/Makefile
+/Makefile.in
+*.cmd
+*.ko
+*.mod.c
+Module.symvers
+
diff --git a/datapath/Makefile.am b/datapath/Makefile.am
new file mode 100644
index 00000000..71e2dc48
--- /dev/null
+++ b/datapath/Makefile.am
@@ -0,0 +1,12 @@
+SUBDIRS =
+if L26_ENABLED
+SUBDIRS += linux-2.6
+endif
+
+EXTRA_DIST = $(dist_headers) $(dist_sources)
+
+# Suppress warnings about GNU extensions in Modules.mk files.
+AUTOMAKE_OPTIONS = -Wno-portability
+
+include Modules.mk
+include linux-2.6/Modules.mk
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
new file mode 100644
index 00000000..1b5de4ab
--- /dev/null
+++ b/datapath/Modules.mk
@@ -0,0 +1,32 @@
+# Some modules should be built and distributed, e.g. openvswitch.
+#
+# Some modules should be distributed but not built, e.g. we do not build
+# veth if the kernel in question already has it.
+#
+# Some modules should be built but not distributed, e.g. third-party
+# hwtable modules.
+both_modules = openvswitch
+build_modules = $(both_modules) # Modules to build
+dist_modules = $(both_modules) # Modules to distribute
+
+openvswitch_sources = \
+ actions.c \
+ datapath.c \
+ dp_dev.c \
+ dp_notify.c \
+ flow.c \
+ table.c
+
+openvswitch_headers = \
+ actions.h \
+ compat.h \
+ datapath.h \
+ dp_dev.h \
+ flow.h
+
+dist_sources = $(foreach module,$(dist_modules),$($(module)_sources))
+dist_headers = $(foreach module,$(dist_modules),$($(module)_headers))
+build_sources = $(foreach module,$(build_modules),$($(module)_sources))
+build_headers = $(foreach module,$(build_modules),$($(module)_headers))
+build_links = $(notdir $(build_sources))
+build_objects = $(notdir $(patsubst %.c,%.o,$(build_sources)))
diff --git a/datapath/actions.c b/datapath/actions.c
new file mode 100644
index 00000000..30b840cb
--- /dev/null
+++ b/datapath/actions.c
@@ -0,0 +1,421 @@
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007, 2008, 2009 Nicira Networks.
+ */
+
+/* Functions for executing flow actions. */
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include "datapath.h"
+#include "dp_dev.h"
+#include "actions.h"
+#include "openvswitch/datapath-protocol.h"
+
+struct sk_buff *
+make_writable(struct sk_buff *skb, gfp_t gfp)
+{
+ if (skb_shared(skb) || skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, gfp);
+ if (nskb) {
+ kfree_skb(skb);
+ return nskb;
+ }
+ } else {
+ unsigned int hdr_len = (skb_transport_offset(skb)
+ + sizeof(struct tcphdr));
+ if (pskb_may_pull(skb, min(hdr_len, skb->len)))
+ return skb;
+ }
+ kfree_skb(skb);
+ return NULL;
+}
+
+
+static struct sk_buff *
+vlan_pull_tag(struct sk_buff *skb)
+{
+ struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+ struct ethhdr *eh;
+
+
+ /* Verify we were given a vlan packet */
+ if (vh->h_vlan_proto != htons(ETH_P_8021Q))
+ return skb;
+
+ memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
+
+ eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+
+ skb->protocol = eh->h_proto;
+ skb->mac_header += VLAN_HLEN;
+
+ return skb;
+}
+
+
+static struct sk_buff *
+modify_vlan_tci(struct datapath *dp, struct sk_buff *skb,
+ struct odp_flow_key *key, const union odp_action *a,
+ int n_actions, gfp_t gfp)
+{
+ u16 tci, mask;
+
+ if (a->type == ODPAT_SET_VLAN_VID) {
+ tci = ntohs(a->vlan_vid.vlan_vid);
+ mask = VLAN_VID_MASK;
+ key->dl_vlan = htons(tci & mask);
+ } else {
+ tci = a->vlan_pcp.vlan_pcp << 13;
+ mask = VLAN_PCP_MASK;
+ }
+
+ skb = make_writable(skb, gfp);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ /* Modify vlan id, but maintain other TCI values */
+ struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
+ vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci);
+ } else {
+ /* Add vlan header */
+
+ /* Set up checksumming pointers for checksum-deferred packets
+ * on Xen. Otherwise, dev_queue_xmit() will try to do this
+ * when we send the packet out on the wire, and it will fail at
+ * that point because skb_checksum_setup() will not look inside
+ * an 802.1Q header. */
+ skb_checksum_setup(skb);
+
+ /* GSO is not implemented for packets with an 802.1Q header, so
+ * we have to do segmentation before we add that header.
+ *
+ * GSO does work with hardware-accelerated VLAN tagging, but we
+ * can't use hardware-accelerated VLAN tagging since it
+ * requires the device to have a VLAN group configured (with
+ * e.g. vconfig(8)) and we don't do that.
+ *
+ * Having to do this here may be a performance loss, since we
+ * can't take advantage of TSO hardware support, although it
+ * does not make a measurable network performance difference
+ * for 1G Ethernet. Fixing that would require patching the
+ * kernel (either to add GSO support to the VLAN protocol or to
+ * support hardware-accelerated VLAN tagging without VLAN
+ * groups configured). */
+ if (skb_is_gso(skb)) {
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (unlikely(IS_ERR(segs)))
+ return ERR_CAST(segs);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+
+ segs = __vlan_put_tag(segs, tci);
+ err = -ENOMEM;
+ if (segs) {
+ struct odp_flow_key segkey = *key;
+ err = execute_actions(dp, segs,
+ &segkey, a + 1,
+ n_actions - 1,
+ gfp);
+ }
+
+ if (unlikely(err)) {
+ while ((segs = nskb)) {
+ nskb = segs->next;
+ segs->next = NULL;
+ kfree_skb(segs);
+ }
+ return ERR_PTR(err);
+ }
+
+ segs = nskb;
+ } while (segs->next);
+
+ skb = segs;
+ }
+
+ /* The hardware-accelerated version of vlan_put_tag() works
+ * only for a device that has a VLAN group configured (with
+ * e.g. vconfig(8)), so call the software-only version
+ * __vlan_put_tag() directly instead.
+ */
+ skb = __vlan_put_tag(skb, tci);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return skb;
+}
+
+static struct sk_buff *strip_vlan(struct sk_buff *skb,
+ struct odp_flow_key *key, gfp_t gfp)
+{
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ vlan_pull_tag(skb);
+ key->dl_vlan = htons(ODP_VLAN_NONE);
+ }
+ return skb;
+}
+
+static struct sk_buff *set_dl_addr(struct sk_buff *skb,
+ const struct odp_action_dl_addr *a,
+ gfp_t gfp)
+{
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct ethhdr *eh = eth_hdr(skb);
+ memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest,
+ a->dl_addr, ETH_ALEN);
+ }
+ return skb;
+}
+
+/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
+ * covered by the sum has been changed from 'from' to 'to'. If set,
+ * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
+ * Based on nf_proto_csum_replace4. */
+static void update_csum(__sum16 *sum, struct sk_buff *skb,
+ __be32 from, __be32 to, int pseudohdr)
+{
+ __be32 diff[] = { ~from, to };
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
+ ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+ ~skb->csum);
+ } else if (pseudohdr)
+ *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
+ csum_unfold(*sum)));
+}
+
+static struct sk_buff *set_nw_addr(struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const struct odp_action_nw_addr *a,
+ gfp_t gfp)
+{
+ if (key->dl_type != htons(ETH_P_IP))
+ return skb;
+
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct iphdr *nh = ip_hdr(skb);
+ u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr;
+ u32 old = *f;
+ u32 new = a->nw_addr;
+
+ if (key->nw_proto == IPPROTO_TCP) {
+ struct tcphdr *th = tcp_hdr(skb);
+ update_csum(&th->check, skb, old, new, 1);
+ } else if (key->nw_proto == IPPROTO_UDP) {
+ struct udphdr *th = udp_hdr(skb);
+ update_csum(&th->check, skb, old, new, 1);
+ }
+ update_csum(&nh->check, skb, old, new, 0);
+ *f = new;
+ }
+ return skb;
+}
+
+static struct sk_buff *
+set_tp_port(struct sk_buff *skb, struct odp_flow_key *key,
+ const struct odp_action_tp_port *a,
+ gfp_t gfp)
+{
+ int check_ofs;
+
+ if (key->dl_type != htons(ETH_P_IP))
+ return skb;
+
+ if (key->nw_proto == IPPROTO_TCP)
+ check_ofs = offsetof(struct tcphdr, check);
+ else if (key->nw_proto == IPPROTO_UDP)
+ check_ofs = offsetof(struct udphdr, check);
+ else
+ return skb;
+
+ skb = make_writable(skb, gfp);
+ if (skb) {
+ struct udphdr *th = udp_hdr(skb);
+ u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest;
+ u16 old = *f;
+ u16 new = a->tp_port;
+ update_csum((u16*)((u8*)skb->data + check_ofs),
+ skb, old, new, 1);
+ *f = new;
+ }
+ return skb;
+}
+
+static inline unsigned packet_length(const struct sk_buff *skb)
+{
+ unsigned length = skb->len - ETH_HLEN;
+ if (skb->protocol == htons(ETH_P_8021Q))
+ length -= VLAN_HLEN;
+ return length;
+}
+
+int dp_xmit_skb(struct sk_buff *skb)
+{
+ struct datapath *dp = skb->dev->br_port->dp;
+ int len = skb->len;
+
+ if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) {
+ printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n",
+ dp_name(dp), packet_length(skb), skb->dev->mtu);
+ kfree_skb(skb);
+ return -E2BIG;
+ }
+
+ dev_queue_xmit(skb);
+
+ return len;
+}
+
+static void
+do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+ struct net_bridge_port *p;
+ struct net_device *dev;
+
+ if (!skb)
+ goto error;
+
+ p = dp->ports[out_port];
+ if (!p)
+ goto error;
+
+ dev = skb->dev = p->dev;
+ if (is_dp_dev(dev))
+ dp_dev_recv(dev, skb);
+ else
+ dp_xmit_skb(skb);
+ return;
+
+error:
+ kfree_skb(skb);
+}
+
+/* Never consumes 'skb'. Returns a port that 'skb' should be sent to, -1 if
+ * none. */
+static int output_group(struct datapath *dp, __u16 group,
+ struct sk_buff *skb, gfp_t gfp)
+{
+ struct dp_port_group *g = rcu_dereference(dp->groups[group]);
+ int prev_port = -1;
+ int i;
+
+ if (!g)
+ return -1;
+ for (i = 0; i < g->n_ports; i++) {
+ struct net_bridge_port *p = dp->ports[g->ports[i]];
+ if (!p || skb->dev == p->dev)
+ continue;
+ if (prev_port != -1) {
+ struct sk_buff *clone = skb_clone(skb, gfp);
+ if (!clone)
+ return -1;
+ do_output(dp, clone, prev_port);
+ }
+ prev_port = p->port_no;
+ }
+ return prev_port;
+}
+
+static int
+output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
+{
+ skb = skb_clone(skb, gfp);
+ if (!skb)
+ return -ENOMEM;
+ return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg);
+}
+
+/* Execute a list of actions against 'skb'. */
+int execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const union odp_action *a, int n_actions,
+ gfp_t gfp)
+{
+ /* Every output action needs a separate clone of 'skb', but the common
+ * case is just a single output action, so that doing a clone and
+ * then freeing the original skbuff is wasteful. So the following code
+ * is slightly obscure just to avoid that. */
+ int prev_port = -1;
+ int err = 0;
+ for (; n_actions > 0; a++, n_actions--) {
+ WARN_ON_ONCE(skb_shared(skb));
+ if (prev_port != -1) {
+ do_output(dp, skb_clone(skb, gfp), prev_port);
+ prev_port = -1;
+ }
+
+ switch (a->type) {
+ case ODPAT_OUTPUT:
+ prev_port = a->output.port;
+ break;
+
+ case ODPAT_OUTPUT_GROUP:
+ prev_port = output_group(dp, a->output_group.group,
+ skb, gfp);
+ break;
+
+ case ODPAT_CONTROLLER:
+ err = output_control(dp, skb, a->controller.arg, gfp);
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+ break;
+
+ case ODPAT_SET_VLAN_VID:
+ case ODPAT_SET_VLAN_PCP:
+ skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+ break;
+
+ case ODPAT_STRIP_VLAN:
+ skb = strip_vlan(skb, key, gfp);
+ break;
+
+ case ODPAT_SET_DL_SRC:
+ case ODPAT_SET_DL_DST:
+ skb = set_dl_addr(skb, &a->dl_addr, gfp);
+ break;
+
+ case ODPAT_SET_NW_SRC:
+ case ODPAT_SET_NW_DST:
+ skb = set_nw_addr(skb, key, &a->nw_addr, gfp);
+ break;
+
+ case ODPAT_SET_TP_SRC:
+ case ODPAT_SET_TP_DST:
+ skb = set_tp_port(skb, key, &a->tp_port, gfp);
+ break;
+ }
+ if (!skb)
+ return -ENOMEM;
+ }
+ if (prev_port != -1)
+ do_output(dp, skb, prev_port);
+ else
+ kfree_skb(skb);
+ return err;
+}
diff --git a/datapath/actions.h b/datapath/actions.h
new file mode 100644
index 00000000..410e3ba7
--- /dev/null
+++ b/datapath/actions.h
@@ -0,0 +1,18 @@
+#ifndef ACTIONS_H
+#define ACTIONS_H 1
+
+#include <linux/gfp.h>
+
+struct datapath;
+struct sk_buff;
+struct odp_flow_key;
+union odp_action;
+
+struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp);
+int dp_xmit_skb(struct sk_buff *);
+int execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct odp_flow_key *key,
+ const union odp_action *, int n_actions,
+ gfp_t gfp);
+
+#endif /* actions.h */
diff --git a/datapath/brc_procfs.c b/datapath/brc_procfs.c
new file mode 100644
index 00000000..733e9a94
--- /dev/null
+++ b/datapath/brc_procfs.c
@@ -0,0 +1,185 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/genetlink.h>
+#include "openvswitch/brcompat-netlink.h"
+
+/* This code implements a Generic Netlink command BRC_GENL_C_SET_PROC that can
+ * be used to add, modify, and delete arbitrary files in selected
+ * subdirectories of /proc. It's a horrible kluge prompted by the need to
+ * simulate certain /proc/net/vlan and /proc/net/bonding files for software
+ * that wants to read them, and with any luck it will go away eventually.
+ *
+ * The implementation is a kluge too. In particular, we want to release the
+ * strings copied into the 'data' members of proc_dir_entry when the
+ * proc_dir_entry structures are freed, but there doesn't appear to be a way to
+ * hook that, so instead we have to rely on being the only entity modifying the
+ * directories in question.
+ */
+
+static int brc_seq_show(struct seq_file *seq, void *unused)
+{
+ seq_puts(seq, seq->private);
+ return 0;
+}
+
+static int brc_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, brc_seq_show, PDE(inode)->data);
+}
+
+static struct file_operations brc_fops = {
+ .owner = THIS_MODULE,
+ .open = brc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct proc_dir_entry *proc_vlan_dir;
+static struct proc_dir_entry *proc_bonding_dir;
+
+struct proc_dir_entry *brc_lookup_entry(struct proc_dir_entry *de, const char *name)
+{
+ int namelen = strlen(name);
+ for (de = de->subdir; de; de = de->next) {
+ if (de->namelen != namelen)
+ continue;
+ if (!memcmp(name, de->name, de->namelen))
+ return de;
+ }
+ return NULL;
+}
+
+static struct proc_dir_entry *brc_open_dir(const char *dir_name,
+ struct proc_dir_entry *parent,
+ struct proc_dir_entry **dirp)
+{
+ if (!*dirp) {
+ struct proc_dir_entry *dir;
+ if (brc_lookup_entry(parent, dir_name)) {
+ printk(KERN_WARNING "%s proc directory exists, can't "
+ "simulate--probably its real module is "
+ "loaded\n", dir_name);
+ return NULL;
+ }
+ dir = *dirp = proc_mkdir(dir_name, parent);
+ }
+ return *dirp;
+}
+
+/* Maximum length of the BRC_GENL_A_PROC_DIR and BRC_GENL_A_PROC_NAME strings.
+ * If we could depend on supporting NLA_NUL_STRING and the .len member in
+ * Generic Netlink policy, then we could just put this in brc_genl_policy (and
+ * simplify brc_genl_set_proc() below too), but upstream 2.6.18 does not have
+ * either. */
+#define BRC_NAME_LEN_MAX 32
+
+int brc_genl_set_proc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct proc_dir_entry *dir, *entry;
+ const char *dir_name, *name;
+ char *data;
+
+ if (!info->attrs[BRC_GENL_A_PROC_DIR] ||
+ VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_DIR]) ||
+ !info->attrs[BRC_GENL_A_PROC_NAME] ||
+ VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_NAME]) ||
+ (info->attrs[BRC_GENL_A_PROC_DATA] &&
+ VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_DATA])))
+ return -EINVAL;
+
+ dir_name = nla_data(info->attrs[BRC_GENL_A_PROC_DIR]);
+ name = nla_data(info->attrs[BRC_GENL_A_PROC_NAME]);
+ if (strlen(dir_name) > BRC_NAME_LEN_MAX ||
+ strlen(name) > BRC_NAME_LEN_MAX)
+ return -EINVAL;
+
+ if (!strcmp(dir_name, "net/vlan"))
+ dir = brc_open_dir("vlan", proc_net, &proc_vlan_dir);
+ else if (!strcmp(dir_name, "net/bonding"))
+ dir = brc_open_dir("bonding", proc_net, &proc_bonding_dir);
+ else
+ return -EINVAL;
+ if (!dir) {
+ /* Probably failed because the module that really implements
+ * the function in question is loaded and already owns the
+ * directory in question.*/
+ return -EBUSY;
+ }
+
+ entry = brc_lookup_entry(dir, name);
+ if (!info->attrs[BRC_GENL_A_PROC_DATA]) {
+ if (!entry)
+ return -ENOENT;
+
+ data = entry->data;
+ remove_proc_entry(name, dir);
+ if (brc_lookup_entry(dir, name))
+ return -EBUSY; /* Shouldn't happen */
+
+ kfree(data);
+ } else {
+ data = kstrdup(nla_data(info->attrs[BRC_GENL_A_PROC_DATA]),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ if (entry) {
+ char *old_data = entry->data;
+ entry->data = data;
+ kfree(old_data);
+ return 0;
+ }
+
+ entry = create_proc_entry(name, S_IFREG|S_IRUSR|S_IWUSR, dir);
+ if (!entry) {
+ kfree(data);
+ return -ENOBUFS;
+ }
+ entry->proc_fops = &brc_fops;
+ entry->data = data;
+ }
+ return 0;
+}
+
+static void kill_proc_dir(const char *dir_name,
+ struct proc_dir_entry *parent,
+ struct proc_dir_entry *dir)
+{
+ if (!dir)
+ return;
+ for (;;) {
+ struct proc_dir_entry *e;
+ char *data;
+ char name[BRC_NAME_LEN_MAX + 1];
+
+ e = dir->subdir;
+ if (!e)
+ break;
+
+ if (e->namelen >= sizeof name) {
+ /* Can't happen: we prevent adding names this long by
+ * limiting the BRC_GENL_A_PROC_NAME string to
+ * BRC_NAME_LEN_MAX bytes. */
+ WARN_ON(1);
+ break;
+ }
+ strcpy(name, e->name);
+
+ data = e->data;
+ e->data = NULL;
+ kfree(data);
+
+ remove_proc_entry(name, dir);
+ }
+ remove_proc_entry(dir_name, parent);
+}
+
+void brc_procfs_exit(void)
+{
+ kill_proc_dir("vlan", proc_net, proc_vlan_dir);
+ kill_proc_dir("bonding", proc_net, proc_bonding_dir);
+}
diff --git a/datapath/brc_procfs.h b/datapath/brc_procfs.h
new file mode 100644
index 00000000..93e21cfb
--- /dev/null
+++ b/datapath/brc_procfs.h
@@ -0,0 +1,11 @@
+#ifndef BRC_PROCFS_H
+#define BRC_PROCFS_H 1
+
+struct sk_buff;
+struct genl_info;
+
+void brc_procfs_exit(void);
+int brc_genl_set_proc(struct sk_buff *skb, struct genl_info *info);
+
+#endif /* brc_procfs.h */
+
diff --git a/datapath/brc_sysfs.h b/datapath/brc_sysfs.h
new file mode 100644
index 00000000..0c72fb22
--- /dev/null
+++ b/datapath/brc_sysfs.h
@@ -0,0 +1,25 @@
+#ifndef BRC_SYSFS_H
+#define BRC_SYSFS_H 1
+
+struct datapath;
+struct net_bridge_port;
+
+/* brc_sysfs_dp.c */
+int brc_sysfs_add_dp(struct datapath *dp);
+int brc_sysfs_del_dp(struct datapath *dp);
+
+/* brc_sysfs_if.c */
+int brc_sysfs_add_if(struct net_bridge_port *p);
+int brc_sysfs_del_if(struct net_bridge_port *p);
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,18)
+#define SUPPORT_SYSFS 1
+#else
+/* We only support sysfs on Linux 2.6.18 because that's the only place we
+ * really need it (on Xen, for brcompat) and it's a big pain to try to support
+ * multiple versions. */
+#endif
+
+#endif /* brc_sysfs.h */
+
diff --git a/datapath/brc_sysfs_dp.c b/datapath/brc_sysfs_dp.c
new file mode 100644
index 00000000..fc02f279
--- /dev/null
+++ b/datapath/brc_sysfs_dp.c
@@ -0,0 +1,532 @@
+#include <linux/version.h>
+
+/*
+ * Sysfs attributes of bridge for Open vSwitch
+ *
+ * This has been shamelessly copied from the kernel sources.
+ */
+
+#include <linux/capability.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+#include <linux/times.h>
+#include <linux/version.h>
+
+#include "brc_sysfs.h"
+#include "datapath.h"
+#include "dp_dev.h"
+
+#ifdef SUPPORT_SYSFS
+#define to_dev(obj) container_of(obj, struct device, kobj)
+
+/* Hack to attempt to build on more platforms. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+#define to_kobj(d) &(d)->class_dev.kobj
+#define BRC_DEVICE_ATTR CLASS_DEVICE_ATTR
+#else
+#define to_kobj(d) &(d)->dev.kobj
+#define BRC_DEVICE_ATTR DEVICE_ATTR
+#endif
+
+/*
+ * Common code for storing bridge parameters.
+ */
+static ssize_t store_bridge_parm(struct class_device *d,
+ const char *buf, size_t len,
+ void (*set)(struct datapath *, unsigned long))
+{
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ char *endp;
+ unsigned long val;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+
+#if 0
+ spin_lock_bh(&br->lock);
+ (*set)(br, val);
+ spin_unlock_bh(&br->lock);
+#else
+ /* xxx We use a default value of 0 for all fields. If the caller is
+ * xxx attempting to set the value to our default, just silently
+ * xxx ignore the request.
+ */
+ if (val != 0) {
+ printk("%s: xxx writing dp parms not supported yet!\n",
+ dp_name(dp));
+ }
+#endif
+ return len;
+}
+
+
+static ssize_t show_forward_delay(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+static void set_forward_delay(struct datapath *dp, unsigned long val)
+{
+#if 0
+ unsigned long delay = clock_t_to_jiffies(val);
+ br->forward_delay = delay;
+ if (br_is_root_bridge(br))
+ br->bridge_forward_delay = delay;
+#else
+ printk("%s: xxx attempt to set_forward_delay()\n", dp_name(dp));
+#endif
+}
+
+static ssize_t store_forward_delay(struct class_device *d,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_forward_delay);
+}
+static BRC_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR,
+ show_forward_delay, store_forward_delay);
+
+static ssize_t show_hello_time(struct class_device *d, char *buf)
+{
+#if 0
+ return sprintf(buf, "%lu\n",
+ jiffies_to_clock_t(to_bridge(d)->hello_time));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+static void set_hello_time(struct datapath *dp, unsigned long val)
+{
+#if 0
+ unsigned long t = clock_t_to_jiffies(val);
+ br->hello_time = t;
+ if (br_is_root_bridge(br))
+ br->bridge_hello_time = t;
+#else
+ printk("%s: xxx attempt to set_hello_time()\n", dp_name(dp));
+#endif
+}
+
+static ssize_t store_hello_time(struct class_device *d,
+ const char *buf,
+ size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_hello_time);
+}
+static BRC_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time,
+ store_hello_time);
+
+static ssize_t show_max_age(struct class_device *d,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%lu\n",
+ jiffies_to_clock_t(to_bridge(d)->max_age));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+static void set_max_age(struct datapath *dp, unsigned long val)
+{
+#if 0
+ unsigned long t = clock_t_to_jiffies(val);
+ br->max_age = t;
+ if (br_is_root_bridge(br))
+ br->bridge_max_age = t;
+#else
+ printk("%s: xxx attempt to set_max_age()\n", dp_name(dp));
+#endif
+}
+
+static ssize_t store_max_age(struct class_device *d,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_max_age);
+}
+static BRC_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age);
+
+static ssize_t show_ageing_time(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+static void set_ageing_time(struct datapath *dp, unsigned long val)
+{
+#if 0
+ br->ageing_time = clock_t_to_jiffies(val);
+#else
+ printk("%s: xxx attempt to set_ageing_time()\n", dp_name(dp));
+#endif
+}
+
+static ssize_t store_ageing_time(struct class_device *d,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_ageing_time);
+}
+static BRC_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time,
+ store_ageing_time);
+
+static ssize_t show_stp_state(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%d\n", br->stp_enabled);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+
+static ssize_t store_stp_state(struct class_device *d,
+ const char *buf,
+ size_t len)
+{
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+#if 0
+ char *endp;
+ unsigned long val;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+
+ rtnl_lock();
+ br_stp_set_enabled(br, val);
+ rtnl_unlock();
+#else
+ printk("%s: xxx attempt to set_stp_state()\n", dp_name(dp));
+#endif
+
+ return len;
+}
+static BRC_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
+ store_stp_state);
+
+static ssize_t show_priority(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%d\n",
+ (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+
+static void set_priority(struct datapath *dp, unsigned long val)
+{
+#if 0
+ br_stp_set_bridge_priority(br, (u16) val);
+#else
+ printk("%s: xxx attempt to set_priority()\n", dp_name(dp));
+#endif
+}
+
+static ssize_t store_priority(struct class_device *d,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_priority);
+}
+static BRC_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority);
+
+static ssize_t show_root_id(struct class_device *d,
+ char *buf)
+{
+#if 0
+ return br_show_bridge_id(buf, &to_bridge(d)->designated_root);
+#else
+ return sprintf(buf, "0000.010203040506\n");
+#endif
+}
+static BRC_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL);
+
+static ssize_t show_bridge_id(struct class_device *d,
+ char *buf)
+{
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ const unsigned char *addr = dp->ports[ODPP_LOCAL]->dev->dev_addr;
+
+ /* xxx Do we need a lock of some sort? */
+ return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n",
+ 0, 0, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+}
+static BRC_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL);
+
+static ssize_t show_root_port(struct class_device *d,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", to_bridge(d)->root_port);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL);
+
+static ssize_t show_root_path_cost(struct class_device *d,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL);
+
+static ssize_t show_topology_change(struct class_device *d,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", to_bridge(d)->topology_change);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL);
+
+static ssize_t show_topology_change_detected(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%d\n", br->topology_change_detected);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(topology_change_detected, S_IRUGO,
+ show_topology_change_detected, NULL);
+
+static ssize_t show_hello_timer(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL);
+
+static ssize_t show_tcn_timer(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL);
+
+static ssize_t show_topology_change_timer(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer,
+ NULL);
+
+static ssize_t show_gc_timer(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRC_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL);
+
+static ssize_t show_group_addr(struct class_device *d,
+ char *buf)
+{
+#if 0
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+ return sprintf(buf, "%x:%x:%x:%x:%x:%x\n",
+ br->group_addr[0], br->group_addr[1],
+ br->group_addr[2], br->group_addr[3],
+ br->group_addr[4], br->group_addr[5]);
+#else
+ return sprintf(buf, "00:01:02:03:04:05\n");
+#endif
+}
+
+static ssize_t store_group_addr(struct class_device *d,
+ const char *buf, size_t len)
+{
+ struct datapath *dp = dp_dev_get_dp(to_net_dev(d));
+#if 0
+ unsigned new_addr[6];
+ int i;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (sscanf(buf, "%x:%x:%x:%x:%x:%x",
+ &new_addr[0], &new_addr[1], &new_addr[2],
+ &new_addr[3], &new_addr[4], &new_addr[5]) != 6)
+ return -EINVAL;
+
+ /* Must be 01:80:c2:00:00:0X */
+ for (i = 0; i < 5; i++)
+ if (new_addr[i] != br_group_address[i])
+ return -EINVAL;
+
+ if (new_addr[5] & ~0xf)
+ return -EINVAL;
+
+ if (new_addr[5] == 1 /* 802.3x Pause address */
+ || new_addr[5] == 2 /* 802.3ad Slow protocols */
+ || new_addr[5] == 3) /* 802.1X PAE address */
+ return -EINVAL;
+
+ spin_lock_bh(&br->lock);
+ for (i = 0; i < 6; i++)
+ br->group_addr[i] = new_addr[i];
+ spin_unlock_bh(&br->lock);
+#else
+ printk("%s: xxx attempt to store_group_addr()\n", dp_name(dp));
+#endif
+ return len;
+}
+
+static BRC_DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
+ show_group_addr, store_group_addr);
+
+static struct attribute *bridge_attrs[] = {
+ &class_device_attr_forward_delay.attr,
+ &class_device_attr_hello_time.attr,
+ &class_device_attr_max_age.attr,
+ &class_device_attr_ageing_time.attr,
+ &class_device_attr_stp_state.attr,
+ &class_device_attr_priority.attr,
+ &class_device_attr_bridge_id.attr,
+ &class_device_attr_root_id.attr,
+ &class_device_attr_root_path_cost.attr,
+ &class_device_attr_root_port.attr,
+ &class_device_attr_topology_change.attr,
+ &class_device_attr_topology_change_detected.attr,
+ &class_device_attr_hello_timer.attr,
+ &class_device_attr_tcn_timer.attr,
+ &class_device_attr_topology_change_timer.attr,
+ &class_device_attr_gc_timer.attr,
+ &class_device_attr_group_addr.attr,
+ NULL
+};
+
+static struct attribute_group bridge_group = {
+ .name = SYSFS_BRIDGE_ATTR,
+ .attrs = bridge_attrs,
+};
+
+/*
+ * Add entries in sysfs onto the existing network class device
+ * for the bridge.
+ * Adds a attribute group "bridge" containing tuning parameters.
+ * Sub directory to hold links to interfaces.
+ *
+ * Note: the ifobj exists only to be a subdirectory
+ * to hold links. The ifobj exists in the same data structure
+ * as its parent the bridge so reference counting works.
+ */
+int brc_sysfs_add_dp(struct datapath *dp)
+{
+ struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev);
+ int err;
+
+ err = sysfs_create_group(kobj, &bridge_group);
+ if (err) {
+ pr_info("%s: can't create group %s/%s\n",
+ __func__, dp_name(dp), bridge_group.name);
+ goto out1;
+ }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+ kobject_set_name(&dp->ifobj, SYSFS_BRIDGE_PORT_SUBDIR);
+ dp->ifobj.ktype = NULL;
+ dp->ifobj.kset = NULL;
+ dp->ifobj.parent = kobj;
+
+ err = kobject_register(&dp->ifobj);
+ if (err) {
+ pr_info("%s: can't add kobject (directory) %s/%s\n",
+ __FUNCTION__, dp_name(dp), dp->ifobj.name);
+ goto out2;
+ }
+#else
+ br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, kobj);
+ if (!br->ifobj) {
+ pr_info("%s: can't add kobject (directory) %s/%s\n",
+ __func__, dp_name(dp), SYSFS_BRIDGE_PORT_SUBDIR);
+ goto out2;
+ }
+#endif
+ return 0;
+
+ out2:
+ sysfs_remove_group(kobj, &bridge_group);
+ out1:
+ return err;
+}
+
+int brc_sysfs_del_dp(struct datapath *dp)
+{
+ struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+ kobject_unregister(&dp->ifobj);
+#else
+ kobject_put(dp->ifobj);
+#endif
+ sysfs_remove_group(kobj, &bridge_group);
+
+ return 0;
+}
+#else /* !SUPPORT_SYSFS */
+int brc_sysfs_add_dp(struct datapath *dp) { return 0; }
+int brc_sysfs_del_dp(struct datapath *dp) { return 0; }
+int brc_sysfs_add_if(struct net_bridge_port *p) { return 0; }
+int brc_sysfs_del_if(struct net_bridge_port *p)
+{
+ dev_put(p->dev);
+ kfree(p);
+ return 0;
+}
+#endif /* !SUPPORT_SYSFS */
diff --git a/datapath/brc_sysfs_if.c b/datapath/brc_sysfs_if.c
new file mode 100644
index 00000000..20bb109b
--- /dev/null
+++ b/datapath/brc_sysfs_if.c
@@ -0,0 +1,334 @@
+/*
+ * Sysfs attributes of bridge ports for Open vSwitch
+ *
+ * This has been shamelessly copied from the kernel sources.
+ */
+
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+#include "brc_sysfs.h"
+#include "datapath.h"
+
+#ifdef SUPPORT_SYSFS
+
+struct brport_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct net_bridge_port *, char *);
+ ssize_t (*store)(struct net_bridge_port *, unsigned long);
+};
+
+#define BRPORT_ATTR(_name,_mode,_show,_store) \
+struct brport_attribute brport_attr_##_name = { \
+ .attr = {.name = __stringify(_name), \
+ .mode = _mode, \
+ .owner = THIS_MODULE, }, \
+ .show = _show, \
+ .store = _store, \
+};
+
+static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->path_cost);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v)
+{
+#if 0
+ br_stp_set_path_cost(p, v);
+#endif
+ return 0;
+}
+static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+ show_path_cost, store_path_cost);
+
+static ssize_t show_priority(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->priority);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static ssize_t store_priority(struct net_bridge_port *p, unsigned long v)
+{
+#if 0
+ if (v >= (1<<(16-BR_PORT_BITS)))
+ return -ERANGE;
+ br_stp_set_port_priority(p, v);
+#endif
+ return 0;
+}
+static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+ show_priority, store_priority);
+
+static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return br_show_bridge_id(buf, &p->designated_root);
+#else
+ return sprintf(buf, "0000.010203040506\n");
+#endif
+}
+static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+
+static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return br_show_bridge_id(buf, &p->designated_bridge);
+#else
+ return sprintf(buf, "0000.060504030201\n");
+#endif
+}
+static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+
+static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->designated_port);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+
+static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->designated_cost);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+
+static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "0x%x\n", p->port_id);
+#else
+ return sprintf(buf, "0x%x\n", 0);
+#endif
+}
+static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+
+static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
+{
+ return sprintf(buf, "0x%x\n", p->port_no);
+}
+
+static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+
+static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->topology_change_ack);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+
+static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->config_pending);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+
+static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
+{
+#if 0
+ return sprintf(buf, "%d\n", p->state);
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+
+static ssize_t show_message_age_timer(struct net_bridge_port *p,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+
+static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+
+static ssize_t show_hold_timer(struct net_bridge_port *p,
+ char *buf)
+{
+#if 0
+ return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
+#else
+ return sprintf(buf, "%d\n", 0);
+#endif
+}
+static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+
+static struct brport_attribute *brport_attrs[] = {
+ &brport_attr_path_cost,
+ &brport_attr_priority,
+ &brport_attr_port_id,
+ &brport_attr_port_no,
+ &brport_attr_designated_root,
+ &brport_attr_designated_bridge,
+ &brport_attr_designated_port,
+ &brport_attr_designated_cost,
+ &brport_attr_state,
+ &brport_attr_change_ack,
+ &brport_attr_config_pending,
+ &brport_attr_message_age_timer,
+ &brport_attr_forward_delay_timer,
+ &brport_attr_hold_timer,
+ NULL
+};
+
+#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr)
+#define to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
+
+static ssize_t brport_show(struct kobject * kobj,
+ struct attribute * attr, char * buf)
+{
+ struct brport_attribute * brport_attr = to_brport_attr(attr);
+ struct net_bridge_port * p = to_brport(kobj);
+
+ return brport_attr->show(p, buf);
+}
+
+static ssize_t brport_store(struct kobject * kobj,
+ struct attribute * attr,
+ const char * buf, size_t count)
+{
+ struct net_bridge_port * p = to_brport(kobj);
+#if 0
+ struct brport_attribute * brport_attr = to_brport_attr(attr);
+ char *endp;
+ unsigned long val;
+#endif
+ ssize_t ret = -EINVAL;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+#if 0
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp != buf) {
+ rtnl_lock();
+ if (p->dev && p->br && brport_attr->store) {
+ spin_lock_bh(&p->br->lock);
+ ret = brport_attr->store(p, val);
+ spin_unlock_bh(&p->br->lock);
+ if (ret == 0)
+ ret = count;
+ }
+ rtnl_unlock();
+ }
+#else
+ printk("%s: xxx writing port parms not supported yet!\n",
+ dp_name(p->dp));
+#endif
+ return ret;
+}
+
+struct sysfs_ops brport_sysfs_ops = {
+ .show = brport_show,
+ .store = brport_store,
+};
+
+static void release_nbp(struct kobject *kobj)
+{
+ struct net_bridge_port *p
+ = container_of(kobj, struct net_bridge_port, kobj);
+ kfree(p);
+}
+
+struct kobj_type brport_ktype = {
+ .sysfs_ops = &brport_sysfs_ops,
+ .release = release_nbp
+};
+
+/*
+ * Add sysfs entries to ethernet device added to a bridge.
+ * Creates a brport subdirectory with bridge attributes.
+ * Puts symlink in bridge's brport subdirectory
+ */
+int brc_sysfs_add_if(struct net_bridge_port *p)
+{
+ struct datapath *dp = p->dp;
+ struct brport_attribute **a;
+ int err;
+
+ kobject_init(&p->kobj);
+ kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR);
+ p->kobj.ktype = &brport_ktype;
+ p->kobj.kset = NULL;
+ p->kobj.parent = &(p->dev->class_dev.kobj);
+
+ err = kobject_add(&p->kobj);
+ if (err)
+ goto err_put;
+
+ err = sysfs_create_link(&p->kobj,
+ &dp->ports[ODPP_LOCAL]->dev->class_dev.kobj,
+ SYSFS_BRIDGE_PORT_LINK);
+ if (err)
+ goto err_del;
+
+ for (a = brport_attrs; *a; ++a) {
+ err = sysfs_create_file(&p->kobj, &((*a)->attr));
+ if (err)
+ goto err_del;
+ }
+
+ err = sysfs_create_link(&dp->ifobj, &p->kobj, p->dev->name);
+ if (err)
+ goto err_del;
+
+ kobject_uevent(&p->kobj, KOBJ_ADD);
+
+ return err;
+
+err_del:
+ kobject_del(&p->kobj);
+err_put:
+ kobject_put(&p->kobj);
+ return err;
+}
+
+int brc_sysfs_del_if(struct net_bridge_port *p)
+{
+ struct net_device *dev = p->dev;
+
+ kobject_uevent(&p->kobj, KOBJ_REMOVE);
+ kobject_del(&p->kobj);
+
+ dev_put(dev);
+
+ kobject_put(&p->kobj);
+
+ return 0;
+}
+#endif /* SUPPORT_SYSFS */
diff --git a/datapath/brcompat.c b/datapath/brcompat.c
new file mode 100644
index 00000000..2e437ccd
--- /dev/null
+++ b/datapath/brcompat.c
@@ -0,0 +1,519 @@
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/rculist.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/genetlink.h>
+
+#include "compat.h"
+#include "openvswitch/brcompat-netlink.h"
+#include "brc_procfs.h"
+#include "brc_sysfs.h"
+#include "datapath.h"
+#include "dp_dev.h"
+
+static struct genl_family brc_genl_family;
+static struct genl_multicast_group brc_mc_group;
+
+/* Time to wait for ovs-vswitchd to respond to a datapath action, in
+ * jiffies. */
+#define BRC_TIMEOUT (HZ * 5)
+
+/* Mutex to serialize ovs-brcompatd callbacks. (Some callbacks naturally hold
+ * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former
+ * ourselves and we don't want to hold the latter over a potentially long
+ * period of time.) */
+static DEFINE_MUTEX(brc_serial);
+
+/* Userspace communication. */
+static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */
+static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
+static int brc_err; /* Error code from userspace. */
+static u32 brc_seq; /* Sequence number for current op. */
+
+static int brc_send_command(const char *bridge, const char *port, int op);
+
+static int
+get_dp_ifindices(int *indices, int num)
+{
+ int i, index = 0;
+
+ rcu_read_lock();
+ for (i=0; i < ODP_MAX && index < num; i++) {
+ struct datapath *dp = get_dp(i);
+ if (!dp)
+ continue;
+ indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex;
+ }
+ rcu_read_unlock();
+
+ return index;
+}
+
+static void
+get_port_ifindices(struct datapath *dp, int *ifindices, int num)
+{
+ struct net_bridge_port *p;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu (p, &dp->port_list, node) {
+ if (p->port_no < num)
+ ifindices[p->port_no] = p->dev->ifindex;
+ }
+ rcu_read_unlock();
+}
+
+static int brc_add_del_bridge(char __user *uname, int add)
+{
+ char name[IFNAMSIZ];
+
+ if (copy_from_user(name, uname, IFNAMSIZ))
+ return -EFAULT;
+
+ name[IFNAMSIZ - 1] = 0;
+ return brc_send_command(name, NULL,
+ add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL);
+}
+
+static int brc_get_bridges(int __user *uindices, int n)
+{
+ int *indices;
+ int ret;
+
+ if (n >= 2048)
+ return -ENOMEM;
+
+ indices = kcalloc(n, sizeof(int), GFP_KERNEL);
+ if (indices == NULL)
+ return -ENOMEM;
+
+ n = get_dp_ifindices(indices, n);
+
+ ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n;
+
+ kfree(indices);
+ return ret;
+}
+
+/* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */
+static int
+old_deviceless(void __user *uarg)
+{
+ unsigned long args[3];
+
+ if (copy_from_user(args, uarg, sizeof(args)))
+ return -EFAULT;
+
+ switch (args[0]) {
+ case BRCTL_GET_BRIDGES:
+ return brc_get_bridges((int __user *)args[1], args[2]);
+
+ case BRCTL_ADD_BRIDGE:
+ return brc_add_del_bridge((void __user *)args[1], 1);
+ case BRCTL_DEL_BRIDGE:
+ return brc_add_del_bridge((void __user *)args[1], 0);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/* Called with the br_ioctl_mutex. */
+static int
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
+#else
+brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+#endif
+{
+ switch (cmd) {
+ case SIOCGIFBR:
+ case SIOCSIFBR:
+ return old_deviceless(uarg);
+
+ case SIOCBRADDBR:
+ return brc_add_del_bridge(uarg, 1);
+ case SIOCBRDELBR:
+ return brc_add_del_bridge(uarg, 0);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int
+brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
+{
+ struct net_device *port;
+ char dev_name[IFNAMSIZ], port_name[IFNAMSIZ];
+ int err;
+
+ port = __dev_get_by_index(&init_net, port_ifindex);
+ if (!port)
+ return -EINVAL;
+
+ /* Save name of dev and port because there's a race between the
+ * rtnl_unlock() and the brc_send_command(). */
+ strcpy(dev_name, dev->name);
+ strcpy(port_name, port->name);
+
+ rtnl_unlock();
+ err = brc_send_command(dev_name, port_name,
+ add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL);
+ rtnl_lock();
+
+ return err;
+}
+
+static int
+brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub)
+{
+ struct __bridge_info b;
+ u64 id = 0;
+ int i;
+
+ memset(&b, 0, sizeof(struct __bridge_info));
+
+ for (i=0; i<ETH_ALEN; i++)
+ id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i));
+ b.bridge_id = cpu_to_be64(id);
+ b.stp_enabled = 0;
+
+ if (copy_to_user(ub, &b, sizeof(struct __bridge_info)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int
+brc_get_port_list(struct net_device *dev, int __user *uindices, int num)
+{
+ struct dp_dev *dp_dev = netdev_priv(dev);
+ struct datapath *dp = dp_dev->dp;
+ int *indices;
+
+ if (num < 0)
+ return -EINVAL;
+ if (num == 0)
+ num = 256;
+ if (num > DP_MAX_PORTS)
+ num = DP_MAX_PORTS;
+
+ indices = kcalloc(num, sizeof(int), GFP_KERNEL);
+ if (indices == NULL)
+ return -ENOMEM;
+
+ get_port_ifindices(dp, indices, num);
+ if (copy_to_user(uindices, indices, num * sizeof(int)))
+ num = -EFAULT;
+ kfree(indices);
+ return num;
+}
+
+/* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */
+static int
+old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ unsigned long args[4];
+
+ if (copy_from_user(args, rq->ifr_data, sizeof(args)))
+ return -EFAULT;
+
+ switch (args[0]) {
+ case BRCTL_ADD_IF:
+ return brc_add_del_port(dev, args[1], 1);
+ case BRCTL_DEL_IF:
+ return brc_add_del_port(dev, args[1], 0);
+
+ case BRCTL_GET_BRIDGE_INFO:
+ return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]);
+
+ case BRCTL_GET_PORT_LIST:
+ return brc_get_port_list(dev, (int __user *)args[1], args[2]);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/* Called with the rtnl_lock. */
+static int
+brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ int err;
+
+ switch (cmd) {
+ case SIOCDEVPRIVATE:
+ err = old_dev_ioctl(dev, rq, cmd);
+ break;
+
+ case SIOCBRADDIF:
+ return brc_add_del_port(dev, rq->ifr_ifindex, 1);
+ case SIOCBRDELIF:
+ return brc_add_del_port(dev, rq->ifr_ifindex, 0);
+
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+
+static struct genl_family brc_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = BRC_GENL_FAMILY_NAME,
+ .version = 1,
+ .maxattr = BRC_GENL_A_MAX,
+};
+
+static int brc_genl_query(struct sk_buff *skb, struct genl_info *info)
+{
+ int err = -EINVAL;
+ struct sk_buff *ans_skb;
+ void *data;
+
+ ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ans_skb)
+ return -ENOMEM;
+
+ data = genlmsg_put_reply(ans_skb, info, &brc_genl_family,
+ 0, BRC_GENL_C_QUERY_MC);
+ if (data == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id);
+
+ genlmsg_end(ans_skb, data);
+ return genlmsg_reply(ans_skb, info);
+
+err:
+nla_put_failure:
+ kfree_skb(ans_skb);
+ return err;
+}
+
+static struct genl_ops brc_genl_ops_query_dp = {
+ .cmd = BRC_GENL_C_QUERY_MC,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+ .policy = NULL,
+ .doit = brc_genl_query,
+ .dumpit = NULL
+};
+
+/* Attribute policy: what each attribute may contain. */
+static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
+ [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
+ [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
+ [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
+ [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
+};
+
+static int
+brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info)
+{
+ unsigned long int flags;
+ int err;
+
+ if (!info->attrs[BRC_GENL_A_ERR_CODE])
+ return -EINVAL;
+
+ spin_lock_irqsave(&brc_lock, flags);
+ if (brc_seq == info->snd_seq) {
+ brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]);
+ complete(&brc_done);
+ err = 0;
+ } else {
+ err = -ESTALE;
+ }
+ spin_unlock_irqrestore(&brc_lock, flags);
+
+ return err;
+}
+
+static struct genl_ops brc_genl_ops_dp_result = {
+ .cmd = BRC_GENL_C_DP_RESULT,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+ .policy = brc_genl_policy,
+ .doit = brc_genl_dp_result,
+ .dumpit = NULL
+};
+
+static struct genl_ops brc_genl_ops_set_proc = {
+ .cmd = BRC_GENL_C_SET_PROC,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */
+ .policy = brc_genl_policy,
+ .doit = brc_genl_set_proc,
+ .dumpit = NULL
+};
+
+static int brc_send_command(const char *bridge, const char *port, int op)
+{
+ unsigned long int flags;
+ struct sk_buff *skb;
+ void *data;
+ int error;
+
+ mutex_lock(&brc_serial);
+
+ /* Increment sequence number first, so that we ignore any replies
+ * to stale requests. */
+ spin_lock_irqsave(&brc_lock, flags);
+ brc_seq++;
+ INIT_COMPLETION(brc_done);
+ spin_unlock_irqrestore(&brc_lock, flags);
+
+ /* Compose message. */
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ error = -ENOMEM;
+ if (skb == NULL)
+ goto exit_unlock;
+ data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op);
+
+ NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+ if (port)
+ NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
+
+ genlmsg_end(skb, data);
+
+ /* Send message. */
+ error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL);
+ if (error < 0)
+ goto exit_unlock;
+
+ /* Wait for reply. */
+ error = -ETIMEDOUT;
+ if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
+ goto exit_unlock;
+
+ error = -brc_err;
+ goto exit_unlock;
+
+nla_put_failure:
+ kfree_skb(skb);
+exit_unlock:
+ mutex_unlock(&brc_serial);
+ return error;
+}
+
+int brc_add_dp(struct datapath *dp)
+{
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+#ifdef SUPPORT_SYSFS
+ brc_sysfs_add_dp(dp);
+#endif
+
+ return 0;
+}
+
+int brc_del_dp(struct datapath *dp)
+{
+#ifdef SUPPORT_SYSFS
+ brc_sysfs_del_dp(dp);
+#endif
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+static int
+__init brc_init(void)
+{
+ int i;
+ int err;
+
+ printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n");
+
+ rcu_read_lock();
+ for (i=0; i<ODP_MAX; i++) {
+ if (get_dp(i)) {
+ rcu_read_unlock();
+ printk(KERN_EMERG "brcompat: no datapaths may exist!\n");
+ return -EEXIST;
+ }
+ }
+ rcu_read_unlock();
+
+ /* Set the bridge ioctl handler */
+ brioctl_set(brc_ioctl_deviceless_stub);
+
+ /* Set the openvswitch_mod device ioctl handler */
+ dp_ioctl_hook = brc_dev_ioctl;
+
+ /* Register hooks for datapath adds and deletes */
+ dp_add_dp_hook = brc_add_dp;
+ dp_del_dp_hook = brc_del_dp;
+
+ /* Register hooks for interface adds and deletes */
+#ifdef SUPPORT_SYSFS
+ dp_add_if_hook = brc_sysfs_add_if;
+ dp_del_if_hook = brc_sysfs_del_if;
+#endif
+
+ /* Randomize the initial sequence number. This is not a security
+ * feature; it only helps avoid crossed wires between userspace and
+ * the kernel when the module is unloaded and reloaded. */
+ brc_seq = net_random();
+
+ /* Register generic netlink family to communicate changes to
+ * userspace. */
+ err = genl_register_family(&brc_genl_family);
+ if (err)
+ goto error;
+
+ err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp);
+ if (err != 0)
+ goto err_unregister;
+
+ err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result);
+ if (err != 0)
+ goto err_unregister;
+
+ err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc);
+ if (err != 0)
+ goto err_unregister;
+
+ strcpy(brc_mc_group.name, "brcompat");
+ err = genl_register_mc_group(&brc_genl_family, &brc_mc_group);
+ if (err < 0)
+ goto err_unregister;
+
+ return 0;
+
+err_unregister:
+ genl_unregister_family(&brc_genl_family);
+error:
+ printk(KERN_EMERG "brcompat: failed to install!");
+ return err;
+}
+
+static void
+brc_cleanup(void)
+{
+ /* Unregister hooks for datapath adds and deletes */
+ dp_add_dp_hook = NULL;
+ dp_del_dp_hook = NULL;
+
+ /* Unregister hooks for interface adds and deletes */
+ dp_add_if_hook = NULL;
+ dp_del_if_hook = NULL;
+
+ /* Unregister ioctl hooks */
+ dp_ioctl_hook = NULL;
+ brioctl_set(NULL);
+
+ genl_unregister_family(&brc_genl_family);
+ brc_procfs_exit();
+}
+
+module_init(brc_init);
+module_exit(brc_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch bridge compatibility");
+MODULE_AUTHOR("Nicira Networks");
+MODULE_LICENSE("GPL");
diff --git a/datapath/compat.h b/datapath/compat.h
new file mode 100644
index 00000000..12100ae3
--- /dev/null
+++ b/datapath/compat.h
@@ -0,0 +1,17 @@
+#ifndef COMPAT_H
+#define COMPAT_H 1
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+#include "compat26.h"
+
+#else
+
+#include "compat24.h"
+
+#endif
+
+
+#endif /* compat.h */
diff --git a/datapath/datapath.c b/datapath/datapath.c
new file mode 100644
index 00000000..015edc4b
--- /dev/null
+++ b/datapath/datapath.c
@@ -0,0 +1,1611 @@
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007, 2008, 2009 Nicira Networks.
+ */
+
+/* Functions for managing the dp interface/device. */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/if_arp.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/llc.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/version.h>
+#include <linux/ethtool.h>
+#include <linux/random.h>
+#include <linux/wait.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+#include <asm/bug.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/inetdevice.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/workqueue.h>
+#include <linux/dmi.h>
+#include <net/llc.h>
+
+#include "openvswitch/datapath-protocol.h"
+#include "datapath.h"
+#include "actions.h"
+#include "dp_dev.h"
+#include "flow.h"
+
+#include "compat.h"
+
+
+int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
+EXPORT_SYMBOL(dp_ioctl_hook);
+
+int (*dp_add_dp_hook)(struct datapath *dp);
+EXPORT_SYMBOL(dp_add_dp_hook);
+
+int (*dp_del_dp_hook)(struct datapath *dp);
+EXPORT_SYMBOL(dp_del_dp_hook);
+
+int (*dp_add_if_hook)(struct net_bridge_port *p);
+EXPORT_SYMBOL(dp_add_if_hook);
+
+int (*dp_del_if_hook)(struct net_bridge_port *p);
+EXPORT_SYMBOL(dp_del_if_hook);
+
+/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
+ * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex
+ * maintained by the Generic Netlink code, but the timeout path needs mutual
+ * exclusion too.
+ *
+ * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
+ * lock first.
+ *
+ * It is safe to access the datapath and net_bridge_port structures with just
+ * dp_mutex.
+ */
+static struct datapath *dps[ODP_MAX];
+static DEFINE_MUTEX(dp_mutex);
+
+/* Number of milliseconds between runs of the maintenance thread. */
+#define MAINT_SLEEP_MSECS 1000
+
+static int new_nbp(struct datapath *, struct net_device *, int port_no);
+
+/* Must be called with rcu_read_lock or dp_mutex. */
+struct datapath *get_dp(int dp_idx)
+{
+ if (dp_idx < 0 || dp_idx >= ODP_MAX)
+ return NULL;
+ return rcu_dereference(dps[dp_idx]);
+}
+EXPORT_SYMBOL_GPL(get_dp);
+
+struct datapath *get_dp_locked(int dp_idx)
+{
+ struct datapath *dp;
+
+ mutex_lock(&dp_mutex);
+ dp = get_dp(dp_idx);
+ if (dp)
+ mutex_lock(&dp->mutex);
+ mutex_unlock(&dp_mutex);
+ return dp;
+}
+
+static inline size_t br_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(4) /* IFLA_MTU */
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1); /* IFLA_OPERSTATE */
+}
+
+static int dp_fill_ifinfo(struct sk_buff *skb,
+ const struct net_bridge_port *port,
+ int event, unsigned int flags)
+{
+ const struct datapath *dp = port->dp;
+ const struct net_device *dev = port->dev;
+ struct ifinfomsg *hdr;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifi_family = AF_BRIDGE;
+ hdr->__ifi_pad = 0;
+ hdr->ifi_type = dev->type;
+ hdr->ifi_index = dev->ifindex;
+ hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_change = 0;
+
+ NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+ NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[ODPP_LOCAL]->dev->ifindex);
+ NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+#ifdef IFLA_OPERSTATE
+ NLA_PUT_U8(skb, IFLA_OPERSTATE,
+ netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+#endif
+
+ if (dev->addr_len)
+ NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+
+ if (dev->ifindex != dev->iflink)
+ NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void dp_ifinfo_notify(int event, struct net_bridge_port *port)
+{
+ struct net *net = dev_net(port->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+ err = dp_fill_ifinfo(skb, port, event, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in br_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+}
+
+static int create_dp(int dp_idx, const char __user *devnamep)
+{
+ struct net_device *dp_dev;
+ char devname[IFNAMSIZ];
+ struct datapath *dp;
+ int err;
+ int i;
+
+ if (devnamep) {
+ err = -EFAULT;
+ if (strncpy_from_user(devname, devnamep, IFNAMSIZ - 1) < 0)
+ goto err;
+ devname[IFNAMSIZ - 1] = '\0';
+ } else {
+ snprintf(devname, sizeof devname, "of%d", dp_idx);
+ }
+
+ rtnl_lock();
+ mutex_lock(&dp_mutex);
+ err = -ENODEV;
+ if (!try_module_get(THIS_MODULE))
+ goto err_unlock;
+
+ /* Exit early if a datapath with that number already exists.
+ * (We don't use -EEXIST because that's ambiguous with 'devname'
+ * conflicting with an existing network device name.) */
+ err = -EBUSY;
+ if (get_dp(dp_idx))
+ goto err_put_module;
+
+ err = -ENOMEM;
+ dp = kzalloc(sizeof *dp, GFP_KERNEL);
+ if (dp == NULL)
+ goto err_put_module;
+
+ mutex_init(&dp->mutex);
+ dp->dp_idx = dp_idx;
+ for (i = 0; i < DP_N_QUEUES; i++)
+ skb_queue_head_init(&dp->queues[i]);
+ init_waitqueue_head(&dp->waitqueue);
+
+ /* Setup our datapath device */
+ dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL);
+ err = PTR_ERR(dp_dev);
+ if (IS_ERR(dp_dev))
+ goto err_free_dp;
+
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
+ if (!dp->table)
+ goto err_destroy_dp_dev;
+ INIT_LIST_HEAD(&dp->port_list);
+
+ err = new_nbp(dp, dp_dev, ODPP_LOCAL);
+ if (err)
+ goto err_destroy_table;
+
+ dp->drop_frags = 0;
+ dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ goto err_destroy_local_port;
+
+ rcu_assign_pointer(dps[dp_idx], dp);
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+
+ if (dp_add_dp_hook)
+ dp_add_dp_hook(dp);
+
+ return 0;
+
+err_destroy_local_port:
+ dp_del_port(dp->ports[ODPP_LOCAL], NULL);
+err_destroy_table:
+ dp_table_destroy(dp->table, 0);
+err_destroy_dp_dev:
+ dp_dev_destroy(dp_dev);
+err_free_dp:
+ kfree(dp);
+err_put_module:
+ module_put(THIS_MODULE);
+err_unlock:
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+err:
+ return err;
+}
+
+static void do_destroy_dp(struct datapath *dp, struct list_head *dp_devs)
+{
+ struct net_bridge_port *p, *n;
+ int i;
+
+ if (dp_del_dp_hook)
+ dp_del_dp_hook(dp);
+
+ /* Drop references to DP. */
+ list_for_each_entry_safe (p, n, &dp->port_list, node)
+ dp_del_port(p, dp_devs);
+
+ rcu_assign_pointer(dps[dp->dp_idx], NULL);
+ synchronize_rcu();
+
+ /* Wait until no longer in use, then destroy it. */
+ synchronize_rcu();
+ dp_table_destroy(dp->table, 1);
+ for (i = 0; i < DP_N_QUEUES; i++)
+ skb_queue_purge(&dp->queues[i]);
+ for (i = 0; i < DP_MAX_GROUPS; i++)
+ kfree(dp->groups[i]);
+ free_percpu(dp->stats_percpu);
+ kfree(dp);
+ module_put(THIS_MODULE);
+}
+
+static int destroy_dp(int dp_idx)
+{
+ struct dp_dev *dp_dev, *next;
+ struct datapath *dp;
+ LIST_HEAD(dp_devs);
+ int err;
+
+ rtnl_lock();
+ mutex_lock(&dp_mutex);
+ dp = get_dp(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto err_unlock;
+
+ do_destroy_dp(dp, &dp_devs);
+ err = 0;
+
+err_unlock:
+ mutex_unlock(&dp_mutex);
+ rtnl_unlock();
+ list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
+ free_netdev(dp_dev->dev);
+ return err;
+}
+
+/* Called with RTNL lock and dp_mutex. */
+static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no)
+{
+ struct net_bridge_port *p;
+
+ if (dev->br_port != NULL)
+ return -EBUSY;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ dev_set_promiscuity(dev, 1);
+ dev_hold(dev);
+ p->port_no = port_no;
+ p->dp = dp;
+ p->dev = dev;
+ if (!is_dp_dev(dev))
+ rcu_assign_pointer(dev->br_port, p);
+ else {
+ /* It would make sense to assign dev->br_port here too, but
+ * that causes packets received on internal ports to get caught
+ * in dp_frame_hook(). In turn dp_frame_hook() can reject them
+ * back to network stack, but that's a waste of time. */
+ }
+ rcu_assign_pointer(dp->ports[port_no], p);
+ list_add_rcu(&p->node, &dp->port_list);
+ dp->n_ports++;
+
+ dp_ifinfo_notify(RTM_NEWLINK, p);
+
+ return 0;
+}
+
+static int add_port(int dp_idx, struct odp_port __user *portp)
+{
+ struct net_device *dev;
+ struct datapath *dp;
+ struct odp_port port;
+ int port_no;
+ int err;
+
+ err = -EFAULT;
+ if (copy_from_user(&port, portp, sizeof port))
+ goto out;
+ port.devname[IFNAMSIZ - 1] = '\0';
+ port_no = port.port;
+
+ err = -EINVAL;
+ if (port_no < 0 || port_no >= DP_MAX_PORTS)
+ goto out;
+
+ rtnl_lock();
+ dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto out_unlock_rtnl;
+
+ err = -EEXIST;
+ if (dp->ports[port_no])
+ goto out_unlock_dp;
+
+ if (!(port.flags & ODP_PORT_INTERNAL)) {
+ err = -ENODEV;
+ dev = dev_get_by_name(&init_net, port.devname);
+ if (!dev)
+ goto out_unlock_dp;
+
+ err = -EINVAL;
+ if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER ||
+ is_dp_dev(dev))
+ goto out_put;
+ } else {
+ dev = dp_dev_create(dp, port.devname, port_no);
+ err = PTR_ERR(dev);
+ if (IS_ERR(dev))
+ goto out_unlock_dp;
+ dev_hold(dev);
+ }
+
+ err = new_nbp(dp, dev, port_no);
+ if (err)
+ goto out_put;
+
+ if (dp_add_if_hook)
+ dp_add_if_hook(dp->ports[port_no]);
+
+out_put:
+ dev_put(dev);
+out_unlock_dp:
+ mutex_unlock(&dp->mutex);
+out_unlock_rtnl:
+ rtnl_unlock();
+out:
+ return err;
+}
+
+int dp_del_port(struct net_bridge_port *p, struct list_head *dp_devs)
+{
+ ASSERT_RTNL();
+
+#ifdef SUPPORT_SYSFS
+ if (p->port_no != ODPP_LOCAL && dp_del_if_hook)
+ sysfs_remove_link(&p->dp->ifobj, p->dev->name);
+#endif
+ dp_ifinfo_notify(RTM_DELLINK, p);
+
+ p->dp->n_ports--;
+
+ if (is_dp_dev(p->dev)) {
+ /* Make sure that no packets arrive from now on, since
+ * dp_dev_xmit() will try to find itself through
+ * p->dp->ports[], and we're about to set that to null. */
+ netif_tx_disable(p->dev);
+ }
+
+ /* First drop references to device. */
+ dev_set_promiscuity(p->dev, -1);
+ list_del_rcu(&p->node);
+ rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+ rcu_assign_pointer(p->dev->br_port, NULL);
+
+ /* Then wait until no one is still using it, and destroy it. */
+ synchronize_rcu();
+
+ if (is_dp_dev(p->dev)) {
+ dp_dev_destroy(p->dev);
+ if (dp_devs) {
+ struct dp_dev *dp_dev = dp_dev_priv(p->dev);
+ list_add(&dp_dev->list, dp_devs);
+ }
+ }
+ if (p->port_no != ODPP_LOCAL && dp_del_if_hook) {
+ dp_del_if_hook(p);
+ } else {
+ dev_put(p->dev);
+ kfree(p);
+ }
+
+ return 0;
+}
+
+static int del_port(int dp_idx, int port_no)
+{
+ struct dp_dev *dp_dev, *next;
+ struct net_bridge_port *p;
+ struct datapath *dp;
+ LIST_HEAD(dp_devs);
+ int err;
+
+ err = -EINVAL;
+ if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL)
+ goto out;
+
+ rtnl_lock();
+ dp = get_dp_locked(dp_idx);
+ err = -ENODEV;
+ if (!dp)
+ goto out_unlock_rtnl;
+
+ p = dp->ports[port_no];
+ err = -ENOENT;
+ if (!p)
+ goto out_unlock_dp;
+
+ err = dp_del_port(p, &dp_devs);
+
+out_unlock_dp:
+ mutex_unlock(&dp->mutex);
+out_unlock_rtnl:
+ rtnl_unlock();
+out:
+ list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
+ free_netdev(dp_dev->dev);
+ return err;
+}
+
+/* Must be called with rcu_read_lock. */
+static void
+do_port_input(struct net_bridge_port *p, struct sk_buff *skb)
+{
+ /* Make our own copy of the packet. Otherwise we will mangle the
+ * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+ * (No one comes after us, since we tell handle_bridge() that we took
+ * the packet.) */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ /* Push the Ethernet header back on. */
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ dp_process_received_packet(skb, p);
+}
+
+/* Must be called with rcu_read_lock and with bottom-halves disabled. */
+void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p)
+{
+ struct datapath *dp = p->dp;
+ struct dp_stats_percpu *stats;
+ struct odp_flow_key key;
+ struct sw_flow *flow;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(skb->destructor);
+
+ /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */
+ stats = percpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) {
+ if (dp->drop_frags) {
+ kfree_skb(skb);
+ stats->n_frags++;
+ return;
+ }
+ }
+
+ flow = dp_table_lookup(rcu_dereference(dp->table), &key);
+ if (flow) {
+ struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts);
+ flow_used(flow, skb);
+ execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
+ GFP_ATOMIC);
+ stats->n_hit++;
+ } else {
+ stats->n_missed++;
+ dp_output_control(dp, skb, _ODPL_MISS_NR, 0);
+ }
+}
+
+/*
+ * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on
+ * different set of devices!)
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static struct sk_buff *dp_frame_hook(struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ do_port_input(p, skb);
+ return NULL;
+}
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb)
+{
+ do_port_input(p, *pskb);
+ return 1;
+}
+#else
+#error
+#endif
+
+#ifdef CONFIG_XEN
+/* This code is copied verbatim from net/dev/core.c in Xen's
+ * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions
+ * directly because they aren't exported. */
+static int skb_pull_up_to(struct sk_buff *skb, void *ptr)
+{
+ if (ptr < (void *)skb->tail)
+ return 1;
+ if (__pskb_pull_tail(skb,
+ ptr - (void *)skb->data - skb_headlen(skb))) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int skb_checksum_setup(struct sk_buff *skb)
+{
+ if (skb->proto_csum_blank) {
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out;
+ if (!skb_pull_up_to(skb, skb->nh.iph + 1))
+ goto out;
+ skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+ switch (skb->nh.iph->protocol) {
+ case IPPROTO_TCP:
+ skb->csum = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ skb->csum = offsetof(struct udphdr, check);
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_ERR "Attempting to checksum a non-"
+ "TCP/UDP packet, dropping a protocol"
+ " %d packet", skb->nh.iph->protocol);
+ goto out;
+ }
+ if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2))
+ goto out;
+ skb->ip_summed = CHECKSUM_HW;
+ skb->proto_csum_blank = 0;
+ }
+ return 0;
+out:
+ return -EPROTO;
+}
+#endif
+
+int
+dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
+ u32 arg)
+{
+ struct dp_stats_percpu *stats;
+ struct sk_buff_head *queue;
+ int port_no;
+ int err;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR);
+
+ queue = &dp->queues[queue_no];
+ err = -ENOBUFS;
+ if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
+ goto err_kfree_skb;
+
+ /* If a checksum-deferred packet is forwarded to the controller,
+ * correct the pointers and checksum. This happens on a regular basis
+ * only on Xen (the CHECKSUM_HW case), on which VMs can pass up packets
+ * that do not have their checksum computed. We also implement it for
+ * the non-Xen case, but it is difficult to trigger or test this case
+ * there, hence the WARN_ON_ONCE().
+ */
+ err = skb_checksum_setup(skb);
+ if (err)
+ goto err_kfree_skb;
+#ifndef CHECKSUM_HW
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ WARN_ON_ONCE(1);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ /* Until 2.6.22, the start of the transport header was also the
+ * start of data to be checksummed. Linux 2.6.22 introduced
+ * the csum_start field for this purpose, but we should point
+ * the transport header to it anyway for backward
+ * compatibility, as dev_queue_xmit() does even in 2.6.28. */
+ skb_set_transport_header(skb, skb->csum_start -
+ skb_headroom(skb));
+#endif
+ err = skb_checksum_help(skb);
+ if (err)
+ goto err_kfree_skb;
+ }
+#else
+ if (skb->ip_summed == CHECKSUM_HW) {
+ err = skb_checksum_help(skb, 0);
+ if (err)
+ goto err_kfree_skb;
+ }
+#endif
+
+ /* Break apart GSO packets into their component pieces. Otherwise
+ * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
+ if (skb_is_gso(skb)) {
+ struct sk_buff *nskb = skb_gso_segment(skb, 0);
+ if (nskb) {
+ kfree_skb(skb);
+ skb = nskb;
+ if (unlikely(IS_ERR(skb))) {
+ err = PTR_ERR(skb);
+ goto err;
+ }
+ } else {
+ /* XXX This case might not be possible. It's hard to
+ * tell from the skb_gso_segment() code and comment. */
+ }
+ }
+
+ /* Figure out port number. */
+ port_no = ODPP_LOCAL;
+ if (skb->dev) {
+ if (skb->dev->br_port)
+ port_no = skb->dev->br_port->port_no;
+ else if (is_dp_dev(skb->dev))
+ port_no = dp_dev_priv(skb->dev)->port_no;
+ }
+
+ /* Append each packet to queue. There will be only one packet unless
+ * we broke up a GSO packet above. */
+ do {
+ struct odp_msg *header;
+ struct sk_buff *nskb = skb->next;
+ skb->next = NULL;
+
+ err = skb_cow(skb, sizeof *header);
+ if (err) {
+ while (nskb) {
+ kfree_skb(skb);
+ skb = nskb;
+ nskb = skb->next;
+ }
+ goto err_kfree_skb;
+ }
+
+ header = (struct odp_msg*)__skb_push(skb, sizeof *header);
+ header->type = queue_no;
+ header->length = skb->len;
+ header->port = port_no;
+ header->reserved = 0;
+ header->arg = arg;
+ skb_queue_tail(queue, skb);
+
+ skb = nskb;
+ } while (skb);
+
+ wake_up_interruptible(&dp->waitqueue);
+ return 0;
+
+err_kfree_skb:
+ kfree_skb(skb);
+err:
+ stats = percpu_ptr(dp->stats_percpu, get_cpu());
+ stats->n_lost++;
+ put_cpu();
+
+ return err;
+}
+
+static int flush_flows(struct datapath *dp)
+{
+ dp->n_flows = 0;
+ return dp_table_flush(dp);
+}
+
+static int validate_actions(const struct sw_flow_actions *actions)
+{
+ unsigned int i;
+
+ for (i = 0; i < actions->n_actions; i++) {
+ const union odp_action *a = &actions->actions[i];
+ switch (a->type) {
+ case ODPAT_OUTPUT:
+ if (a->output.port >= DP_MAX_PORTS)
+ return -EINVAL;
+ break;
+
+ case ODPAT_OUTPUT_GROUP:
+ if (a->output_group.group >= DP_MAX_GROUPS)
+ return -EINVAL;
+ break;
+
+ case ODPAT_SET_VLAN_VID:
+ if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK))
+ return -EINVAL;
+ break;
+
+ case ODPAT_SET_VLAN_PCP:
+ if (a->vlan_pcp.vlan_pcp & ~VLAN_PCP_MASK)
+ return -EINVAL;
+ break;
+
+ default:
+ if (a->type >= ODPAT_N_ACTIONS)
+ return -EOPNOTSUPP;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static struct sw_flow_actions *get_actions(const struct odp_flow *flow)
+{
+ struct sw_flow_actions *actions;
+ int error;
+
+ actions = flow_actions_alloc(flow->n_actions);
+ error = PTR_ERR(actions);
+ if (IS_ERR(actions))
+ goto error;
+
+ error = -EFAULT;
+ if (copy_from_user(actions->actions, flow->actions,
+ flow->n_actions * sizeof(union odp_action)))
+ goto error_free_actions;
+ error = validate_actions(actions);
+ if (error)
+ goto error_free_actions;
+
+ return actions;
+
+error_free_actions:
+ kfree(actions);
+error:
+ return ERR_PTR(error);
+}
+
+static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
+{
+ if (flow->used.tv_sec) {
+ stats->used_sec = flow->used.tv_sec;
+ stats->used_nsec = flow->used.tv_nsec;
+ } else {
+ stats->used_sec = 0;
+ stats->used_nsec = 0;
+ }
+ stats->n_packets = flow->packet_count;
+ stats->n_bytes = flow->byte_count;
+ stats->ip_tos = flow->ip_tos;
+ stats->tcp_flags = flow->tcp_flags;
+}
+
+static void clear_stats(struct sw_flow *flow)
+{
+ flow->used.tv_sec = flow->used.tv_nsec = 0;
+ flow->tcp_flags = 0;
+ flow->ip_tos = 0;
+ flow->packet_count = 0;
+ flow->byte_count = 0;
+}
+
+static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
+{
+ struct odp_flow_put uf;
+ struct sw_flow *flow, **bucket;
+ struct dp_table *table;
+ struct odp_flow_stats stats;
+ int error;
+
+ error = -EFAULT;
+ if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put)))
+ goto error;
+ uf.flow.key.reserved = 0;
+
+retry:
+ table = rcu_dereference(dp->table);
+ bucket = dp_table_lookup_for_insert(table, &uf.flow.key);
+ if (!bucket) {
+ /* No such flow, and the slots where it could go are full. */
+ error = uf.flags & ODPPF_CREATE ? -EXFULL : -ENOENT;
+ goto error;
+ } else if (!*bucket) {
+ /* No such flow, but we found an available slot for it. */
+ struct sw_flow_actions *acts;
+
+ error = -ENOENT;
+ if (!(uf.flags & ODPPF_CREATE))
+ goto error;
+
+ /* Expand table, if necessary, to make room. */
+ if (dp->n_flows * 4 >= table->n_buckets &&
+ table->n_buckets < DP_MAX_BUCKETS) {
+ error = dp_table_expand(dp);
+ if (error)
+ goto error;
+
+ /* The bucket's location has changed. Try again. */
+ goto retry;
+ }
+
+ /* Allocate flow. */
+ error = -ENOMEM;
+ flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ if (flow == NULL)
+ goto error;
+ flow->key = uf.flow.key;
+ spin_lock_init(&flow->lock);
+ clear_stats(flow);
+
+ /* Obtain actions. */
+ acts = get_actions(&uf.flow);
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto error_free_flow;
+ rcu_assign_pointer(flow->sf_acts, acts);
+
+ /* Put flow in bucket. */
+ rcu_assign_pointer(*bucket, flow);
+ dp->n_flows++;
+ memset(&stats, 0, sizeof(struct odp_flow_stats));
+ } else {
+ /* We found a matching flow. */
+ struct sw_flow *flow = *rcu_dereference(bucket);
+ struct sw_flow_actions *old_acts, *new_acts;
+ unsigned long int flags;
+
+ /* Bail out if we're not allowed to modify an existing flow. */
+ error = -EEXIST;
+ if (!(uf.flags & ODPPF_MODIFY))
+ goto error;
+
+ /* Swap actions. */
+ new_acts = get_actions(&uf.flow);
+ error = PTR_ERR(new_acts);
+ if (IS_ERR(new_acts))
+ goto error;
+ old_acts = rcu_dereference(flow->sf_acts);
+ if (old_acts->n_actions != new_acts->n_actions ||
+ memcmp(old_acts->actions, new_acts->actions,
+ sizeof(union odp_action) * old_acts->n_actions)) {
+ rcu_assign_pointer(flow->sf_acts, new_acts);
+ flow_deferred_free_acts(old_acts);
+ } else {
+ kfree(new_acts);
+ }
+
+ /* Fetch stats, then clear them if necessary. */
+ spin_lock_irqsave(&flow->lock, flags);
+ get_stats(flow, &stats);
+ if (uf.flags & ODPPF_ZERO_STATS)
+ clear_stats(flow);
+ spin_unlock_irqrestore(&flow->lock, flags);
+ }
+
+ /* Copy stats to userspace. */
+ if (__copy_to_user(&ufp->flow.stats, &stats,
+ sizeof(struct odp_flow_stats)))
+ return -EFAULT;
+ return 0;
+
+error_free_flow:
+ kmem_cache_free(flow_cache, flow);
+error:
+ return error;
+}
+
+static int put_actions(const struct sw_flow *flow, struct odp_flow __user *ufp)
+{
+ union odp_action __user *actions;
+ struct sw_flow_actions *sf_acts;
+ u32 n_actions;
+
+ if (__get_user(actions, &ufp->actions) ||
+ __get_user(n_actions, &ufp->n_actions))
+ return -EFAULT;
+
+ if (!n_actions)
+ return 0;
+ if (ufp->n_actions > INT_MAX / sizeof(union odp_action))
+ return -EINVAL;
+
+ sf_acts = rcu_dereference(flow->sf_acts);
+ if (__put_user(sf_acts->n_actions, &ufp->n_actions) ||
+ (actions && copy_to_user(actions, sf_acts->actions,
+ sizeof(union odp_action) *
+ min(sf_acts->n_actions, n_actions))))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int answer_query(struct sw_flow *flow, struct odp_flow __user *ufp)
+{
+ struct odp_flow_stats stats;
+ unsigned long int flags;
+
+ spin_lock_irqsave(&flow->lock, flags);
+ get_stats(flow, &stats);
+ spin_unlock_irqrestore(&flow->lock, flags);
+
+ if (__copy_to_user(&ufp->stats, &stats, sizeof(struct odp_flow_stats)))
+ return -EFAULT;
+ return put_actions(flow, ufp);
+}
+
+static int del_or_query_flow(struct datapath *dp,
+ struct odp_flow __user *ufp,
+ unsigned int cmd)
+{
+ struct dp_table *table = rcu_dereference(dp->table);
+ struct odp_flow uf;
+ struct sw_flow *flow;
+ int error;
+
+ error = -EFAULT;
+ if (copy_from_user(&uf, ufp, sizeof uf))
+ goto error;
+ uf.key.reserved = 0;
+
+ flow = dp_table_lookup(table, &uf.key);
+ error = -ENOENT;
+ if (!flow)
+ goto error;
+
+ if (cmd == ODP_FLOW_DEL) {
+ /* XXX redundant lookup */
+ error = dp_table_delete(table, flow);
+ if (error)
+ goto error;
+
+ /* XXX These statistics might lose a few packets, since other
+ * CPUs can be using this flow. We used to synchronize_rcu()
+ * to make sure that we get completely accurate stats, but that
+ * blows our performance, badly. */
+ dp->n_flows--;
+ error = answer_query(flow, ufp);
+ flow_deferred_free(flow);
+ } else {
+ error = answer_query(flow, ufp);
+ }
+
+error:
+ return error;
+}
+
+static int query_multiple_flows(struct datapath *dp,
+ const struct odp_flowvec *flowvec)
+{
+ struct dp_table *table = rcu_dereference(dp->table);
+ int i;
+ for (i = 0; i < flowvec->n_flows; i++) {
+ struct __user odp_flow *ufp = &flowvec->flows[i];
+ struct odp_flow uf;
+ struct sw_flow *flow;
+ int error;
+
+ if (__copy_from_user(&uf, ufp, sizeof uf))
+ return -EFAULT;
+ uf.key.reserved = 0;
+
+ flow = dp_table_lookup(table, &uf.key);
+ if (!flow)
+ error = __clear_user(&ufp->stats, sizeof ufp->stats);
+ else
+ error = answer_query(flow, ufp);
+ if (error)
+ return -EFAULT;
+ }
+ return flowvec->n_flows;
+}
+
+struct list_flows_cbdata {
+ struct odp_flow __user *uflows;
+ int n_flows;
+ int listed_flows;
+};
+
+static int list_flow(struct sw_flow *flow, void *cbdata_)
+{
+ struct list_flows_cbdata *cbdata = cbdata_;
+ struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
+ int error;
+
+ if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
+ return -EFAULT;
+ error = answer_query(flow, ufp);
+ if (error)
+ return error;
+
+ if (cbdata->listed_flows >= cbdata->n_flows)
+ return cbdata->listed_flows;
+ return 0;
+}
+
+static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
+{
+ struct list_flows_cbdata cbdata;
+ int error;
+
+ if (!flowvec->n_flows)
+ return 0;
+
+ cbdata.uflows = flowvec->flows;
+ cbdata.n_flows = flowvec->n_flows;
+ cbdata.listed_flows = 0;
+ error = dp_table_foreach(rcu_dereference(dp->table),
+ list_flow, &cbdata);
+ return error ? error : cbdata.listed_flows;
+}
+
+static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
+ int (*function)(struct datapath *,
+ const struct odp_flowvec *))
+{
+ struct odp_flowvec __user *uflowvec;
+ struct odp_flowvec flowvec;
+ int retval;
+
+ uflowvec = (struct odp_flowvec __user *)argp;
+ if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) ||
+ copy_from_user(&flowvec, uflowvec, sizeof flowvec))
+ return -EFAULT;
+
+ if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow))
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, flowvec.flows,
+ flowvec.n_flows * sizeof(struct odp_flow)))
+ return -EFAULT;
+
+ retval = function(dp, &flowvec);
+ return (retval < 0 ? retval
+ : retval == flowvec.n_flows ? 0
+ : __put_user(retval, &uflowvec->n_flows));
+}
+
+static int do_execute(struct datapath *dp, const struct odp_execute *executep)
+{
+ struct odp_execute execute;
+ struct odp_flow_key key;
+ struct sk_buff *skb;
+ struct sw_flow_actions *actions;
+ int err;
+
+ err = -EFAULT;
+ if (copy_from_user(&execute, executep, sizeof execute))
+ goto error;
+
+ err = -EINVAL;
+ if (execute.length < ETH_HLEN || execute.length > 65535)
+ goto error;
+
+ err = -ENOMEM;
+ actions = flow_actions_alloc(execute.n_actions);
+ if (!actions)
+ goto error;
+
+ err = -EFAULT;
+ if (copy_from_user(actions->actions, execute.actions,
+ execute.n_actions * sizeof *execute.actions))
+ goto error_free_actions;
+
+ err = validate_actions(actions);
+ if (err)
+ goto error_free_actions;
+
+ err = -ENOMEM;
+ skb = alloc_skb(execute.length, GFP_KERNEL);
+ if (!skb)
+ goto error_free_actions;
+ if (execute.in_port < DP_MAX_PORTS) {
+ struct net_bridge_port *p = dp->ports[execute.in_port];
+ if (p)
+ skb->dev = p->dev;
+ }
+
+ err = -EFAULT;
+ if (copy_from_user(skb_put(skb, execute.length), execute.data,
+ execute.length))
+ goto error_free_skb;
+
+ flow_extract(skb, execute.in_port, &key);
+ err = execute_actions(dp, skb, &key, actions->actions,
+ actions->n_actions, GFP_KERNEL);
+ kfree(actions);
+ return err;
+
+error_free_skb:
+ kfree_skb(skb);
+error_free_actions:
+ kfree(actions);
+error:
+ return err;
+}
+
+static int
+get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
+{
+ struct odp_stats stats;
+ int i;
+
+ stats.n_flows = dp->n_flows;
+ stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2;
+ stats.max_capacity = DP_MAX_BUCKETS * 2;
+ stats.n_ports = dp->n_ports;
+ stats.max_ports = DP_MAX_PORTS;
+ stats.max_groups = DP_MAX_GROUPS;
+ stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
+ for_each_possible_cpu(i) {
+ const struct dp_stats_percpu *s;
+ s = percpu_ptr(dp->stats_percpu, i);
+ stats.n_frags += s->n_frags;
+ stats.n_hit += s->n_hit;
+ stats.n_missed += s->n_missed;
+ stats.n_lost += s->n_lost;
+ }
+ stats.max_miss_queue = DP_MAX_QUEUE_LEN;
+ stats.max_action_queue = DP_MAX_QUEUE_LEN;
+ return copy_to_user(statsp, &stats, sizeof stats) ? -EFAULT : 0;
+}
+
+static int
+put_port(const struct net_bridge_port *p, struct odp_port __user *uop)
+{
+ struct odp_port op;
+ memset(&op, 0, sizeof op);
+ strncpy(op.devname, p->dev->name, sizeof op.devname);
+ op.port = p->port_no;
+ op.flags = is_dp_dev(p->dev) ? ODP_PORT_INTERNAL : 0;
+ return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0;
+}
+
+static int
+query_port(struct datapath *dp, struct odp_port __user *uport)
+{
+ struct odp_port port;
+
+ if (copy_from_user(&port, uport, sizeof port))
+ return -EFAULT;
+ if (port.devname[0]) {
+ struct net_bridge_port *p;
+ struct net_device *dev;
+ int err;
+
+ port.devname[IFNAMSIZ - 1] = '\0';
+
+ dev = dev_get_by_name(&init_net, port.devname);
+ if (!dev)
+ return -ENODEV;
+
+ p = dev->br_port;
+ if (!p && is_dp_dev(dev)) {
+ struct dp_dev *dp_dev = dp_dev_priv(dev);
+ if (dp_dev->dp == dp)
+ p = dp->ports[dp_dev->port_no];
+ }
+ err = p && p->dp == dp ? put_port(p, uport) : -ENOENT;
+ dev_put(dev);
+
+ return err;
+ } else {
+ if (port.port >= DP_MAX_PORTS)
+ return -EINVAL;
+ if (!dp->ports[port.port])
+ return -ENOENT;
+ return put_port(dp->ports[port.port], uport);
+ }
+}
+
+static int
+list_ports(struct datapath *dp, struct odp_portvec __user *pvp)
+{
+ struct odp_portvec pv;
+ struct net_bridge_port *p;
+ int idx;
+
+ if (copy_from_user(&pv, pvp, sizeof pv))
+ return -EFAULT;
+
+ idx = 0;
+ if (pv.n_ports) {
+ list_for_each_entry_rcu (p, &dp->port_list, node) {
+ if (put_port(p, &pv.ports[idx]))
+ return -EFAULT;
+ if (idx++ >= pv.n_ports)
+ break;
+ }
+ }
+ return put_user(idx, &pvp->n_ports);
+}
+
+/* RCU callback for freeing a dp_port_group */
+static void free_port_group(struct rcu_head *rcu)
+{
+ struct dp_port_group *g = container_of(rcu, struct dp_port_group, rcu);
+ kfree(g);
+}
+
+static int
+set_port_group(struct datapath *dp, const struct odp_port_group __user *upg)
+{
+ struct odp_port_group pg;
+ struct dp_port_group *new_group, *old_group;
+ int error;
+
+ error = -EFAULT;
+ if (copy_from_user(&pg, upg, sizeof pg))
+ goto error;
+
+ error = -EINVAL;
+ if (pg.n_ports > DP_MAX_PORTS || pg.group >= DP_MAX_GROUPS)
+ goto error;
+
+ error = -ENOMEM;
+ new_group = kmalloc(sizeof *new_group + sizeof(u16) * pg.n_ports,
+ GFP_KERNEL);
+ if (!new_group)
+ goto error;
+
+ new_group->n_ports = pg.n_ports;
+ error = -EFAULT;
+ if (copy_from_user(new_group->ports, pg.ports,
+ sizeof(u16) * pg.n_ports))
+ goto error_free;
+
+ old_group = rcu_dereference(dp->groups[pg.group]);
+ rcu_assign_pointer(dp->groups[pg.group], new_group);
+ if (old_group)
+ call_rcu(&old_group->rcu, free_port_group);
+ return 0;
+
+error_free:
+ kfree(new_group);
+error:
+ return error;
+}
+
+static int
+get_port_group(struct datapath *dp, struct odp_port_group *upg)
+{
+ struct odp_port_group pg;
+ struct dp_port_group *g;
+ u16 n_copy;
+
+ if (copy_from_user(&pg, upg, sizeof pg))
+ return -EFAULT;
+
+ if (pg.group >= DP_MAX_GROUPS)
+ return -EINVAL;
+
+ g = dp->groups[pg.group];
+ n_copy = g ? min_t(int, g->n_ports, pg.n_ports) : 0;
+ if (n_copy && copy_to_user(pg.ports, g->ports, n_copy * sizeof(u16)))
+ return -EFAULT;
+
+ if (put_user(g ? g->n_ports : 0, &upg->n_ports))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long openvswitch_ioctl(struct file *f, unsigned int cmd,
+ unsigned long argp)
+{
+ int dp_idx = iminor(f->f_dentry->d_inode);
+ struct datapath *dp;
+ int drop_frags, listeners, port_no;
+ int err;
+
+ /* Handle commands with special locking requirements up front. */
+ switch (cmd) {
+ case ODP_DP_CREATE:
+ return create_dp(dp_idx, (char __user *)argp);
+
+ case ODP_DP_DESTROY:
+ return destroy_dp(dp_idx);
+
+ case ODP_PORT_ADD:
+ return add_port(dp_idx, (struct odp_port __user *)argp);
+
+ case ODP_PORT_DEL:
+ err = get_user(port_no, (int __user *)argp);
+ if (err)
+ break;
+ return del_port(dp_idx, port_no);
+ }
+
+ dp = get_dp_locked(dp_idx);
+ if (!dp)
+ return -ENODEV;
+
+ switch (cmd) {
+ case ODP_DP_STATS:
+ err = get_dp_stats(dp, (struct odp_stats __user *)argp);
+ break;
+
+ case ODP_GET_DROP_FRAGS:
+ err = put_user(dp->drop_frags, (int __user *)argp);
+ break;
+
+ case ODP_SET_DROP_FRAGS:
+ err = get_user(drop_frags, (int __user *)argp);
+ if (err)
+ break;
+ err = -EINVAL;
+ if (drop_frags != 0 && drop_frags != 1)
+ break;
+ dp->drop_frags = drop_frags;
+ err = 0;
+ break;
+
+ case ODP_GET_LISTEN_MASK:
+ err = put_user((int)f->private_data, (int __user *)argp);
+ break;
+
+ case ODP_SET_LISTEN_MASK:
+ err = get_user(listeners, (int __user *)argp);
+ if (err)
+ break;
+ err = -EINVAL;
+ if (listeners & ~ODPL_ALL)
+ break;
+ err = 0;
+ f->private_data = (void*)listeners;
+ break;
+
+ case ODP_PORT_QUERY:
+ err = query_port(dp, (struct odp_port __user *)argp);
+ break;
+
+ case ODP_PORT_LIST:
+ err = list_ports(dp, (struct odp_portvec __user *)argp);
+ break;
+
+ case ODP_PORT_GROUP_SET:
+ err = set_port_group(dp, (struct odp_port_group __user *)argp);
+ break;
+
+ case ODP_PORT_GROUP_GET:
+ err = get_port_group(dp, (struct odp_port_group __user *)argp);
+ break;
+
+ case ODP_FLOW_FLUSH:
+ err = flush_flows(dp);
+ break;
+
+ case ODP_FLOW_PUT:
+ err = put_flow(dp, (struct odp_flow_put __user *)argp);
+ break;
+
+ case ODP_FLOW_DEL:
+ case ODP_FLOW_GET:
+ err = del_or_query_flow(dp, (struct odp_flow __user *)argp,
+ cmd);
+ break;
+
+ case ODP_FLOW_GET_MULTIPLE:
+ err = do_flowvec_ioctl(dp, argp, query_multiple_flows);
+ break;
+
+ case ODP_FLOW_LIST:
+ err = do_flowvec_ioctl(dp, argp, list_flows);
+ break;
+
+ case ODP_EXECUTE:
+ err = do_execute(dp, (struct odp_execute __user *)argp);
+ break;
+
+ default:
+ err = -ENOIOCTLCMD;
+ break;
+ }
+ mutex_unlock(&dp->mutex);
+ return err;
+}
+
+static int dp_has_packet_of_interest(struct datapath *dp, int listeners)
+{
+ int i;
+ for (i = 0; i < DP_N_QUEUES; i++) {
+ if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i]))
+ return 1;
+ }
+ return 0;
+}
+
+ssize_t openvswitch_read(struct file *f, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ int listeners = (int) f->private_data;
+ int dp_idx = iminor(f->f_dentry->d_inode);
+ struct datapath *dp = get_dp(dp_idx);
+ struct sk_buff *skb;
+ struct iovec __user iov;
+ size_t copy_bytes;
+ int retval;
+
+ if (!dp)
+ return -ENODEV;
+
+ if (nbytes == 0 || !listeners)
+ return 0;
+
+ for (;;) {
+ int i;
+
+ for (i = 0; i < DP_N_QUEUES; i++) {
+ if (listeners & (1 << i)) {
+ skb = skb_dequeue(&dp->queues[i]);
+ if (skb)
+ goto success;
+ }
+ }
+
+ if (f->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ goto error;
+ }
+
+ wait_event_interruptible(dp->waitqueue,
+ dp_has_packet_of_interest(dp,
+ listeners));
+
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ goto error;
+ }
+ }
+success:
+ copy_bytes = min(skb->len, nbytes);
+ iov.iov_base = buf;
+ iov.iov_len = copy_bytes;
+ retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len);
+ if (!retval)
+ retval = copy_bytes;
+ kfree_skb(skb);
+
+error:
+ return retval;
+}
+
+static unsigned int openvswitch_poll(struct file *file, poll_table *wait)
+{
+ int dp_idx = iminor(file->f_dentry->d_inode);
+ struct datapath *dp = get_dp(dp_idx);
+ unsigned int mask;
+
+ if (dp) {
+ mask = 0;
+ poll_wait(file, &dp->waitqueue, wait);
+ if (dp_has_packet_of_interest(dp, (int)file->private_data))
+ mask |= POLLIN | POLLRDNORM;
+ } else {
+ mask = POLLIN | POLLRDNORM | POLLHUP;
+ }
+ return mask;
+}
+
+struct file_operations openvswitch_fops = {
+ /* XXX .aio_read = openvswitch_aio_read, */
+ .read = openvswitch_read,
+ .poll = openvswitch_poll,
+ .unlocked_ioctl = openvswitch_ioctl,
+ /* XXX .fasync = openvswitch_fasync, */
+};
+
+static int major;
+static struct llc_sap *dp_stp_sap;
+
+static int dp_stp_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ /* We don't really care about STP packets, we just listen for them for
+ * mutual exclusion with the bridge module, so this just discards
+ * them. */
+ kfree_skb(skb);
+ return 0;
+}
+
+static int __init dp_init(void)
+{
+ int err;
+
+ printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
+
+ /* Register to receive STP packets because the bridge module also
+ * attempts to do so. Since there can only be a single listener for a
+ * given protocol, this provides mutual exclusion against the bridge
+ * module, preventing both of them from being loaded at the same
+ * time. */
+ dp_stp_sap = llc_sap_open(LLC_SAP_BSPAN, dp_stp_rcv);
+ if (!dp_stp_sap) {
+ printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n");
+ return -EADDRINUSE;
+ }
+
+ err = flow_init();
+ if (err)
+ goto error;
+
+ err = register_netdevice_notifier(&dp_device_notifier);
+ if (err)
+ goto error_flow_exit;
+
+ major = register_chrdev(0, "openvswitch", &openvswitch_fops);
+ if (err < 0)
+ goto error_unreg_notifier;
+
+ /* Hook into callback used by the bridge to intercept packets.
+ * Parasites we are. */
+ br_handle_frame_hook = dp_frame_hook;
+
+ return 0;
+
+error_unreg_notifier:
+ unregister_netdevice_notifier(&dp_device_notifier);
+error_flow_exit:
+ flow_exit();
+error:
+ return err;
+}
+
+static void dp_cleanup(void)
+{
+ rcu_barrier();
+ unregister_chrdev(major, "openvswitch");
+ unregister_netdevice_notifier(&dp_device_notifier);
+ flow_exit();
+ br_handle_frame_hook = NULL;
+ llc_sap_put(dp_stp_sap);
+}
+
+module_init(dp_init);
+module_exit(dp_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch switching datapath");
+MODULE_LICENSE("GPL");
diff --git a/datapath/datapath.h b/datapath/datapath.h
new file mode 100644
index 00000000..102b27f3
--- /dev/null
+++ b/datapath/datapath.h
@@ -0,0 +1,139 @@
+/* Interface exported by openvswitch_mod. */
+
+#ifndef DATAPATH_H
+#define DATAPATH_H 1
+
+#include <asm/page.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include "flow.h"
+#include "brc_sysfs.h"
+
+struct sk_buff;
+
+/* Mask for the priority bits in a vlan header. If we ever merge upstream
+ * then this should go into include/linux/if_vlan.h. */
+#define VLAN_PCP_MASK 0xe000
+
+#define DP_MAX_PORTS 256
+#define DP_MAX_GROUPS 16
+
+#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*)))
+#define DP_L2_SIZE (1 << DP_L2_BITS)
+#define DP_L2_SHIFT 0
+
+#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**)))
+#define DP_L1_SIZE (1 << DP_L1_BITS)
+#define DP_L1_SHIFT DP_L2_BITS
+
+#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE)
+
+struct dp_table {
+ unsigned int n_buckets;
+ struct sw_flow ***flows[2];
+ struct rcu_head rcu;
+};
+
+#define DP_N_QUEUES 2
+#define DP_MAX_QUEUE_LEN 100
+
+struct dp_stats_percpu {
+ u64 n_frags;
+ u64 n_hit;
+ u64 n_missed;
+ u64 n_lost;
+};
+
+struct dp_port_group {
+ struct rcu_head rcu;
+ int n_ports;
+ u16 ports[];
+};
+
+struct datapath {
+ struct mutex mutex;
+ int dp_idx;
+
+#ifdef SUPPORT_SYSFS
+ struct kobject ifobj;
+#endif
+
+ int drop_frags;
+
+ /* Queued data. */
+ struct sk_buff_head queues[DP_N_QUEUES];
+ wait_queue_head_t waitqueue;
+
+ /* Flow table. */
+ unsigned int n_flows;
+ struct dp_table *table;
+
+ /* Port groups. */
+ struct dp_port_group *groups[DP_MAX_GROUPS];
+
+ /* Switch ports. */
+ unsigned int n_ports;
+ struct net_bridge_port *ports[DP_MAX_PORTS];
+ struct list_head port_list; /* All ports, including local_port. */
+
+ /* Stats. */
+ struct dp_stats_percpu *stats_percpu;
+};
+
+struct net_bridge_port {
+ u16 port_no;
+ struct datapath *dp;
+ struct net_device *dev;
+#ifdef SUPPORT_SYSFS
+ struct kobject kobj;
+#endif
+ struct list_head node; /* Element in datapath.ports. */
+};
+
+extern struct notifier_block dp_device_notifier;
+extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
+extern int (*dp_add_dp_hook)(struct datapath *dp);
+extern int (*dp_del_dp_hook)(struct datapath *dp);
+extern int (*dp_add_if_hook)(struct net_bridge_port *p);
+extern int (*dp_del_if_hook)(struct net_bridge_port *p);
+
+/* Flow table. */
+struct dp_table *dp_table_create(unsigned int n_buckets);
+void dp_table_destroy(struct dp_table *, int free_flows);
+struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *);
+struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *);
+int dp_table_delete(struct dp_table *, struct sw_flow *);
+int dp_table_expand(struct datapath *);
+int dp_table_flush(struct datapath *);
+int dp_table_foreach(struct dp_table *table,
+ int (*callback)(struct sw_flow *flow, void *aux),
+ void *aux);
+
+void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *);
+int dp_del_port(struct net_bridge_port *, struct list_head *);
+int dp_output_port(struct datapath *, struct sk_buff *, int out_port,
+ int ignore_no_fwd);
+int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
+void dp_set_origin(struct datapath *, u16, struct sk_buff *);
+
+struct datapath *get_dp(int dp_idx);
+
+static inline const char *dp_name(const struct datapath *dp)
+{
+ return dp->ports[ODPP_LOCAL]->dev->name;
+}
+
+#ifdef CONFIG_XEN
+int skb_checksum_setup(struct sk_buff *skb);
+#else
+static inline int skb_checksum_setup(struct sk_buff *skb)
+{
+ return 0;
+}
+#endif
+
+#endif /* datapath.h */
diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c
new file mode 100644
index 00000000..8a749dbc
--- /dev/null
+++ b/datapath/dp_dev.c
@@ -0,0 +1,210 @@
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+
+#include "datapath.h"
+#include "dp_dev.h"
+
+struct datapath *dp_dev_get_dp(struct net_device *netdev)
+{
+ return dp_dev_priv(netdev)->dp;
+}
+EXPORT_SYMBOL(dp_dev_get_dp);
+
+static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ return &dp_dev->stats;
+}
+
+int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ int len;
+ len = skb->len;
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, netdev);
+ if (in_interrupt())
+ netif_rx(skb);
+ else
+ netif_rx_ni(skb);
+ netdev->last_rx = jiffies;
+ dp_dev->stats.rx_packets++;
+ dp_dev->stats.rx_bytes += len;
+ return len;
+}
+
+static int dp_dev_mac_addr(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ /* By orphaning 'skb' we will screw up socket accounting slightly, but
+ * the effect is limited to the device queue length. If we don't
+ * do this, then the sk_buff will be destructed eventually, but it is
+ * harder to predict when. */
+ skb_orphan(skb);
+
+ /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue,
+ * so we need to have our own clone. (At any rate, fwd_port_input()
+ * will need its own clone, so there's no benefit to queuing any other
+ * way.) */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return 0;
+
+ dp_dev->stats.tx_packets++;
+ dp_dev->stats.tx_bytes += skb->len;
+
+ if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) {
+ /* Queue overflow. Stop transmitter. */
+ netif_stop_queue(netdev);
+
+ /* We won't see all dropped packets individually, so overrun
+ * error is appropriate. */
+ dp_dev->stats.tx_fifo_errors++;
+ }
+ skb_queue_tail(&dp_dev->xmit_queue, skb);
+ netdev->trans_start = jiffies;
+
+ schedule_work(&dp_dev->xmit_work);
+
+ return 0;
+}
+
+static void dp_dev_do_xmit(struct work_struct *work)
+{
+ struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
+ struct datapath *dp = dp_dev->dp;
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
+ skb_reset_mac_header(skb);
+ rcu_read_lock_bh();
+ dp_process_received_packet(skb, dp->ports[dp_dev->port_no]);
+ rcu_read_unlock_bh();
+ }
+ netif_wake_queue(dp_dev->dev);
+}
+
+static int dp_dev_open(struct net_device *netdev)
+{
+ netif_start_queue(netdev);
+ return 0;
+}
+
+static int dp_dev_stop(struct net_device *netdev)
+{
+ netif_stop_queue(netdev);
+ return 0;
+}
+
+static void dp_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ strcpy(info->driver, "openvswitch");
+ sprintf(info->bus_info, "%d", dp_dev->dp->dp_idx);
+}
+
+static struct ethtool_ops dp_ethtool_ops = {
+ .get_drvinfo = dp_getinfo,
+ .get_link = ethtool_op_get_link,
+ .get_sg = ethtool_op_get_sg,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .get_tso = ethtool_op_get_tso,
+};
+
+static void
+do_setup(struct net_device *netdev)
+{
+ ether_setup(netdev);
+
+ netdev->do_ioctl = dp_ioctl_hook;
+ netdev->get_stats = dp_dev_get_stats;
+ netdev->hard_start_xmit = dp_dev_xmit;
+ netdev->open = dp_dev_open;
+ SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops);
+ netdev->stop = dp_dev_stop;
+ netdev->tx_queue_len = 100;
+ netdev->set_mac_address = dp_dev_mac_addr;
+
+ netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
+
+ random_ether_addr(netdev->dev_addr);
+
+ /* Set the OUI to the Nicira one. */
+ netdev->dev_addr[0] = 0x00;
+ netdev->dev_addr[1] = 0x23;
+ netdev->dev_addr[2] = 0x20;
+
+ /* Set the top bits to indicate random Nicira address. */
+ netdev->dev_addr[3] |= 0xc0;
+}
+
+/* Create a datapath device associated with 'dp'. If 'dp_name' is null,
+ * the device name will be of the form 'of<dp_idx>'. Returns the new device or
+ * an error code.
+ *
+ * Called with RTNL lock and dp_mutex. */
+struct net_device *dp_dev_create(struct datapath *dp, const char *dp_name, int port_no)
+{
+ struct dp_dev *dp_dev;
+ struct net_device *netdev;
+ char dev_name[IFNAMSIZ];
+ int err;
+
+ if (dp_name) {
+ if (strlen(dp_name) >= IFNAMSIZ)
+ return ERR_PTR(-EINVAL);
+ strncpy(dev_name, dp_name, sizeof(dev_name));
+ } else
+ snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx);
+
+ netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ err = register_netdevice(netdev);
+ if (err) {
+ free_netdev(netdev);
+ return ERR_PTR(err);
+ }
+
+ dp_dev = dp_dev_priv(netdev);
+ dp_dev->dp = dp;
+ dp_dev->port_no = port_no;
+ dp_dev->dev = netdev;
+ skb_queue_head_init(&dp_dev->xmit_queue);
+ INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
+ return netdev;
+}
+
+/* Called with RTNL lock and dp_mutex.*/
+void dp_dev_destroy(struct net_device *netdev)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ netif_tx_disable(netdev);
+ synchronize_net();
+ skb_queue_purge(&dp_dev->xmit_queue);
+ unregister_netdevice(netdev);
+}
+
+int is_dp_dev(struct net_device *netdev)
+{
+ return netdev->open == dp_dev_open;
+}
+EXPORT_SYMBOL(is_dp_dev);
diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h
new file mode 100644
index 00000000..84874390
--- /dev/null
+++ b/datapath/dp_dev.h
@@ -0,0 +1,27 @@
+#ifndef DP_DEV_H
+#define DP_DEV_H 1
+
+struct dp_dev {
+ struct datapath *dp;
+ int port_no;
+
+ struct net_device *dev;
+ struct net_device_stats stats;
+ struct sk_buff_head xmit_queue;
+ struct work_struct xmit_work;
+
+ struct list_head list;
+};
+
+static inline struct dp_dev *dp_dev_priv(struct net_device *netdev)
+{
+ return netdev_priv(netdev);
+}
+
+struct net_device *dp_dev_create(struct datapath *, const char *, int port_no);
+void dp_dev_destroy(struct net_device *);
+int dp_dev_recv(struct net_device *, struct sk_buff *);
+int is_dp_dev(struct net_device *);
+struct datapath *dp_dev_get_dp(struct net_device *);
+
+#endif /* dp_dev.h */
diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c
new file mode 100644
index 00000000..56d5c3c9
--- /dev/null
+++ b/datapath/dp_notify.c
@@ -0,0 +1,29 @@
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007, 2008, 2009 Nicira Networks.
+ */
+
+/* Handle changes to managed devices */
+
+#include <linux/netdevice.h>
+
+#include "datapath.h"
+
+
+static int dp_device_event(struct notifier_block *unused, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct net_bridge_port *p = dev->br_port;
+ if (event == NETDEV_UNREGISTER && p) {
+ struct datapath *dp = p->dp;
+ mutex_lock(&dp->mutex);
+ dp_del_port(p, NULL);
+ mutex_unlock(&dp->mutex);
+ }
+ return NOTIFY_DONE;
+}
+
+struct notifier_block dp_device_notifier = {
+ .notifier_call = dp_device_event
+};
diff --git a/datapath/flow.c b/datapath/flow.c
new file mode 100644
index 00000000..b24c242c
--- /dev/null
+++ b/datapath/flow.c
@@ -0,0 +1,301 @@
+/*
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) 2007, 2008, 2009 Nicira Networks.
+ */
+
+#include "flow.h"
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+
+#include "compat.h"
+
+struct kmem_cache *flow_cache;
+
+static inline int iphdr_ok(struct sk_buff *skb)
+{
+ int nh_ofs = skb_network_offset(skb);
+ if (skb->len >= nh_ofs + sizeof(struct iphdr)) {
+ int ip_len = ip_hdrlen(skb);
+ return (ip_len >= sizeof(struct iphdr)
+ && pskb_may_pull(skb, nh_ofs + ip_len));
+ }
+ return 0;
+}
+
+static inline int tcphdr_ok(struct sk_buff *skb)
+{
+ int th_ofs = skb_transport_offset(skb);
+ if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) {
+ int tcp_len = tcp_hdrlen(skb);
+ return (tcp_len >= sizeof(struct tcphdr)
+ && skb->len >= th_ofs + tcp_len);
+ }
+ return 0;
+}
+
+static inline int udphdr_ok(struct sk_buff *skb)
+{
+ int th_ofs = skb_transport_offset(skb);
+ return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr));
+}
+
+static inline int icmphdr_ok(struct sk_buff *skb)
+{
+ int th_ofs = skb_transport_offset(skb);
+ return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr));
+}
+
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
+
+static inline struct ovs_tcphdr *ovs_tcp_hdr(const struct sk_buff *skb)
+{
+ return (struct ovs_tcphdr *)skb_transport_header(skb);
+}
+
+void flow_used(struct sw_flow *flow, struct sk_buff *skb)
+{
+ unsigned long flags;
+ u8 tcp_flags = 0;
+
+ if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
+ struct iphdr *nh = ip_hdr(skb);
+ flow->ip_tos = nh->tos;
+ if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) {
+ u8 *tcp = (u8 *)tcp_hdr(skb);
+ tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
+ }
+ }
+
+ spin_lock_irqsave(&flow->lock, flags);
+ getnstimeofday(&flow->used);
+ flow->packet_count++;
+ flow->byte_count += skb->len;
+ flow->tcp_flags |= tcp_flags;
+ spin_unlock_irqrestore(&flow->lock, flags);
+}
+
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
+{
+ struct sw_flow_actions *sfa;
+
+ if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action))
+ return ERR_PTR(-EINVAL);
+
+ sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action),
+ GFP_KERNEL);
+ if (!sfa)
+ return ERR_PTR(-ENOMEM);
+
+ sfa->n_actions = n_actions;
+ return sfa;
+}
+
+
+/* Frees 'flow' immediately. */
+void flow_free(struct sw_flow *flow)
+{
+ if (unlikely(!flow))
+ return;
+ kfree(flow->sf_acts);
+ kmem_cache_free(flow_cache, flow);
+}
+
+/* RCU callback used by flow_deferred_free. */
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+ struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+ flow_free(flow);
+}
+
+/* Schedules 'flow' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void flow_deferred_free(struct sw_flow *flow)
+{
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+}
+
+/* RCU callback used by flow_deferred_free_acts. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+ struct sw_flow_actions *sf_acts = container_of(rcu,
+ struct sw_flow_actions, rcu);
+ kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
+{
+ call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+#define SNAP_OUI_LEN 3
+
+struct eth_snap_hdr
+{
+ struct ethhdr eth;
+ u8 dsap; /* Always 0xAA */
+ u8 ssap; /* Always 0xAA */
+ u8 ctrl;
+ u8 oui[SNAP_OUI_LEN];
+ u16 ethertype;
+} __attribute__ ((packed));
+
+static int is_snap(const struct eth_snap_hdr *esh)
+{
+ return (esh->dsap == LLC_SAP_SNAP
+ && esh->ssap == LLC_SAP_SNAP
+ && !memcmp(esh->oui, "\0\0\0", 3));
+}
+
+/* Parses the Ethernet frame in 'skb', which was received on 'in_port',
+ * and initializes 'key' to match. Returns 1 if 'skb' contains an IP
+ * fragment, 0 otherwise. */
+int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
+{
+ struct ethhdr *eth;
+ struct eth_snap_hdr *esh;
+ int retval = 0;
+ int nh_ofs;
+
+ memset(key, 0, sizeof *key);
+ key->dl_vlan = htons(ODP_VLAN_NONE);
+ key->in_port = in_port;
+
+ if (skb->len < sizeof *eth)
+ return 0;
+ if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) {
+ return 0;
+ }
+
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ esh = (struct eth_snap_hdr *) eth;
+ nh_ofs = sizeof *eth;
+ if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF))
+ key->dl_type = eth->h_proto;
+ else if (skb->len >= sizeof *esh && is_snap(esh)) {
+ key->dl_type = esh->ethertype;
+ nh_ofs = sizeof *esh;
+ } else {
+ key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE);
+ if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) {
+ nh_ofs += sizeof(struct llc_pdu_un);
+ }
+ }
+
+ /* Check for a VLAN tag */
+ if (key->dl_type == htons(ETH_P_8021Q) &&
+ skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
+ struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
+ key->dl_type = vh->h_vlan_encapsulated_proto;
+ key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
+ nh_ofs += sizeof(struct vlan_hdr);
+ }
+ memcpy(key->dl_src, eth->h_source, ETH_ALEN);
+ memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
+ skb_set_network_header(skb, nh_ofs);
+
+ /* Network layer. */
+ if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
+ struct iphdr *nh = ip_hdr(skb);
+ int th_ofs = nh_ofs + nh->ihl * 4;
+ key->nw_src = nh->saddr;
+ key->nw_dst = nh->daddr;
+ key->nw_proto = nh->protocol;
+ skb_set_transport_header(skb, th_ofs);
+
+ /* Transport layer. */
+ if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) {
+ if (key->nw_proto == IPPROTO_TCP) {
+ if (tcphdr_ok(skb)) {
+ struct tcphdr *tcp = tcp_hdr(skb);
+ key->tp_src = tcp->source;
+ key->tp_dst = tcp->dest;
+ } else {
+ /* Avoid tricking other code into
+ * thinking that this packet has an L4
+ * header. */
+ key->nw_proto = 0;
+ }
+ } else if (key->nw_proto == IPPROTO_UDP) {
+ if (udphdr_ok(skb)) {
+ struct udphdr *udp = udp_hdr(skb);
+ key->tp_src = udp->source;
+ key->tp_dst = udp->dest;
+ } else {
+ /* Avoid tricking other code into
+ * thinking that this packet has an L4
+ * header. */
+ key->nw_proto = 0;
+ }
+ } else if (key->nw_proto == IPPROTO_ICMP) {
+ if (icmphdr_ok(skb)) {
+ struct icmphdr *icmp = icmp_hdr(skb);
+ /* The ICMP type and code fields use the 16-bit
+ * transport port fields, so we need to store them
+ * in 16-bit network byte order. */
+ key->tp_src = htons(icmp->type);
+ key->tp_dst = htons(icmp->code);
+ } else {
+ /* Avoid tricking other code into
+ * thinking that this packet has an L4
+ * header. */
+ key->nw_proto = 0;
+ }
+ }
+ } else {
+ retval = 1;
+ }
+ } else {
+ skb_reset_transport_header(skb);
+ }
+ return retval;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int flow_init(void)
+{
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+ 0, NULL);
+ if (flow_cache == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Uninitializes the flow module. */
+void flow_exit(void)
+{
+ kmem_cache_destroy(flow_cache);
+}
+
+void print_flow(const struct odp_flow_key *key)
+{
+#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5]
+ printk("port%04x:vlan%d mac"MAC_FMT"->"MAC_FMT" "
+ "type%04x proto%d ip%x->%x port%d->%d\n",
+ key->in_port, ntohs(key->dl_vlan),
+ MAC_ARG(key->dl_src), MAC_ARG(key->dl_dst),
+ ntohs(key->dl_type), key->nw_proto,
+ key->nw_src, key->nw_dst,
+ ntohs(key->tp_src), ntohs(key->tp_dst));
+}
diff --git a/datapath/flow.h b/datapath/flow.h
new file mode 100644
index 00000000..55efede1
--- /dev/null
+++ b/datapath/flow.h
@@ -0,0 +1,49 @@
+#ifndef FLOW_H
+#define FLOW_H 1
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/gfp.h>
+
+#include "openvswitch/datapath-protocol.h"
+
+struct sk_buff;
+
+struct sw_flow_actions {
+ struct rcu_head rcu;
+ unsigned int n_actions;
+ union odp_action actions[];
+};
+
+struct sw_flow {
+ struct rcu_head rcu;
+ struct odp_flow_key key;
+ struct sw_flow_actions *sf_acts;
+
+ struct timespec used; /* Last used time. */
+
+ u8 ip_tos; /* IP TOS value. */
+
+ spinlock_t lock; /* Lock for values below. */
+ u64 packet_count; /* Number of packets matched. */
+ u64 byte_count; /* Number of bytes matched. */
+ u8 tcp_flags; /* Union of seen TCP flags. */
+};
+
+extern struct kmem_cache *flow_cache;
+
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
+void flow_free(struct sw_flow *);
+void flow_deferred_free(struct sw_flow *);
+void flow_deferred_free_acts(struct sw_flow_actions *);
+int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
+void flow_used(struct sw_flow *, struct sk_buff *);
+
+void print_flow(const struct odp_flow_key *);
+
+int flow_init(void);
+void flow_exit(void);
+
+#endif /* flow.h */
diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore
new file mode 100644
index 00000000..af5821a2
--- /dev/null
+++ b/datapath/linux-2.6/.gitignore
@@ -0,0 +1,20 @@
+/Kbuild
+/Makefile
+/Makefile.main
+/actions.c
+/brcompat.c
+/brc_sysfs_dp.c
+/brc_sysfs_if.c
+/datapath.c
+/dp_dev.c
+/dp_notify.c
+/flow.c
+/genetlink-brcompat.c
+/genetlink-openvswitch.c
+/kcompat.h
+/linux-2.6
+/modules.order
+/random32.c
+/table.c
+/tmp
+/veth.c
diff --git a/datapath/linux-2.6/Kbuild.in b/datapath/linux-2.6/Kbuild.in
new file mode 100644
index 00000000..f08eb9c5
--- /dev/null
+++ b/datapath/linux-2.6/Kbuild.in
@@ -0,0 +1,34 @@
+# -*- makefile -*-
+export builddir = @abs_builddir@
+export srcdir = @abs_srcdir@
+export top_srcdir = @abs_top_srcdir@
+export VERSION = @VERSION@
+export BUILDNR = @BUILDNR@
+
+include $(srcdir)/../Modules.mk
+include $(srcdir)/Modules.mk
+
+EXTRA_CFLAGS := -DVERSION=\"$(VERSION)\"
+EXTRA_CFLAGS += -I$(srcdir)/..
+EXTRA_CFLAGS += -I$(builddir)/..
+EXTRA_CFLAGS += -I$(top_srcdir)/include
+ifeq '$(BUILDNR)' '0'
+EXTRA_CFLAGS += -DBUILDNR=\"\"
+else
+EXTRA_CFLAGS += -DBUILDNR=\"+build$(BUILDNR)\"
+endif
+EXTRA_CFLAGS += -g
+EXTRA_CFLAGS += -include $(builddir)/kcompat.h
+
+# These include directories have to go before -I$(KSRC)/include.
+# NOSTDINC_FLAGS just happens to be a variable that goes in the
+# right place, even though it's conceptually incorrect.
+NOSTDINC_FLAGS += -I$(srcdir)/compat-2.6 -I$(srcdir)/compat-2.6/include
+
+obj-m := $(patsubst %,%_mod.o,$(build_modules))
+
+define module_template
+$(1)_mod-y = $$(notdir $$(patsubst %.c,%.o,$($(1)_sources)))
+endef
+
+$(foreach module,$(build_modules),$(eval $(call module_template,$(module))))
diff --git a/datapath/linux-2.6/Makefile.in b/datapath/linux-2.6/Makefile.in
new file mode 100644
index 00000000..efc1663e
--- /dev/null
+++ b/datapath/linux-2.6/Makefile.in
@@ -0,0 +1,9 @@
+ifeq ($(KERNELRELEASE),)
+# We're being called directly by running make in this directory.
+include Makefile.main
+else
+# We're being included by the Linux kernel build system
+include Kbuild
+endif
+
+
diff --git a/datapath/linux-2.6/Makefile.main.in b/datapath/linux-2.6/Makefile.main.in
new file mode 100644
index 00000000..0005ec4f
--- /dev/null
+++ b/datapath/linux-2.6/Makefile.main.in
@@ -0,0 +1,82 @@
+# -*- makefile -*-
+export builddir = @abs_builddir@
+export srcdir = @abs_srcdir@
+export top_srcdir = @abs_top_srcdir@
+export KSRC = @KSRC26@
+export VERSION = @VERSION@
+export BUILD_VETH = @BUILD_VETH@
+
+include $(srcdir)/../Modules.mk
+include $(srcdir)/Modules.mk
+
+default: $(build_links)
+
+$(foreach s,$(sort $(foreach m,$(build_modules),$($(m)_sources))), \
+ $(eval $(notdir $(s)): ; ln -s $(srcdir)/../$(s) $@))
+
+distclean: clean
+ rm -f kcompat.h
+distdir: clean
+install:
+all: default
+check: all
+clean:
+ rm -f *.o *.ko *_mod.* Module.symvers *.cmd kcompat.h.new
+ for d in $(build_links); do if test -h $$d; then rm $$d; fi; done
+
+ifneq ($(KSRC),)
+
+ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC))
+ KOBJ := /lib/modules/$(shell uname -r)/build
+else
+ KOBJ := $(KSRC)
+endif
+
+ifneq ($(shell grep -c 'PATCHLEVEL = 6' $(KSRC)/Makefile),1)
+ $(error Linux kernel source in $(KSRC) not 2.6)
+endif
+
+VERSION_FILE := $(KOBJ)/include/linux/version.h
+ifeq (,$(wildcard $(VERSION_FILE)))
+ $(error Linux kernel source not configured - missing version.h)
+endif
+
+CONFIG_FILE := $(KSRC)/include/linux/autoconf.h
+ifeq (,$(wildcard $(CONFIG_FILE)))
+ $(error Linux kernel source not configured - missing autoconf.h)
+endif
+
+default:
+ $(MAKE) -C $(KSRC) M=$(builddir) modules
+endif
+
+# Much of the kernel build system in this file is derived from Intel's
+# e1000 distribution, with the following license:
+
+################################################################################
+#
+# Intel PRO/1000 Linux driver
+# Copyright(c) 1999 - 2007, 2009 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Contact Information:
+# Linux NICS <linux.nics@intel.com>
+# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+#
+################################################################################
diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk
new file mode 100644
index 00000000..bbc4c72f
--- /dev/null
+++ b/datapath/linux-2.6/Modules.mk
@@ -0,0 +1,50 @@
+openvswitch_sources += \
+ linux-2.6/compat-2.6/genetlink-openvswitch.c \
+ linux-2.6/compat-2.6/random32.c
+openvswitch_headers += \
+ linux-2.6/compat-2.6/compat26.h \
+ linux-2.6/compat-2.6/include/asm-generic/bug.h \
+ linux-2.6/compat-2.6/include/linux/dmi.h \
+ linux-2.6/compat-2.6/include/linux/err.h \
+ linux-2.6/compat-2.6/include/linux/icmp.h \
+ linux-2.6/compat-2.6/include/linux/if_arp.h \
+ linux-2.6/compat-2.6/include/linux/ip.h \
+ linux-2.6/compat-2.6/include/linux/ipv6.h \
+ linux-2.6/compat-2.6/include/linux/jiffies.h \
+ linux-2.6/compat-2.6/include/linux/kernel.h \
+ linux-2.6/compat-2.6/include/linux/log2.h \
+ linux-2.6/compat-2.6/include/linux/lockdep.h \
+ linux-2.6/compat-2.6/include/linux/mutex.h \
+ linux-2.6/compat-2.6/include/linux/netdevice.h \
+ linux-2.6/compat-2.6/include/linux/netfilter_bridge.h \
+ linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h \
+ linux-2.6/compat-2.6/include/linux/netlink.h \
+ linux-2.6/compat-2.6/include/linux/percpu.h \
+ linux-2.6/compat-2.6/include/linux/random.h \
+ linux-2.6/compat-2.6/include/linux/rculist.h \
+ linux-2.6/compat-2.6/include/linux/rtnetlink.h \
+ linux-2.6/compat-2.6/include/linux/skbuff.h \
+ linux-2.6/compat-2.6/include/linux/tcp.h \
+ linux-2.6/compat-2.6/include/linux/timer.h \
+ linux-2.6/compat-2.6/include/linux/types.h \
+ linux-2.6/compat-2.6/include/linux/udp.h \
+ linux-2.6/compat-2.6/include/linux/workqueue.h \
+ linux-2.6/compat-2.6/include/net/checksum.h \
+ linux-2.6/compat-2.6/include/net/genetlink.h \
+ linux-2.6/compat-2.6/include/net/netlink.h
+
+both_modules += brcompat
+brcompat_sources = \
+ linux-2.6/compat-2.6/genetlink-brcompat.c \
+ brcompat.c \
+ brc_procfs.c \
+ brc_sysfs_dp.c \
+ brc_sysfs_if.c
+brcompat_headers = \
+ brc_procfs.h \
+ brc_sysfs.h
+
+dist_modules += veth
+build_modules += $(if $(BUILD_VETH),veth)
+veth_sources = linux-2.6/compat-2.6/veth.c
+veth_headers =
diff --git a/datapath/linux-2.6/compat-2.6/compat26.h b/datapath/linux-2.6/compat-2.6/compat26.h
new file mode 100644
index 00000000..61448d63
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/compat26.h
@@ -0,0 +1,37 @@
+#ifndef __COMPAT26_H
+#define __COMPAT26_H 1
+
+#include <linux/version.h>
+
+#if defined(CONFIG_PREEMPT) && LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
+#error "CONFIG_PREEMPT is broken with 2.6.x before 2.6.21--see commit 4498121ca3, \"[NET]: Handle disabled preemption in gfp_any()\""
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+/*----------------------------------------------------------------------------
+ * In 2.6.24, a namespace argument became required for dev_get_by_name. */
+
+#define dev_get_by_name(net, name) \
+ dev_get_by_name((name))
+
+#define dev_get_by_index(net, ifindex) \
+ dev_get_by_index((ifindex))
+
+#define __dev_get_by_name(net, name) \
+ __dev_get_by_name((name))
+
+#define __dev_get_by_index(net, ifindex) \
+ __dev_get_by_index((ifindex))
+
+#endif /* linux kernel <= 2.6.23 */
+
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,22)
+/*----------------------------------------------------------------------------
+ * In 2.6.23, the last argument was dropped from kmem_cache_create. */
+#define kmem_cache_create(n, s, a, f, c) \
+ kmem_cache_create((n), (s), (a), (f), (c), NULL)
+
+#endif /* linux kernel <= 2.6.22 */
+
+#endif /* compat26.h */
diff --git a/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c b/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c
new file mode 100644
index 00000000..c43b3ce4
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c
@@ -0,0 +1,20 @@
+#include "net/genetlink.h"
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+/* We fix grp->id to 32 so that it doesn't collide with any of the multicast
+ * groups selected by openvswitch_mod, which uses groups 16 through 31.
+ * Collision isn't fatal--multicast listeners should check that the family is
+ * the one that they want and discard others--but it wastes time and memory to
+ * receive unwanted messages. */
+int genl_register_mc_group(struct genl_family *family,
+ struct genl_multicast_group *grp)
+{
+ grp->id = 32;
+ grp->family = family;
+
+ return 0;
+}
+
+#endif /* kernel < 2.6.23 */
diff --git a/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c b/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c
new file mode 100644
index 00000000..9e09215f
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c
@@ -0,0 +1,22 @@
+#include "net/genetlink.h"
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+/* We use multicast groups 16 through 31 to avoid colliding with the multicast
+ * group selected by brcompat_mod, which uses groups 32. Collision isn't
+ * fatal--multicast listeners should check that the family is the one that they
+ * want and discard others--but it wastes time and memory to receive unwanted
+ * messages. */
+int genl_register_mc_group(struct genl_family *family,
+ struct genl_multicast_group *grp)
+{
+ /* This code is called single-threaded. */
+ static unsigned int next_id = 0;
+ grp->id = next_id++ % 16 + 16;
+ grp->family = family;
+
+ return 0;
+}
+
+#endif /* kernel < 2.6.23 */
diff --git a/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h b/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h
new file mode 100644
index 00000000..1d9b3140
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h
@@ -0,0 +1,19 @@
+#ifndef __ASM_GENERIC_BUG_WRAPPER_H
+#define __ASM_GENERIC_BUG_WRAPPER_H
+
+#include_next <asm-generic/bug.h>
+
+#ifndef WARN_ON_ONCE
+#define WARN_ON_ONCE(condition) ({ \
+ static int __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once) && !__warned) { \
+ WARN_ON(1); \
+ __warned = 1; \
+ } \
+ unlikely(__ret_warn_once); \
+})
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h b/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h
new file mode 100644
index 00000000..48c73aa8
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h
@@ -0,0 +1,11 @@
+#ifndef __LINUX_CPUMASK_WRAPPER_H
+#define __LINUX_CPUMASK_WRAPPER_H
+
+#include_next <linux/cpumask.h>
+
+/* for_each_cpu was renamed for_each_possible_cpu in 2.6.18. */
+#ifndef for_each_possible_cpu
+#define for_each_possible_cpu for_each_cpu
+#endif
+
+#endif /* linux/cpumask.h wrapper */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/dmi.h b/datapath/linux-2.6/compat-2.6/include/linux/dmi.h
new file mode 100644
index 00000000..52916fec
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/dmi.h
@@ -0,0 +1,114 @@
+#ifndef __LINUX_DMI_WRAPPER_H
+#define __LINUX_DMI_WRAPPER_H 1
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
+
+#include_next <linux/dmi.h>
+
+#else /* linux version >= 2.6.23 */
+
+#ifndef __DMI_H__
+#define __DMI_H__
+
+#include <linux/list.h>
+
+enum dmi_field {
+ DMI_NONE,
+ DMI_BIOS_VENDOR,
+ DMI_BIOS_VERSION,
+ DMI_BIOS_DATE,
+ DMI_SYS_VENDOR,
+ DMI_PRODUCT_NAME,
+ DMI_PRODUCT_VERSION,
+ DMI_PRODUCT_SERIAL,
+ DMI_PRODUCT_UUID,
+ DMI_BOARD_VENDOR,
+ DMI_BOARD_NAME,
+ DMI_BOARD_VERSION,
+ DMI_BOARD_SERIAL,
+ DMI_BOARD_ASSET_TAG,
+ DMI_CHASSIS_VENDOR,
+ DMI_CHASSIS_TYPE,
+ DMI_CHASSIS_VERSION,
+ DMI_CHASSIS_SERIAL,
+ DMI_CHASSIS_ASSET_TAG,
+ DMI_STRING_MAX,
+};
+
+enum dmi_device_type {
+ DMI_DEV_TYPE_ANY = 0,
+ DMI_DEV_TYPE_OTHER,
+ DMI_DEV_TYPE_UNKNOWN,
+ DMI_DEV_TYPE_VIDEO,
+ DMI_DEV_TYPE_SCSI,
+ DMI_DEV_TYPE_ETHERNET,
+ DMI_DEV_TYPE_TOKENRING,
+ DMI_DEV_TYPE_SOUND,
+ DMI_DEV_TYPE_IPMI = -1,
+ DMI_DEV_TYPE_OEM_STRING = -2
+};
+
+struct dmi_header {
+ u8 type;
+ u8 length;
+ u16 handle;
+};
+
+/*
+ * DMI callbacks for problem boards
+ */
+struct dmi_strmatch {
+ u8 slot;
+ char *substr;
+};
+
+struct dmi_system_id {
+ int (*callback)(struct dmi_system_id *);
+ const char *ident;
+ struct dmi_strmatch matches[4];
+ void *driver_data;
+};
+
+#define DMI_MATCH(a, b) { a, b }
+
+struct dmi_device {
+ struct list_head list;
+ int type;
+ const char *name;
+ void *device_data; /* Type specific data */
+};
+
+/* No CONFIG_DMI before 2.6.16 */
+#if defined(CONFIG_DMI) || defined(CONFIG_X86_32)
+
+extern int dmi_check_system(struct dmi_system_id *list);
+extern char * dmi_get_system_info(int field);
+extern struct dmi_device * dmi_find_device(int type, const char *name,
+ struct dmi_device *from);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+extern void dmi_scan_machine(void);
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
+extern int dmi_get_year(int field);
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
+extern int dmi_name_in_vendors(char *str);
+#endif
+
+#else
+
+static inline int dmi_check_system(struct dmi_system_id *list) { return 0; }
+static inline char * dmi_get_system_info(int field) { return NULL; }
+static inline struct dmi_device * dmi_find_device(int type, const char *name,
+ struct dmi_device *from) { return NULL; }
+static inline int dmi_get_year(int year) { return 0; }
+static inline int dmi_name_in_vendors(char *s) { return 0; }
+
+#endif
+
+#endif /* __DMI_H__ */
+
+#endif /* linux kernel < 2.6.22 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/err.h b/datapath/linux-2.6/compat-2.6/include/linux/err.h
new file mode 100644
index 00000000..50faf2a1
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/err.h
@@ -0,0 +1,21 @@
+#ifndef __LINUX_ERR_WRAPPER_H
+#define __LINUX_ERR_WRAPPER_H 1
+
+#include_next <linux/err.h>
+
+#ifndef HAVE_ERR_CAST
+/**
+ * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
+ * @ptr: The pointer to cast.
+ *
+ * Explicitly cast an error-valued pointer to another pointer type in such a
+ * way as to make it clear that's what's going on.
+ */
+static inline void *ERR_CAST(const void *ptr)
+{
+ /* cast away the const */
+ return (void *) ptr;
+}
+#endif /* HAVE_ERR_CAST */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/icmp.h b/datapath/linux-2.6/compat-2.6/include/linux/icmp.h
new file mode 100644
index 00000000..89b354e4
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/icmp.h
@@ -0,0 +1,13 @@
+#ifndef __LINUX_ICMP_WRAPPER_H
+#define __LINUX_ICMP_WRAPPER_H 1
+
+#include_next <linux/icmp.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
+{
+ return (struct icmphdr *)skb_transport_header(skb);
+}
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h b/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h
new file mode 100644
index 00000000..e48d6ba0
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h
@@ -0,0 +1,15 @@
+#ifndef __LINUX_IF_ARP_WRAPPER_H
+#define __LINUX_IF_ARP_WRAPPER_H 1
+
+#include_next <linux/if_arp.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+#include <linux/skbuff.h>
+
+static inline struct arphdr *arp_hdr(const struct sk_buff *skb)
+{
+ return (struct arphdr *)skb_network_header(skb);
+}
+#endif /* !HAVE_SKBUFF_HEADER_HELPERS */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ip.h b/datapath/linux-2.6/compat-2.6/include/linux/ip.h
new file mode 100644
index 00000000..36765396
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/ip.h
@@ -0,0 +1,18 @@
+#ifndef __LINUX_IP_WRAPPER_H
+#define __LINUX_IP_WRAPPER_H 1
+
+#include_next <linux/ip.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
+{
+ return (struct iphdr *)skb_network_header(skb);
+}
+
+static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
+{
+ return ip_hdr(skb)->ihl * 4;
+}
+#endif /* !HAVE_SKBUFF_HEADER_HELPERS */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h
new file mode 100644
index 00000000..25a5431a
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h
@@ -0,0 +1,13 @@
+#ifndef __LINUX_IPV6_WRAPPER_H
+#define __LINUX_IPV6_WRAPPER_H 1
+
+#include_next <linux/ipv6.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
+{
+ return (struct ipv6hdr *)skb_network_header(skb);
+}
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h b/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h
new file mode 100644
index 00000000..3286e634
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h
@@ -0,0 +1,26 @@
+#ifndef __LINUX_JIFFIES_WRAPPER_H
+#define __LINUX_JIFFIES_WRAPPER_H 1
+
+#include_next <linux/jiffies.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+/* Same as above, but does so with platform independent 64bit types.
+ * These must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64() */
+#define time_after64(a,b) \
+ (typecheck(__u64, a) && \
+ typecheck(__u64, b) && \
+ ((__s64)(b) - (__s64)(a) < 0))
+#define time_before64(a,b) time_after64(b,a)
+
+#define time_after_eq64(a,b) \
+ (typecheck(__u64, a) && \
+ typecheck(__u64, b) && \
+ ((__s64)(a) - (__s64)(b) >= 0))
+#define time_before_eq64(a,b) time_after_eq64(b,a)
+
+#endif /* linux kernel < 2.6.19 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
new file mode 100644
index 00000000..9459155d
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
@@ -0,0 +1,9 @@
+#ifndef __KERNEL_H_WRAPPER
+#define __KERNEL_H_WRAPPER 1
+
+#include_next <linux/kernel.h>
+#ifndef HAVE_LOG2_H
+#include <linux/log2.h>
+#endif
+
+#endif /* linux/kernel.h */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h
new file mode 100644
index 00000000..1c839423
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h
@@ -0,0 +1,450 @@
+/*
+ * Runtime locking correctness validator
+ *
+ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * see Documentation/lockdep-design.txt for more details.
+ */
+#ifndef __LINUX_LOCKDEP_WRAPPER_H
+#define __LINUX_LOCKDEP_WRAPPER_H
+
+#include_next <linux/lockdep.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+
+struct task_struct;
+struct lockdep_map;
+
+#ifdef CONFIG_LOCKDEP
+
+#include <linux/linkage.h>
+#include <linux/list.h>
+#include <linux/debug_locks.h>
+#include <linux/stacktrace.h>
+
+/*
+ * Lock-class usage-state bits:
+ */
+enum lock_usage_bit
+{
+ LOCK_USED = 0,
+ LOCK_USED_IN_HARDIRQ,
+ LOCK_USED_IN_SOFTIRQ,
+ LOCK_ENABLED_SOFTIRQS,
+ LOCK_ENABLED_HARDIRQS,
+ LOCK_USED_IN_HARDIRQ_READ,
+ LOCK_USED_IN_SOFTIRQ_READ,
+ LOCK_ENABLED_SOFTIRQS_READ,
+ LOCK_ENABLED_HARDIRQS_READ,
+ LOCK_USAGE_STATES
+};
+
+/*
+ * Usage-state bitmasks:
+ */
+#define LOCKF_USED (1 << LOCK_USED)
+#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ)
+#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ)
+#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS)
+#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS)
+
+#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
+#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+
+#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ)
+#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ)
+#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ)
+#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ)
+
+#define LOCKF_ENABLED_IRQS_READ \
+ (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
+#define LOCKF_USED_IN_IRQ_READ \
+ (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
+
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+/*
+ * Lock-classes are keyed via unique addresses, by embedding the
+ * lockclass-key into the kernel (or module) .data section. (For
+ * static locks we use the lock address itself as the key.)
+ */
+struct lockdep_subclass_key {
+ char __one_byte;
+} __attribute__ ((__packed__));
+
+struct lock_class_key {
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+};
+
+/*
+ * The lock-class itself:
+ */
+struct lock_class {
+ /*
+ * class-hash:
+ */
+ struct list_head hash_entry;
+
+ /*
+ * global list of all lock-classes:
+ */
+ struct list_head lock_entry;
+
+ struct lockdep_subclass_key *key;
+ unsigned int subclass;
+
+ /*
+ * IRQ/softirq usage tracking bits:
+ */
+ unsigned long usage_mask;
+ struct stack_trace usage_traces[LOCK_USAGE_STATES];
+
+ /*
+ * These fields represent a directed graph of lock dependencies,
+ * to every node we attach a list of "forward" and a list of
+ * "backward" graph nodes.
+ */
+ struct list_head locks_after, locks_before;
+
+ /*
+ * Generation counter, when doing certain classes of graph walking,
+ * to ensure that we check one node only once:
+ */
+ unsigned int version;
+
+ /*
+ * Statistics counter:
+ */
+ unsigned long ops;
+
+ const char *name;
+ int name_version;
+
+#ifdef CONFIG_LOCK_STAT
+ unsigned long contention_point[4];
+#endif
+};
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+ s64 min;
+ s64 max;
+ s64 total;
+ unsigned long nr;
+};
+
+enum bounce_type {
+ bounce_acquired_write,
+ bounce_acquired_read,
+ bounce_contended_write,
+ bounce_contended_read,
+ nr_bounce_types,
+
+ bounce_acquired = bounce_acquired_write,
+ bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+ unsigned long contention_point[4];
+ struct lock_time read_waittime;
+ struct lock_time write_waittime;
+ struct lock_time read_holdtime;
+ struct lock_time write_holdtime;
+ unsigned long bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache;
+ const char *name;
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+#endif
+};
+
+/*
+ * Every lock has a list of other locks that were taken after it.
+ * We only grow the list, never remove from it:
+ */
+struct lock_list {
+ struct list_head entry;
+ struct lock_class *class;
+ struct stack_trace trace;
+ int distance;
+};
+
+/*
+ * We record lock dependency chains, so that we can cache them:
+ */
+struct lock_chain {
+ struct list_head entry;
+ u64 chain_key;
+};
+
+struct held_lock {
+ /*
+ * One-way hash of the dependency chain up to this point. We
+ * hash the hashes step by step as the dependency chain grows.
+ *
+ * We use it for dependency-caching and we skip detection
+ * passes and dependency-updates if there is a cache-hit, so
+ * it is absolutely critical for 100% coverage of the validator
+ * to have a unique key value for every unique dependency path
+ * that can occur in the system, to make a unique hash value
+ * as likely as possible - hence the 64-bit width.
+ *
+ * The task struct holds the current hash value (initialized
+ * with zero), here we store the previous hash value:
+ */
+ u64 prev_chain_key;
+ struct lock_class *class;
+ unsigned long acquire_ip;
+ struct lockdep_map *instance;
+
+#ifdef CONFIG_LOCK_STAT
+ u64 waittime_stamp;
+ u64 holdtime_stamp;
+#endif
+ /*
+ * The lock-stack is unified in that the lock chains of interrupt
+ * contexts nest ontop of process context chains, but we 'separate'
+ * the hashes by starting with 0 if we cross into an interrupt
+ * context, and we also keep do not add cross-context lock
+ * dependencies - the lock usage graph walking covers that area
+ * anyway, and we'd just unnecessarily increase the number of
+ * dependencies otherwise. [Note: hardirq and softirq contexts
+ * are separated from each other too.]
+ *
+ * The following field is used to detect when we cross into an
+ * interrupt context:
+ */
+ int irq_context;
+ int trylock;
+ int read;
+ int check;
+ int hardirqs_off;
+};
+
+/*
+ * Initialization, self-test and debugging-output methods:
+ */
+extern void lockdep_init(void);
+extern void lockdep_info(void);
+extern void lockdep_reset(void);
+extern void lockdep_reset_lock(struct lockdep_map *lock);
+extern void lockdep_free_key_range(void *start, unsigned long size);
+
+extern void lockdep_off(void);
+extern void lockdep_on(void);
+
+/*
+ * These methods are used by specific locking variants (spinlocks,
+ * rwlocks, mutexes and rwsems) to pass init/acquire/release events
+ * to lockdep:
+ */
+
+extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass);
+
+/*
+ * Reinitialize a lock key - for cases where there is special locking or
+ * special initialization of locks so that the validator gets the scope
+ * of dependencies wrong: they are either too broad (they need a class-split)
+ * or they are too narrow (they suffer from a false class-split):
+ */
+#define lockdep_set_class(lock, key) \
+ lockdep_init_map(&(lock)->dep_map, #key, key, 0)
+#define lockdep_set_class_and_name(lock, key, name) \
+ lockdep_init_map(&(lock)->dep_map, name, key, 0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+ lockdep_init_map(&(lock)->dep_map, #key, key, sub)
+#define lockdep_set_subclass(lock, sub) \
+ lockdep_init_map(&(lock)->dep_map, #lock, \
+ (lock)->dep_map.key, sub)
+
+/*
+ * Acquire a lock.
+ *
+ * Values for "read":
+ *
+ * 0: exclusive (write) acquire
+ * 1: read-acquire (no recursion allowed)
+ * 2: read-acquire with same-instance recursion allowed
+ *
+ * Values for check:
+ *
+ * 0: disabled
+ * 1: simple checks (freeing, held-at-exit-time, etc.)
+ * 2: full validation
+ */
+extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check, unsigned long ip);
+
+extern void lock_release(struct lockdep_map *lock, int nested,
+ unsigned long ip);
+
+# define INIT_LOCKDEP .lockdep_recursion = 0,
+
+#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
+
+#else /* !LOCKDEP */
+
+static inline void lockdep_off(void)
+{
+}
+
+static inline void lockdep_on(void)
+{
+}
+
+# define lock_acquire(l, s, t, r, c, i) do { } while (0)
+# define lock_release(l, n, i) do { } while (0)
+# define lockdep_init() do { } while (0)
+# define lockdep_info() do { } while (0)
+# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0)
+# define lockdep_set_class(lock, key) do { (void)(key); } while (0)
+# define lockdep_set_class_and_name(lock, key, name) \
+ do { (void)(key); } while (0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+ do { (void)(key); } while (0)
+#define lockdep_set_subclass(lock, sub) do { } while (0)
+
+# define INIT_LOCKDEP
+# define lockdep_reset() do { debug_locks = 1; } while (0)
+# define lockdep_free_key_range(start, size) do { } while (0)
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+#define lockdep_depth(tsk) (0)
+
+#endif /* !LOCKDEP */
+
+#ifdef CONFIG_LOCK_STAT
+
+extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
+extern void lock_acquired(struct lockdep_map *lock);
+
+#define LOCK_CONTENDED(_lock, try, lock) \
+do { \
+ if (!try(_lock)) { \
+ lock_contended(&(_lock)->dep_map, _RET_IP_); \
+ lock(_lock); \
+ } \
+ lock_acquired(&(_lock)->dep_map); \
+} while (0)
+
+#else /* CONFIG_LOCK_STAT */
+
+#define lock_contended(lockdep_map, ip) do {} while (0)
+#define lock_acquired(lockdep_map) do {} while (0)
+
+#define LOCK_CONTENDED(_lock, try, lock) \
+ lock(_lock)
+
+#endif /* CONFIG_LOCK_STAT */
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
+extern void early_init_irq_lock_class(void);
+#else
+static inline void early_init_irq_lock_class(void)
+{
+}
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+extern void early_boot_irqs_off(void);
+extern void early_boot_irqs_on(void);
+extern void print_irqtrace_events(struct task_struct *curr);
+#else
+static inline void early_boot_irqs_off(void)
+{
+}
+static inline void early_boot_irqs_on(void)
+{
+}
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+#endif
+
+/*
+ * For trivial one-depth nesting of a lock-class, the following
+ * global define can be used. (Subsystems with multiple levels
+ * of nesting should define their own lock-nesting subclasses.)
+ */
+#define SINGLE_DEPTH_NESTING 1
+
+/*
+ * Map the dependency ops to NOP or to real lockdep ops, depending
+ * on the per lock-class debug mode:
+ */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
+# else
+# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
+# endif
+# define spin_release(l, n, i) lock_release(l, n, i)
+#else
+# define spin_acquire(l, s, t, i) do { } while (0)
+# define spin_release(l, n, i) do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
+# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i)
+# else
+# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
+# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i)
+# endif
+# define rwlock_release(l, n, i) lock_release(l, n, i)
+#else
+# define rwlock_acquire(l, s, t, i) do { } while (0)
+# define rwlock_acquire_read(l, s, t, i) do { } while (0)
+# define rwlock_release(l, n, i) do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
+# else
+# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
+# endif
+# define mutex_release(l, n, i) lock_release(l, n, i)
+#else
+# define mutex_acquire(l, s, t, i) do { } while (0)
+# define mutex_release(l, n, i) do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
+# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i)
+# else
+# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
+# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i)
+# endif
+# define rwsem_release(l, n, i) lock_release(l, n, i)
+#else
+# define rwsem_acquire(l, s, t, i) do { } while (0)
+# define rwsem_acquire_read(l, s, t, i) do { } while (0)
+# define rwsem_release(l, n, i) do { } while (0)
+#endif
+
+#endif /* linux kernel < 2.6.18 */
+
+#endif /* __LINUX_LOCKDEP_WRAPPER_H */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/log2.h b/datapath/linux-2.6/compat-2.6/include/linux/log2.h
new file mode 100644
index 00000000..69abae5e
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/log2.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_LOG2_WRAPPER
+#define __LINUX_LOG2_WRAPPER
+
+#ifdef HAVE_LOG2_H
+#include_next <linux/log2.h>
+#else
+/* This is very stripped down because log2.h has far too many dependencies. */
+
+extern __attribute__((const, noreturn))
+int ____ilog2_NaN(void);
+
+#define ilog2(n) ((n) == 4 ? 2 : \
+ (n) == 8 ? 3 : \
+ ____ilog2_NaN())
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/mutex.h b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h
new file mode 100644
index 00000000..93dfa3b2
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h
@@ -0,0 +1,59 @@
+#ifndef __LINUX_MUTEX_WRAPPER_H
+#define __LINUX_MUTEX_WRAPPER_H
+
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+
+#include <asm/semaphore.h>
+
+struct mutex {
+ struct semaphore sema;
+};
+
+#define mutex_init(mutex) init_MUTEX(&(mutex)->sema)
+#define mutex_destroy(mutex) do { } while (0)
+
+#define __MUTEX_INITIALIZER(name) \
+ __SEMAPHORE_INITIALIZER(name,1)
+
+#define DEFINE_MUTEX(mutexname) \
+ struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) }
+
+/*
+ * See kernel/mutex.c for detailed documentation of these APIs.
+ * Also see Documentation/mutex-design.txt.
+ */
+static inline void mutex_lock(struct mutex *lock)
+{
+ down(&lock->sema);
+}
+
+static inline int mutex_lock_interruptible(struct mutex *lock)
+{
+ return down_interruptible(&lock->sema);
+}
+
+#define mutex_lock_nested(lock, subclass) mutex_lock(lock)
+#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
+
+/*
+ * NOTE: mutex_trylock() follows the spin_trylock() convention,
+ * not the down_trylock() convention!
+ */
+static inline int mutex_trylock(struct mutex *lock)
+{
+ return !down_trylock(&lock->sema);
+}
+
+static inline void mutex_unlock(struct mutex *lock)
+{
+ up(&lock->sema);
+}
+#else
+
+#include_next <linux/mutex.h>
+
+#endif /* linux version < 2.6.16 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h
new file mode 100644
index 00000000..32e1735d
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h
@@ -0,0 +1,35 @@
+#ifndef __LINUX_NETDEVICE_WRAPPER_H
+#define __LINUX_NETDEVICE_WRAPPER_H 1
+
+#include_next <linux/netdevice.h>
+
+struct net;
+
+#ifndef to_net_dev
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+static inline
+struct net *dev_net(const struct net_device *dev)
+{
+ return NULL;
+}
+#endif /* linux kernel < 2.6.26 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
+#define proc_net init_net.proc_net
+#endif
+
+#ifndef for_each_netdev
+/* Linux before 2.6.22 didn't have for_each_netdev at all. */
+#define for_each_netdev(net, d) for (d = dev_base; d; d = d->next)
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
+/* Linux 2.6.24 added a network namespace pointer to the macro. */
+#undef for_each_netdev
+#define for_each_netdev(net,d) list_for_each_entry(d, &dev_base_head, dev_list)
+#endif
+
+
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h
new file mode 100644
index 00000000..1c8183c8
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h
@@ -0,0 +1,24 @@
+#ifndef __LINUX_NETFILTER_BRIDGE_WRAPPER_H
+#define __LINUX_NETFILTER_BRIDGE_WRAPPER_H
+
+#include_next <linux/netfilter_bridge.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+
+#include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+
+static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_8021Q):
+ return VLAN_HLEN;
+ default:
+ return 0;
+ }
+}
+
+#endif /* linux version < 2.6.22 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h
new file mode 100644
index 00000000..ed8a5d94
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_NETFILTER_IPV4_WRAPPER_H
+#define __LINUX_NETFILTER_IPV4_WRAPPER_H 1
+
+#include_next <linux/netfilter_ipv4.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+#ifdef __KERNEL__
+
+#define NF_INET_PRE_ROUTING NF_IP_PRE_ROUTING
+#define NF_INET_POST_ROUTING NF_IP_POST_ROUTING
+#define NF_INET_FORWARD NF_IP_FORWARD
+
+#endif /* __KERNEL__ */
+
+#endif /* linux kernel < 2.6.25 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netlink.h b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h
new file mode 100644
index 00000000..c5f83bd0
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h
@@ -0,0 +1,24 @@
+#ifndef __LINUX_NETLINK_WRAPPER_H
+#define __LINUX_NETLINK_WRAPPER_H 1
+
+#include <linux/skbuff.h>
+#include_next <linux/netlink.h>
+#include <net/netlink.h>
+
+#include <linux/version.h>
+
+#ifndef NLMSG_DEFAULT_SIZE
+#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+#define nlmsg_new(s, f) nlmsg_new_proper((s), (f))
+static inline struct sk_buff *nlmsg_new_proper(int size, gfp_t flags)
+{
+ return alloc_skb(size, flags);
+}
+
+#endif /* linux kernel < 2.6.19 */
+
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/percpu.h b/datapath/linux-2.6/compat-2.6/include/linux/percpu.h
new file mode 100644
index 00000000..0f68bb25
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/percpu.h
@@ -0,0 +1,10 @@
+#ifndef __LINUX_PERCPU_H_WRAPPER
+#define __LINUX_PERCPU_H_WRAPPER 1
+
+#include_next <linux/percpu.h>
+
+#ifndef percpu_ptr
+#define percpu_ptr per_cpu_ptr
+#endif
+
+#endif /* linux/percpu.h wrapper */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h
new file mode 100644
index 00000000..4e4932c9
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/random.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_RANDOM_WRAPPER_H
+#define __LINUX_RANDOM_WRAPPER_H 1
+
+#include_next <linux/random.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+#ifdef __KERNEL__
+u32 random32(void);
+void srandom32(u32 seed);
+#endif /* __KERNEL__ */
+
+#endif /* linux kernel < 2.6.19 */
+
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/rculist.h b/datapath/linux-2.6/compat-2.6/include/linux/rculist.h
new file mode 100644
index 00000000..4164c0e9
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/rculist.h
@@ -0,0 +1,12 @@
+#ifndef __LINUX_RCULIST_WRAPPER_H
+#define __LINUX_RCULIST_WRAPPER_H
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
+#include_next <linux/rculist.h>
+#else
+/* Prior to 2.6.26, the contents of rculist.h were part of list.h. */
+#include <linux/list.h>
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h
new file mode 100644
index 00000000..8bc51560
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h
@@ -0,0 +1,29 @@
+#ifndef __RTNETLINK_WRAPPER_H
+#define __RTNETLINK_WRAPPER_H 1
+
+#include_next <linux/rtnetlink.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+static inline int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
+ u32 group, struct nlmsghdr *nlh, gfp_t flags)
+{
+ BUG_ON(nlh); /* not implemented */
+ if (group) {
+ /* errors reported via destination sk->sk_err */
+ nlmsg_multicast(rtnl, skb, 0, group);
+ }
+ return 0;
+}
+
+static inline void rtnl_set_sk_err(struct net *net, u32 group, int error)
+{
+ netlink_set_err(rtnl, 0, group, error);
+}
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+#define rtnl_notify(skb, net, pid, group, nlh, flags) \
+ ((void) (net), rtnl_notify(skb, pid, group, nlh, flags))
+#define rtnl_set_sk_err(net, group, error) \
+ ((void) (net), rtnl_set_sk_err(group, error))
+#endif /* linux kernel < 2.6.25 */
+
+#endif /* linux/rtnetlink.h wrapper */
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h
new file mode 100644
index 00000000..666ef850
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h
@@ -0,0 +1,170 @@
+#ifndef __LINUX_SKBUFF_WRAPPER_H
+#define __LINUX_SKBUFF_WRAPPER_H 1
+
+#include_next <linux/skbuff.h>
+
+#include <linux/version.h>
+
+#ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET
+static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
+ const int offset, void *to,
+ const unsigned int len)
+{
+ memcpy(to, skb->data + offset, len);
+}
+
+static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
+ const int offset,
+ const void *from,
+ const unsigned int len)
+{
+ memcpy(skb->data + offset, from, len);
+}
+
+#endif /* !HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET */
+
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
+ int cloned)
+{
+ int delta = 0;
+
+ if (headroom < NET_SKB_PAD)
+ headroom = NET_SKB_PAD;
+ if (headroom > skb_headroom(skb))
+ delta = headroom - skb_headroom(skb);
+
+ if (delta || cloned)
+ return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
+ GFP_ATOMIC);
+ return 0;
+}
+
+static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
+{
+ return __skb_cow(skb, headroom, skb_header_cloned(skb));
+}
+#endif /* linux < 2.6.23 */
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+/* Emulate Linux 2.6.17 and later behavior, in which kfree_skb silently ignores
+ * null pointer arguments. */
+#define kfree_skb(skb) kfree_skb_maybe_null(skb)
+static inline void kfree_skb_maybe_null(struct sk_buff *skb)
+{
+ if (likely(skb != NULL))
+ (kfree_skb)(skb);
+}
+#endif
+
+
+#ifndef CHECKSUM_PARTIAL
+/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at
+ * least test against it: see update_csum() in forward.c. */
+#define CHECKSUM_PARTIAL 3
+#endif
+#ifndef CHECKSUM_COMPLETE
+#define CHECKSUM_COMPLETE CHECKSUM_HW
+#endif
+
+#ifdef HAVE_MAC_RAW
+#define mac_header mac.raw
+#define network_header nh.raw
+#endif
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
+{
+ return skb->h.raw;
+}
+
+static inline void skb_reset_transport_header(struct sk_buff *skb)
+{
+ skb->h.raw = skb->data;
+}
+
+static inline void skb_set_transport_header(struct sk_buff *skb,
+ const int offset)
+{
+ skb->h.raw = skb->data + offset;
+}
+
+static inline unsigned char *skb_network_header(const struct sk_buff *skb)
+{
+ return skb->nh.raw;
+}
+
+static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
+{
+ skb->nh.raw = skb->data + offset;
+}
+
+static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
+{
+ return skb->mac.raw;
+}
+
+static inline void skb_reset_mac_header(struct sk_buff *skb)
+{
+ skb->mac_header = skb->data;
+}
+
+static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
+{
+ skb->mac.raw = skb->data + offset;
+}
+
+static inline int skb_transport_offset(const struct sk_buff *skb)
+{
+ return skb_transport_header(skb) - skb->data;
+}
+
+static inline int skb_network_offset(const struct sk_buff *skb)
+{
+ return skb_network_header(skb) - skb->data;
+}
+
+static inline void skb_copy_to_linear_data(struct sk_buff *skb,
+ const void *from,
+ const unsigned int len)
+{
+ memcpy(skb->data, from, len);
+}
+#endif /* !HAVE_SKBUFF_HEADER_HELPERS */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+#warning "TSO/UFO not supported on kernels earlier than 2.6.18"
+
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+ int features)
+{
+ return NULL;
+}
+#endif /* before 2.6.18 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/tcp.h b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h
new file mode 100644
index 00000000..6fad1933
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h
@@ -0,0 +1,18 @@
+#ifndef __LINUX_TCP_WRAPPER_H
+#define __LINUX_TCP_WRAPPER_H 1
+
+#include_next <linux/tcp.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
+{
+ return (struct tcphdr *)skb_transport_header(skb);
+}
+
+static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
+{
+ return tcp_hdr(skb)->doff * 4;
+}
+#endif /* !HAVE_SKBUFF_HEADER_HELPERS */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/timer.h b/datapath/linux-2.6/compat-2.6/include/linux/timer.h
new file mode 100644
index 00000000..6c3a9b0f
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/timer.h
@@ -0,0 +1,96 @@
+#ifndef __LINUX_TIMER_WRAPPER_H
+#define __LINUX_TIMER_WRAPPER_H 1
+
+#include_next <linux/timer.h>
+
+#include <linux/version.h>
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(X,Y) ( 0 )
+#endif
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)) && \
+ (!defined(RHEL_RELEASE_CODE) || \
+ (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,1))))
+
+extern unsigned long volatile jiffies;
+
+/**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+static inline unsigned long __round_jiffies(unsigned long j, int cpu)
+{
+ int rem;
+ unsigned long original = j;
+
+ /*
+ * We don't want all cpus firing their timers at once hitting the
+ * same lock or cachelines, so we skew each extra cpu with an extra
+ * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+ * already did this.
+ * The skew is done by adding 3*cpunr, then round, then subtract this
+ * extra offset again.
+ */
+ j += cpu * 3;
+
+ rem = j % HZ;
+
+ /*
+ * If the target jiffie is just after a whole second (which can happen
+ * due to delays of the timer irq, long irq off times etc etc) then
+ * we should round down to the whole second, not up. Use 1/4th second
+ * as cutoff for this rounding as an extreme upper bound for this.
+ */
+ if (rem < HZ/4) /* round down */
+ j = j - rem;
+ else /* round up */
+ j = j - rem + HZ;
+
+ /* now that we have rounded, subtract the extra skew again */
+ j -= cpu * 3;
+
+ if (j <= jiffies) /* rounding ate our timeout entirely; */
+ return original;
+ return j;
+}
+
+
+/**
+ * round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+static inline unsigned long round_jiffies(unsigned long j)
+{
+ return __round_jiffies(j, 0); // FIXME
+}
+
+#endif /* linux kernel < 2.6.20 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h
new file mode 100644
index 00000000..c1f375eb
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h
@@ -0,0 +1,14 @@
+#ifndef __LINUX_TYPES_WRAPPER_H
+#define __LINUX_TYPES_WRAPPER_H 1
+
+#include_next <linux/types.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#endif /* linux kernel < 2.6.20 */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/udp.h b/datapath/linux-2.6/compat-2.6/include/linux/udp.h
new file mode 100644
index 00000000..6fe4721b
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/udp.h
@@ -0,0 +1,13 @@
+#ifndef __LINUX_UDP_WRAPPER_H
+#define __LINUX_UDP_WRAPPER_H 1
+
+#include_next <linux/udp.h>
+
+#ifndef HAVE_SKBUFF_HEADER_HELPERS
+static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
+{
+ return (struct udphdr *)skb_transport_header(skb);
+}
+#endif /* HAVE_SKBUFF_HEADER_HELPERS */
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
new file mode 100644
index 00000000..1ac3b6ec
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
@@ -0,0 +1,42 @@
+#ifndef __LINUX_WORKQUEUE_WRAPPER_H
+#define __LINUX_WORKQUEUE_WRAPPER_H 1
+
+#include_next <linux/workqueue.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+#ifdef __KERNEL__
+/*
+ * initialize a work-struct's func and data pointers:
+ */
+#undef PREPARE_WORK
+#define PREPARE_WORK(_work, _func) \
+ do { \
+ (_work)->func = (void(*)(void*)) _func; \
+ (_work)->data = _work; \
+ } while (0)
+
+/*
+ * initialize all of a work-struct:
+ */
+#undef INIT_WORK
+#define INIT_WORK(_work, _func) \
+ do { \
+ INIT_LIST_HEAD(&(_work)->entry); \
+ (_work)->pending = 0; \
+ PREPARE_WORK((_work), (_func)); \
+ init_timer(&(_work)->timer); \
+ } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* linux kernel < 2.6.20 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+/* There is no equivalent to cancel_work_sync() so just flush all
+ * pending work. */
+#define cancel_work_sync(_work) flush_scheduled_work()
+#endif
+
+#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/net/checksum.h b/datapath/linux-2.6/compat-2.6/include/net/checksum.h
new file mode 100644
index 00000000..c64c6bd0
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/net/checksum.h
@@ -0,0 +1,16 @@
+#ifndef __NET_CHECKSUM_WRAPPER_H
+#define __NET_CHECKSUM_WRAPPER_H 1
+
+#include_next <net/checksum.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+static inline __wsum csum_unfold(__sum16 n)
+{
+ return (__force __wsum)n;
+}
+
+#endif /* linux kernel < 2.6.20 */
+
+#endif /* checksum.h */
diff --git a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h
new file mode 100644
index 00000000..57a47316
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h
@@ -0,0 +1,123 @@
+#ifndef __NET_GENERIC_NETLINK_WRAPPER_H
+#define __NET_GENERIC_NETLINK_WRAPPER_H 1
+
+
+#include <linux/netlink.h>
+#include_next <net/genetlink.h>
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+#include <linux/genetlink.h>
+
+/*----------------------------------------------------------------------------
+ * In 2.6.23, registering of multicast groups was added. Our compatability
+ * layer just supports registering a single group, since that's all we
+ * need.
+ */
+
+/**
+ * struct genl_multicast_group - generic netlink multicast group
+ * @name: name of the multicast group, names are per-family
+ * @id: multicast group ID, assigned by the core, to use with
+ * genlmsg_multicast().
+ * @list: list entry for linking
+ * @family: pointer to family, need not be set before registering
+ */
+struct genl_multicast_group
+{
+ struct genl_family *family; /* private */
+ struct list_head list; /* private */
+ char name[GENL_NAMSIZ];
+ u32 id;
+};
+
+int genl_register_mc_group(struct genl_family *family,
+ struct genl_multicast_group *grp);
+#endif /* linux kernel < 2.6.23 */
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+/**
+ * genlmsg_msg_size - length of genetlink message not including padding
+ * @payload: length of message payload
+ */
+static inline int genlmsg_msg_size(int payload)
+{
+ return GENL_HDRLEN + payload;
+}
+
+/**
+ * genlmsg_total_size - length of genetlink message including padding
+ * @payload: length of message payload
+ */
+static inline int genlmsg_total_size(int payload)
+{
+ return NLMSG_ALIGN(genlmsg_msg_size(payload));
+}
+
+#define genlmsg_multicast(s, p, g, f) \
+ genlmsg_multicast_flags((s), (p), (g), (f))
+
+static inline int genlmsg_multicast_flags(struct sk_buff *skb, u32 pid,
+ unsigned int group, gfp_t flags)
+{
+ int err;
+
+ NETLINK_CB(skb).dst_group = group;
+
+ err = netlink_broadcast(genl_sock, skb, pid, group, flags);
+ if (err > 0)
+ err = 0;
+
+ return err;
+}
+#endif /* linux kernel < 2.6.19 */
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+#define genlmsg_put(skb, p, seq, fam, flg, c) \
+ genlmsg_put((skb), (p), (seq), (fam)->id, (fam)->hdrsize, \
+ (flg), (c), (fam)->version)
+
+/**
+ * genlmsg_put_reply - Add generic netlink header to a reply message
+ * @skb: socket buffer holding the message
+ * @info: receiver info
+ * @family: generic netlink family
+ * @flags: netlink message flags
+ * @cmd: generic netlink command
+ *
+ * Returns pointer to user specific header
+ */
+static inline void *genlmsg_put_reply(struct sk_buff *skb,
+ struct genl_info *info, struct genl_family *family,
+ int flags, u8 cmd)
+{
+ return genlmsg_put(skb, info->snd_pid, info->snd_seq, family,
+ flags, cmd);
+}
+
+/**
+ * genlmsg_reply - reply to a request
+ * @skb: netlink message to be sent back
+ * @info: receiver information
+ */
+static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
+{
+ return genlmsg_unicast(skb, info->snd_pid);
+}
+
+/**
+ * genlmsg_new - Allocate a new generic netlink message
+ * @payload: size of the message payload
+ * @flags: the type of memory to allocate.
+ */
+static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
+{
+ return nlmsg_new(genlmsg_total_size(payload), flags);
+}
+#endif /* linux kernel < 2.6.20 */
+
+#endif /* genetlink.h */
diff --git a/datapath/linux-2.6/compat-2.6/include/net/netlink.h b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
new file mode 100644
index 00000000..e0d594d7
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/include/net/netlink.h
@@ -0,0 +1,22 @@
+#ifndef __NET_NETLINK_WRAPPER_H
+#define __NET_NETLINK_WRAPPER_H 1
+
+#include_next <net/netlink.h>
+
+#ifndef HAVE_NLA_NUL_STRING
+#define NLA_NUL_STRING NLA_STRING
+
+static inline int VERIFY_NUL_STRING(struct nlattr *attr)
+{
+ return (!attr || (nla_len(attr)
+ && memchr(nla_data(attr), '\0', nla_len(attr)))
+ ? 0 : EINVAL);
+}
+#else
+static inline int VERIFY_NUL_STRING(struct nlattr *attr)
+{
+ return 0;
+}
+#endif /* !HAVE_NLA_NUL_STRING */
+
+#endif /* net/netlink.h */
diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c
new file mode 100644
index 00000000..b0dd2a32
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/random32.c
@@ -0,0 +1,144 @@
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+/*
+ This is a maximally equidistributed combined Tausworthe generator
+ based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+ x_n = (s1_n ^ s2_n ^ s3_n)
+
+ s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+ s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+ s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+ The period of this generator is about 2^88.
+
+ From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+ Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+ This is available on the net from L'Ecuyer's home page,
+
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+ ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
+
+ There is an erratum in the paper "Tables of Maximally
+ Equidistributed Combined LFSR Generators", Mathematics of
+ Computation, 68, 225 (1999), 261--269:
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+ ... the k_j most significant bits of z_j must be non-
+ zero, for each j. (Note: this restriction also applies to the
+ computer code given in [4], but was mistakenly not mentioned in
+ that paper.)
+
+ This affects the seeding procedure by imposing the requirement
+ s1 > 1, s2 > 7, s3 > 15.
+
+*/
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+
+#include "compat26.h"
+
+struct rnd_state {
+ u32 s1, s2, s3;
+};
+
+static struct rnd_state net_rand_state[NR_CPUS];
+
+static u32 __random32(struct rnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+ state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+ state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+ state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+ return (state->s1 ^ state->s2 ^ state->s3);
+}
+
+static void __set_random32(struct rnd_state *state, unsigned long s)
+{
+ if (s == 0)
+ s = 1; /* default seed is 1 */
+
+#define LCG(n) (69069 * n)
+ state->s1 = LCG(s);
+ state->s2 = LCG(state->s1);
+ state->s3 = LCG(state->s2);
+
+ /* "warm it up" */
+ __random32(state);
+ __random32(state);
+ __random32(state);
+ __random32(state);
+ __random32(state);
+ __random32(state);
+}
+
+/**
+ * random32 - pseudo random number generator
+ *
+ * A 32 bit pseudo-random number is generated using a fast
+ * algorithm suitable for simulation. This algorithm is NOT
+ * considered safe for cryptographic use.
+ */
+u32 random32(void)
+{
+ return __random32(&net_rand_state[smp_processor_id()]);
+}
+
+/**
+ * srandom32 - add entropy to pseudo random number generator
+ * @seed: seed value
+ *
+ * Add some additional seeding to the random32() pool.
+ * Note: this pool is per cpu so it only affects current CPU.
+ */
+void srandom32(u32 entropy)
+{
+ struct rnd_state *state = &net_rand_state[smp_processor_id()];
+ __set_random32(state, state->s1 ^ entropy);
+}
+
+static int __init random32_reseed(void);
+
+/*
+ * Generate some initially weak seeding values to allow
+ * to start the random32() engine.
+ */
+int __init random32_init(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct rnd_state *state = &net_rand_state[i];
+ __set_random32(state, i + jiffies);
+ }
+ random32_reseed();
+ return 0;
+}
+
+/*
+ * Generate better values after random number generator
+ * is fully initalized.
+ */
+static int __init random32_reseed(void)
+{
+ int i;
+ unsigned long seed;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct rnd_state *state = &net_rand_state[i];
+
+ get_random_bytes(&seed, sizeof(seed));
+ __set_random32(state, seed);
+ }
+ return 0;
+}
+
+#endif /* kernel < 2.6.19 */
diff --git a/datapath/linux-2.6/compat-2.6/veth.c b/datapath/linux-2.6/compat-2.6/veth.c
new file mode 100644
index 00000000..3cda3365
--- /dev/null
+++ b/datapath/linux-2.6/compat-2.6/veth.c
@@ -0,0 +1,537 @@
+/* veth driver port to Linux 2.6.18 */
+
+/*
+ * drivers/net/veth.c
+ *
+ * Copyright (C) 2007, 2009 OpenVZ http://openvz.org, SWsoft Inc
+ *
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+
+#include <net/dst.h>
+#include <net/xfrm.h>
+
+#define DRV_NAME "veth"
+#define DRV_VERSION "1.0"
+
+struct veth_net_stats {
+ unsigned long rx_packets;
+ unsigned long tx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_bytes;
+ unsigned long tx_dropped;
+};
+
+struct veth_priv {
+ struct net_device *peer;
+ struct net_device *dev;
+ struct list_head list;
+ struct veth_net_stats *stats;
+ unsigned ip_summed;
+ struct net_device_stats dev_stats;
+};
+
+static LIST_HEAD(veth_list);
+
+/*
+ * ethtool interface
+ */
+
+static struct {
+ const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+ { "peer_ifindex" },
+};
+
+static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ cmd->supported = 0;
+ cmd->advertising = 0;
+ cmd->speed = SPEED_10000;
+ cmd->duplex = DUPLEX_FULL;
+ cmd->port = PORT_TP;
+ cmd->phy_address = 0;
+ cmd->transceiver = XCVR_INTERNAL;
+ cmd->autoneg = AUTONEG_DISABLE;
+ cmd->maxtxpkt = 0;
+ cmd->maxrxpkt = 0;
+ return 0;
+}
+
+static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+ strcpy(info->driver, DRV_NAME);
+ strcpy(info->version, DRV_VERSION);
+ strcpy(info->fw_version, "N/A");
+}
+
+static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+ switch(stringset) {
+ case ETH_SS_STATS:
+ memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+ break;
+ }
+}
+
+static void veth_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ data[0] = priv->peer->ifindex;
+}
+
+static u32 veth_get_rx_csum(struct net_device *dev)
+{
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ return priv->ip_summed == CHECKSUM_UNNECESSARY;
+}
+
+static int veth_set_rx_csum(struct net_device *dev, u32 data)
+{
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ priv->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+ return 0;
+}
+
+static u32 veth_get_tx_csum(struct net_device *dev)
+{
+ return (dev->features & NETIF_F_NO_CSUM) != 0;
+}
+
+static int veth_set_tx_csum(struct net_device *dev, u32 data)
+{
+ if (data)
+ dev->features |= NETIF_F_NO_CSUM;
+ else
+ dev->features &= ~NETIF_F_NO_CSUM;
+ return 0;
+}
+
+static struct ethtool_ops veth_ethtool_ops = {
+ .get_settings = veth_get_settings,
+ .get_drvinfo = veth_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_rx_csum = veth_get_rx_csum,
+ .set_rx_csum = veth_set_rx_csum,
+ .get_tx_csum = veth_get_tx_csum,
+ .set_tx_csum = veth_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+ .get_strings = veth_get_strings,
+ .get_ethtool_stats = veth_get_ethtool_stats,
+};
+
+/*
+ * xmit
+ */
+
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct net_device *rcv = NULL;
+ struct veth_priv *priv, *rcv_priv;
+ struct veth_net_stats *stats;
+ int length, cpu;
+
+ skb_orphan(skb);
+
+ priv = netdev_priv(dev);
+ rcv = priv->peer;
+ rcv_priv = netdev_priv(rcv);
+
+ cpu = smp_processor_id();
+ stats = per_cpu_ptr(priv->stats, cpu);
+
+ if (!(rcv->flags & IFF_UP))
+ goto outf;
+
+ skb->dev = rcv;
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, rcv);
+ if (dev->features & NETIF_F_NO_CSUM)
+ skb->ip_summed = rcv_priv->ip_summed;
+
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ secpath_reset(skb);
+ nf_reset(skb);
+
+ length = skb->len;
+
+ stats->tx_bytes += length;
+ stats->tx_packets++;
+
+ stats = per_cpu_ptr(rcv_priv->stats, cpu);
+ stats->rx_bytes += length;
+ stats->rx_packets++;
+
+ netif_rx(skb);
+ return 0;
+
+outf:
+ kfree_skb(skb);
+ stats->tx_dropped++;
+ return 0;
+}
+
+/*
+ * general routines
+ */
+
+static struct net_device_stats *veth_get_stats(struct net_device *dev)
+{
+ struct veth_priv *priv;
+ struct net_device_stats *dev_stats;
+ int cpu;
+ struct veth_net_stats *stats;
+
+ priv = netdev_priv(dev);
+ dev_stats = &priv->dev_stats;
+
+ dev_stats->rx_packets = 0;
+ dev_stats->tx_packets = 0;
+ dev_stats->rx_bytes = 0;
+ dev_stats->tx_bytes = 0;
+ dev_stats->tx_dropped = 0;
+
+ for_each_online_cpu(cpu) {
+ stats = per_cpu_ptr(priv->stats, cpu);
+
+ dev_stats->rx_packets += stats->rx_packets;
+ dev_stats->tx_packets += stats->tx_packets;
+ dev_stats->rx_bytes += stats->rx_bytes;
+ dev_stats->tx_bytes += stats->tx_bytes;
+ dev_stats->tx_dropped += stats->tx_dropped;
+ }
+
+ return dev_stats;
+}
+
+static int veth_open(struct net_device *dev)
+{
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ if (priv->peer == NULL)
+ return -ENOTCONN;
+
+ if (priv->peer->flags & IFF_UP) {
+ netif_carrier_on(dev);
+ netif_carrier_on(priv->peer);
+ }
+ return 0;
+}
+
+static int veth_dev_init(struct net_device *dev)
+{
+ struct veth_net_stats *stats;
+ struct veth_priv *priv;
+
+ stats = alloc_percpu(struct veth_net_stats);
+ if (stats == NULL)
+ return -ENOMEM;
+
+ priv = netdev_priv(dev);
+ priv->stats = stats;
+ return 0;
+}
+
+static void veth_dev_free(struct net_device *dev)
+{
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ free_percpu(priv->stats);
+ free_netdev(dev);
+}
+
+static void veth_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->hard_start_xmit = veth_xmit;
+ dev->get_stats = veth_get_stats;
+ dev->open = veth_open;
+ dev->ethtool_ops = &veth_ethtool_ops;
+ dev->features |= NETIF_F_LLTX;
+ dev->init = veth_dev_init;
+ dev->destructor = veth_dev_free;
+}
+
+static void veth_change_state(struct net_device *dev)
+{
+ struct net_device *peer;
+ struct veth_priv *priv;
+
+ priv = netdev_priv(dev);
+ peer = priv->peer;
+
+ if (netif_carrier_ok(peer)) {
+ if (!netif_carrier_ok(dev))
+ netif_carrier_on(dev);
+ } else {
+ if (netif_carrier_ok(dev))
+ netif_carrier_off(dev);
+ }
+}
+
+static int veth_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ if (dev->open != veth_open)
+ goto out;
+
+ switch (event) {
+ case NETDEV_CHANGE:
+ veth_change_state(dev);
+ break;
+ }
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block veth_notifier_block __read_mostly = {
+ .notifier_call = veth_device_event,
+};
+
+/*
+ * netlink interface
+ */
+
+static int veth_newlink(const char *devname, const char *peername)
+{
+ int err;
+ const char *names[2];
+ struct net_device *devs[2];
+ int i;
+
+ names[0] = devname;
+ names[1] = peername;
+ devs[0] = devs[1] = NULL;
+
+ for (i = 0; i < 2; i++) {
+ struct net_device *dev;
+
+ err = -ENOMEM;
+ devs[i] = alloc_netdev(sizeof(struct veth_priv),
+ names[i], veth_setup);
+ if (!devs[i]) {
+ goto err;
+ }
+
+ dev = devs[i];
+
+ if (strchr(dev->name, '%')) {
+ err = dev_alloc_name(dev, dev->name);
+ if (err < 0)
+ goto err;
+ }
+ random_ether_addr(dev->dev_addr);
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto err;
+
+ netif_carrier_off(dev);
+ }
+
+ /*
+ * tie the devices together
+ */
+
+ for (i = 0; i < 2; i++) {
+ struct veth_priv *priv = netdev_priv(devs[i]);
+ priv->dev = devs[i];
+ priv->peer = devs[!i];
+ if (!i)
+ list_add(&priv->list, &veth_list);
+ else
+ INIT_LIST_HEAD(&priv->list);
+ }
+ return 0;
+
+err:
+ for (i = 0; i < 2; i++) {
+ if (devs[i]) {
+ if (devs[i]->reg_state != NETREG_UNINITIALIZED)
+ unregister_netdevice(devs[i]);
+ else
+ free_netdev(devs[i]);
+ }
+ }
+ return err;
+}
+
+static void veth_dellink(struct net_device *dev)
+{
+ struct veth_priv *priv;
+ struct net_device *peer;
+
+ priv = netdev_priv(dev);
+ peer = priv->peer;
+
+ if (!list_empty(&priv->list))
+ list_del(&priv->list);
+
+ priv = netdev_priv(peer);
+ if (!list_empty(&priv->list))
+ list_del(&priv->list);
+
+ unregister_netdevice(dev);
+ unregister_netdevice(peer);
+}
+
+/*
+ * sysfs
+ */
+
+/*
+ * "show" function for the veth_pairs attribute.
+ * The class parameter is ignored.
+ */
+static ssize_t veth_show_veth_pairs(struct class *cls, char *buffer)
+{
+ int res = 0;
+ struct veth_priv *priv;
+
+ list_for_each_entry(priv, &veth_list, list) {
+ if (res > (PAGE_SIZE - (IFNAMSIZ * 2 + 1))) {
+ /* not enough space for another interface name */
+ if ((PAGE_SIZE - res) > 10)
+ res = PAGE_SIZE - 10;
+ res += sprintf(buffer + res, "++more++");
+ break;
+ }
+ res += sprintf(buffer + res, "%s,%s ",
+ priv->dev->name, priv->peer->name);
+ }
+ res += sprintf(buffer + res, "\n");
+ res++;
+ return res;
+}
+
+/*
+ * "store" function for the veth_pairs attribute. This is what
+ * creates and deletes veth pairs.
+ *
+ * The class parameter is ignored.
+ *
+ */
+static ssize_t veth_store_veth_pairs(struct class *cls, const char *buffer,
+ size_t count)
+{
+ int c = *buffer++;
+ int retval;
+ printk("1\n");
+ if (c == '+') {
+ char devname[IFNAMSIZ + 1] = "";
+ char peername[IFNAMSIZ + 1] = "";
+ char *comma = strchr(buffer, ',');
+ printk("2\n");
+ if (!comma)
+ goto err_no_cmd;
+ strncat(devname, buffer,
+ min_t(int, sizeof devname, comma - buffer));
+ strncat(peername, comma + 1,
+ min_t(int, sizeof peername, strcspn(comma + 1, "\n")));
+ printk("3 '%s' '%s'\n", devname, peername);
+ if (!dev_valid_name(devname) || !dev_valid_name(peername))
+ goto err_no_cmd;
+ printk("4\n");
+ rtnl_lock();
+ retval = veth_newlink(devname, peername);
+ rtnl_unlock();
+ return retval ? retval : count;
+ } else if (c == '-') {
+ struct net_device *dev;
+
+ rtnl_lock();
+ dev = dev_get_by_name(buffer);
+ if (!dev)
+ retval = -ENODEV;
+ else if (dev->init != veth_dev_init)
+ retval = -EINVAL;
+ else {
+ veth_dellink(dev);
+ retval = count;
+ }
+ rtnl_unlock();
+
+ return retval;
+ }
+
+err_no_cmd:
+ printk(KERN_ERR DRV_NAME ": no command found in veth_pairs. Use +ifname,peername or -ifname.\n");
+ return -EPERM;
+}
+
+/* class attribute for veth_pairs file. This ends up in /sys/class/net */
+static CLASS_ATTR(veth_pairs, S_IWUSR | S_IRUGO,
+ veth_show_veth_pairs, veth_store_veth_pairs);
+
+static struct class *netdev_class;
+
+/*
+ * Initialize sysfs. This sets up the veth_pairs file in
+ * /sys/class/net.
+ */
+int veth_create_sysfs(void)
+{
+ struct net_device *dev = dev_get_by_name("lo");
+ if (!dev)
+ return -ESRCH;
+ netdev_class = dev->class_dev.class;
+ if (!netdev_class)
+ return -ENODEV;
+
+ return class_create_file(netdev_class, &class_attr_veth_pairs);
+}
+
+/*
+ * Remove /sys/class/net/veth_pairs.
+ */
+void veth_destroy_sysfs(void)
+{
+ class_remove_file(netdev_class, &class_attr_veth_pairs);
+}
+
+
+
+/*
+ * init/fini
+ */
+
+static __init int veth_init(void)
+{
+ int retval = veth_create_sysfs();
+ if (retval)
+ return retval;
+ register_netdevice_notifier(&veth_notifier_block);
+ return 0;
+}
+
+static __exit void veth_exit(void)
+{
+ unregister_netdevice_notifier(&veth_notifier_block);
+}
+
+module_init(veth_init);
+module_exit(veth_exit);
+
+MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
+MODULE_LICENSE("GPL v2");
diff --git a/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm
new file mode 100644
index 00000000..f287cf72
--- /dev/null
+++ b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm
@@ -0,0 +1,1408 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.23-rc9
+# Fri Oct 19 15:08:37 2007
+#
+CONFIG_X86_32=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_ZONE_DMA=y
+CONFIG_QUICKLIST=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_DMI=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+# CONFIG_USER_NS is not set
+# CONFIG_AUDIT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CPUSETS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+# CONFIG_EMBEDDED is not set
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_LSF=y
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
+#
+# Processor type and features
+#
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_SMP=y
+CONFIG_X86_PC=y
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+# CONFIG_X86_GENERICARCH is not set
+# CONFIG_X86_ES7000 is not set
+# CONFIG_PARAVIRT is not set
+# CONFIG_M386 is not set
+CONFIG_M486=y
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MCORE2 is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_MVIAC7 is not set
+CONFIG_X86_GENERIC=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_XADD=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_PPRO_FENCE=y
+CONFIG_X86_F00F_BUG=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_ALIGNMENT_16=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_MINIMUM_CPU_FAMILY=4
+# CONFIG_HPET_TIMER is not set
+CONFIG_NR_CPUS=8
+# CONFIG_SCHED_SMT is not set
+CONFIG_SCHED_MC=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_PREEMPT_BKL=y
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+# CONFIG_X86_MCE is not set
+CONFIG_VM86=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_X86_REBOOTFIXUPS is not set
+# CONFIG_MICROCODE is not set
+# CONFIG_X86_MSR is not set
+# CONFIG_X86_CPUID is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+# CONFIG_DELL_RBU is not set
+# CONFIG_DCDBAS is not set
+CONFIG_DMIID=y
+# CONFIG_NOHIGHMEM is not set
+CONFIG_HIGHMEM4G=y
+# CONFIG_HIGHMEM64G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_HIGHMEM=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_NR_QUICK=1
+CONFIG_VIRT_TO_BUS=y
+# CONFIG_HIGHPTE is not set
+# CONFIG_MATH_EMULATION is not set
+# CONFIG_MTRR is not set
+CONFIG_IRQBALANCE=y
+CONFIG_SECCOMP=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_PHYSICAL_START=0x100000
+# CONFIG_RELOCATABLE is not set
+CONFIG_PHYSICAL_ALIGN=0x100000
+CONFIG_HOTPLUG_CPU=y
+CONFIG_COMPAT_VDSO=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+# CONFIG_PM_LEGACY is not set
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP_SMP=y
+CONFIG_PM_SLEEP=y
+CONFIG_SUSPEND_SMP_POSSIBLE=y
+CONFIG_SUSPEND=y
+CONFIG_HIBERNATION_SMP_POSSIBLE=y
+# CONFIG_HIBERNATION is not set
+# CONFIG_ACPI is not set
+CONFIG_APM=y
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+# CONFIG_APM_DO_ENABLE is not set
+# CONFIG_APM_CPU_IDLE is not set
+# CONFIG_APM_DISPLAY_BLANK is not set
+# CONFIG_APM_ALLOW_INTS is not set
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+# CONFIG_PCIEPORTBUS is not set
+CONFIG_ARCH_SUPPORTS_MSI=y
+# CONFIG_PCI_MSI is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_HT_IRQ=y
+CONFIG_ISA_DMA_API=y
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_AOUT is not set
+CONFIG_BINFMT_MISC=m
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_MIGRATE=y
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+# CONFIG_IP_ROUTE_VERBOSE is not set
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+# CONFIG_DEFAULT_BIC is not set
+CONFIG_DEFAULT_CUBIC=y
+# CONFIG_DEFAULT_HTCP is not set
+# CONFIG_DEFAULT_VEGAS is not set
+# CONFIG_DEFAULT_WESTWOOD is not set
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="cubic"
+CONFIG_TCP_MD5SIG=y
+# CONFIG_IP_VS is not set
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+# CONFIG_IPV6_MIP6 is not set
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NF_CONNTRACK_ENABLED=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CT_ACCT=y
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_GRE=m
+CONFIG_NF_CT_PROTO_SCTP=m
+# CONFIG_NF_CT_PROTO_UDPLITE is not set
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
+# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set
+# CONFIG_NETFILTER_XT_TARGET_TRACE is not set
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+# CONFIG_NETFILTER_XT_MATCH_U32 is not set
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+CONFIG_NF_NAT_SNMP_BASIC=m
+CONFIG_NF_NAT_PROTO_GRE=m
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_TFTP=m
+CONFIG_NF_NAT_AMANDA=m
+CONFIG_NF_NAT_PPTP=m
+CONFIG_NF_NAT_H323=m
+CONFIG_NF_NAT_SIP=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+
+#
+# IPv6: Netfilter Configuration (EXPERIMENTAL)
+#
+CONFIG_NF_CONNTRACK_IPV6=m
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_RAW=m
+
+#
+# DECnet: Netfilter Configuration
+#
+# CONFIG_DECNET_NF_GRABULATOR is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+CONFIG_IP_DCCP=m
+CONFIG_INET_DCCP_DIAG=m
+CONFIG_IP_DCCP_ACKVEC=y
+
+#
+# DCCP CCIDs Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_DCCP_CCID2=m
+# CONFIG_IP_DCCP_CCID2_DEBUG is not set
+CONFIG_IP_DCCP_CCID3=m
+CONFIG_IP_DCCP_TFRC_LIB=m
+# CONFIG_IP_DCCP_CCID3_DEBUG is not set
+CONFIG_IP_DCCP_CCID3_RTO=100
+
+#
+# DCCP Kernel Hacking
+#
+# CONFIG_IP_DCCP_DEBUG is not set
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_TIPC=m
+CONFIG_TIPC_ADVANCED=y
+CONFIG_TIPC_ZONES=3
+CONFIG_TIPC_CLUSTERS=1
+CONFIG_TIPC_NODES=255
+CONFIG_TIPC_SLAVE_NODES=0
+CONFIG_TIPC_PORTS=8191
+CONFIG_TIPC_LOG=0
+# CONFIG_TIPC_DEBUG is not set
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_LANE=m
+# CONFIG_ATM_MPOA is not set
+CONFIG_ATM_BR2684=m
+CONFIG_ATM_BR2684_IPFILTER=y
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_DECNET=m
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_LLC=m
+CONFIG_LLC2=m
+CONFIG_IPX=m
+CONFIG_IPX_INTERN=y
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+# CONFIG_LTPC is not set
+# CONFIG_COPS is not set
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_FIFO=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+# CONFIG_NET_SCH_RR is not set
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+# CONFIG_NET_CLS_POLICE is not set
+CONFIG_NET_CLS_IND=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_AF_RXRPC=m
+# CONFIG_AF_RXRPC_DEBUG is not set
+CONFIG_RXKAD=m
+CONFIG_FIB_RULES=y
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=m
+# CONFIG_MTD is not set
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+# CONFIG_PARPORT_SERIAL is not set
+# CONFIG_PARPORT_PC_FIFO is not set
+# CONFIG_PARPORT_PC_SUPERIO is not set
+# CONFIG_PARPORT_GSC is not set
+# CONFIG_PARPORT_AX88796 is not set
+# CONFIG_PARPORT_1284 is not set
+# CONFIG_PNP is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_IBM_ASM is not set
+# CONFIG_PHANTOM is not set
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+CONFIG_IDE_PROC_FS=y
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+# CONFIG_BLK_DEV_CMD640 is not set
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+CONFIG_IDEPCI_PCIBUS_ORDER=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_GENERIC is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_IDEDMA_PCI is not set
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+# CONFIG_BLK_DEV_IDEDMA is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_FIREWIRE is not set
+# CONFIG_IEEE1394 is not set
+# CONFIG_I2O is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+# CONFIG_IFB is not set
+CONFIG_DUMMY=m
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+# CONFIG_ARCNET is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_NET_TULIP is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_PCNET32_NAPI is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+CONFIG_NE2K_PCI=y
+CONFIG_8139CP=y
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_SC92031 is not set
+# CONFIG_NET_POCKET is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+# CONFIG_TR is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_WAN is not set
+CONFIG_ATM_DRIVERS=y
+# CONFIG_ATM_DUMMY is not set
+# CONFIG_ATM_TCP is not set
+# CONFIG_ATM_LANAI is not set
+# CONFIG_ATM_ENI is not set
+# CONFIG_ATM_FIRESTREAM is not set
+# CONFIG_ATM_ZATM is not set
+# CONFIG_ATM_NICSTAR is not set
+# CONFIG_ATM_IDT77252 is not set
+# CONFIG_ATM_AMBASSADOR is not set
+# CONFIG_ATM_HORIZON is not set
+# CONFIG_ATM_IA is not set
+# CONFIG_ATM_FORE200E_MAYBE is not set
+# CONFIG_ATM_HE is not set
+# CONFIG_FDDI is not set
+CONFIG_HIPPI=y
+# CONFIG_ROADRUNNER is not set
+# CONFIG_PLIP is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+CONFIG_MOUSE_PS2_LIFEBOOK=y
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+# CONFIG_MOUSE_PS2_TOUCHKIT is not set
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_APPLETOUCH is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_PRINTER is not set
+# CONFIG_PPDEV is not set
+# CONFIG_TIPAR is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=y
+# CONFIG_ACQUIRE_WDT is not set
+# CONFIG_ADVANTECH_WDT is not set
+# CONFIG_ALIM1535_WDT is not set
+# CONFIG_ALIM7101_WDT is not set
+# CONFIG_SC520_WDT is not set
+# CONFIG_EUROTECH_WDT is not set
+# CONFIG_IB700_WDT is not set
+# CONFIG_IBMASR is not set
+# CONFIG_WAFER_WDT is not set
+# CONFIG_I6300ESB_WDT is not set
+# CONFIG_ITCO_WDT is not set
+# CONFIG_SC1200_WDT is not set
+# CONFIG_PC87413_WDT is not set
+# CONFIG_60XX_WDT is not set
+# CONFIG_SBC8360_WDT is not set
+# CONFIG_CPU5_WDT is not set
+# CONFIG_SMSC37B787_WDT is not set
+# CONFIG_W83627HF_WDT is not set
+# CONFIG_W83697HF_WDT is not set
+# CONFIG_W83877F_WDT is not set
+# CONFIG_W83977F_WDT is not set
+# CONFIG_MACHZ_WDT is not set
+# CONFIG_SBC_EPX_C3_WATCHDOG is not set
+
+#
+# ISA-based Watchdog Cards
+#
+# CONFIG_PCWATCHDOG is not set
+# CONFIG_MIXCOMWD is not set
+# CONFIG_WDT is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_PC8736x_GPIO is not set
+# CONFIG_NSC_GPIO is not set
+# CONFIG_CS5535_GPIO is not set
+CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=256
+# CONFIG_HANGCHECK_TIMER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+CONFIG_DEVPORT=y
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ABITUGURU3 is not set
+# CONFIG_SENSORS_K8TEMP is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_CORETEMP is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_SENSORS_APPLESMC is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_VGASTATE is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
+# CONFIG_VIDEO_SELECT is not set
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+# CONFIG_HID is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
+# CONFIG_USB is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+# CONFIG_MMC is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_INFINIBAND is not set
+# CONFIG_EDAC is not set
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+# CONFIG_AUXDISPLAY is not set
+CONFIG_VIRTUALIZATION=y
+# CONFIG_KVM is not set
+
+#
+# Userspace I/O
+#
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT4DEV_FS is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+CONFIG_ROMFS_FS=m
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+CONFIG_GENERIC_ACL=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+CONFIG_CONFIGFS_FS=m
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_ECRYPT_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=m
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Distributed Lock Manager
+#
+# CONFIG_DLM is not set
+CONFIG_INSTRUMENTATION=y
+# CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_SLAB_LEAK=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_PI_LIST=y
+# CONFIG_RT_MUTEX_TESTER is not set
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_LOCK_STAT=y
+# CONFIG_DEBUG_LOCKDEP is not set
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+CONFIG_DEBUG_KOBJECT=y
+CONFIG_DEBUG_HIGHMEM=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_LIST=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FORCED_INLINING=y
+CONFIG_RCU_TORTURE_TEST=m
+# CONFIG_FAULT_INJECTION is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA=y
+CONFIG_4KSTACKS=y
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+CONFIG_DOUBLEFAULT=y
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
+# CONFIG_SECURITY is not set
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=m
+CONFIG_CRYPTO_HASH=m
+CONFIG_CRYPTO_MANAGER=m
+CONFIG_CRYPTO_HMAC=m
+# CONFIG_CRYPTO_XCBC is not set
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+CONFIG_CRYPTO_GF128MUL=m
+# CONFIG_CRYPTO_ECB is not set
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_LRW=m
+# CONFIG_CRYPTO_CRYPTD is not set
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_FCRYPT=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_TWOFISH_586 is not set
+# CONFIG_CRYPTO_SERPENT is not set
+CONFIG_CRYPTO_AES=m
+# CONFIG_CRYPTO_AES_586 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_TEA=m
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_TEST is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_PADLOCK is not set
+# CONFIG_CRYPTO_DEV_GEODE is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_CRC_CCITT=m
+CONFIG_CRC16=m
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_KTIME_SCALAR=y
diff --git a/datapath/table.c b/datapath/table.c
new file mode 100644
index 00000000..c0885b70
--- /dev/null
+++ b/datapath/table.c
@@ -0,0 +1,240 @@
+#include "flow.h"
+#include "datapath.h"
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+
+static void free_table(struct sw_flow ***flows, unsigned int n_buckets,
+ int free_flows)
+{
+ unsigned int i;
+
+ for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
+ struct sw_flow **l2 = flows[i];
+ if (free_flows) {
+ unsigned int j;
+ for (j = 0; j < DP_L1_SIZE; j++) {
+ if (l2[j])
+ flow_free(l2[j]);
+ }
+ }
+ free_page((unsigned long)l2);
+ }
+ kfree(flows);
+}
+
+static struct sw_flow ***alloc_table(unsigned int n_buckets)
+{
+ struct sw_flow ***flows;
+ unsigned int i;
+
+ flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**),
+ GFP_KERNEL);
+ if (!flows)
+ return NULL;
+ for (i = 0; i < n_buckets >> DP_L1_BITS; i++) {
+ flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL);
+ if (!flows[i]) {
+ free_table(flows, i << DP_L1_BITS, 0);
+ return NULL;
+ }
+ }
+ return flows;
+}
+
+struct dp_table *dp_table_create(unsigned int n_buckets)
+{
+ struct dp_table *table;
+
+ table = kzalloc(sizeof *table, GFP_KERNEL);
+ if (!table)
+ goto err;
+
+ table->n_buckets = n_buckets;
+ table->flows[0] = alloc_table(n_buckets);
+ if (!table[0].flows)
+ goto err_free_tables;
+
+ table->flows[1] = alloc_table(n_buckets);
+ if (!table->flows[1])
+ goto err_free_flows0;
+
+ return table;
+
+err_free_flows0:
+ free_table(table->flows[0], table->n_buckets, 0);
+err_free_tables:
+ kfree(table);
+err:
+ return NULL;
+}
+
+void dp_table_destroy(struct dp_table *table, int free_flows)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ free_table(table->flows[i], table->n_buckets, free_flows);
+ kfree(table);
+}
+
+static struct sw_flow **find_bucket(struct dp_table *table,
+ struct sw_flow ***flows, u32 hash)
+{
+ unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT;
+ unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1);
+ return &flows[l1][l2];
+}
+
+static struct sw_flow *lookup_table(struct dp_table *table,
+ struct sw_flow ***flows, u32 hash,
+ const struct odp_flow_key *key)
+{
+ struct sw_flow **bucket = find_bucket(table, flows, hash);
+ struct sw_flow *flow = rcu_dereference(*bucket);
+ if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key)))
+ return flow;
+ return NULL;
+}
+
+static u32 flow_hash0(const struct odp_flow_key *key)
+{
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa);
+}
+
+static u32 flow_hash1(const struct odp_flow_key *key)
+{
+ return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555);
+}
+
+static void find_buckets(struct dp_table *table,
+ const struct odp_flow_key *key,
+ struct sw_flow **buckets[2])
+{
+ buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key));
+ buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key));
+}
+
+struct sw_flow *dp_table_lookup(struct dp_table *table,
+ const struct odp_flow_key *key)
+{
+ struct sw_flow *flow;
+ flow = lookup_table(table, table->flows[0], flow_hash0(key), key);
+ if (!flow)
+ flow = lookup_table(table, table->flows[1],
+ flow_hash1(key), key);
+ return flow;
+}
+
+int dp_table_foreach(struct dp_table *table,
+ int (*callback)(struct sw_flow *flow, void *aux),
+ void *aux)
+{
+ unsigned int i, j, k;
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) {
+ struct sw_flow **l2 = table->flows[i][j];
+ for (k = 0; k < DP_L1_SIZE; k++) {
+ struct sw_flow *flow = rcu_dereference(l2[k]);
+ if (flow) {
+ int error = callback(flow, aux);
+ if (error)
+ return error;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+static int insert_flow(struct sw_flow *flow, void *new_table_)
+{
+ struct dp_table *new_table = new_table_;
+ struct sw_flow **buckets[2];
+ int i;
+
+ find_buckets(new_table, &flow->key, buckets);
+ for (i = 0; i < 2; i++) {
+ if (!*buckets[i]) {
+ rcu_assign_pointer(*buckets[i], flow);
+ return 0;
+ }
+ }
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static void dp_free_table_rcu(struct rcu_head *rcu)
+{
+ struct dp_table *table = container_of(rcu, struct dp_table, rcu);
+ dp_table_destroy(table, 0);
+}
+
+int dp_table_expand(struct datapath *dp)
+{
+ struct dp_table *old_table = rcu_dereference(dp->table);
+ struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2);
+ if (!new_table)
+ return -ENOMEM;
+ dp_table_foreach(old_table, insert_flow, new_table);
+ rcu_assign_pointer(dp->table, new_table);
+ call_rcu(&old_table->rcu, dp_free_table_rcu);
+ return 0;
+}
+
+static void dp_free_table_and_flows_rcu(struct rcu_head *rcu)
+{
+ struct dp_table *table = container_of(rcu, struct dp_table, rcu);
+ dp_table_destroy(table, 1);
+}
+
+int dp_table_flush(struct datapath *dp)
+{
+ struct dp_table *old_table = rcu_dereference(dp->table);
+ struct dp_table *new_table = dp_table_create(DP_L1_SIZE);
+ if (!new_table)
+ return -ENOMEM;
+ rcu_assign_pointer(dp->table, new_table);
+ call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu);
+ return 0;
+}
+
+struct sw_flow **
+dp_table_lookup_for_insert(struct dp_table *table,
+ const struct odp_flow_key *target)
+{
+ struct sw_flow **buckets[2];
+ struct sw_flow **empty_bucket = NULL;
+ int i;
+
+ find_buckets(table, target, buckets);
+ for (i = 0; i < 2; i++) {
+ struct sw_flow *f = rcu_dereference(*buckets[i]);
+ if (f) {
+ if (!memcmp(&f->key, target, sizeof(struct odp_flow_key)))
+ return buckets[i];
+ } else if (!empty_bucket)
+ empty_bucket = buckets[i];
+ }
+ return empty_bucket;
+}
+
+int dp_table_delete(struct dp_table *table, struct sw_flow *target)
+{
+ struct sw_flow **buckets[2];
+ int i;
+
+ find_buckets(table, &target->key, buckets);
+ for (i = 0; i < 2; i++) {
+ struct sw_flow *flow = rcu_dereference(*buckets[i]);
+ if (flow == target) {
+ rcu_assign_pointer(*buckets[i], NULL);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}