diff options
Diffstat (limited to 'datapath')
61 files changed, 8551 insertions, 0 deletions
diff --git a/datapath/.gitignore b/datapath/.gitignore new file mode 100644 index 00000000..5a59a0d3 --- /dev/null +++ b/datapath/.gitignore @@ -0,0 +1,7 @@ +/Makefile +/Makefile.in +*.cmd +*.ko +*.mod.c +Module.symvers + diff --git a/datapath/Makefile.am b/datapath/Makefile.am new file mode 100644 index 00000000..71e2dc48 --- /dev/null +++ b/datapath/Makefile.am @@ -0,0 +1,12 @@ +SUBDIRS = +if L26_ENABLED +SUBDIRS += linux-2.6 +endif + +EXTRA_DIST = $(dist_headers) $(dist_sources) + +# Suppress warnings about GNU extensions in Modules.mk files. +AUTOMAKE_OPTIONS = -Wno-portability + +include Modules.mk +include linux-2.6/Modules.mk diff --git a/datapath/Modules.mk b/datapath/Modules.mk new file mode 100644 index 00000000..1b5de4ab --- /dev/null +++ b/datapath/Modules.mk @@ -0,0 +1,32 @@ +# Some modules should be built and distributed, e.g. openvswitch. +# +# Some modules should be distributed but not built, e.g. we do not build +# veth if the kernel in question already has it. +# +# Some modules should be built but not distributed, e.g. third-party +# hwtable modules. +both_modules = openvswitch +build_modules = $(both_modules) # Modules to build +dist_modules = $(both_modules) # Modules to distribute + +openvswitch_sources = \ + actions.c \ + datapath.c \ + dp_dev.c \ + dp_notify.c \ + flow.c \ + table.c + +openvswitch_headers = \ + actions.h \ + compat.h \ + datapath.h \ + dp_dev.h \ + flow.h + +dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) +dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) +build_sources = $(foreach module,$(build_modules),$($(module)_sources)) +build_headers = $(foreach module,$(build_modules),$($(module)_headers)) +build_links = $(notdir $(build_sources)) +build_objects = $(notdir $(patsubst %.c,%.o,$(build_sources))) diff --git a/datapath/actions.c b/datapath/actions.c new file mode 100644 index 00000000..30b840cb --- /dev/null +++ b/datapath/actions.c @@ -0,0 +1,421 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007, 2008, 2009 Nicira Networks. + */ + +/* Functions for executing flow actions. */ + +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/in6.h> +#include <linux/if_vlan.h> +#include <net/ip.h> +#include <net/checksum.h> +#include "datapath.h" +#include "dp_dev.h" +#include "actions.h" +#include "openvswitch/datapath-protocol.h" + +struct sk_buff * +make_writable(struct sk_buff *skb, gfp_t gfp) +{ + if (skb_shared(skb) || skb_cloned(skb)) { + struct sk_buff *nskb = skb_copy(skb, gfp); + if (nskb) { + kfree_skb(skb); + return nskb; + } + } else { + unsigned int hdr_len = (skb_transport_offset(skb) + + sizeof(struct tcphdr)); + if (pskb_may_pull(skb, min(hdr_len, skb->len))) + return skb; + } + kfree_skb(skb); + return NULL; +} + + +static struct sk_buff * +vlan_pull_tag(struct sk_buff *skb) +{ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + struct ethhdr *eh; + + + /* Verify we were given a vlan packet */ + if (vh->h_vlan_proto != htons(ETH_P_8021Q)) + return skb; + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); + + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); + + skb->protocol = eh->h_proto; + skb->mac_header += VLAN_HLEN; + + return skb; +} + + +static struct sk_buff * +modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, + struct odp_flow_key *key, const union odp_action *a, + int n_actions, gfp_t gfp) +{ + u16 tci, mask; + + if (a->type == ODPAT_SET_VLAN_VID) { + tci = ntohs(a->vlan_vid.vlan_vid); + mask = VLAN_VID_MASK; + key->dl_vlan = htons(tci & mask); + } else { + tci = a->vlan_pcp.vlan_pcp << 13; + mask = VLAN_PCP_MASK; + } + + skb = make_writable(skb, gfp); + if (!skb) + return ERR_PTR(-ENOMEM); + + if (skb->protocol == htons(ETH_P_8021Q)) { + /* Modify vlan id, but maintain other TCI values */ + struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci); + } else { + /* Add vlan header */ + + /* Set up checksumming pointers for checksum-deferred packets + * on Xen. Otherwise, dev_queue_xmit() will try to do this + * when we send the packet out on the wire, and it will fail at + * that point because skb_checksum_setup() will not look inside + * an 802.1Q header. */ + skb_checksum_setup(skb); + + /* GSO is not implemented for packets with an 802.1Q header, so + * we have to do segmentation before we add that header. + * + * GSO does work with hardware-accelerated VLAN tagging, but we + * can't use hardware-accelerated VLAN tagging since it + * requires the device to have a VLAN group configured (with + * e.g. vconfig(8)) and we don't do that. + * + * Having to do this here may be a performance loss, since we + * can't take advantage of TSO hardware support, although it + * does not make a measurable network performance difference + * for 1G Ethernet. Fixing that would require patching the + * kernel (either to add GSO support to the VLAN protocol or to + * support hardware-accelerated VLAN tagging without VLAN + * groups configured). */ + if (skb_is_gso(skb)) { + struct sk_buff *segs; + + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return ERR_CAST(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + + segs = __vlan_put_tag(segs, tci); + err = -ENOMEM; + if (segs) { + struct odp_flow_key segkey = *key; + err = execute_actions(dp, segs, + &segkey, a + 1, + n_actions - 1, + gfp); + } + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return ERR_PTR(err); + } + + segs = nskb; + } while (segs->next); + + skb = segs; + } + + /* The hardware-accelerated version of vlan_put_tag() works + * only for a device that has a VLAN group configured (with + * e.g. vconfig(8)), so call the software-only version + * __vlan_put_tag() directly instead. + */ + skb = __vlan_put_tag(skb, tci); + if (!skb) + return ERR_PTR(-ENOMEM); + } + + return skb; +} + +static struct sk_buff *strip_vlan(struct sk_buff *skb, + struct odp_flow_key *key, gfp_t gfp) +{ + skb = make_writable(skb, gfp); + if (skb) { + vlan_pull_tag(skb); + key->dl_vlan = htons(ODP_VLAN_NONE); + } + return skb; +} + +static struct sk_buff *set_dl_addr(struct sk_buff *skb, + const struct odp_action_dl_addr *a, + gfp_t gfp) +{ + skb = make_writable(skb, gfp); + if (skb) { + struct ethhdr *eh = eth_hdr(skb); + memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest, + a->dl_addr, ETH_ALEN); + } + return skb; +} + +/* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field + * covered by the sum has been changed from 'from' to 'to'. If set, + * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header. + * Based on nf_proto_csum_replace4. */ +static void update_csum(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) +{ + __be32 diff[] = { ~from, to }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + csum_unfold(*sum))); +} + +static struct sk_buff *set_nw_addr(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_nw_addr *a, + gfp_t gfp) +{ + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + skb = make_writable(skb, gfp); + if (skb) { + struct iphdr *nh = ip_hdr(skb); + u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr; + u32 old = *f; + u32 new = a->nw_addr; + + if (key->nw_proto == IPPROTO_TCP) { + struct tcphdr *th = tcp_hdr(skb); + update_csum(&th->check, skb, old, new, 1); + } else if (key->nw_proto == IPPROTO_UDP) { + struct udphdr *th = udp_hdr(skb); + update_csum(&th->check, skb, old, new, 1); + } + update_csum(&nh->check, skb, old, new, 0); + *f = new; + } + return skb; +} + +static struct sk_buff * +set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, + const struct odp_action_tp_port *a, + gfp_t gfp) +{ + int check_ofs; + + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + if (key->nw_proto == IPPROTO_TCP) + check_ofs = offsetof(struct tcphdr, check); + else if (key->nw_proto == IPPROTO_UDP) + check_ofs = offsetof(struct udphdr, check); + else + return skb; + + skb = make_writable(skb, gfp); + if (skb) { + struct udphdr *th = udp_hdr(skb); + u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest; + u16 old = *f; + u16 new = a->tp_port; + update_csum((u16*)((u8*)skb->data + check_ofs), + skb, old, new, 1); + *f = new; + } + return skb; +} + +static inline unsigned packet_length(const struct sk_buff *skb) +{ + unsigned length = skb->len - ETH_HLEN; + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + return length; +} + +int dp_xmit_skb(struct sk_buff *skb) +{ + struct datapath *dp = skb->dev->br_port->dp; + int len = skb->len; + + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { + printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", + dp_name(dp), packet_length(skb), skb->dev->mtu); + kfree_skb(skb); + return -E2BIG; + } + + dev_queue_xmit(skb); + + return len; +} + +static void +do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct net_bridge_port *p; + struct net_device *dev; + + if (!skb) + goto error; + + p = dp->ports[out_port]; + if (!p) + goto error; + + dev = skb->dev = p->dev; + if (is_dp_dev(dev)) + dp_dev_recv(dev, skb); + else + dp_xmit_skb(skb); + return; + +error: + kfree_skb(skb); +} + +/* Never consumes 'skb'. Returns a port that 'skb' should be sent to, -1 if + * none. */ +static int output_group(struct datapath *dp, __u16 group, + struct sk_buff *skb, gfp_t gfp) +{ + struct dp_port_group *g = rcu_dereference(dp->groups[group]); + int prev_port = -1; + int i; + + if (!g) + return -1; + for (i = 0; i < g->n_ports; i++) { + struct net_bridge_port *p = dp->ports[g->ports[i]]; + if (!p || skb->dev == p->dev) + continue; + if (prev_port != -1) { + struct sk_buff *clone = skb_clone(skb, gfp); + if (!clone) + return -1; + do_output(dp, clone, prev_port); + } + prev_port = p->port_no; + } + return prev_port; +} + +static int +output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) +{ + skb = skb_clone(skb, gfp); + if (!skb) + return -ENOMEM; + return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg); +} + +/* Execute a list of actions against 'skb'. */ +int execute_actions(struct datapath *dp, struct sk_buff *skb, + struct odp_flow_key *key, + const union odp_action *a, int n_actions, + gfp_t gfp) +{ + /* Every output action needs a separate clone of 'skb', but the common + * case is just a single output action, so that doing a clone and + * then freeing the original skbuff is wasteful. So the following code + * is slightly obscure just to avoid that. */ + int prev_port = -1; + int err = 0; + for (; n_actions > 0; a++, n_actions--) { + WARN_ON_ONCE(skb_shared(skb)); + if (prev_port != -1) { + do_output(dp, skb_clone(skb, gfp), prev_port); + prev_port = -1; + } + + switch (a->type) { + case ODPAT_OUTPUT: + prev_port = a->output.port; + break; + + case ODPAT_OUTPUT_GROUP: + prev_port = output_group(dp, a->output_group.group, + skb, gfp); + break; + + case ODPAT_CONTROLLER: + err = output_control(dp, skb, a->controller.arg, gfp); + if (err) { + kfree_skb(skb); + return err; + } + break; + + case ODPAT_SET_VLAN_VID: + case ODPAT_SET_VLAN_PCP: + skb = modify_vlan_tci(dp, skb, key, a, n_actions, gfp); + if (IS_ERR(skb)) + return PTR_ERR(skb); + break; + + case ODPAT_STRIP_VLAN: + skb = strip_vlan(skb, key, gfp); + break; + + case ODPAT_SET_DL_SRC: + case ODPAT_SET_DL_DST: + skb = set_dl_addr(skb, &a->dl_addr, gfp); + break; + + case ODPAT_SET_NW_SRC: + case ODPAT_SET_NW_DST: + skb = set_nw_addr(skb, key, &a->nw_addr, gfp); + break; + + case ODPAT_SET_TP_SRC: + case ODPAT_SET_TP_DST: + skb = set_tp_port(skb, key, &a->tp_port, gfp); + break; + } + if (!skb) + return -ENOMEM; + } + if (prev_port != -1) + do_output(dp, skb, prev_port); + else + kfree_skb(skb); + return err; +} diff --git a/datapath/actions.h b/datapath/actions.h new file mode 100644 index 00000000..410e3ba7 --- /dev/null +++ b/datapath/actions.h @@ -0,0 +1,18 @@ +#ifndef ACTIONS_H +#define ACTIONS_H 1 + +#include <linux/gfp.h> + +struct datapath; +struct sk_buff; +struct odp_flow_key; +union odp_action; + +struct sk_buff *make_writable(struct sk_buff *, gfp_t gfp); +int dp_xmit_skb(struct sk_buff *); +int execute_actions(struct datapath *dp, struct sk_buff *skb, + struct odp_flow_key *key, + const union odp_action *, int n_actions, + gfp_t gfp); + +#endif /* actions.h */ diff --git a/datapath/brc_procfs.c b/datapath/brc_procfs.c new file mode 100644 index 00000000..733e9a94 --- /dev/null +++ b/datapath/brc_procfs.c @@ -0,0 +1,185 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <net/genetlink.h> +#include "openvswitch/brcompat-netlink.h" + +/* This code implements a Generic Netlink command BRC_GENL_C_SET_PROC that can + * be used to add, modify, and delete arbitrary files in selected + * subdirectories of /proc. It's a horrible kluge prompted by the need to + * simulate certain /proc/net/vlan and /proc/net/bonding files for software + * that wants to read them, and with any luck it will go away eventually. + * + * The implementation is a kluge too. In particular, we want to release the + * strings copied into the 'data' members of proc_dir_entry when the + * proc_dir_entry structures are freed, but there doesn't appear to be a way to + * hook that, so instead we have to rely on being the only entity modifying the + * directories in question. + */ + +static int brc_seq_show(struct seq_file *seq, void *unused) +{ + seq_puts(seq, seq->private); + return 0; +} + +static int brc_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, brc_seq_show, PDE(inode)->data); +} + +static struct file_operations brc_fops = { + .owner = THIS_MODULE, + .open = brc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct proc_dir_entry *proc_vlan_dir; +static struct proc_dir_entry *proc_bonding_dir; + +struct proc_dir_entry *brc_lookup_entry(struct proc_dir_entry *de, const char *name) +{ + int namelen = strlen(name); + for (de = de->subdir; de; de = de->next) { + if (de->namelen != namelen) + continue; + if (!memcmp(name, de->name, de->namelen)) + return de; + } + return NULL; +} + +static struct proc_dir_entry *brc_open_dir(const char *dir_name, + struct proc_dir_entry *parent, + struct proc_dir_entry **dirp) +{ + if (!*dirp) { + struct proc_dir_entry *dir; + if (brc_lookup_entry(parent, dir_name)) { + printk(KERN_WARNING "%s proc directory exists, can't " + "simulate--probably its real module is " + "loaded\n", dir_name); + return NULL; + } + dir = *dirp = proc_mkdir(dir_name, parent); + } + return *dirp; +} + +/* Maximum length of the BRC_GENL_A_PROC_DIR and BRC_GENL_A_PROC_NAME strings. + * If we could depend on supporting NLA_NUL_STRING and the .len member in + * Generic Netlink policy, then we could just put this in brc_genl_policy (and + * simplify brc_genl_set_proc() below too), but upstream 2.6.18 does not have + * either. */ +#define BRC_NAME_LEN_MAX 32 + +int brc_genl_set_proc(struct sk_buff *skb, struct genl_info *info) +{ + struct proc_dir_entry *dir, *entry; + const char *dir_name, *name; + char *data; + + if (!info->attrs[BRC_GENL_A_PROC_DIR] || + VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_DIR]) || + !info->attrs[BRC_GENL_A_PROC_NAME] || + VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_NAME]) || + (info->attrs[BRC_GENL_A_PROC_DATA] && + VERIFY_NUL_STRING(info->attrs[BRC_GENL_A_PROC_DATA]))) + return -EINVAL; + + dir_name = nla_data(info->attrs[BRC_GENL_A_PROC_DIR]); + name = nla_data(info->attrs[BRC_GENL_A_PROC_NAME]); + if (strlen(dir_name) > BRC_NAME_LEN_MAX || + strlen(name) > BRC_NAME_LEN_MAX) + return -EINVAL; + + if (!strcmp(dir_name, "net/vlan")) + dir = brc_open_dir("vlan", proc_net, &proc_vlan_dir); + else if (!strcmp(dir_name, "net/bonding")) + dir = brc_open_dir("bonding", proc_net, &proc_bonding_dir); + else + return -EINVAL; + if (!dir) { + /* Probably failed because the module that really implements + * the function in question is loaded and already owns the + * directory in question.*/ + return -EBUSY; + } + + entry = brc_lookup_entry(dir, name); + if (!info->attrs[BRC_GENL_A_PROC_DATA]) { + if (!entry) + return -ENOENT; + + data = entry->data; + remove_proc_entry(name, dir); + if (brc_lookup_entry(dir, name)) + return -EBUSY; /* Shouldn't happen */ + + kfree(data); + } else { + data = kstrdup(nla_data(info->attrs[BRC_GENL_A_PROC_DATA]), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + if (entry) { + char *old_data = entry->data; + entry->data = data; + kfree(old_data); + return 0; + } + + entry = create_proc_entry(name, S_IFREG|S_IRUSR|S_IWUSR, dir); + if (!entry) { + kfree(data); + return -ENOBUFS; + } + entry->proc_fops = &brc_fops; + entry->data = data; + } + return 0; +} + +static void kill_proc_dir(const char *dir_name, + struct proc_dir_entry *parent, + struct proc_dir_entry *dir) +{ + if (!dir) + return; + for (;;) { + struct proc_dir_entry *e; + char *data; + char name[BRC_NAME_LEN_MAX + 1]; + + e = dir->subdir; + if (!e) + break; + + if (e->namelen >= sizeof name) { + /* Can't happen: we prevent adding names this long by + * limiting the BRC_GENL_A_PROC_NAME string to + * BRC_NAME_LEN_MAX bytes. */ + WARN_ON(1); + break; + } + strcpy(name, e->name); + + data = e->data; + e->data = NULL; + kfree(data); + + remove_proc_entry(name, dir); + } + remove_proc_entry(dir_name, parent); +} + +void brc_procfs_exit(void) +{ + kill_proc_dir("vlan", proc_net, proc_vlan_dir); + kill_proc_dir("bonding", proc_net, proc_bonding_dir); +} diff --git a/datapath/brc_procfs.h b/datapath/brc_procfs.h new file mode 100644 index 00000000..93e21cfb --- /dev/null +++ b/datapath/brc_procfs.h @@ -0,0 +1,11 @@ +#ifndef BRC_PROCFS_H +#define BRC_PROCFS_H 1 + +struct sk_buff; +struct genl_info; + +void brc_procfs_exit(void); +int brc_genl_set_proc(struct sk_buff *skb, struct genl_info *info); + +#endif /* brc_procfs.h */ + diff --git a/datapath/brc_sysfs.h b/datapath/brc_sysfs.h new file mode 100644 index 00000000..0c72fb22 --- /dev/null +++ b/datapath/brc_sysfs.h @@ -0,0 +1,25 @@ +#ifndef BRC_SYSFS_H +#define BRC_SYSFS_H 1 + +struct datapath; +struct net_bridge_port; + +/* brc_sysfs_dp.c */ +int brc_sysfs_add_dp(struct datapath *dp); +int brc_sysfs_del_dp(struct datapath *dp); + +/* brc_sysfs_if.c */ +int brc_sysfs_add_if(struct net_bridge_port *p); +int brc_sysfs_del_if(struct net_bridge_port *p); + +#include <linux/version.h> +#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,18) +#define SUPPORT_SYSFS 1 +#else +/* We only support sysfs on Linux 2.6.18 because that's the only place we + * really need it (on Xen, for brcompat) and it's a big pain to try to support + * multiple versions. */ +#endif + +#endif /* brc_sysfs.h */ + diff --git a/datapath/brc_sysfs_dp.c b/datapath/brc_sysfs_dp.c new file mode 100644 index 00000000..fc02f279 --- /dev/null +++ b/datapath/brc_sysfs_dp.c @@ -0,0 +1,532 @@ +#include <linux/version.h> + +/* + * Sysfs attributes of bridge for Open vSwitch + * + * This has been shamelessly copied from the kernel sources. + */ + +#include <linux/capability.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> +#include <linux/times.h> +#include <linux/version.h> + +#include "brc_sysfs.h" +#include "datapath.h" +#include "dp_dev.h" + +#ifdef SUPPORT_SYSFS +#define to_dev(obj) container_of(obj, struct device, kobj) + +/* Hack to attempt to build on more platforms. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) +#define to_kobj(d) &(d)->class_dev.kobj +#define BRC_DEVICE_ATTR CLASS_DEVICE_ATTR +#else +#define to_kobj(d) &(d)->dev.kobj +#define BRC_DEVICE_ATTR DEVICE_ATTR +#endif + +/* + * Common code for storing bridge parameters. + */ +static ssize_t store_bridge_parm(struct class_device *d, + const char *buf, size_t len, + void (*set)(struct datapath *, unsigned long)) +{ + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + +#if 0 + spin_lock_bh(&br->lock); + (*set)(br, val); + spin_unlock_bh(&br->lock); +#else + /* xxx We use a default value of 0 for all fields. If the caller is + * xxx attempting to set the value to our default, just silently + * xxx ignore the request. + */ + if (val != 0) { + printk("%s: xxx writing dp parms not supported yet!\n", + dp_name(dp)); + } +#endif + return len; +} + + +static ssize_t show_forward_delay(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + +static void set_forward_delay(struct datapath *dp, unsigned long val) +{ +#if 0 + unsigned long delay = clock_t_to_jiffies(val); + br->forward_delay = delay; + if (br_is_root_bridge(br)) + br->bridge_forward_delay = delay; +#else + printk("%s: xxx attempt to set_forward_delay()\n", dp_name(dp)); +#endif +} + +static ssize_t store_forward_delay(struct class_device *d, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_forward_delay); +} +static BRC_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, + show_forward_delay, store_forward_delay); + +static ssize_t show_hello_time(struct class_device *d, char *buf) +{ +#if 0 + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(d)->hello_time)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + +static void set_hello_time(struct datapath *dp, unsigned long val) +{ +#if 0 + unsigned long t = clock_t_to_jiffies(val); + br->hello_time = t; + if (br_is_root_bridge(br)) + br->bridge_hello_time = t; +#else + printk("%s: xxx attempt to set_hello_time()\n", dp_name(dp)); +#endif +} + +static ssize_t store_hello_time(struct class_device *d, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_hello_time); +} +static BRC_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, + store_hello_time); + +static ssize_t show_max_age(struct class_device *d, + char *buf) +{ +#if 0 + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(d)->max_age)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + +static void set_max_age(struct datapath *dp, unsigned long val) +{ +#if 0 + unsigned long t = clock_t_to_jiffies(val); + br->max_age = t; + if (br_is_root_bridge(br)) + br->bridge_max_age = t; +#else + printk("%s: xxx attempt to set_max_age()\n", dp_name(dp)); +#endif +} + +static ssize_t store_max_age(struct class_device *d, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_max_age); +} +static BRC_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); + +static ssize_t show_ageing_time(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + +static void set_ageing_time(struct datapath *dp, unsigned long val) +{ +#if 0 + br->ageing_time = clock_t_to_jiffies(val); +#else + printk("%s: xxx attempt to set_ageing_time()\n", dp_name(dp)); +#endif +} + +static ssize_t store_ageing_time(struct class_device *d, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_ageing_time); +} +static BRC_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, + store_ageing_time); + +static ssize_t show_stp_state(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%d\n", br->stp_enabled); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + + +static ssize_t store_stp_state(struct class_device *d, + const char *buf, + size_t len) +{ + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); +#if 0 + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + rtnl_lock(); + br_stp_set_enabled(br, val); + rtnl_unlock(); +#else + printk("%s: xxx attempt to set_stp_state()\n", dp_name(dp)); +#endif + + return len; +} +static BRC_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, + store_stp_state); + +static ssize_t show_priority(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%d\n", + (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); +#else + return sprintf(buf, "%d\n", 0); +#endif +} + +static void set_priority(struct datapath *dp, unsigned long val) +{ +#if 0 + br_stp_set_bridge_priority(br, (u16) val); +#else + printk("%s: xxx attempt to set_priority()\n", dp_name(dp)); +#endif +} + +static ssize_t store_priority(struct class_device *d, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_priority); +} +static BRC_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); + +static ssize_t show_root_id(struct class_device *d, + char *buf) +{ +#if 0 + return br_show_bridge_id(buf, &to_bridge(d)->designated_root); +#else + return sprintf(buf, "0000.010203040506\n"); +#endif +} +static BRC_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); + +static ssize_t show_bridge_id(struct class_device *d, + char *buf) +{ + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + const unsigned char *addr = dp->ports[ODPP_LOCAL]->dev->dev_addr; + + /* xxx Do we need a lock of some sort? */ + return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", + 0, 0, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); +} +static BRC_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); + +static ssize_t show_root_port(struct class_device *d, + char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", to_bridge(d)->root_port); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); + +static ssize_t show_root_path_cost(struct class_device *d, + char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); + +static ssize_t show_topology_change(struct class_device *d, + char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", to_bridge(d)->topology_change); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); + +static ssize_t show_topology_change_detected(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%d\n", br->topology_change_detected); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(topology_change_detected, S_IRUGO, + show_topology_change_detected, NULL); + +static ssize_t show_hello_timer(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); + +static ssize_t show_tcn_timer(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); + +static ssize_t show_topology_change_timer(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, + NULL); + +static ssize_t show_gc_timer(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRC_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); + +static ssize_t show_group_addr(struct class_device *d, + char *buf) +{ +#if 0 + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", + br->group_addr[0], br->group_addr[1], + br->group_addr[2], br->group_addr[3], + br->group_addr[4], br->group_addr[5]); +#else + return sprintf(buf, "00:01:02:03:04:05\n"); +#endif +} + +static ssize_t store_group_addr(struct class_device *d, + const char *buf, size_t len) +{ + struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); +#if 0 + unsigned new_addr[6]; + int i; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (sscanf(buf, "%x:%x:%x:%x:%x:%x", + &new_addr[0], &new_addr[1], &new_addr[2], + &new_addr[3], &new_addr[4], &new_addr[5]) != 6) + return -EINVAL; + + /* Must be 01:80:c2:00:00:0X */ + for (i = 0; i < 5; i++) + if (new_addr[i] != br_group_address[i]) + return -EINVAL; + + if (new_addr[5] & ~0xf) + return -EINVAL; + + if (new_addr[5] == 1 /* 802.3x Pause address */ + || new_addr[5] == 2 /* 802.3ad Slow protocols */ + || new_addr[5] == 3) /* 802.1X PAE address */ + return -EINVAL; + + spin_lock_bh(&br->lock); + for (i = 0; i < 6; i++) + br->group_addr[i] = new_addr[i]; + spin_unlock_bh(&br->lock); +#else + printk("%s: xxx attempt to store_group_addr()\n", dp_name(dp)); +#endif + return len; +} + +static BRC_DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, + show_group_addr, store_group_addr); + +static struct attribute *bridge_attrs[] = { + &class_device_attr_forward_delay.attr, + &class_device_attr_hello_time.attr, + &class_device_attr_max_age.attr, + &class_device_attr_ageing_time.attr, + &class_device_attr_stp_state.attr, + &class_device_attr_priority.attr, + &class_device_attr_bridge_id.attr, + &class_device_attr_root_id.attr, + &class_device_attr_root_path_cost.attr, + &class_device_attr_root_port.attr, + &class_device_attr_topology_change.attr, + &class_device_attr_topology_change_detected.attr, + &class_device_attr_hello_timer.attr, + &class_device_attr_tcn_timer.attr, + &class_device_attr_topology_change_timer.attr, + &class_device_attr_gc_timer.attr, + &class_device_attr_group_addr.attr, + NULL +}; + +static struct attribute_group bridge_group = { + .name = SYSFS_BRIDGE_ATTR, + .attrs = bridge_attrs, +}; + +/* + * Add entries in sysfs onto the existing network class device + * for the bridge. + * Adds a attribute group "bridge" containing tuning parameters. + * Sub directory to hold links to interfaces. + * + * Note: the ifobj exists only to be a subdirectory + * to hold links. The ifobj exists in the same data structure + * as its parent the bridge so reference counting works. + */ +int brc_sysfs_add_dp(struct datapath *dp) +{ + struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev); + int err; + + err = sysfs_create_group(kobj, &bridge_group); + if (err) { + pr_info("%s: can't create group %s/%s\n", + __func__, dp_name(dp), bridge_group.name); + goto out1; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + kobject_set_name(&dp->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); + dp->ifobj.ktype = NULL; + dp->ifobj.kset = NULL; + dp->ifobj.parent = kobj; + + err = kobject_register(&dp->ifobj); + if (err) { + pr_info("%s: can't add kobject (directory) %s/%s\n", + __FUNCTION__, dp_name(dp), dp->ifobj.name); + goto out2; + } +#else + br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, kobj); + if (!br->ifobj) { + pr_info("%s: can't add kobject (directory) %s/%s\n", + __func__, dp_name(dp), SYSFS_BRIDGE_PORT_SUBDIR); + goto out2; + } +#endif + return 0; + + out2: + sysfs_remove_group(kobj, &bridge_group); + out1: + return err; +} + +int brc_sysfs_del_dp(struct datapath *dp) +{ + struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + kobject_unregister(&dp->ifobj); +#else + kobject_put(dp->ifobj); +#endif + sysfs_remove_group(kobj, &bridge_group); + + return 0; +} +#else /* !SUPPORT_SYSFS */ +int brc_sysfs_add_dp(struct datapath *dp) { return 0; } +int brc_sysfs_del_dp(struct datapath *dp) { return 0; } +int brc_sysfs_add_if(struct net_bridge_port *p) { return 0; } +int brc_sysfs_del_if(struct net_bridge_port *p) +{ + dev_put(p->dev); + kfree(p); + return 0; +} +#endif /* !SUPPORT_SYSFS */ diff --git a/datapath/brc_sysfs_if.c b/datapath/brc_sysfs_if.c new file mode 100644 index 00000000..20bb109b --- /dev/null +++ b/datapath/brc_sysfs_if.c @@ -0,0 +1,334 @@ +/* + * Sysfs attributes of bridge ports for Open vSwitch + * + * This has been shamelessly copied from the kernel sources. + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> +#include "brc_sysfs.h" +#include "datapath.h" + +#ifdef SUPPORT_SYSFS + +struct brport_attribute { + struct attribute attr; + ssize_t (*show)(struct net_bridge_port *, char *); + ssize_t (*store)(struct net_bridge_port *, unsigned long); +}; + +#define BRPORT_ATTR(_name,_mode,_show,_store) \ +struct brport_attribute brport_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode, \ + .owner = THIS_MODULE, }, \ + .show = _show, \ + .store = _store, \ +}; + +static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->path_cost); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) +{ +#if 0 + br_stp_set_path_cost(p, v); +#endif + return 0; +} +static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, + show_path_cost, store_path_cost); + +static ssize_t show_priority(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->priority); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) +{ +#if 0 + if (v >= (1<<(16-BR_PORT_BITS))) + return -ERANGE; + br_stp_set_port_priority(p, v); +#endif + return 0; +} +static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, + show_priority, store_priority); + +static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) +{ +#if 0 + return br_show_bridge_id(buf, &p->designated_root); +#else + return sprintf(buf, "0000.010203040506\n"); +#endif +} +static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL); + +static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) +{ +#if 0 + return br_show_bridge_id(buf, &p->designated_bridge); +#else + return sprintf(buf, "0000.060504030201\n"); +#endif +} +static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL); + +static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->designated_port); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL); + +static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->designated_cost); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL); + +static ssize_t show_port_id(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "0x%x\n", p->port_id); +#else + return sprintf(buf, "0x%x\n", 0); +#endif +} +static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL); + +static ssize_t show_port_no(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "0x%x\n", p->port_no); +} + +static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL); + +static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->topology_change_ack); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL); + +static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->config_pending); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL); + +static ssize_t show_port_state(struct net_bridge_port *p, char *buf) +{ +#if 0 + return sprintf(buf, "%d\n", p->state); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL); + +static ssize_t show_message_age_timer(struct net_bridge_port *p, + char *buf) +{ +#if 0 + return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL); + +static ssize_t show_forward_delay_timer(struct net_bridge_port *p, + char *buf) +{ +#if 0 + return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL); + +static ssize_t show_hold_timer(struct net_bridge_port *p, + char *buf) +{ +#if 0 + return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer)); +#else + return sprintf(buf, "%d\n", 0); +#endif +} +static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); + +static struct brport_attribute *brport_attrs[] = { + &brport_attr_path_cost, + &brport_attr_priority, + &brport_attr_port_id, + &brport_attr_port_no, + &brport_attr_designated_root, + &brport_attr_designated_bridge, + &brport_attr_designated_port, + &brport_attr_designated_cost, + &brport_attr_state, + &brport_attr_change_ack, + &brport_attr_config_pending, + &brport_attr_message_age_timer, + &brport_attr_forward_delay_timer, + &brport_attr_hold_timer, + NULL +}; + +#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) +#define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) + +static ssize_t brport_show(struct kobject * kobj, + struct attribute * attr, char * buf) +{ + struct brport_attribute * brport_attr = to_brport_attr(attr); + struct net_bridge_port * p = to_brport(kobj); + + return brport_attr->show(p, buf); +} + +static ssize_t brport_store(struct kobject * kobj, + struct attribute * attr, + const char * buf, size_t count) +{ + struct net_bridge_port * p = to_brport(kobj); +#if 0 + struct brport_attribute * brport_attr = to_brport_attr(attr); + char *endp; + unsigned long val; +#endif + ssize_t ret = -EINVAL; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +#if 0 + val = simple_strtoul(buf, &endp, 0); + if (endp != buf) { + rtnl_lock(); + if (p->dev && p->br && brport_attr->store) { + spin_lock_bh(&p->br->lock); + ret = brport_attr->store(p, val); + spin_unlock_bh(&p->br->lock); + if (ret == 0) + ret = count; + } + rtnl_unlock(); + } +#else + printk("%s: xxx writing port parms not supported yet!\n", + dp_name(p->dp)); +#endif + return ret; +} + +struct sysfs_ops brport_sysfs_ops = { + .show = brport_show, + .store = brport_store, +}; + +static void release_nbp(struct kobject *kobj) +{ + struct net_bridge_port *p + = container_of(kobj, struct net_bridge_port, kobj); + kfree(p); +} + +struct kobj_type brport_ktype = { + .sysfs_ops = &brport_sysfs_ops, + .release = release_nbp +}; + +/* + * Add sysfs entries to ethernet device added to a bridge. + * Creates a brport subdirectory with bridge attributes. + * Puts symlink in bridge's brport subdirectory + */ +int brc_sysfs_add_if(struct net_bridge_port *p) +{ + struct datapath *dp = p->dp; + struct brport_attribute **a; + int err; + + kobject_init(&p->kobj); + kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); + p->kobj.ktype = &brport_ktype; + p->kobj.kset = NULL; + p->kobj.parent = &(p->dev->class_dev.kobj); + + err = kobject_add(&p->kobj); + if (err) + goto err_put; + + err = sysfs_create_link(&p->kobj, + &dp->ports[ODPP_LOCAL]->dev->class_dev.kobj, + SYSFS_BRIDGE_PORT_LINK); + if (err) + goto err_del; + + for (a = brport_attrs; *a; ++a) { + err = sysfs_create_file(&p->kobj, &((*a)->attr)); + if (err) + goto err_del; + } + + err = sysfs_create_link(&dp->ifobj, &p->kobj, p->dev->name); + if (err) + goto err_del; + + kobject_uevent(&p->kobj, KOBJ_ADD); + + return err; + +err_del: + kobject_del(&p->kobj); +err_put: + kobject_put(&p->kobj); + return err; +} + +int brc_sysfs_del_if(struct net_bridge_port *p) +{ + struct net_device *dev = p->dev; + + kobject_uevent(&p->kobj, KOBJ_REMOVE); + kobject_del(&p->kobj); + + dev_put(dev); + + kobject_put(&p->kobj); + + return 0; +} +#endif /* SUPPORT_SYSFS */ diff --git a/datapath/brcompat.c b/datapath/brcompat.c new file mode 100644 index 00000000..2e437ccd --- /dev/null +++ b/datapath/brcompat.c @@ -0,0 +1,519 @@ +#include <linux/kernel.h> +#include <asm/uaccess.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/rculist.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <net/genetlink.h> + +#include "compat.h" +#include "openvswitch/brcompat-netlink.h" +#include "brc_procfs.h" +#include "brc_sysfs.h" +#include "datapath.h" +#include "dp_dev.h" + +static struct genl_family brc_genl_family; +static struct genl_multicast_group brc_mc_group; + +/* Time to wait for ovs-vswitchd to respond to a datapath action, in + * jiffies. */ +#define BRC_TIMEOUT (HZ * 5) + +/* Mutex to serialize ovs-brcompatd callbacks. (Some callbacks naturally hold + * br_ioctl_mutex, others hold rtnl_lock, but we can't take the former + * ourselves and we don't want to hold the latter over a potentially long + * period of time.) */ +static DEFINE_MUTEX(brc_serial); + +/* Userspace communication. */ +static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */ +static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */ +static int brc_err; /* Error code from userspace. */ +static u32 brc_seq; /* Sequence number for current op. */ + +static int brc_send_command(const char *bridge, const char *port, int op); + +static int +get_dp_ifindices(int *indices, int num) +{ + int i, index = 0; + + rcu_read_lock(); + for (i=0; i < ODP_MAX && index < num; i++) { + struct datapath *dp = get_dp(i); + if (!dp) + continue; + indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex; + } + rcu_read_unlock(); + + return index; +} + +static void +get_port_ifindices(struct datapath *dp, int *ifindices, int num) +{ + struct net_bridge_port *p; + + rcu_read_lock(); + list_for_each_entry_rcu (p, &dp->port_list, node) { + if (p->port_no < num) + ifindices[p->port_no] = p->dev->ifindex; + } + rcu_read_unlock(); +} + +static int brc_add_del_bridge(char __user *uname, int add) +{ + char name[IFNAMSIZ]; + + if (copy_from_user(name, uname, IFNAMSIZ)) + return -EFAULT; + + name[IFNAMSIZ - 1] = 0; + return brc_send_command(name, NULL, + add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL); +} + +static int brc_get_bridges(int __user *uindices, int n) +{ + int *indices; + int ret; + + if (n >= 2048) + return -ENOMEM; + + indices = kcalloc(n, sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + n = get_dp_ifindices(indices, n); + + ret = copy_to_user(uindices, indices, n * sizeof(int)) ? -EFAULT : n; + + kfree(indices); + return ret; +} + +/* Legacy deviceless bridge ioctl's. Called with br_ioctl_mutex. */ +static int +old_deviceless(void __user *uarg) +{ + unsigned long args[3]; + + if (copy_from_user(args, uarg, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_GET_BRIDGES: + return brc_get_bridges((int __user *)args[1], args[2]); + + case BRCTL_ADD_BRIDGE: + return brc_add_del_bridge((void __user *)args[1], 1); + case BRCTL_DEL_BRIDGE: + return brc_add_del_bridge((void __user *)args[1], 0); + } + + return -EOPNOTSUPP; +} + +/* Called with the br_ioctl_mutex. */ +static int +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) +brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +#else +brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) +#endif +{ + switch (cmd) { + case SIOCGIFBR: + case SIOCSIFBR: + return old_deviceless(uarg); + + case SIOCBRADDBR: + return brc_add_del_bridge(uarg, 1); + case SIOCBRDELBR: + return brc_add_del_bridge(uarg, 0); + } + + return -EOPNOTSUPP; +} + +static int +brc_add_del_port(struct net_device *dev, int port_ifindex, int add) +{ + struct net_device *port; + char dev_name[IFNAMSIZ], port_name[IFNAMSIZ]; + int err; + + port = __dev_get_by_index(&init_net, port_ifindex); + if (!port) + return -EINVAL; + + /* Save name of dev and port because there's a race between the + * rtnl_unlock() and the brc_send_command(). */ + strcpy(dev_name, dev->name); + strcpy(port_name, port->name); + + rtnl_unlock(); + err = brc_send_command(dev_name, port_name, + add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL); + rtnl_lock(); + + return err; +} + +static int +brc_get_bridge_info(struct net_device *dev, struct __bridge_info __user *ub) +{ + struct __bridge_info b; + u64 id = 0; + int i; + + memset(&b, 0, sizeof(struct __bridge_info)); + + for (i=0; i<ETH_ALEN; i++) + id |= (u64)dev->dev_addr[i] << (8*(ETH_ALEN-1 - i)); + b.bridge_id = cpu_to_be64(id); + b.stp_enabled = 0; + + if (copy_to_user(ub, &b, sizeof(struct __bridge_info))) + return -EFAULT; + + return 0; +} + +static int +brc_get_port_list(struct net_device *dev, int __user *uindices, int num) +{ + struct dp_dev *dp_dev = netdev_priv(dev); + struct datapath *dp = dp_dev->dp; + int *indices; + + if (num < 0) + return -EINVAL; + if (num == 0) + num = 256; + if (num > DP_MAX_PORTS) + num = DP_MAX_PORTS; + + indices = kcalloc(num, sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + get_port_ifindices(dp, indices, num); + if (copy_to_user(uindices, indices, num * sizeof(int))) + num = -EFAULT; + kfree(indices); + return num; +} + +/* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */ +static int +old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + unsigned long args[4]; + + if (copy_from_user(args, rq->ifr_data, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_ADD_IF: + return brc_add_del_port(dev, args[1], 1); + case BRCTL_DEL_IF: + return brc_add_del_port(dev, args[1], 0); + + case BRCTL_GET_BRIDGE_INFO: + return brc_get_bridge_info(dev, (struct __bridge_info __user *)args[1]); + + case BRCTL_GET_PORT_LIST: + return brc_get_port_list(dev, (int __user *)args[1], args[2]); + } + + return -EOPNOTSUPP; +} + +/* Called with the rtnl_lock. */ +static int +brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + int err; + + switch (cmd) { + case SIOCDEVPRIVATE: + err = old_dev_ioctl(dev, rq, cmd); + break; + + case SIOCBRADDIF: + return brc_add_del_port(dev, rq->ifr_ifindex, 1); + case SIOCBRDELIF: + return brc_add_del_port(dev, rq->ifr_ifindex, 0); + + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + + +static struct genl_family brc_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = BRC_GENL_FAMILY_NAME, + .version = 1, + .maxattr = BRC_GENL_A_MAX, +}; + +static int brc_genl_query(struct sk_buff *skb, struct genl_info *info) +{ + int err = -EINVAL; + struct sk_buff *ans_skb; + void *data; + + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ans_skb) + return -ENOMEM; + + data = genlmsg_put_reply(ans_skb, info, &brc_genl_family, + 0, BRC_GENL_C_QUERY_MC); + if (data == NULL) { + err = -ENOMEM; + goto err; + } + NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id); + + genlmsg_end(ans_skb, data); + return genlmsg_reply(ans_skb, info); + +err: +nla_put_failure: + kfree_skb(ans_skb); + return err; +} + +static struct genl_ops brc_genl_ops_query_dp = { + .cmd = BRC_GENL_C_QUERY_MC, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = NULL, + .doit = brc_genl_query, + .dumpit = NULL +}; + +/* Attribute policy: what each attribute may contain. */ +static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = { + [BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 }, + [BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING }, + [BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING }, + [BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING }, +}; + +static int +brc_genl_dp_result(struct sk_buff *skb, struct genl_info *info) +{ + unsigned long int flags; + int err; + + if (!info->attrs[BRC_GENL_A_ERR_CODE]) + return -EINVAL; + + spin_lock_irqsave(&brc_lock, flags); + if (brc_seq == info->snd_seq) { + brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]); + complete(&brc_done); + err = 0; + } else { + err = -ESTALE; + } + spin_unlock_irqrestore(&brc_lock, flags); + + return err; +} + +static struct genl_ops brc_genl_ops_dp_result = { + .cmd = BRC_GENL_C_DP_RESULT, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = brc_genl_policy, + .doit = brc_genl_dp_result, + .dumpit = NULL +}; + +static struct genl_ops brc_genl_ops_set_proc = { + .cmd = BRC_GENL_C_SET_PROC, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = brc_genl_policy, + .doit = brc_genl_set_proc, + .dumpit = NULL +}; + +static int brc_send_command(const char *bridge, const char *port, int op) +{ + unsigned long int flags; + struct sk_buff *skb; + void *data; + int error; + + mutex_lock(&brc_serial); + + /* Increment sequence number first, so that we ignore any replies + * to stale requests. */ + spin_lock_irqsave(&brc_lock, flags); + brc_seq++; + INIT_COMPLETION(brc_done); + spin_unlock_irqrestore(&brc_lock, flags); + + /* Compose message. */ + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + error = -ENOMEM; + if (skb == NULL) + goto exit_unlock; + data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op); + + NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge); + if (port) + NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port); + + genlmsg_end(skb, data); + + /* Send message. */ + error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL); + if (error < 0) + goto exit_unlock; + + /* Wait for reply. */ + error = -ETIMEDOUT; + if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT)) + goto exit_unlock; + + error = -brc_err; + goto exit_unlock; + +nla_put_failure: + kfree_skb(skb); +exit_unlock: + mutex_unlock(&brc_serial); + return error; +} + +int brc_add_dp(struct datapath *dp) +{ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; +#ifdef SUPPORT_SYSFS + brc_sysfs_add_dp(dp); +#endif + + return 0; +} + +int brc_del_dp(struct datapath *dp) +{ +#ifdef SUPPORT_SYSFS + brc_sysfs_del_dp(dp); +#endif + module_put(THIS_MODULE); + + return 0; +} + +static int +__init brc_init(void) +{ + int i; + int err; + + printk("Open vSwitch Bridge Compatibility, built "__DATE__" "__TIME__"\n"); + + rcu_read_lock(); + for (i=0; i<ODP_MAX; i++) { + if (get_dp(i)) { + rcu_read_unlock(); + printk(KERN_EMERG "brcompat: no datapaths may exist!\n"); + return -EEXIST; + } + } + rcu_read_unlock(); + + /* Set the bridge ioctl handler */ + brioctl_set(brc_ioctl_deviceless_stub); + + /* Set the openvswitch_mod device ioctl handler */ + dp_ioctl_hook = brc_dev_ioctl; + + /* Register hooks for datapath adds and deletes */ + dp_add_dp_hook = brc_add_dp; + dp_del_dp_hook = brc_del_dp; + + /* Register hooks for interface adds and deletes */ +#ifdef SUPPORT_SYSFS + dp_add_if_hook = brc_sysfs_add_if; + dp_del_if_hook = brc_sysfs_del_if; +#endif + + /* Randomize the initial sequence number. This is not a security + * feature; it only helps avoid crossed wires between userspace and + * the kernel when the module is unloaded and reloaded. */ + brc_seq = net_random(); + + /* Register generic netlink family to communicate changes to + * userspace. */ + err = genl_register_family(&brc_genl_family); + if (err) + goto error; + + err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp); + if (err != 0) + goto err_unregister; + + err = genl_register_ops(&brc_genl_family, &brc_genl_ops_dp_result); + if (err != 0) + goto err_unregister; + + err = genl_register_ops(&brc_genl_family, &brc_genl_ops_set_proc); + if (err != 0) + goto err_unregister; + + strcpy(brc_mc_group.name, "brcompat"); + err = genl_register_mc_group(&brc_genl_family, &brc_mc_group); + if (err < 0) + goto err_unregister; + + return 0; + +err_unregister: + genl_unregister_family(&brc_genl_family); +error: + printk(KERN_EMERG "brcompat: failed to install!"); + return err; +} + +static void +brc_cleanup(void) +{ + /* Unregister hooks for datapath adds and deletes */ + dp_add_dp_hook = NULL; + dp_del_dp_hook = NULL; + + /* Unregister hooks for interface adds and deletes */ + dp_add_if_hook = NULL; + dp_del_if_hook = NULL; + + /* Unregister ioctl hooks */ + dp_ioctl_hook = NULL; + brioctl_set(NULL); + + genl_unregister_family(&brc_genl_family); + brc_procfs_exit(); +} + +module_init(brc_init); +module_exit(brc_cleanup); + +MODULE_DESCRIPTION("Open vSwitch bridge compatibility"); +MODULE_AUTHOR("Nicira Networks"); +MODULE_LICENSE("GPL"); diff --git a/datapath/compat.h b/datapath/compat.h new file mode 100644 index 00000000..12100ae3 --- /dev/null +++ b/datapath/compat.h @@ -0,0 +1,17 @@ +#ifndef COMPAT_H +#define COMPAT_H 1 + +#include <linux/version.h> + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +#include "compat26.h" + +#else + +#include "compat24.h" + +#endif + + +#endif /* compat.h */ diff --git a/datapath/datapath.c b/datapath/datapath.c new file mode 100644 index 00000000..015edc4b --- /dev/null +++ b/datapath/datapath.c @@ -0,0 +1,1611 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007, 2008, 2009 Nicira Networks. + */ + +/* Functions for managing the dp interface/device. */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/if_arp.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/delay.h> +#include <linux/time.h> +#include <linux/etherdevice.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/llc.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/version.h> +#include <linux/ethtool.h> +#include <linux/random.h> +#include <linux/wait.h> +#include <asm/system.h> +#include <asm/div64.h> +#include <asm/bug.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <linux/inetdevice.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/workqueue.h> +#include <linux/dmi.h> +#include <net/llc.h> + +#include "openvswitch/datapath-protocol.h" +#include "datapath.h" +#include "actions.h" +#include "dp_dev.h" +#include "flow.h" + +#include "compat.h" + + +int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); +EXPORT_SYMBOL(dp_ioctl_hook); + +int (*dp_add_dp_hook)(struct datapath *dp); +EXPORT_SYMBOL(dp_add_dp_hook); + +int (*dp_del_dp_hook)(struct datapath *dp); +EXPORT_SYMBOL(dp_del_dp_hook); + +int (*dp_add_if_hook)(struct net_bridge_port *p); +EXPORT_SYMBOL(dp_add_if_hook); + +int (*dp_del_if_hook)(struct net_bridge_port *p); +EXPORT_SYMBOL(dp_del_if_hook); + +/* Datapaths. Protected on the read side by rcu_read_lock, on the write side + * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex + * maintained by the Generic Netlink code, but the timeout path needs mutual + * exclusion too. + * + * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL + * lock first. + * + * It is safe to access the datapath and net_bridge_port structures with just + * dp_mutex. + */ +static struct datapath *dps[ODP_MAX]; +static DEFINE_MUTEX(dp_mutex); + +/* Number of milliseconds between runs of the maintenance thread. */ +#define MAINT_SLEEP_MSECS 1000 + +static int new_nbp(struct datapath *, struct net_device *, int port_no); + +/* Must be called with rcu_read_lock or dp_mutex. */ +struct datapath *get_dp(int dp_idx) +{ + if (dp_idx < 0 || dp_idx >= ODP_MAX) + return NULL; + return rcu_dereference(dps[dp_idx]); +} +EXPORT_SYMBOL_GPL(get_dp); + +struct datapath *get_dp_locked(int dp_idx) +{ + struct datapath *dp; + + mutex_lock(&dp_mutex); + dp = get_dp(dp_idx); + if (dp) + mutex_lock(&dp->mutex); + mutex_unlock(&dp_mutex); + return dp; +} + +static inline size_t br_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1); /* IFLA_OPERSTATE */ +} + +static int dp_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + int event, unsigned int flags) +{ + const struct datapath *dp = port->dp; + const struct net_device *dev = port->dev; + struct ifinfomsg *hdr; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_BRIDGE; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[ODPP_LOCAL]->dev->ifindex); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); +#ifdef IFLA_OPERSTATE + NLA_PUT_U8(skb, IFLA_OPERSTATE, + netif_running(dev) ? dev->operstate : IF_OPER_DOWN); +#endif + + if (dev->addr_len) + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + + if (dev->ifindex != dev->iflink) + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void dp_ifinfo_notify(int event, struct net_bridge_port *port) +{ + struct net *net = dev_net(port->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = dp_fill_ifinfo(skb, port, event, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in br_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_LINK, err); +} + +static int create_dp(int dp_idx, const char __user *devnamep) +{ + struct net_device *dp_dev; + char devname[IFNAMSIZ]; + struct datapath *dp; + int err; + int i; + + if (devnamep) { + err = -EFAULT; + if (strncpy_from_user(devname, devnamep, IFNAMSIZ - 1) < 0) + goto err; + devname[IFNAMSIZ - 1] = '\0'; + } else { + snprintf(devname, sizeof devname, "of%d", dp_idx); + } + + rtnl_lock(); + mutex_lock(&dp_mutex); + err = -ENODEV; + if (!try_module_get(THIS_MODULE)) + goto err_unlock; + + /* Exit early if a datapath with that number already exists. + * (We don't use -EEXIST because that's ambiguous with 'devname' + * conflicting with an existing network device name.) */ + err = -EBUSY; + if (get_dp(dp_idx)) + goto err_put_module; + + err = -ENOMEM; + dp = kzalloc(sizeof *dp, GFP_KERNEL); + if (dp == NULL) + goto err_put_module; + + mutex_init(&dp->mutex); + dp->dp_idx = dp_idx; + for (i = 0; i < DP_N_QUEUES; i++) + skb_queue_head_init(&dp->queues[i]); + init_waitqueue_head(&dp->waitqueue); + + /* Setup our datapath device */ + dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL); + err = PTR_ERR(dp_dev); + if (IS_ERR(dp_dev)) + goto err_free_dp; + + err = -ENOMEM; + rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE)); + if (!dp->table) + goto err_destroy_dp_dev; + INIT_LIST_HEAD(&dp->port_list); + + err = new_nbp(dp, dp_dev, ODPP_LOCAL); + if (err) + goto err_destroy_table; + + dp->drop_frags = 0; + dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); + if (!dp->stats_percpu) + goto err_destroy_local_port; + + rcu_assign_pointer(dps[dp_idx], dp); + mutex_unlock(&dp_mutex); + rtnl_unlock(); + + if (dp_add_dp_hook) + dp_add_dp_hook(dp); + + return 0; + +err_destroy_local_port: + dp_del_port(dp->ports[ODPP_LOCAL], NULL); +err_destroy_table: + dp_table_destroy(dp->table, 0); +err_destroy_dp_dev: + dp_dev_destroy(dp_dev); +err_free_dp: + kfree(dp); +err_put_module: + module_put(THIS_MODULE); +err_unlock: + mutex_unlock(&dp_mutex); + rtnl_unlock(); +err: + return err; +} + +static void do_destroy_dp(struct datapath *dp, struct list_head *dp_devs) +{ + struct net_bridge_port *p, *n; + int i; + + if (dp_del_dp_hook) + dp_del_dp_hook(dp); + + /* Drop references to DP. */ + list_for_each_entry_safe (p, n, &dp->port_list, node) + dp_del_port(p, dp_devs); + + rcu_assign_pointer(dps[dp->dp_idx], NULL); + synchronize_rcu(); + + /* Wait until no longer in use, then destroy it. */ + synchronize_rcu(); + dp_table_destroy(dp->table, 1); + for (i = 0; i < DP_N_QUEUES; i++) + skb_queue_purge(&dp->queues[i]); + for (i = 0; i < DP_MAX_GROUPS; i++) + kfree(dp->groups[i]); + free_percpu(dp->stats_percpu); + kfree(dp); + module_put(THIS_MODULE); +} + +static int destroy_dp(int dp_idx) +{ + struct dp_dev *dp_dev, *next; + struct datapath *dp; + LIST_HEAD(dp_devs); + int err; + + rtnl_lock(); + mutex_lock(&dp_mutex); + dp = get_dp(dp_idx); + err = -ENODEV; + if (!dp) + goto err_unlock; + + do_destroy_dp(dp, &dp_devs); + err = 0; + +err_unlock: + mutex_unlock(&dp_mutex); + rtnl_unlock(); + list_for_each_entry_safe (dp_dev, next, &dp_devs, list) + free_netdev(dp_dev->dev); + return err; +} + +/* Called with RTNL lock and dp_mutex. */ +static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) +{ + struct net_bridge_port *p; + + if (dev->br_port != NULL) + return -EBUSY; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + dev_set_promiscuity(dev, 1); + dev_hold(dev); + p->port_no = port_no; + p->dp = dp; + p->dev = dev; + if (!is_dp_dev(dev)) + rcu_assign_pointer(dev->br_port, p); + else { + /* It would make sense to assign dev->br_port here too, but + * that causes packets received on internal ports to get caught + * in dp_frame_hook(). In turn dp_frame_hook() can reject them + * back to network stack, but that's a waste of time. */ + } + rcu_assign_pointer(dp->ports[port_no], p); + list_add_rcu(&p->node, &dp->port_list); + dp->n_ports++; + + dp_ifinfo_notify(RTM_NEWLINK, p); + + return 0; +} + +static int add_port(int dp_idx, struct odp_port __user *portp) +{ + struct net_device *dev; + struct datapath *dp; + struct odp_port port; + int port_no; + int err; + + err = -EFAULT; + if (copy_from_user(&port, portp, sizeof port)) + goto out; + port.devname[IFNAMSIZ - 1] = '\0'; + port_no = port.port; + + err = -EINVAL; + if (port_no < 0 || port_no >= DP_MAX_PORTS) + goto out; + + rtnl_lock(); + dp = get_dp_locked(dp_idx); + err = -ENODEV; + if (!dp) + goto out_unlock_rtnl; + + err = -EEXIST; + if (dp->ports[port_no]) + goto out_unlock_dp; + + if (!(port.flags & ODP_PORT_INTERNAL)) { + err = -ENODEV; + dev = dev_get_by_name(&init_net, port.devname); + if (!dev) + goto out_unlock_dp; + + err = -EINVAL; + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER || + is_dp_dev(dev)) + goto out_put; + } else { + dev = dp_dev_create(dp, port.devname, port_no); + err = PTR_ERR(dev); + if (IS_ERR(dev)) + goto out_unlock_dp; + dev_hold(dev); + } + + err = new_nbp(dp, dev, port_no); + if (err) + goto out_put; + + if (dp_add_if_hook) + dp_add_if_hook(dp->ports[port_no]); + +out_put: + dev_put(dev); +out_unlock_dp: + mutex_unlock(&dp->mutex); +out_unlock_rtnl: + rtnl_unlock(); +out: + return err; +} + +int dp_del_port(struct net_bridge_port *p, struct list_head *dp_devs) +{ + ASSERT_RTNL(); + +#ifdef SUPPORT_SYSFS + if (p->port_no != ODPP_LOCAL && dp_del_if_hook) + sysfs_remove_link(&p->dp->ifobj, p->dev->name); +#endif + dp_ifinfo_notify(RTM_DELLINK, p); + + p->dp->n_ports--; + + if (is_dp_dev(p->dev)) { + /* Make sure that no packets arrive from now on, since + * dp_dev_xmit() will try to find itself through + * p->dp->ports[], and we're about to set that to null. */ + netif_tx_disable(p->dev); + } + + /* First drop references to device. */ + dev_set_promiscuity(p->dev, -1); + list_del_rcu(&p->node); + rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + rcu_assign_pointer(p->dev->br_port, NULL); + + /* Then wait until no one is still using it, and destroy it. */ + synchronize_rcu(); + + if (is_dp_dev(p->dev)) { + dp_dev_destroy(p->dev); + if (dp_devs) { + struct dp_dev *dp_dev = dp_dev_priv(p->dev); + list_add(&dp_dev->list, dp_devs); + } + } + if (p->port_no != ODPP_LOCAL && dp_del_if_hook) { + dp_del_if_hook(p); + } else { + dev_put(p->dev); + kfree(p); + } + + return 0; +} + +static int del_port(int dp_idx, int port_no) +{ + struct dp_dev *dp_dev, *next; + struct net_bridge_port *p; + struct datapath *dp; + LIST_HEAD(dp_devs); + int err; + + err = -EINVAL; + if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL) + goto out; + + rtnl_lock(); + dp = get_dp_locked(dp_idx); + err = -ENODEV; + if (!dp) + goto out_unlock_rtnl; + + p = dp->ports[port_no]; + err = -ENOENT; + if (!p) + goto out_unlock_dp; + + err = dp_del_port(p, &dp_devs); + +out_unlock_dp: + mutex_unlock(&dp->mutex); +out_unlock_rtnl: + rtnl_unlock(); +out: + list_for_each_entry_safe (dp_dev, next, &dp_devs, list) + free_netdev(dp_dev->dev); + return err; +} + +/* Must be called with rcu_read_lock. */ +static void +do_port_input(struct net_bridge_port *p, struct sk_buff *skb) +{ + /* Make our own copy of the packet. Otherwise we will mangle the + * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). + * (No one comes after us, since we tell handle_bridge() that we took + * the packet.) */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Push the Ethernet header back on. */ + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + dp_process_received_packet(skb, p); +} + +/* Must be called with rcu_read_lock and with bottom-halves disabled. */ +void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p) +{ + struct datapath *dp = p->dp; + struct dp_stats_percpu *stats; + struct odp_flow_key key; + struct sw_flow *flow; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(skb->destructor); + + /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */ + stats = percpu_ptr(dp->stats_percpu, smp_processor_id()); + + if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) { + if (dp->drop_frags) { + kfree_skb(skb); + stats->n_frags++; + return; + } + } + + flow = dp_table_lookup(rcu_dereference(dp->table), &key); + if (flow) { + struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts); + flow_used(flow, skb); + execute_actions(dp, skb, &key, acts->actions, acts->n_actions, + GFP_ATOMIC); + stats->n_hit++; + } else { + stats->n_missed++; + dp_output_control(dp, skb, _ODPL_MISS_NR, 0); + } +} + +/* + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on + * different set of devices!) + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +/* Called with rcu_read_lock and bottom-halves disabled. */ +static struct sk_buff *dp_frame_hook(struct net_bridge_port *p, + struct sk_buff *skb) +{ + do_port_input(p, skb); + return NULL; +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +/* Called with rcu_read_lock and bottom-halves disabled. */ +static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) +{ + do_port_input(p, *pskb); + return 1; +} +#else +#error +#endif + +#ifdef CONFIG_XEN +/* This code is copied verbatim from net/dev/core.c in Xen's + * linux-2.6.18-92.1.10.el5.xs5.0.0.394.644. We can't call those functions + * directly because they aren't exported. */ +static int skb_pull_up_to(struct sk_buff *skb, void *ptr) +{ + if (ptr < (void *)skb->tail) + return 1; + if (__pskb_pull_tail(skb, + ptr - (void *)skb->data - skb_headlen(skb))) { + return 1; + } else { + return 0; + } +} + +int skb_checksum_setup(struct sk_buff *skb) +{ + if (skb->proto_csum_blank) { + if (skb->protocol != htons(ETH_P_IP)) + goto out; + if (!skb_pull_up_to(skb, skb->nh.iph + 1)) + goto out; + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + skb->csum = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + skb->csum = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", skb->nh.iph->protocol); + goto out; + } + if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2)) + goto out; + skb->ip_summed = CHECKSUM_HW; + skb->proto_csum_blank = 0; + } + return 0; +out: + return -EPROTO; +} +#endif + +int +dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, + u32 arg) +{ + struct dp_stats_percpu *stats; + struct sk_buff_head *queue; + int port_no; + int err; + + WARN_ON_ONCE(skb_shared(skb)); + BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR); + + queue = &dp->queues[queue_no]; + err = -ENOBUFS; + if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN) + goto err_kfree_skb; + + /* If a checksum-deferred packet is forwarded to the controller, + * correct the pointers and checksum. This happens on a regular basis + * only on Xen (the CHECKSUM_HW case), on which VMs can pass up packets + * that do not have their checksum computed. We also implement it for + * the non-Xen case, but it is difficult to trigger or test this case + * there, hence the WARN_ON_ONCE(). + */ + err = skb_checksum_setup(skb); + if (err) + goto err_kfree_skb; +#ifndef CHECKSUM_HW + if (skb->ip_summed == CHECKSUM_PARTIAL) { + WARN_ON_ONCE(1); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + /* Until 2.6.22, the start of the transport header was also the + * start of data to be checksummed. Linux 2.6.22 introduced + * the csum_start field for this purpose, but we should point + * the transport header to it anyway for backward + * compatibility, as dev_queue_xmit() does even in 2.6.28. */ + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); +#endif + err = skb_checksum_help(skb); + if (err) + goto err_kfree_skb; + } +#else + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); + if (err) + goto err_kfree_skb; + } +#endif + + /* Break apart GSO packets into their component pieces. Otherwise + * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ + if (skb_is_gso(skb)) { + struct sk_buff *nskb = skb_gso_segment(skb, 0); + if (nskb) { + kfree_skb(skb); + skb = nskb; + if (unlikely(IS_ERR(skb))) { + err = PTR_ERR(skb); + goto err; + } + } else { + /* XXX This case might not be possible. It's hard to + * tell from the skb_gso_segment() code and comment. */ + } + } + + /* Figure out port number. */ + port_no = ODPP_LOCAL; + if (skb->dev) { + if (skb->dev->br_port) + port_no = skb->dev->br_port->port_no; + else if (is_dp_dev(skb->dev)) + port_no = dp_dev_priv(skb->dev)->port_no; + } + + /* Append each packet to queue. There will be only one packet unless + * we broke up a GSO packet above. */ + do { + struct odp_msg *header; + struct sk_buff *nskb = skb->next; + skb->next = NULL; + + err = skb_cow(skb, sizeof *header); + if (err) { + while (nskb) { + kfree_skb(skb); + skb = nskb; + nskb = skb->next; + } + goto err_kfree_skb; + } + + header = (struct odp_msg*)__skb_push(skb, sizeof *header); + header->type = queue_no; + header->length = skb->len; + header->port = port_no; + header->reserved = 0; + header->arg = arg; + skb_queue_tail(queue, skb); + + skb = nskb; + } while (skb); + + wake_up_interruptible(&dp->waitqueue); + return 0; + +err_kfree_skb: + kfree_skb(skb); +err: + stats = percpu_ptr(dp->stats_percpu, get_cpu()); + stats->n_lost++; + put_cpu(); + + return err; +} + +static int flush_flows(struct datapath *dp) +{ + dp->n_flows = 0; + return dp_table_flush(dp); +} + +static int validate_actions(const struct sw_flow_actions *actions) +{ + unsigned int i; + + for (i = 0; i < actions->n_actions; i++) { + const union odp_action *a = &actions->actions[i]; + switch (a->type) { + case ODPAT_OUTPUT: + if (a->output.port >= DP_MAX_PORTS) + return -EINVAL; + break; + + case ODPAT_OUTPUT_GROUP: + if (a->output_group.group >= DP_MAX_GROUPS) + return -EINVAL; + break; + + case ODPAT_SET_VLAN_VID: + if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK)) + return -EINVAL; + break; + + case ODPAT_SET_VLAN_PCP: + if (a->vlan_pcp.vlan_pcp & ~VLAN_PCP_MASK) + return -EINVAL; + break; + + default: + if (a->type >= ODPAT_N_ACTIONS) + return -EOPNOTSUPP; + break; + } + } + + return 0; +} + +static struct sw_flow_actions *get_actions(const struct odp_flow *flow) +{ + struct sw_flow_actions *actions; + int error; + + actions = flow_actions_alloc(flow->n_actions); + error = PTR_ERR(actions); + if (IS_ERR(actions)) + goto error; + + error = -EFAULT; + if (copy_from_user(actions->actions, flow->actions, + flow->n_actions * sizeof(union odp_action))) + goto error_free_actions; + error = validate_actions(actions); + if (error) + goto error_free_actions; + + return actions; + +error_free_actions: + kfree(actions); +error: + return ERR_PTR(error); +} + +static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats) +{ + if (flow->used.tv_sec) { + stats->used_sec = flow->used.tv_sec; + stats->used_nsec = flow->used.tv_nsec; + } else { + stats->used_sec = 0; + stats->used_nsec = 0; + } + stats->n_packets = flow->packet_count; + stats->n_bytes = flow->byte_count; + stats->ip_tos = flow->ip_tos; + stats->tcp_flags = flow->tcp_flags; +} + +static void clear_stats(struct sw_flow *flow) +{ + flow->used.tv_sec = flow->used.tv_nsec = 0; + flow->tcp_flags = 0; + flow->ip_tos = 0; + flow->packet_count = 0; + flow->byte_count = 0; +} + +static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) +{ + struct odp_flow_put uf; + struct sw_flow *flow, **bucket; + struct dp_table *table; + struct odp_flow_stats stats; + int error; + + error = -EFAULT; + if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put))) + goto error; + uf.flow.key.reserved = 0; + +retry: + table = rcu_dereference(dp->table); + bucket = dp_table_lookup_for_insert(table, &uf.flow.key); + if (!bucket) { + /* No such flow, and the slots where it could go are full. */ + error = uf.flags & ODPPF_CREATE ? -EXFULL : -ENOENT; + goto error; + } else if (!*bucket) { + /* No such flow, but we found an available slot for it. */ + struct sw_flow_actions *acts; + + error = -ENOENT; + if (!(uf.flags & ODPPF_CREATE)) + goto error; + + /* Expand table, if necessary, to make room. */ + if (dp->n_flows * 4 >= table->n_buckets && + table->n_buckets < DP_MAX_BUCKETS) { + error = dp_table_expand(dp); + if (error) + goto error; + + /* The bucket's location has changed. Try again. */ + goto retry; + } + + /* Allocate flow. */ + error = -ENOMEM; + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (flow == NULL) + goto error; + flow->key = uf.flow.key; + spin_lock_init(&flow->lock); + clear_stats(flow); + + /* Obtain actions. */ + acts = get_actions(&uf.flow); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error_free_flow; + rcu_assign_pointer(flow->sf_acts, acts); + + /* Put flow in bucket. */ + rcu_assign_pointer(*bucket, flow); + dp->n_flows++; + memset(&stats, 0, sizeof(struct odp_flow_stats)); + } else { + /* We found a matching flow. */ + struct sw_flow *flow = *rcu_dereference(bucket); + struct sw_flow_actions *old_acts, *new_acts; + unsigned long int flags; + + /* Bail out if we're not allowed to modify an existing flow. */ + error = -EEXIST; + if (!(uf.flags & ODPPF_MODIFY)) + goto error; + + /* Swap actions. */ + new_acts = get_actions(&uf.flow); + error = PTR_ERR(new_acts); + if (IS_ERR(new_acts)) + goto error; + old_acts = rcu_dereference(flow->sf_acts); + if (old_acts->n_actions != new_acts->n_actions || + memcmp(old_acts->actions, new_acts->actions, + sizeof(union odp_action) * old_acts->n_actions)) { + rcu_assign_pointer(flow->sf_acts, new_acts); + flow_deferred_free_acts(old_acts); + } else { + kfree(new_acts); + } + + /* Fetch stats, then clear them if necessary. */ + spin_lock_irqsave(&flow->lock, flags); + get_stats(flow, &stats); + if (uf.flags & ODPPF_ZERO_STATS) + clear_stats(flow); + spin_unlock_irqrestore(&flow->lock, flags); + } + + /* Copy stats to userspace. */ + if (__copy_to_user(&ufp->flow.stats, &stats, + sizeof(struct odp_flow_stats))) + return -EFAULT; + return 0; + +error_free_flow: + kmem_cache_free(flow_cache, flow); +error: + return error; +} + +static int put_actions(const struct sw_flow *flow, struct odp_flow __user *ufp) +{ + union odp_action __user *actions; + struct sw_flow_actions *sf_acts; + u32 n_actions; + + if (__get_user(actions, &ufp->actions) || + __get_user(n_actions, &ufp->n_actions)) + return -EFAULT; + + if (!n_actions) + return 0; + if (ufp->n_actions > INT_MAX / sizeof(union odp_action)) + return -EINVAL; + + sf_acts = rcu_dereference(flow->sf_acts); + if (__put_user(sf_acts->n_actions, &ufp->n_actions) || + (actions && copy_to_user(actions, sf_acts->actions, + sizeof(union odp_action) * + min(sf_acts->n_actions, n_actions)))) + return -EFAULT; + + return 0; +} + +static int answer_query(struct sw_flow *flow, struct odp_flow __user *ufp) +{ + struct odp_flow_stats stats; + unsigned long int flags; + + spin_lock_irqsave(&flow->lock, flags); + get_stats(flow, &stats); + spin_unlock_irqrestore(&flow->lock, flags); + + if (__copy_to_user(&ufp->stats, &stats, sizeof(struct odp_flow_stats))) + return -EFAULT; + return put_actions(flow, ufp); +} + +static int del_or_query_flow(struct datapath *dp, + struct odp_flow __user *ufp, + unsigned int cmd) +{ + struct dp_table *table = rcu_dereference(dp->table); + struct odp_flow uf; + struct sw_flow *flow; + int error; + + error = -EFAULT; + if (copy_from_user(&uf, ufp, sizeof uf)) + goto error; + uf.key.reserved = 0; + + flow = dp_table_lookup(table, &uf.key); + error = -ENOENT; + if (!flow) + goto error; + + if (cmd == ODP_FLOW_DEL) { + /* XXX redundant lookup */ + error = dp_table_delete(table, flow); + if (error) + goto error; + + /* XXX These statistics might lose a few packets, since other + * CPUs can be using this flow. We used to synchronize_rcu() + * to make sure that we get completely accurate stats, but that + * blows our performance, badly. */ + dp->n_flows--; + error = answer_query(flow, ufp); + flow_deferred_free(flow); + } else { + error = answer_query(flow, ufp); + } + +error: + return error; +} + +static int query_multiple_flows(struct datapath *dp, + const struct odp_flowvec *flowvec) +{ + struct dp_table *table = rcu_dereference(dp->table); + int i; + for (i = 0; i < flowvec->n_flows; i++) { + struct __user odp_flow *ufp = &flowvec->flows[i]; + struct odp_flow uf; + struct sw_flow *flow; + int error; + + if (__copy_from_user(&uf, ufp, sizeof uf)) + return -EFAULT; + uf.key.reserved = 0; + + flow = dp_table_lookup(table, &uf.key); + if (!flow) + error = __clear_user(&ufp->stats, sizeof ufp->stats); + else + error = answer_query(flow, ufp); + if (error) + return -EFAULT; + } + return flowvec->n_flows; +} + +struct list_flows_cbdata { + struct odp_flow __user *uflows; + int n_flows; + int listed_flows; +}; + +static int list_flow(struct sw_flow *flow, void *cbdata_) +{ + struct list_flows_cbdata *cbdata = cbdata_; + struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++]; + int error; + + if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) + return -EFAULT; + error = answer_query(flow, ufp); + if (error) + return error; + + if (cbdata->listed_flows >= cbdata->n_flows) + return cbdata->listed_flows; + return 0; +} + +static int list_flows(struct datapath *dp, const struct odp_flowvec *flowvec) +{ + struct list_flows_cbdata cbdata; + int error; + + if (!flowvec->n_flows) + return 0; + + cbdata.uflows = flowvec->flows; + cbdata.n_flows = flowvec->n_flows; + cbdata.listed_flows = 0; + error = dp_table_foreach(rcu_dereference(dp->table), + list_flow, &cbdata); + return error ? error : cbdata.listed_flows; +} + +static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp, + int (*function)(struct datapath *, + const struct odp_flowvec *)) +{ + struct odp_flowvec __user *uflowvec; + struct odp_flowvec flowvec; + int retval; + + uflowvec = (struct odp_flowvec __user *)argp; + if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) || + copy_from_user(&flowvec, uflowvec, sizeof flowvec)) + return -EFAULT; + + if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow)) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, flowvec.flows, + flowvec.n_flows * sizeof(struct odp_flow))) + return -EFAULT; + + retval = function(dp, &flowvec); + return (retval < 0 ? retval + : retval == flowvec.n_flows ? 0 + : __put_user(retval, &uflowvec->n_flows)); +} + +static int do_execute(struct datapath *dp, const struct odp_execute *executep) +{ + struct odp_execute execute; + struct odp_flow_key key; + struct sk_buff *skb; + struct sw_flow_actions *actions; + int err; + + err = -EFAULT; + if (copy_from_user(&execute, executep, sizeof execute)) + goto error; + + err = -EINVAL; + if (execute.length < ETH_HLEN || execute.length > 65535) + goto error; + + err = -ENOMEM; + actions = flow_actions_alloc(execute.n_actions); + if (!actions) + goto error; + + err = -EFAULT; + if (copy_from_user(actions->actions, execute.actions, + execute.n_actions * sizeof *execute.actions)) + goto error_free_actions; + + err = validate_actions(actions); + if (err) + goto error_free_actions; + + err = -ENOMEM; + skb = alloc_skb(execute.length, GFP_KERNEL); + if (!skb) + goto error_free_actions; + if (execute.in_port < DP_MAX_PORTS) { + struct net_bridge_port *p = dp->ports[execute.in_port]; + if (p) + skb->dev = p->dev; + } + + err = -EFAULT; + if (copy_from_user(skb_put(skb, execute.length), execute.data, + execute.length)) + goto error_free_skb; + + flow_extract(skb, execute.in_port, &key); + err = execute_actions(dp, skb, &key, actions->actions, + actions->n_actions, GFP_KERNEL); + kfree(actions); + return err; + +error_free_skb: + kfree_skb(skb); +error_free_actions: + kfree(actions); +error: + return err; +} + +static int +get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) +{ + struct odp_stats stats; + int i; + + stats.n_flows = dp->n_flows; + stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2; + stats.max_capacity = DP_MAX_BUCKETS * 2; + stats.n_ports = dp->n_ports; + stats.max_ports = DP_MAX_PORTS; + stats.max_groups = DP_MAX_GROUPS; + stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0; + for_each_possible_cpu(i) { + const struct dp_stats_percpu *s; + s = percpu_ptr(dp->stats_percpu, i); + stats.n_frags += s->n_frags; + stats.n_hit += s->n_hit; + stats.n_missed += s->n_missed; + stats.n_lost += s->n_lost; + } + stats.max_miss_queue = DP_MAX_QUEUE_LEN; + stats.max_action_queue = DP_MAX_QUEUE_LEN; + return copy_to_user(statsp, &stats, sizeof stats) ? -EFAULT : 0; +} + +static int +put_port(const struct net_bridge_port *p, struct odp_port __user *uop) +{ + struct odp_port op; + memset(&op, 0, sizeof op); + strncpy(op.devname, p->dev->name, sizeof op.devname); + op.port = p->port_no; + op.flags = is_dp_dev(p->dev) ? ODP_PORT_INTERNAL : 0; + return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0; +} + +static int +query_port(struct datapath *dp, struct odp_port __user *uport) +{ + struct odp_port port; + + if (copy_from_user(&port, uport, sizeof port)) + return -EFAULT; + if (port.devname[0]) { + struct net_bridge_port *p; + struct net_device *dev; + int err; + + port.devname[IFNAMSIZ - 1] = '\0'; + + dev = dev_get_by_name(&init_net, port.devname); + if (!dev) + return -ENODEV; + + p = dev->br_port; + if (!p && is_dp_dev(dev)) { + struct dp_dev *dp_dev = dp_dev_priv(dev); + if (dp_dev->dp == dp) + p = dp->ports[dp_dev->port_no]; + } + err = p && p->dp == dp ? put_port(p, uport) : -ENOENT; + dev_put(dev); + + return err; + } else { + if (port.port >= DP_MAX_PORTS) + return -EINVAL; + if (!dp->ports[port.port]) + return -ENOENT; + return put_port(dp->ports[port.port], uport); + } +} + +static int +list_ports(struct datapath *dp, struct odp_portvec __user *pvp) +{ + struct odp_portvec pv; + struct net_bridge_port *p; + int idx; + + if (copy_from_user(&pv, pvp, sizeof pv)) + return -EFAULT; + + idx = 0; + if (pv.n_ports) { + list_for_each_entry_rcu (p, &dp->port_list, node) { + if (put_port(p, &pv.ports[idx])) + return -EFAULT; + if (idx++ >= pv.n_ports) + break; + } + } + return put_user(idx, &pvp->n_ports); +} + +/* RCU callback for freeing a dp_port_group */ +static void free_port_group(struct rcu_head *rcu) +{ + struct dp_port_group *g = container_of(rcu, struct dp_port_group, rcu); + kfree(g); +} + +static int +set_port_group(struct datapath *dp, const struct odp_port_group __user *upg) +{ + struct odp_port_group pg; + struct dp_port_group *new_group, *old_group; + int error; + + error = -EFAULT; + if (copy_from_user(&pg, upg, sizeof pg)) + goto error; + + error = -EINVAL; + if (pg.n_ports > DP_MAX_PORTS || pg.group >= DP_MAX_GROUPS) + goto error; + + error = -ENOMEM; + new_group = kmalloc(sizeof *new_group + sizeof(u16) * pg.n_ports, + GFP_KERNEL); + if (!new_group) + goto error; + + new_group->n_ports = pg.n_ports; + error = -EFAULT; + if (copy_from_user(new_group->ports, pg.ports, + sizeof(u16) * pg.n_ports)) + goto error_free; + + old_group = rcu_dereference(dp->groups[pg.group]); + rcu_assign_pointer(dp->groups[pg.group], new_group); + if (old_group) + call_rcu(&old_group->rcu, free_port_group); + return 0; + +error_free: + kfree(new_group); +error: + return error; +} + +static int +get_port_group(struct datapath *dp, struct odp_port_group *upg) +{ + struct odp_port_group pg; + struct dp_port_group *g; + u16 n_copy; + + if (copy_from_user(&pg, upg, sizeof pg)) + return -EFAULT; + + if (pg.group >= DP_MAX_GROUPS) + return -EINVAL; + + g = dp->groups[pg.group]; + n_copy = g ? min_t(int, g->n_ports, pg.n_ports) : 0; + if (n_copy && copy_to_user(pg.ports, g->ports, n_copy * sizeof(u16))) + return -EFAULT; + + if (put_user(g ? g->n_ports : 0, &upg->n_ports)) + return -EFAULT; + + return 0; +} + +static long openvswitch_ioctl(struct file *f, unsigned int cmd, + unsigned long argp) +{ + int dp_idx = iminor(f->f_dentry->d_inode); + struct datapath *dp; + int drop_frags, listeners, port_no; + int err; + + /* Handle commands with special locking requirements up front. */ + switch (cmd) { + case ODP_DP_CREATE: + return create_dp(dp_idx, (char __user *)argp); + + case ODP_DP_DESTROY: + return destroy_dp(dp_idx); + + case ODP_PORT_ADD: + return add_port(dp_idx, (struct odp_port __user *)argp); + + case ODP_PORT_DEL: + err = get_user(port_no, (int __user *)argp); + if (err) + break; + return del_port(dp_idx, port_no); + } + + dp = get_dp_locked(dp_idx); + if (!dp) + return -ENODEV; + + switch (cmd) { + case ODP_DP_STATS: + err = get_dp_stats(dp, (struct odp_stats __user *)argp); + break; + + case ODP_GET_DROP_FRAGS: + err = put_user(dp->drop_frags, (int __user *)argp); + break; + + case ODP_SET_DROP_FRAGS: + err = get_user(drop_frags, (int __user *)argp); + if (err) + break; + err = -EINVAL; + if (drop_frags != 0 && drop_frags != 1) + break; + dp->drop_frags = drop_frags; + err = 0; + break; + + case ODP_GET_LISTEN_MASK: + err = put_user((int)f->private_data, (int __user *)argp); + break; + + case ODP_SET_LISTEN_MASK: + err = get_user(listeners, (int __user *)argp); + if (err) + break; + err = -EINVAL; + if (listeners & ~ODPL_ALL) + break; + err = 0; + f->private_data = (void*)listeners; + break; + + case ODP_PORT_QUERY: + err = query_port(dp, (struct odp_port __user *)argp); + break; + + case ODP_PORT_LIST: + err = list_ports(dp, (struct odp_portvec __user *)argp); + break; + + case ODP_PORT_GROUP_SET: + err = set_port_group(dp, (struct odp_port_group __user *)argp); + break; + + case ODP_PORT_GROUP_GET: + err = get_port_group(dp, (struct odp_port_group __user *)argp); + break; + + case ODP_FLOW_FLUSH: + err = flush_flows(dp); + break; + + case ODP_FLOW_PUT: + err = put_flow(dp, (struct odp_flow_put __user *)argp); + break; + + case ODP_FLOW_DEL: + case ODP_FLOW_GET: + err = del_or_query_flow(dp, (struct odp_flow __user *)argp, + cmd); + break; + + case ODP_FLOW_GET_MULTIPLE: + err = do_flowvec_ioctl(dp, argp, query_multiple_flows); + break; + + case ODP_FLOW_LIST: + err = do_flowvec_ioctl(dp, argp, list_flows); + break; + + case ODP_EXECUTE: + err = do_execute(dp, (struct odp_execute __user *)argp); + break; + + default: + err = -ENOIOCTLCMD; + break; + } + mutex_unlock(&dp->mutex); + return err; +} + +static int dp_has_packet_of_interest(struct datapath *dp, int listeners) +{ + int i; + for (i = 0; i < DP_N_QUEUES; i++) { + if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i])) + return 1; + } + return 0; +} + +ssize_t openvswitch_read(struct file *f, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + int listeners = (int) f->private_data; + int dp_idx = iminor(f->f_dentry->d_inode); + struct datapath *dp = get_dp(dp_idx); + struct sk_buff *skb; + struct iovec __user iov; + size_t copy_bytes; + int retval; + + if (!dp) + return -ENODEV; + + if (nbytes == 0 || !listeners) + return 0; + + for (;;) { + int i; + + for (i = 0; i < DP_N_QUEUES; i++) { + if (listeners & (1 << i)) { + skb = skb_dequeue(&dp->queues[i]); + if (skb) + goto success; + } + } + + if (f->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto error; + } + + wait_event_interruptible(dp->waitqueue, + dp_has_packet_of_interest(dp, + listeners)); + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto error; + } + } +success: + copy_bytes = min(skb->len, nbytes); + iov.iov_base = buf; + iov.iov_len = copy_bytes; + retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len); + if (!retval) + retval = copy_bytes; + kfree_skb(skb); + +error: + return retval; +} + +static unsigned int openvswitch_poll(struct file *file, poll_table *wait) +{ + int dp_idx = iminor(file->f_dentry->d_inode); + struct datapath *dp = get_dp(dp_idx); + unsigned int mask; + + if (dp) { + mask = 0; + poll_wait(file, &dp->waitqueue, wait); + if (dp_has_packet_of_interest(dp, (int)file->private_data)) + mask |= POLLIN | POLLRDNORM; + } else { + mask = POLLIN | POLLRDNORM | POLLHUP; + } + return mask; +} + +struct file_operations openvswitch_fops = { + /* XXX .aio_read = openvswitch_aio_read, */ + .read = openvswitch_read, + .poll = openvswitch_poll, + .unlocked_ioctl = openvswitch_ioctl, + /* XXX .fasync = openvswitch_fasync, */ +}; + +static int major; +static struct llc_sap *dp_stp_sap; + +static int dp_stp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + /* We don't really care about STP packets, we just listen for them for + * mutual exclusion with the bridge module, so this just discards + * them. */ + kfree_skb(skb); + return 0; +} + +static int __init dp_init(void) +{ + int err; + + printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR); + + /* Register to receive STP packets because the bridge module also + * attempts to do so. Since there can only be a single listener for a + * given protocol, this provides mutual exclusion against the bridge + * module, preventing both of them from being loaded at the same + * time. */ + dp_stp_sap = llc_sap_open(LLC_SAP_BSPAN, dp_stp_rcv); + if (!dp_stp_sap) { + printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n"); + return -EADDRINUSE; + } + + err = flow_init(); + if (err) + goto error; + + err = register_netdevice_notifier(&dp_device_notifier); + if (err) + goto error_flow_exit; + + major = register_chrdev(0, "openvswitch", &openvswitch_fops); + if (err < 0) + goto error_unreg_notifier; + + /* Hook into callback used by the bridge to intercept packets. + * Parasites we are. */ + br_handle_frame_hook = dp_frame_hook; + + return 0; + +error_unreg_notifier: + unregister_netdevice_notifier(&dp_device_notifier); +error_flow_exit: + flow_exit(); +error: + return err; +} + +static void dp_cleanup(void) +{ + rcu_barrier(); + unregister_chrdev(major, "openvswitch"); + unregister_netdevice_notifier(&dp_device_notifier); + flow_exit(); + br_handle_frame_hook = NULL; + llc_sap_put(dp_stp_sap); +} + +module_init(dp_init); +module_exit(dp_cleanup); + +MODULE_DESCRIPTION("Open vSwitch switching datapath"); +MODULE_LICENSE("GPL"); diff --git a/datapath/datapath.h b/datapath/datapath.h new file mode 100644 index 00000000..102b27f3 --- /dev/null +++ b/datapath/datapath.h @@ -0,0 +1,139 @@ +/* Interface exported by openvswitch_mod. */ + +#ifndef DATAPATH_H +#define DATAPATH_H 1 + +#include <asm/page.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <linux/skbuff.h> +#include "flow.h" +#include "brc_sysfs.h" + +struct sk_buff; + +/* Mask for the priority bits in a vlan header. If we ever merge upstream + * then this should go into include/linux/if_vlan.h. */ +#define VLAN_PCP_MASK 0xe000 + +#define DP_MAX_PORTS 256 +#define DP_MAX_GROUPS 16 + +#define DP_L2_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow*))) +#define DP_L2_SIZE (1 << DP_L2_BITS) +#define DP_L2_SHIFT 0 + +#define DP_L1_BITS (PAGE_SHIFT - ilog2(sizeof(struct sw_flow**))) +#define DP_L1_SIZE (1 << DP_L1_BITS) +#define DP_L1_SHIFT DP_L2_BITS + +#define DP_MAX_BUCKETS (DP_L1_SIZE * DP_L2_SIZE) + +struct dp_table { + unsigned int n_buckets; + struct sw_flow ***flows[2]; + struct rcu_head rcu; +}; + +#define DP_N_QUEUES 2 +#define DP_MAX_QUEUE_LEN 100 + +struct dp_stats_percpu { + u64 n_frags; + u64 n_hit; + u64 n_missed; + u64 n_lost; +}; + +struct dp_port_group { + struct rcu_head rcu; + int n_ports; + u16 ports[]; +}; + +struct datapath { + struct mutex mutex; + int dp_idx; + +#ifdef SUPPORT_SYSFS + struct kobject ifobj; +#endif + + int drop_frags; + + /* Queued data. */ + struct sk_buff_head queues[DP_N_QUEUES]; + wait_queue_head_t waitqueue; + + /* Flow table. */ + unsigned int n_flows; + struct dp_table *table; + + /* Port groups. */ + struct dp_port_group *groups[DP_MAX_GROUPS]; + + /* Switch ports. */ + unsigned int n_ports; + struct net_bridge_port *ports[DP_MAX_PORTS]; + struct list_head port_list; /* All ports, including local_port. */ + + /* Stats. */ + struct dp_stats_percpu *stats_percpu; +}; + +struct net_bridge_port { + u16 port_no; + struct datapath *dp; + struct net_device *dev; +#ifdef SUPPORT_SYSFS + struct kobject kobj; +#endif + struct list_head node; /* Element in datapath.ports. */ +}; + +extern struct notifier_block dp_device_notifier; +extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); +extern int (*dp_add_dp_hook)(struct datapath *dp); +extern int (*dp_del_dp_hook)(struct datapath *dp); +extern int (*dp_add_if_hook)(struct net_bridge_port *p); +extern int (*dp_del_if_hook)(struct net_bridge_port *p); + +/* Flow table. */ +struct dp_table *dp_table_create(unsigned int n_buckets); +void dp_table_destroy(struct dp_table *, int free_flows); +struct sw_flow *dp_table_lookup(struct dp_table *, const struct odp_flow_key *); +struct sw_flow **dp_table_lookup_for_insert(struct dp_table *, const struct odp_flow_key *); +int dp_table_delete(struct dp_table *, struct sw_flow *); +int dp_table_expand(struct datapath *); +int dp_table_flush(struct datapath *); +int dp_table_foreach(struct dp_table *table, + int (*callback)(struct sw_flow *flow, void *aux), + void *aux); + +void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *); +int dp_del_port(struct net_bridge_port *, struct list_head *); +int dp_output_port(struct datapath *, struct sk_buff *, int out_port, + int ignore_no_fwd); +int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg); +void dp_set_origin(struct datapath *, u16, struct sk_buff *); + +struct datapath *get_dp(int dp_idx); + +static inline const char *dp_name(const struct datapath *dp) +{ + return dp->ports[ODPP_LOCAL]->dev->name; +} + +#ifdef CONFIG_XEN +int skb_checksum_setup(struct sk_buff *skb); +#else +static inline int skb_checksum_setup(struct sk_buff *skb) +{ + return 0; +} +#endif + +#endif /* datapath.h */ diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c new file mode 100644 index 00000000..8a749dbc --- /dev/null +++ b/datapath/dp_dev.c @@ -0,0 +1,210 @@ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/workqueue.h> + +#include "datapath.h" +#include "dp_dev.h" + +struct datapath *dp_dev_get_dp(struct net_device *netdev) +{ + return dp_dev_priv(netdev)->dp; +} +EXPORT_SYMBOL(dp_dev_get_dp); + +static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev) +{ + struct dp_dev *dp_dev = dp_dev_priv(netdev); + return &dp_dev->stats; +} + +int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb) +{ + struct dp_dev *dp_dev = dp_dev_priv(netdev); + int len; + len = skb->len; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, netdev); + if (in_interrupt()) + netif_rx(skb); + else + netif_rx_ni(skb); + netdev->last_rx = jiffies; + dp_dev->stats.rx_packets++; + dp_dev->stats.rx_bytes += len; + return len; +} + +static int dp_dev_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct dp_dev *dp_dev = dp_dev_priv(netdev); + + /* By orphaning 'skb' we will screw up socket accounting slightly, but + * the effect is limited to the device queue length. If we don't + * do this, then the sk_buff will be destructed eventually, but it is + * harder to predict when. */ + skb_orphan(skb); + + /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue, + * so we need to have our own clone. (At any rate, fwd_port_input() + * will need its own clone, so there's no benefit to queuing any other + * way.) */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return 0; + + dp_dev->stats.tx_packets++; + dp_dev->stats.tx_bytes += skb->len; + + if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) { + /* Queue overflow. Stop transmitter. */ + netif_stop_queue(netdev); + + /* We won't see all dropped packets individually, so overrun + * error is appropriate. */ + dp_dev->stats.tx_fifo_errors++; + } + skb_queue_tail(&dp_dev->xmit_queue, skb); + netdev->trans_start = jiffies; + + schedule_work(&dp_dev->xmit_work); + + return 0; +} + +static void dp_dev_do_xmit(struct work_struct *work) +{ + struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work); + struct datapath *dp = dp_dev->dp; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) { + skb_reset_mac_header(skb); + rcu_read_lock_bh(); + dp_process_received_packet(skb, dp->ports[dp_dev->port_no]); + rcu_read_unlock_bh(); + } + netif_wake_queue(dp_dev->dev); +} + +static int dp_dev_open(struct net_device *netdev) +{ + netif_start_queue(netdev); + return 0; +} + +static int dp_dev_stop(struct net_device *netdev) +{ + netif_stop_queue(netdev); + return 0; +} + +static void dp_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) +{ + struct dp_dev *dp_dev = dp_dev_priv(netdev); + strcpy(info->driver, "openvswitch"); + sprintf(info->bus_info, "%d", dp_dev->dp->dp_idx); +} + +static struct ethtool_ops dp_ethtool_ops = { + .get_drvinfo = dp_getinfo, + .get_link = ethtool_op_get_link, + .get_sg = ethtool_op_get_sg, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_tso = ethtool_op_get_tso, +}; + +static void +do_setup(struct net_device *netdev) +{ + ether_setup(netdev); + + netdev->do_ioctl = dp_ioctl_hook; + netdev->get_stats = dp_dev_get_stats; + netdev->hard_start_xmit = dp_dev_xmit; + netdev->open = dp_dev_open; + SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops); + netdev->stop = dp_dev_stop; + netdev->tx_queue_len = 100; + netdev->set_mac_address = dp_dev_mac_addr; + + netdev->flags = IFF_BROADCAST | IFF_MULTICAST; + + random_ether_addr(netdev->dev_addr); + + /* Set the OUI to the Nicira one. */ + netdev->dev_addr[0] = 0x00; + netdev->dev_addr[1] = 0x23; + netdev->dev_addr[2] = 0x20; + + /* Set the top bits to indicate random Nicira address. */ + netdev->dev_addr[3] |= 0xc0; +} + +/* Create a datapath device associated with 'dp'. If 'dp_name' is null, + * the device name will be of the form 'of<dp_idx>'. Returns the new device or + * an error code. + * + * Called with RTNL lock and dp_mutex. */ +struct net_device *dp_dev_create(struct datapath *dp, const char *dp_name, int port_no) +{ + struct dp_dev *dp_dev; + struct net_device *netdev; + char dev_name[IFNAMSIZ]; + int err; + + if (dp_name) { + if (strlen(dp_name) >= IFNAMSIZ) + return ERR_PTR(-EINVAL); + strncpy(dev_name, dp_name, sizeof(dev_name)); + } else + snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx); + + netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup); + if (!netdev) + return ERR_PTR(-ENOMEM); + + err = register_netdevice(netdev); + if (err) { + free_netdev(netdev); + return ERR_PTR(err); + } + + dp_dev = dp_dev_priv(netdev); + dp_dev->dp = dp; + dp_dev->port_no = port_no; + dp_dev->dev = netdev; + skb_queue_head_init(&dp_dev->xmit_queue); + INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit); + return netdev; +} + +/* Called with RTNL lock and dp_mutex.*/ +void dp_dev_destroy(struct net_device *netdev) +{ + struct dp_dev *dp_dev = dp_dev_priv(netdev); + + netif_tx_disable(netdev); + synchronize_net(); + skb_queue_purge(&dp_dev->xmit_queue); + unregister_netdevice(netdev); +} + +int is_dp_dev(struct net_device *netdev) +{ + return netdev->open == dp_dev_open; +} +EXPORT_SYMBOL(is_dp_dev); diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h new file mode 100644 index 00000000..84874390 --- /dev/null +++ b/datapath/dp_dev.h @@ -0,0 +1,27 @@ +#ifndef DP_DEV_H +#define DP_DEV_H 1 + +struct dp_dev { + struct datapath *dp; + int port_no; + + struct net_device *dev; + struct net_device_stats stats; + struct sk_buff_head xmit_queue; + struct work_struct xmit_work; + + struct list_head list; +}; + +static inline struct dp_dev *dp_dev_priv(struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +struct net_device *dp_dev_create(struct datapath *, const char *, int port_no); +void dp_dev_destroy(struct net_device *); +int dp_dev_recv(struct net_device *, struct sk_buff *); +int is_dp_dev(struct net_device *); +struct datapath *dp_dev_get_dp(struct net_device *); + +#endif /* dp_dev.h */ diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c new file mode 100644 index 00000000..56d5c3c9 --- /dev/null +++ b/datapath/dp_notify.c @@ -0,0 +1,29 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007, 2008, 2009 Nicira Networks. + */ + +/* Handle changes to managed devices */ + +#include <linux/netdevice.h> + +#include "datapath.h" + + +static int dp_device_event(struct notifier_block *unused, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net_bridge_port *p = dev->br_port; + if (event == NETDEV_UNREGISTER && p) { + struct datapath *dp = p->dp; + mutex_lock(&dp->mutex); + dp_del_port(p, NULL); + mutex_unlock(&dp->mutex); + } + return NOTIFY_DONE; +} + +struct notifier_block dp_device_notifier = { + .notifier_call = dp_device_event +}; diff --git a/datapath/flow.c b/datapath/flow.c new file mode 100644 index 00000000..b24c242c --- /dev/null +++ b/datapath/flow.c @@ -0,0 +1,301 @@ +/* + * Distributed under the terms of the GNU GPL version 2. + * Copyright (c) 2007, 2008, 2009 Nicira Networks. + */ + +#include "flow.h" +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <net/ip.h> + +#include "compat.h" + +struct kmem_cache *flow_cache; + +static inline int iphdr_ok(struct sk_buff *skb) +{ + int nh_ofs = skb_network_offset(skb); + if (skb->len >= nh_ofs + sizeof(struct iphdr)) { + int ip_len = ip_hdrlen(skb); + return (ip_len >= sizeof(struct iphdr) + && pskb_may_pull(skb, nh_ofs + ip_len)); + } + return 0; +} + +static inline int tcphdr_ok(struct sk_buff *skb) +{ + int th_ofs = skb_transport_offset(skb); + if (pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))) { + int tcp_len = tcp_hdrlen(skb); + return (tcp_len >= sizeof(struct tcphdr) + && skb->len >= th_ofs + tcp_len); + } + return 0; +} + +static inline int udphdr_ok(struct sk_buff *skb) +{ + int th_ofs = skb_transport_offset(skb); + return pskb_may_pull(skb, th_ofs + sizeof(struct udphdr)); +} + +static inline int icmphdr_ok(struct sk_buff *skb) +{ + int th_ofs = skb_transport_offset(skb); + return pskb_may_pull(skb, th_ofs + sizeof(struct icmphdr)); +} + +#define TCP_FLAGS_OFFSET 13 +#define TCP_FLAG_MASK 0x3f + +static inline struct ovs_tcphdr *ovs_tcp_hdr(const struct sk_buff *skb) +{ + return (struct ovs_tcphdr *)skb_transport_header(skb); +} + +void flow_used(struct sw_flow *flow, struct sk_buff *skb) +{ + unsigned long flags; + u8 tcp_flags = 0; + + if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { + struct iphdr *nh = ip_hdr(skb); + flow->ip_tos = nh->tos; + if (flow->key.nw_proto == IPPROTO_TCP && tcphdr_ok(skb)) { + u8 *tcp = (u8 *)tcp_hdr(skb); + tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; + } + } + + spin_lock_irqsave(&flow->lock, flags); + getnstimeofday(&flow->used); + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock_irqrestore(&flow->lock, flags); +} + +struct sw_flow_actions *flow_actions_alloc(size_t n_actions) +{ + struct sw_flow_actions *sfa; + + if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) + return ERR_PTR(-EINVAL); + + sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), + GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + + sfa->n_actions = n_actions; + return sfa; +} + + +/* Frees 'flow' immediately. */ +void flow_free(struct sw_flow *flow) +{ + if (unlikely(!flow)) + return; + kfree(flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +/* RCU callback used by flow_deferred_free. */ +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + flow_free(flow); +} + +/* Schedules 'flow' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void flow_deferred_free(struct sw_flow *flow) +{ + call_rcu(&flow->rcu, rcu_free_flow_callback); +} + +/* RCU callback used by flow_deferred_free_acts. */ +static void rcu_free_acts_callback(struct rcu_head *rcu) +{ + struct sw_flow_actions *sf_acts = container_of(rcu, + struct sw_flow_actions, rcu); + kfree(sf_acts); +} + +/* Schedules 'sf_acts' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void flow_deferred_free_acts(struct sw_flow_actions *sf_acts) +{ + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +} + +#define SNAP_OUI_LEN 3 + +struct eth_snap_hdr +{ + struct ethhdr eth; + u8 dsap; /* Always 0xAA */ + u8 ssap; /* Always 0xAA */ + u8 ctrl; + u8 oui[SNAP_OUI_LEN]; + u16 ethertype; +} __attribute__ ((packed)); + +static int is_snap(const struct eth_snap_hdr *esh) +{ + return (esh->dsap == LLC_SAP_SNAP + && esh->ssap == LLC_SAP_SNAP + && !memcmp(esh->oui, "\0\0\0", 3)); +} + +/* Parses the Ethernet frame in 'skb', which was received on 'in_port', + * and initializes 'key' to match. Returns 1 if 'skb' contains an IP + * fragment, 0 otherwise. */ +int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) +{ + struct ethhdr *eth; + struct eth_snap_hdr *esh; + int retval = 0; + int nh_ofs; + + memset(key, 0, sizeof *key); + key->dl_vlan = htons(ODP_VLAN_NONE); + key->in_port = in_port; + + if (skb->len < sizeof *eth) + return 0; + if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) { + return 0; + } + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + esh = (struct eth_snap_hdr *) eth; + nh_ofs = sizeof *eth; + if (likely(ntohs(eth->h_proto) >= ODP_DL_TYPE_ETH2_CUTOFF)) + key->dl_type = eth->h_proto; + else if (skb->len >= sizeof *esh && is_snap(esh)) { + key->dl_type = esh->ethertype; + nh_ofs = sizeof *esh; + } else { + key->dl_type = htons(ODP_DL_TYPE_NOT_ETH_TYPE); + if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) { + nh_ofs += sizeof(struct llc_pdu_un); + } + } + + /* Check for a VLAN tag */ + if (key->dl_type == htons(ETH_P_8021Q) && + skb->len >= nh_ofs + sizeof(struct vlan_hdr)) { + struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs); + key->dl_type = vh->h_vlan_encapsulated_proto; + key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK); + nh_ofs += sizeof(struct vlan_hdr); + } + memcpy(key->dl_src, eth->h_source, ETH_ALEN); + memcpy(key->dl_dst, eth->h_dest, ETH_ALEN); + skb_set_network_header(skb, nh_ofs); + + /* Network layer. */ + if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { + struct iphdr *nh = ip_hdr(skb); + int th_ofs = nh_ofs + nh->ihl * 4; + key->nw_src = nh->saddr; + key->nw_dst = nh->daddr; + key->nw_proto = nh->protocol; + skb_set_transport_header(skb, th_ofs); + + /* Transport layer. */ + if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) { + if (key->nw_proto == IPPROTO_TCP) { + if (tcphdr_ok(skb)) { + struct tcphdr *tcp = tcp_hdr(skb); + key->tp_src = tcp->source; + key->tp_dst = tcp->dest; + } else { + /* Avoid tricking other code into + * thinking that this packet has an L4 + * header. */ + key->nw_proto = 0; + } + } else if (key->nw_proto == IPPROTO_UDP) { + if (udphdr_ok(skb)) { + struct udphdr *udp = udp_hdr(skb); + key->tp_src = udp->source; + key->tp_dst = udp->dest; + } else { + /* Avoid tricking other code into + * thinking that this packet has an L4 + * header. */ + key->nw_proto = 0; + } + } else if (key->nw_proto == IPPROTO_ICMP) { + if (icmphdr_ok(skb)) { + struct icmphdr *icmp = icmp_hdr(skb); + /* The ICMP type and code fields use the 16-bit + * transport port fields, so we need to store them + * in 16-bit network byte order. */ + key->tp_src = htons(icmp->type); + key->tp_dst = htons(icmp->code); + } else { + /* Avoid tricking other code into + * thinking that this packet has an L4 + * header. */ + key->nw_proto = 0; + } + } + } else { + retval = 1; + } + } else { + skb_reset_transport_header(skb); + } + return retval; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int flow_init(void) +{ + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} + +void print_flow(const struct odp_flow_key *key) +{ +#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" +#define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5] + printk("port%04x:vlan%d mac"MAC_FMT"->"MAC_FMT" " + "type%04x proto%d ip%x->%x port%d->%d\n", + key->in_port, ntohs(key->dl_vlan), + MAC_ARG(key->dl_src), MAC_ARG(key->dl_dst), + ntohs(key->dl_type), key->nw_proto, + key->nw_src, key->nw_dst, + ntohs(key->tp_src), ntohs(key->tp_dst)); +} diff --git a/datapath/flow.h b/datapath/flow.h new file mode 100644 index 00000000..55efede1 --- /dev/null +++ b/datapath/flow.h @@ -0,0 +1,49 @@ +#ifndef FLOW_H +#define FLOW_H 1 + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/gfp.h> + +#include "openvswitch/datapath-protocol.h" + +struct sk_buff; + +struct sw_flow_actions { + struct rcu_head rcu; + unsigned int n_actions; + union odp_action actions[]; +}; + +struct sw_flow { + struct rcu_head rcu; + struct odp_flow_key key; + struct sw_flow_actions *sf_acts; + + struct timespec used; /* Last used time. */ + + u8 ip_tos; /* IP TOS value. */ + + spinlock_t lock; /* Lock for values below. */ + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + u8 tcp_flags; /* Union of seen TCP flags. */ +}; + +extern struct kmem_cache *flow_cache; + +struct sw_flow_actions *flow_actions_alloc(size_t n_actions); +void flow_free(struct sw_flow *); +void flow_deferred_free(struct sw_flow *); +void flow_deferred_free_acts(struct sw_flow_actions *); +int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); +void flow_used(struct sw_flow *, struct sk_buff *); + +void print_flow(const struct odp_flow_key *); + +int flow_init(void); +void flow_exit(void); + +#endif /* flow.h */ diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore new file mode 100644 index 00000000..af5821a2 --- /dev/null +++ b/datapath/linux-2.6/.gitignore @@ -0,0 +1,20 @@ +/Kbuild +/Makefile +/Makefile.main +/actions.c +/brcompat.c +/brc_sysfs_dp.c +/brc_sysfs_if.c +/datapath.c +/dp_dev.c +/dp_notify.c +/flow.c +/genetlink-brcompat.c +/genetlink-openvswitch.c +/kcompat.h +/linux-2.6 +/modules.order +/random32.c +/table.c +/tmp +/veth.c diff --git a/datapath/linux-2.6/Kbuild.in b/datapath/linux-2.6/Kbuild.in new file mode 100644 index 00000000..f08eb9c5 --- /dev/null +++ b/datapath/linux-2.6/Kbuild.in @@ -0,0 +1,34 @@ +# -*- makefile -*- +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export VERSION = @VERSION@ +export BUILDNR = @BUILDNR@ + +include $(srcdir)/../Modules.mk +include $(srcdir)/Modules.mk + +EXTRA_CFLAGS := -DVERSION=\"$(VERSION)\" +EXTRA_CFLAGS += -I$(srcdir)/.. +EXTRA_CFLAGS += -I$(builddir)/.. +EXTRA_CFLAGS += -I$(top_srcdir)/include +ifeq '$(BUILDNR)' '0' +EXTRA_CFLAGS += -DBUILDNR=\"\" +else +EXTRA_CFLAGS += -DBUILDNR=\"+build$(BUILDNR)\" +endif +EXTRA_CFLAGS += -g +EXTRA_CFLAGS += -include $(builddir)/kcompat.h + +# These include directories have to go before -I$(KSRC)/include. +# NOSTDINC_FLAGS just happens to be a variable that goes in the +# right place, even though it's conceptually incorrect. +NOSTDINC_FLAGS += -I$(srcdir)/compat-2.6 -I$(srcdir)/compat-2.6/include + +obj-m := $(patsubst %,%_mod.o,$(build_modules)) + +define module_template +$(1)_mod-y = $$(notdir $$(patsubst %.c,%.o,$($(1)_sources))) +endef + +$(foreach module,$(build_modules),$(eval $(call module_template,$(module)))) diff --git a/datapath/linux-2.6/Makefile.in b/datapath/linux-2.6/Makefile.in new file mode 100644 index 00000000..efc1663e --- /dev/null +++ b/datapath/linux-2.6/Makefile.in @@ -0,0 +1,9 @@ +ifeq ($(KERNELRELEASE),) +# We're being called directly by running make in this directory. +include Makefile.main +else +# We're being included by the Linux kernel build system +include Kbuild +endif + + diff --git a/datapath/linux-2.6/Makefile.main.in b/datapath/linux-2.6/Makefile.main.in new file mode 100644 index 00000000..0005ec4f --- /dev/null +++ b/datapath/linux-2.6/Makefile.main.in @@ -0,0 +1,82 @@ +# -*- makefile -*- +export builddir = @abs_builddir@ +export srcdir = @abs_srcdir@ +export top_srcdir = @abs_top_srcdir@ +export KSRC = @KSRC26@ +export VERSION = @VERSION@ +export BUILD_VETH = @BUILD_VETH@ + +include $(srcdir)/../Modules.mk +include $(srcdir)/Modules.mk + +default: $(build_links) + +$(foreach s,$(sort $(foreach m,$(build_modules),$($(m)_sources))), \ + $(eval $(notdir $(s)): ; ln -s $(srcdir)/../$(s) $@)) + +distclean: clean + rm -f kcompat.h +distdir: clean +install: +all: default +check: all +clean: + rm -f *.o *.ko *_mod.* Module.symvers *.cmd kcompat.h.new + for d in $(build_links); do if test -h $$d; then rm $$d; fi; done + +ifneq ($(KSRC),) + +ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) + KOBJ := /lib/modules/$(shell uname -r)/build +else + KOBJ := $(KSRC) +endif + +ifneq ($(shell grep -c 'PATCHLEVEL = 6' $(KSRC)/Makefile),1) + $(error Linux kernel source in $(KSRC) not 2.6) +endif + +VERSION_FILE := $(KOBJ)/include/linux/version.h +ifeq (,$(wildcard $(VERSION_FILE))) + $(error Linux kernel source not configured - missing version.h) +endif + +CONFIG_FILE := $(KSRC)/include/linux/autoconf.h +ifeq (,$(wildcard $(CONFIG_FILE))) + $(error Linux kernel source not configured - missing autoconf.h) +endif + +default: + $(MAKE) -C $(KSRC) M=$(builddir) modules +endif + +# Much of the kernel build system in this file is derived from Intel's +# e1000 distribution, with the following license: + +################################################################################ +# +# Intel PRO/1000 Linux driver +# Copyright(c) 1999 - 2007, 2009 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Contact Information: +# Linux NICS <linux.nics@intel.com> +# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> +# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 +# +################################################################################ diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk new file mode 100644 index 00000000..bbc4c72f --- /dev/null +++ b/datapath/linux-2.6/Modules.mk @@ -0,0 +1,50 @@ +openvswitch_sources += \ + linux-2.6/compat-2.6/genetlink-openvswitch.c \ + linux-2.6/compat-2.6/random32.c +openvswitch_headers += \ + linux-2.6/compat-2.6/compat26.h \ + linux-2.6/compat-2.6/include/asm-generic/bug.h \ + linux-2.6/compat-2.6/include/linux/dmi.h \ + linux-2.6/compat-2.6/include/linux/err.h \ + linux-2.6/compat-2.6/include/linux/icmp.h \ + linux-2.6/compat-2.6/include/linux/if_arp.h \ + linux-2.6/compat-2.6/include/linux/ip.h \ + linux-2.6/compat-2.6/include/linux/ipv6.h \ + linux-2.6/compat-2.6/include/linux/jiffies.h \ + linux-2.6/compat-2.6/include/linux/kernel.h \ + linux-2.6/compat-2.6/include/linux/log2.h \ + linux-2.6/compat-2.6/include/linux/lockdep.h \ + linux-2.6/compat-2.6/include/linux/mutex.h \ + linux-2.6/compat-2.6/include/linux/netdevice.h \ + linux-2.6/compat-2.6/include/linux/netfilter_bridge.h \ + linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h \ + linux-2.6/compat-2.6/include/linux/netlink.h \ + linux-2.6/compat-2.6/include/linux/percpu.h \ + linux-2.6/compat-2.6/include/linux/random.h \ + linux-2.6/compat-2.6/include/linux/rculist.h \ + linux-2.6/compat-2.6/include/linux/rtnetlink.h \ + linux-2.6/compat-2.6/include/linux/skbuff.h \ + linux-2.6/compat-2.6/include/linux/tcp.h \ + linux-2.6/compat-2.6/include/linux/timer.h \ + linux-2.6/compat-2.6/include/linux/types.h \ + linux-2.6/compat-2.6/include/linux/udp.h \ + linux-2.6/compat-2.6/include/linux/workqueue.h \ + linux-2.6/compat-2.6/include/net/checksum.h \ + linux-2.6/compat-2.6/include/net/genetlink.h \ + linux-2.6/compat-2.6/include/net/netlink.h + +both_modules += brcompat +brcompat_sources = \ + linux-2.6/compat-2.6/genetlink-brcompat.c \ + brcompat.c \ + brc_procfs.c \ + brc_sysfs_dp.c \ + brc_sysfs_if.c +brcompat_headers = \ + brc_procfs.h \ + brc_sysfs.h + +dist_modules += veth +build_modules += $(if $(BUILD_VETH),veth) +veth_sources = linux-2.6/compat-2.6/veth.c +veth_headers = diff --git a/datapath/linux-2.6/compat-2.6/compat26.h b/datapath/linux-2.6/compat-2.6/compat26.h new file mode 100644 index 00000000..61448d63 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/compat26.h @@ -0,0 +1,37 @@ +#ifndef __COMPAT26_H +#define __COMPAT26_H 1 + +#include <linux/version.h> + +#if defined(CONFIG_PREEMPT) && LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) +#error "CONFIG_PREEMPT is broken with 2.6.x before 2.6.21--see commit 4498121ca3, \"[NET]: Handle disabled preemption in gfp_any()\"" +#endif + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) +/*---------------------------------------------------------------------------- + * In 2.6.24, a namespace argument became required for dev_get_by_name. */ + +#define dev_get_by_name(net, name) \ + dev_get_by_name((name)) + +#define dev_get_by_index(net, ifindex) \ + dev_get_by_index((ifindex)) + +#define __dev_get_by_name(net, name) \ + __dev_get_by_name((name)) + +#define __dev_get_by_index(net, ifindex) \ + __dev_get_by_index((ifindex)) + +#endif /* linux kernel <= 2.6.23 */ + + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,22) +/*---------------------------------------------------------------------------- + * In 2.6.23, the last argument was dropped from kmem_cache_create. */ +#define kmem_cache_create(n, s, a, f, c) \ + kmem_cache_create((n), (s), (a), (f), (c), NULL) + +#endif /* linux kernel <= 2.6.22 */ + +#endif /* compat26.h */ diff --git a/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c b/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c new file mode 100644 index 00000000..c43b3ce4 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/genetlink-brcompat.c @@ -0,0 +1,20 @@ +#include "net/genetlink.h" + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +/* We fix grp->id to 32 so that it doesn't collide with any of the multicast + * groups selected by openvswitch_mod, which uses groups 16 through 31. + * Collision isn't fatal--multicast listeners should check that the family is + * the one that they want and discard others--but it wastes time and memory to + * receive unwanted messages. */ +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + grp->id = 32; + grp->family = family; + + return 0; +} + +#endif /* kernel < 2.6.23 */ diff --git a/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c b/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c new file mode 100644 index 00000000..9e09215f --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/genetlink-openvswitch.c @@ -0,0 +1,22 @@ +#include "net/genetlink.h" + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +/* We use multicast groups 16 through 31 to avoid colliding with the multicast + * group selected by brcompat_mod, which uses groups 32. Collision isn't + * fatal--multicast listeners should check that the family is the one that they + * want and discard others--but it wastes time and memory to receive unwanted + * messages. */ +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + /* This code is called single-threaded. */ + static unsigned int next_id = 0; + grp->id = next_id++ % 16 + 16; + grp->family = family; + + return 0; +} + +#endif /* kernel < 2.6.23 */ diff --git a/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h b/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h new file mode 100644 index 00000000..1d9b3140 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/asm-generic/bug.h @@ -0,0 +1,19 @@ +#ifndef __ASM_GENERIC_BUG_WRAPPER_H +#define __ASM_GENERIC_BUG_WRAPPER_H + +#include_next <asm-generic/bug.h> + +#ifndef WARN_ON_ONCE +#define WARN_ON_ONCE(condition) ({ \ + static int __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once) && !__warned) { \ + WARN_ON(1); \ + __warned = 1; \ + } \ + unlikely(__ret_warn_once); \ +}) +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h b/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h new file mode 100644 index 00000000..48c73aa8 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/cpumask.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_CPUMASK_WRAPPER_H +#define __LINUX_CPUMASK_WRAPPER_H + +#include_next <linux/cpumask.h> + +/* for_each_cpu was renamed for_each_possible_cpu in 2.6.18. */ +#ifndef for_each_possible_cpu +#define for_each_possible_cpu for_each_cpu +#endif + +#endif /* linux/cpumask.h wrapper */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/dmi.h b/datapath/linux-2.6/compat-2.6/include/linux/dmi.h new file mode 100644 index 00000000..52916fec --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/dmi.h @@ -0,0 +1,114 @@ +#ifndef __LINUX_DMI_WRAPPER_H +#define __LINUX_DMI_WRAPPER_H 1 + +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) + +#include_next <linux/dmi.h> + +#else /* linux version >= 2.6.23 */ + +#ifndef __DMI_H__ +#define __DMI_H__ + +#include <linux/list.h> + +enum dmi_field { + DMI_NONE, + DMI_BIOS_VENDOR, + DMI_BIOS_VERSION, + DMI_BIOS_DATE, + DMI_SYS_VENDOR, + DMI_PRODUCT_NAME, + DMI_PRODUCT_VERSION, + DMI_PRODUCT_SERIAL, + DMI_PRODUCT_UUID, + DMI_BOARD_VENDOR, + DMI_BOARD_NAME, + DMI_BOARD_VERSION, + DMI_BOARD_SERIAL, + DMI_BOARD_ASSET_TAG, + DMI_CHASSIS_VENDOR, + DMI_CHASSIS_TYPE, + DMI_CHASSIS_VERSION, + DMI_CHASSIS_SERIAL, + DMI_CHASSIS_ASSET_TAG, + DMI_STRING_MAX, +}; + +enum dmi_device_type { + DMI_DEV_TYPE_ANY = 0, + DMI_DEV_TYPE_OTHER, + DMI_DEV_TYPE_UNKNOWN, + DMI_DEV_TYPE_VIDEO, + DMI_DEV_TYPE_SCSI, + DMI_DEV_TYPE_ETHERNET, + DMI_DEV_TYPE_TOKENRING, + DMI_DEV_TYPE_SOUND, + DMI_DEV_TYPE_IPMI = -1, + DMI_DEV_TYPE_OEM_STRING = -2 +}; + +struct dmi_header { + u8 type; + u8 length; + u16 handle; +}; + +/* + * DMI callbacks for problem boards + */ +struct dmi_strmatch { + u8 slot; + char *substr; +}; + +struct dmi_system_id { + int (*callback)(struct dmi_system_id *); + const char *ident; + struct dmi_strmatch matches[4]; + void *driver_data; +}; + +#define DMI_MATCH(a, b) { a, b } + +struct dmi_device { + struct list_head list; + int type; + const char *name; + void *device_data; /* Type specific data */ +}; + +/* No CONFIG_DMI before 2.6.16 */ +#if defined(CONFIG_DMI) || defined(CONFIG_X86_32) + +extern int dmi_check_system(struct dmi_system_id *list); +extern char * dmi_get_system_info(int field); +extern struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +extern void dmi_scan_machine(void); +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) +extern int dmi_get_year(int field); +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) +extern int dmi_name_in_vendors(char *str); +#endif + +#else + +static inline int dmi_check_system(struct dmi_system_id *list) { return 0; } +static inline char * dmi_get_system_info(int field) { return NULL; } +static inline struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) { return NULL; } +static inline int dmi_get_year(int year) { return 0; } +static inline int dmi_name_in_vendors(char *s) { return 0; } + +#endif + +#endif /* __DMI_H__ */ + +#endif /* linux kernel < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/err.h b/datapath/linux-2.6/compat-2.6/include/linux/err.h new file mode 100644 index 00000000..50faf2a1 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/err.h @@ -0,0 +1,21 @@ +#ifndef __LINUX_ERR_WRAPPER_H +#define __LINUX_ERR_WRAPPER_H 1 + +#include_next <linux/err.h> + +#ifndef HAVE_ERR_CAST +/** + * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type + * @ptr: The pointer to cast. + * + * Explicitly cast an error-valued pointer to another pointer type in such a + * way as to make it clear that's what's going on. + */ +static inline void *ERR_CAST(const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} +#endif /* HAVE_ERR_CAST */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/icmp.h b/datapath/linux-2.6/compat-2.6/include/linux/icmp.h new file mode 100644 index 00000000..89b354e4 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/icmp.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_ICMP_WRAPPER_H +#define __LINUX_ICMP_WRAPPER_H 1 + +#include_next <linux/icmp.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) +{ + return (struct icmphdr *)skb_transport_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h b/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h new file mode 100644 index 00000000..e48d6ba0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/if_arp.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_IF_ARP_WRAPPER_H +#define __LINUX_IF_ARP_WRAPPER_H 1 + +#include_next <linux/if_arp.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +#include <linux/skbuff.h> + +static inline struct arphdr *arp_hdr(const struct sk_buff *skb) +{ + return (struct arphdr *)skb_network_header(skb); +} +#endif /* !HAVE_SKBUFF_HEADER_HELPERS */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ip.h b/datapath/linux-2.6/compat-2.6/include/linux/ip.h new file mode 100644 index 00000000..36765396 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ip.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_IP_WRAPPER_H +#define __LINUX_IP_WRAPPER_H 1 + +#include_next <linux/ip.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct iphdr *ip_hdr(const struct sk_buff *skb) +{ + return (struct iphdr *)skb_network_header(skb); +} + +static inline unsigned int ip_hdrlen(const struct sk_buff *skb) +{ + return ip_hdr(skb)->ihl * 4; +} +#endif /* !HAVE_SKBUFF_HEADER_HELPERS */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h new file mode 100644 index 00000000..25a5431a --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/ipv6.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_IPV6_WRAPPER_H +#define __LINUX_IPV6_WRAPPER_H 1 + +#include_next <linux/ipv6.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) +{ + return (struct ipv6hdr *)skb_network_header(skb); +} +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h b/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h new file mode 100644 index 00000000..3286e634 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/jiffies.h @@ -0,0 +1,26 @@ +#ifndef __LINUX_JIFFIES_WRAPPER_H +#define __LINUX_JIFFIES_WRAPPER_H 1 + +#include_next <linux/jiffies.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +/* Same as above, but does so with platform independent 64bit types. + * These must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64() */ +#define time_after64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)(b) - (__s64)(a) < 0)) +#define time_before64(a,b) time_after64(b,a) + +#define time_after_eq64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)(a) - (__s64)(b) >= 0)) +#define time_before_eq64(a,b) time_after_eq64(b,a) + +#endif /* linux kernel < 2.6.19 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h new file mode 100644 index 00000000..9459155d --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h @@ -0,0 +1,9 @@ +#ifndef __KERNEL_H_WRAPPER +#define __KERNEL_H_WRAPPER 1 + +#include_next <linux/kernel.h> +#ifndef HAVE_LOG2_H +#include <linux/log2.h> +#endif + +#endif /* linux/kernel.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h new file mode 100644 index 00000000..1c839423 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/lockdep.h @@ -0,0 +1,450 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_WRAPPER_H +#define __LINUX_LOCKDEP_WRAPPER_H + +#include_next <linux/lockdep.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +struct task_struct; +struct lockdep_map; + +#ifdef CONFIG_LOCKDEP + +#include <linux/linkage.h> +#include <linux/list.h> +#include <linux/debug_locks.h> +#include <linux/stacktrace.h> + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[4]; +#endif +}; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[4]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache; + const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; +#endif +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; + int distance; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key, 0) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key, 0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + lockdep_init_map(&(lock)->dep_map, #key, key, sub) +#define lockdep_set_subclass(lock, sub) \ + lockdep_init_map(&(lock)->dep_map, #lock, \ + (lock)->dep_map.key, sub) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + do { (void)(key); } while (0) +#define lockdep_set_subclass(lock, sub) do { } while (0) + +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +#define lockdep_depth(tsk) (0) + +#endif /* !LOCKDEP */ + +#ifdef CONFIG_LOCK_STAT + +extern void lock_contended(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); + +#define LOCK_CONTENDED(_lock, try, lock) \ +do { \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + lock(_lock); \ + } \ + lock_acquired(&(_lock)->dep_map); \ +} while (0) + +#else /* CONFIG_LOCK_STAT */ + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) + +#define LOCK_CONTENDED(_lock, try, lock) \ + lock(_lock) + +#endif /* CONFIG_LOCK_STAT */ + +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +extern void early_init_irq_lock_class(void); +#else +static inline void early_init_irq_lock_class(void) +{ +} +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +extern void print_irqtrace_events(struct task_struct *curr); +#else +static inline void early_boot_irqs_off(void) +{ +} +static inline void early_boot_irqs_on(void) +{ +} +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* linux kernel < 2.6.18 */ + +#endif /* __LINUX_LOCKDEP_WRAPPER_H */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/log2.h b/datapath/linux-2.6/compat-2.6/include/linux/log2.h new file mode 100644 index 00000000..69abae5e --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/log2.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_LOG2_WRAPPER +#define __LINUX_LOG2_WRAPPER + +#ifdef HAVE_LOG2_H +#include_next <linux/log2.h> +#else +/* This is very stripped down because log2.h has far too many dependencies. */ + +extern __attribute__((const, noreturn)) +int ____ilog2_NaN(void); + +#define ilog2(n) ((n) == 4 ? 2 : \ + (n) == 8 ? 3 : \ + ____ilog2_NaN()) +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/mutex.h b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h new file mode 100644 index 00000000..93dfa3b2 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/mutex.h @@ -0,0 +1,59 @@ +#ifndef __LINUX_MUTEX_WRAPPER_H +#define __LINUX_MUTEX_WRAPPER_H + + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + +#include <asm/semaphore.h> + +struct mutex { + struct semaphore sema; +}; + +#define mutex_init(mutex) init_MUTEX(&(mutex)->sema) +#define mutex_destroy(mutex) do { } while (0) + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = { __MUTEX_INITIALIZER(mutexname.sema) } + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +static inline void mutex_lock(struct mutex *lock) +{ + down(&lock->sema); +} + +static inline int mutex_lock_interruptible(struct mutex *lock) +{ + return down_interruptible(&lock->sema); +} + +#define mutex_lock_nested(lock, subclass) mutex_lock(lock) +#define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) + +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +static inline int mutex_trylock(struct mutex *lock) +{ + return !down_trylock(&lock->sema); +} + +static inline void mutex_unlock(struct mutex *lock) +{ + up(&lock->sema); +} +#else + +#include_next <linux/mutex.h> + +#endif /* linux version < 2.6.16 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h new file mode 100644 index 00000000..32e1735d --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netdevice.h @@ -0,0 +1,35 @@ +#ifndef __LINUX_NETDEVICE_WRAPPER_H +#define __LINUX_NETDEVICE_WRAPPER_H 1 + +#include_next <linux/netdevice.h> + +struct net; + +#ifndef to_net_dev +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) +static inline +struct net *dev_net(const struct net_device *dev) +{ + return NULL; +} +#endif /* linux kernel < 2.6.26 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) +#define proc_net init_net.proc_net +#endif + +#ifndef for_each_netdev +/* Linux before 2.6.22 didn't have for_each_netdev at all. */ +#define for_each_netdev(net, d) for (d = dev_base; d; d = d->next) +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) +/* Linux 2.6.24 added a network namespace pointer to the macro. */ +#undef for_each_netdev +#define for_each_netdev(net,d) list_for_each_entry(d, &dev_base_head, dev_list) +#endif + + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h new file mode 100644 index 00000000..1c8183c8 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_bridge.h @@ -0,0 +1,24 @@ +#ifndef __LINUX_NETFILTER_BRIDGE_WRAPPER_H +#define __LINUX_NETFILTER_BRIDGE_WRAPPER_H + +#include_next <linux/netfilter_bridge.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#include <linux/if_vlan.h> +#include <linux/if_pppox.h> + +static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_8021Q): + return VLAN_HLEN; + default: + return 0; + } +} + +#endif /* linux version < 2.6.22 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h new file mode 100644 index 00000000..ed8a5d94 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_NETFILTER_IPV4_WRAPPER_H +#define __LINUX_NETFILTER_IPV4_WRAPPER_H 1 + +#include_next <linux/netfilter_ipv4.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#ifdef __KERNEL__ + +#define NF_INET_PRE_ROUTING NF_IP_PRE_ROUTING +#define NF_INET_POST_ROUTING NF_IP_POST_ROUTING +#define NF_INET_FORWARD NF_IP_FORWARD + +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.25 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/netlink.h b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h new file mode 100644 index 00000000..c5f83bd0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/netlink.h @@ -0,0 +1,24 @@ +#ifndef __LINUX_NETLINK_WRAPPER_H +#define __LINUX_NETLINK_WRAPPER_H 1 + +#include <linux/skbuff.h> +#include_next <linux/netlink.h> +#include <net/netlink.h> + +#include <linux/version.h> + +#ifndef NLMSG_DEFAULT_SIZE +#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +#define nlmsg_new(s, f) nlmsg_new_proper((s), (f)) +static inline struct sk_buff *nlmsg_new_proper(int size, gfp_t flags) +{ + return alloc_skb(size, flags); +} + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/percpu.h b/datapath/linux-2.6/compat-2.6/include/linux/percpu.h new file mode 100644 index 00000000..0f68bb25 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/percpu.h @@ -0,0 +1,10 @@ +#ifndef __LINUX_PERCPU_H_WRAPPER +#define __LINUX_PERCPU_H_WRAPPER 1 + +#include_next <linux/percpu.h> + +#ifndef percpu_ptr +#define percpu_ptr per_cpu_ptr +#endif + +#endif /* linux/percpu.h wrapper */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h new file mode 100644 index 00000000..4e4932c9 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/random.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_RANDOM_WRAPPER_H +#define __LINUX_RANDOM_WRAPPER_H 1 + +#include_next <linux/random.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#ifdef __KERNEL__ +u32 random32(void); +void srandom32(u32 seed); +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.19 */ + + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/rculist.h b/datapath/linux-2.6/compat-2.6/include/linux/rculist.h new file mode 100644 index 00000000..4164c0e9 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/rculist.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_RCULIST_WRAPPER_H +#define __LINUX_RCULIST_WRAPPER_H + +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) +#include_next <linux/rculist.h> +#else +/* Prior to 2.6.26, the contents of rculist.h were part of list.h. */ +#include <linux/list.h> +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h new file mode 100644 index 00000000..8bc51560 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h @@ -0,0 +1,29 @@ +#ifndef __RTNETLINK_WRAPPER_H +#define __RTNETLINK_WRAPPER_H 1 + +#include_next <linux/rtnetlink.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +static inline int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, + u32 group, struct nlmsghdr *nlh, gfp_t flags) +{ + BUG_ON(nlh); /* not implemented */ + if (group) { + /* errors reported via destination sk->sk_err */ + nlmsg_multicast(rtnl, skb, 0, group); + } + return 0; +} + +static inline void rtnl_set_sk_err(struct net *net, u32 group, int error) +{ + netlink_set_err(rtnl, 0, group, error); +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) +#define rtnl_notify(skb, net, pid, group, nlh, flags) \ + ((void) (net), rtnl_notify(skb, pid, group, nlh, flags)) +#define rtnl_set_sk_err(net, group, error) \ + ((void) (net), rtnl_set_sk_err(group, error)) +#endif /* linux kernel < 2.6.25 */ + +#endif /* linux/rtnetlink.h wrapper */ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h new file mode 100644 index 00000000..666ef850 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/skbuff.h @@ -0,0 +1,170 @@ +#ifndef __LINUX_SKBUFF_WRAPPER_H +#define __LINUX_SKBUFF_WRAPPER_H 1 + +#include_next <linux/skbuff.h> + +#include <linux/version.h> + +#ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET +static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, + const int offset, void *to, + const unsigned int len) +{ + memcpy(to, skb->data + offset, len); +} + +static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, + const int offset, + const void *from, + const unsigned int len) +{ + memcpy(skb->data + offset, from, len); +} + +#endif /* !HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET */ + +/* + * The networking layer reserves some headroom in skb data (via + * dev_alloc_skb). This is used to avoid having to reallocate skb data when + * the header has to grow. In the default case, if the header has to grow + * 16 bytes or less we avoid the reallocation. + * + * Unfortunately this headroom changes the DMA alignment of the resulting + * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive + * on some architectures. An architecture can override this value, + * perhaps setting it to a cacheline in size (since that will maintain + * cacheline alignment of the DMA). It must be a power of 2. + * + * Various parts of the networking layer expect at least 16 bytes of + * headroom, you should not reduce this. + */ +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, + int cloned) +{ + int delta = 0; + + if (headroom < NET_SKB_PAD) + headroom = NET_SKB_PAD; + if (headroom > skb_headroom(skb)) + delta = headroom - skb_headroom(skb); + + if (delta || cloned) + return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, + GFP_ATOMIC); + return 0; +} + +static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) +{ + return __skb_cow(skb, headroom, skb_header_cloned(skb)); +} +#endif /* linux < 2.6.23 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +/* Emulate Linux 2.6.17 and later behavior, in which kfree_skb silently ignores + * null pointer arguments. */ +#define kfree_skb(skb) kfree_skb_maybe_null(skb) +static inline void kfree_skb_maybe_null(struct sk_buff *skb) +{ + if (likely(skb != NULL)) + (kfree_skb)(skb); +} +#endif + + +#ifndef CHECKSUM_PARTIAL +/* Note that CHECKSUM_PARTIAL is not implemented, but this allows us to at + * least test against it: see update_csum() in forward.c. */ +#define CHECKSUM_PARTIAL 3 +#endif +#ifndef CHECKSUM_COMPLETE +#define CHECKSUM_COMPLETE CHECKSUM_HW +#endif + +#ifdef HAVE_MAC_RAW +#define mac_header mac.raw +#define network_header nh.raw +#endif + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline unsigned char *skb_transport_header(const struct sk_buff *skb) +{ + return skb->h.raw; +} + +static inline void skb_reset_transport_header(struct sk_buff *skb) +{ + skb->h.raw = skb->data; +} + +static inline void skb_set_transport_header(struct sk_buff *skb, + const int offset) +{ + skb->h.raw = skb->data + offset; +} + +static inline unsigned char *skb_network_header(const struct sk_buff *skb) +{ + return skb->nh.raw; +} + +static inline void skb_set_network_header(struct sk_buff *skb, const int offset) +{ + skb->nh.raw = skb->data + offset; +} + +static inline unsigned char *skb_mac_header(const struct sk_buff *skb) +{ + return skb->mac.raw; +} + +static inline void skb_reset_mac_header(struct sk_buff *skb) +{ + skb->mac_header = skb->data; +} + +static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) +{ + skb->mac.raw = skb->data + offset; +} + +static inline int skb_transport_offset(const struct sk_buff *skb) +{ + return skb_transport_header(skb) - skb->data; +} + +static inline int skb_network_offset(const struct sk_buff *skb) +{ + return skb_network_header(skb) - skb->data; +} + +static inline void skb_copy_to_linear_data(struct sk_buff *skb, + const void *from, + const unsigned int len) +{ + memcpy(skb->data, from, len); +} +#endif /* !HAVE_SKBUFF_HEADER_HELPERS */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) +#warning "TSO/UFO not supported on kernels earlier than 2.6.18" + +static inline int skb_is_gso(const struct sk_buff *skb) +{ + return 0; +} + +static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, + int features) +{ + return NULL; +} +#endif /* before 2.6.18 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/tcp.h b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h new file mode 100644 index 00000000..6fad1933 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/tcp.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_TCP_WRAPPER_H +#define __LINUX_TCP_WRAPPER_H 1 + +#include_next <linux/tcp.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb_transport_header(skb); +} + +static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) +{ + return tcp_hdr(skb)->doff * 4; +} +#endif /* !HAVE_SKBUFF_HEADER_HELPERS */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/timer.h b/datapath/linux-2.6/compat-2.6/include/linux/timer.h new file mode 100644 index 00000000..6c3a9b0f --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/timer.h @@ -0,0 +1,96 @@ +#ifndef __LINUX_TIMER_WRAPPER_H +#define __LINUX_TIMER_WRAPPER_H 1 + +#include_next <linux/timer.h> + +#include <linux/version.h> + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(X,Y) ( 0 ) +#endif +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)) && \ + (!defined(RHEL_RELEASE_CODE) || \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,1)))) + +extern unsigned long volatile jiffies; + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long __round_jiffies(unsigned long j, int cpu) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + */ + if (rem < HZ/4) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + + +/** + * round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The return value is the rounded version of the @j parameter. + */ +static inline unsigned long round_jiffies(unsigned long j) +{ + return __round_jiffies(j, 0); // FIXME +} + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h new file mode 100644 index 00000000..c1f375eb --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h @@ -0,0 +1,14 @@ +#ifndef __LINUX_TYPES_WRAPPER_H +#define __LINUX_TYPES_WRAPPER_H 1 + +#include_next <linux/types.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#endif /* linux kernel < 2.6.20 */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/udp.h b/datapath/linux-2.6/compat-2.6/include/linux/udp.h new file mode 100644 index 00000000..6fe4721b --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/udp.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_UDP_WRAPPER_H +#define __LINUX_UDP_WRAPPER_H 1 + +#include_next <linux/udp.h> + +#ifndef HAVE_SKBUFF_HEADER_HELPERS +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif /* HAVE_SKBUFF_HEADER_HELPERS */ + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h new file mode 100644 index 00000000..1ac3b6ec --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h @@ -0,0 +1,42 @@ +#ifndef __LINUX_WORKQUEUE_WRAPPER_H +#define __LINUX_WORKQUEUE_WRAPPER_H 1 + +#include_next <linux/workqueue.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#ifdef __KERNEL__ +/* + * initialize a work-struct's func and data pointers: + */ +#undef PREPARE_WORK +#define PREPARE_WORK(_work, _func) \ + do { \ + (_work)->func = (void(*)(void*)) _func; \ + (_work)->data = _work; \ + } while (0) + +/* + * initialize all of a work-struct: + */ +#undef INIT_WORK +#define INIT_WORK(_work, _func) \ + do { \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->pending = 0; \ + PREPARE_WORK((_work), (_func)); \ + init_timer(&(_work)->timer); \ + } while (0) + +#endif /* __KERNEL__ */ + +#endif /* linux kernel < 2.6.20 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) +/* There is no equivalent to cancel_work_sync() so just flush all + * pending work. */ +#define cancel_work_sync(_work) flush_scheduled_work() +#endif + +#endif diff --git a/datapath/linux-2.6/compat-2.6/include/net/checksum.h b/datapath/linux-2.6/compat-2.6/include/net/checksum.h new file mode 100644 index 00000000..c64c6bd0 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/checksum.h @@ -0,0 +1,16 @@ +#ifndef __NET_CHECKSUM_WRAPPER_H +#define __NET_CHECKSUM_WRAPPER_H 1 + +#include_next <net/checksum.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} + +#endif /* linux kernel < 2.6.20 */ + +#endif /* checksum.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h new file mode 100644 index 00000000..57a47316 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h @@ -0,0 +1,123 @@ +#ifndef __NET_GENERIC_NETLINK_WRAPPER_H +#define __NET_GENERIC_NETLINK_WRAPPER_H 1 + + +#include <linux/netlink.h> +#include_next <net/genetlink.h> + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +#include <linux/genetlink.h> + +/*---------------------------------------------------------------------------- + * In 2.6.23, registering of multicast groups was added. Our compatability + * layer just supports registering a single group, since that's all we + * need. + */ + +/** + * struct genl_multicast_group - generic netlink multicast group + * @name: name of the multicast group, names are per-family + * @id: multicast group ID, assigned by the core, to use with + * genlmsg_multicast(). + * @list: list entry for linking + * @family: pointer to family, need not be set before registering + */ +struct genl_multicast_group +{ + struct genl_family *family; /* private */ + struct list_head list; /* private */ + char name[GENL_NAMSIZ]; + u32 id; +}; + +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp); +#endif /* linux kernel < 2.6.23 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + +#define genlmsg_multicast(s, p, g, f) \ + genlmsg_multicast_flags((s), (p), (g), (f)) + +static inline int genlmsg_multicast_flags(struct sk_buff *skb, u32 pid, + unsigned int group, gfp_t flags) +{ + int err; + + NETLINK_CB(skb).dst_group = group; + + err = netlink_broadcast(genl_sock, skb, pid, group, flags); + if (err > 0) + err = 0; + + return err; +} +#endif /* linux kernel < 2.6.19 */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#define genlmsg_put(skb, p, seq, fam, flg, c) \ + genlmsg_put((skb), (p), (seq), (fam)->id, (fam)->hdrsize, \ + (flg), (c), (fam)->version) + +/** + * genlmsg_put_reply - Add generic netlink header to a reply message + * @skb: socket buffer holding the message + * @info: receiver info + * @family: generic netlink family + * @flags: netlink message flags + * @cmd: generic netlink command + * + * Returns pointer to user specific header + */ +static inline void *genlmsg_put_reply(struct sk_buff *skb, + struct genl_info *info, struct genl_family *family, + int flags, u8 cmd) +{ + return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + flags, cmd); +} + +/** + * genlmsg_reply - reply to a request + * @skb: netlink message to be sent back + * @info: receiver information + */ +static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) +{ + return genlmsg_unicast(skb, info->snd_pid); +} + +/** + * genlmsg_new - Allocate a new generic netlink message + * @payload: size of the message payload + * @flags: the type of memory to allocate. + */ +static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) +{ + return nlmsg_new(genlmsg_total_size(payload), flags); +} +#endif /* linux kernel < 2.6.20 */ + +#endif /* genetlink.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/net/netlink.h b/datapath/linux-2.6/compat-2.6/include/net/netlink.h new file mode 100644 index 00000000..e0d594d7 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/include/net/netlink.h @@ -0,0 +1,22 @@ +#ifndef __NET_NETLINK_WRAPPER_H +#define __NET_NETLINK_WRAPPER_H 1 + +#include_next <net/netlink.h> + +#ifndef HAVE_NLA_NUL_STRING +#define NLA_NUL_STRING NLA_STRING + +static inline int VERIFY_NUL_STRING(struct nlattr *attr) +{ + return (!attr || (nla_len(attr) + && memchr(nla_data(attr), '\0', nla_len(attr))) + ? 0 : EINVAL); +} +#else +static inline int VERIFY_NUL_STRING(struct nlattr *attr) +{ + return 0; +} +#endif /* !HAVE_NLA_NUL_STRING */ + +#endif /* net/netlink.h */ diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c new file mode 100644 index 00000000..b0dd2a32 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/random32.c @@ -0,0 +1,144 @@ +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/jiffies.h> +#include <linux/random.h> +#include <linux/smp.h> + +#include "compat26.h" + +struct rnd_state { + u32 s1, s2, s3; +}; + +static struct rnd_state net_rand_state[NR_CPUS]; + +static u32 __random32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} + +static void __set_random32(struct rnd_state *state, unsigned long s) +{ + if (s == 0) + s = 1; /* default seed is 1 */ + +#define LCG(n) (69069 * n) + state->s1 = LCG(s); + state->s2 = LCG(state->s1); + state->s3 = LCG(state->s2); + + /* "warm it up" */ + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); + __random32(state); +} + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + return __random32(&net_rand_state[smp_processor_id()]); +} + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + * Note: this pool is per cpu so it only affects current CPU. + */ +void srandom32(u32 entropy) +{ + struct rnd_state *state = &net_rand_state[smp_processor_id()]; + __set_random32(state, state->s1 ^ entropy); +} + +static int __init random32_reseed(void); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +int __init random32_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + __set_random32(state, i + jiffies); + } + random32_reseed(); + return 0; +} + +/* + * Generate better values after random number generator + * is fully initalized. + */ +static int __init random32_reseed(void) +{ + int i; + unsigned long seed; + + for (i = 0; i < NR_CPUS; i++) { + struct rnd_state *state = &net_rand_state[i]; + + get_random_bytes(&seed, sizeof(seed)); + __set_random32(state, seed); + } + return 0; +} + +#endif /* kernel < 2.6.19 */ diff --git a/datapath/linux-2.6/compat-2.6/veth.c b/datapath/linux-2.6/compat-2.6/veth.c new file mode 100644 index 00000000..3cda3365 --- /dev/null +++ b/datapath/linux-2.6/compat-2.6/veth.c @@ -0,0 +1,537 @@ +/* veth driver port to Linux 2.6.18 */ + +/* + * drivers/net/veth.c + * + * Copyright (C) 2007, 2009 OpenVZ http://openvz.org, SWsoft Inc + * + * Author: Pavel Emelianov <xemul@openvz.org> + * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> + * + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#include <linux/etherdevice.h> + +#include <net/dst.h> +#include <net/xfrm.h> + +#define DRV_NAME "veth" +#define DRV_VERSION "1.0" + +struct veth_net_stats { + unsigned long rx_packets; + unsigned long tx_packets; + unsigned long rx_bytes; + unsigned long tx_bytes; + unsigned long tx_dropped; +}; + +struct veth_priv { + struct net_device *peer; + struct net_device *dev; + struct list_head list; + struct veth_net_stats *stats; + unsigned ip_summed; + struct net_device_stats dev_stats; +}; + +static LIST_HEAD(veth_list); + +/* + * ethtool interface + */ + +static struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "peer_ifindex" }, +}; + +static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10000; + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); +} + +static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch(stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + } +} + +static void veth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + data[0] = priv->peer->ifindex; +} + +static u32 veth_get_rx_csum(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + return priv->ip_summed == CHECKSUM_UNNECESSARY; +} + +static int veth_set_rx_csum(struct net_device *dev, u32 data) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + priv->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + return 0; +} + +static u32 veth_get_tx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_NO_CSUM) != 0; +} + +static int veth_set_tx_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_NO_CSUM; + else + dev->features &= ~NETIF_F_NO_CSUM; + return 0; +} + +static struct ethtool_ops veth_ethtool_ops = { + .get_settings = veth_get_settings, + .get_drvinfo = veth_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_rx_csum = veth_get_rx_csum, + .set_rx_csum = veth_set_rx_csum, + .get_tx_csum = veth_get_tx_csum, + .set_tx_csum = veth_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_strings = veth_get_strings, + .get_ethtool_stats = veth_get_ethtool_stats, +}; + +/* + * xmit + */ + +static int veth_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device *rcv = NULL; + struct veth_priv *priv, *rcv_priv; + struct veth_net_stats *stats; + int length, cpu; + + skb_orphan(skb); + + priv = netdev_priv(dev); + rcv = priv->peer; + rcv_priv = netdev_priv(rcv); + + cpu = smp_processor_id(); + stats = per_cpu_ptr(priv->stats, cpu); + + if (!(rcv->flags & IFF_UP)) + goto outf; + + skb->dev = rcv; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, rcv); + if (dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = rcv_priv->ip_summed; + + dst_release(skb->dst); + skb->dst = NULL; + secpath_reset(skb); + nf_reset(skb); + + length = skb->len; + + stats->tx_bytes += length; + stats->tx_packets++; + + stats = per_cpu_ptr(rcv_priv->stats, cpu); + stats->rx_bytes += length; + stats->rx_packets++; + + netif_rx(skb); + return 0; + +outf: + kfree_skb(skb); + stats->tx_dropped++; + return 0; +} + +/* + * general routines + */ + +static struct net_device_stats *veth_get_stats(struct net_device *dev) +{ + struct veth_priv *priv; + struct net_device_stats *dev_stats; + int cpu; + struct veth_net_stats *stats; + + priv = netdev_priv(dev); + dev_stats = &priv->dev_stats; + + dev_stats->rx_packets = 0; + dev_stats->tx_packets = 0; + dev_stats->rx_bytes = 0; + dev_stats->tx_bytes = 0; + dev_stats->tx_dropped = 0; + + for_each_online_cpu(cpu) { + stats = per_cpu_ptr(priv->stats, cpu); + + dev_stats->rx_packets += stats->rx_packets; + dev_stats->tx_packets += stats->tx_packets; + dev_stats->rx_bytes += stats->rx_bytes; + dev_stats->tx_bytes += stats->tx_bytes; + dev_stats->tx_dropped += stats->tx_dropped; + } + + return dev_stats; +} + +static int veth_open(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + if (priv->peer == NULL) + return -ENOTCONN; + + if (priv->peer->flags & IFF_UP) { + netif_carrier_on(dev); + netif_carrier_on(priv->peer); + } + return 0; +} + +static int veth_dev_init(struct net_device *dev) +{ + struct veth_net_stats *stats; + struct veth_priv *priv; + + stats = alloc_percpu(struct veth_net_stats); + if (stats == NULL) + return -ENOMEM; + + priv = netdev_priv(dev); + priv->stats = stats; + return 0; +} + +static void veth_dev_free(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + free_percpu(priv->stats); + free_netdev(dev); +} + +static void veth_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->hard_start_xmit = veth_xmit; + dev->get_stats = veth_get_stats; + dev->open = veth_open; + dev->ethtool_ops = &veth_ethtool_ops; + dev->features |= NETIF_F_LLTX; + dev->init = veth_dev_init; + dev->destructor = veth_dev_free; +} + +static void veth_change_state(struct net_device *dev) +{ + struct net_device *peer; + struct veth_priv *priv; + + priv = netdev_priv(dev); + peer = priv->peer; + + if (netif_carrier_ok(peer)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(dev)) + netif_carrier_off(dev); + } +} + +static int veth_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + if (dev->open != veth_open) + goto out; + + switch (event) { + case NETDEV_CHANGE: + veth_change_state(dev); + break; + } +out: + return NOTIFY_DONE; +} + +static struct notifier_block veth_notifier_block __read_mostly = { + .notifier_call = veth_device_event, +}; + +/* + * netlink interface + */ + +static int veth_newlink(const char *devname, const char *peername) +{ + int err; + const char *names[2]; + struct net_device *devs[2]; + int i; + + names[0] = devname; + names[1] = peername; + devs[0] = devs[1] = NULL; + + for (i = 0; i < 2; i++) { + struct net_device *dev; + + err = -ENOMEM; + devs[i] = alloc_netdev(sizeof(struct veth_priv), + names[i], veth_setup); + if (!devs[i]) { + goto err; + } + + dev = devs[i]; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err; + } + random_ether_addr(dev->dev_addr); + + err = register_netdevice(dev); + if (err < 0) + goto err; + + netif_carrier_off(dev); + } + + /* + * tie the devices together + */ + + for (i = 0; i < 2; i++) { + struct veth_priv *priv = netdev_priv(devs[i]); + priv->dev = devs[i]; + priv->peer = devs[!i]; + if (!i) + list_add(&priv->list, &veth_list); + else + INIT_LIST_HEAD(&priv->list); + } + return 0; + +err: + for (i = 0; i < 2; i++) { + if (devs[i]) { + if (devs[i]->reg_state != NETREG_UNINITIALIZED) + unregister_netdevice(devs[i]); + else + free_netdev(devs[i]); + } + } + return err; +} + +static void veth_dellink(struct net_device *dev) +{ + struct veth_priv *priv; + struct net_device *peer; + + priv = netdev_priv(dev); + peer = priv->peer; + + if (!list_empty(&priv->list)) + list_del(&priv->list); + + priv = netdev_priv(peer); + if (!list_empty(&priv->list)) + list_del(&priv->list); + + unregister_netdevice(dev); + unregister_netdevice(peer); +} + +/* + * sysfs + */ + +/* + * "show" function for the veth_pairs attribute. + * The class parameter is ignored. + */ +static ssize_t veth_show_veth_pairs(struct class *cls, char *buffer) +{ + int res = 0; + struct veth_priv *priv; + + list_for_each_entry(priv, &veth_list, list) { + if (res > (PAGE_SIZE - (IFNAMSIZ * 2 + 1))) { + /* not enough space for another interface name */ + if ((PAGE_SIZE - res) > 10) + res = PAGE_SIZE - 10; + res += sprintf(buffer + res, "++more++"); + break; + } + res += sprintf(buffer + res, "%s,%s ", + priv->dev->name, priv->peer->name); + } + res += sprintf(buffer + res, "\n"); + res++; + return res; +} + +/* + * "store" function for the veth_pairs attribute. This is what + * creates and deletes veth pairs. + * + * The class parameter is ignored. + * + */ +static ssize_t veth_store_veth_pairs(struct class *cls, const char *buffer, + size_t count) +{ + int c = *buffer++; + int retval; + printk("1\n"); + if (c == '+') { + char devname[IFNAMSIZ + 1] = ""; + char peername[IFNAMSIZ + 1] = ""; + char *comma = strchr(buffer, ','); + printk("2\n"); + if (!comma) + goto err_no_cmd; + strncat(devname, buffer, + min_t(int, sizeof devname, comma - buffer)); + strncat(peername, comma + 1, + min_t(int, sizeof peername, strcspn(comma + 1, "\n"))); + printk("3 '%s' '%s'\n", devname, peername); + if (!dev_valid_name(devname) || !dev_valid_name(peername)) + goto err_no_cmd; + printk("4\n"); + rtnl_lock(); + retval = veth_newlink(devname, peername); + rtnl_unlock(); + return retval ? retval : count; + } else if (c == '-') { + struct net_device *dev; + + rtnl_lock(); + dev = dev_get_by_name(buffer); + if (!dev) + retval = -ENODEV; + else if (dev->init != veth_dev_init) + retval = -EINVAL; + else { + veth_dellink(dev); + retval = count; + } + rtnl_unlock(); + + return retval; + } + +err_no_cmd: + printk(KERN_ERR DRV_NAME ": no command found in veth_pairs. Use +ifname,peername or -ifname.\n"); + return -EPERM; +} + +/* class attribute for veth_pairs file. This ends up in /sys/class/net */ +static CLASS_ATTR(veth_pairs, S_IWUSR | S_IRUGO, + veth_show_veth_pairs, veth_store_veth_pairs); + +static struct class *netdev_class; + +/* + * Initialize sysfs. This sets up the veth_pairs file in + * /sys/class/net. + */ +int veth_create_sysfs(void) +{ + struct net_device *dev = dev_get_by_name("lo"); + if (!dev) + return -ESRCH; + netdev_class = dev->class_dev.class; + if (!netdev_class) + return -ENODEV; + + return class_create_file(netdev_class, &class_attr_veth_pairs); +} + +/* + * Remove /sys/class/net/veth_pairs. + */ +void veth_destroy_sysfs(void) +{ + class_remove_file(netdev_class, &class_attr_veth_pairs); +} + + + +/* + * init/fini + */ + +static __init int veth_init(void) +{ + int retval = veth_create_sysfs(); + if (retval) + return retval; + register_netdevice_notifier(&veth_notifier_block); + return 0; +} + +static __exit void veth_exit(void) +{ + unregister_netdevice_notifier(&veth_notifier_block); +} + +module_init(veth_init); +module_exit(veth_exit); + +MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); +MODULE_LICENSE("GPL v2"); diff --git a/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm new file mode 100644 index 00000000..f287cf72 --- /dev/null +++ b/datapath/linux-2.6/config/config-linux-2.6.23-rc9-kvm @@ -0,0 +1,1408 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.23-rc9 +# Fri Oct 19 15:08:37 2007 +# +CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_LSF=y +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Processor type and features +# +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_SMP=y +CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set +# CONFIG_M386 is not set +CONFIG_M486=y +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_F00F_BUG=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_ALIGNMENT_16=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +# CONFIG_HPET_TIMER is not set +CONFIG_NR_CPUS=8 +# CONFIG_SCHED_SMT is not set +CONFIG_SCHED_MC=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +# CONFIG_X86_MCE is not set +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_STATIC=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +CONFIG_IRQBALANCE=y +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +CONFIG_HOTPLUG_CPU=y +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND_SMP_POSSIBLE=y +CONFIG_SUSPEND=y +CONFIG_HIBERNATION_SMP_POSSIBLE=y +# CONFIG_HIBERNATION is not set +# CONFIG_ACPI is not set +CONFIG_APM=y +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_DEBUG is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=m + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=m +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +# CONFIG_IP_ROUTE_VERBOSE is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +# CONFIG_NF_CT_PROTO_UDPLITE is not set +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_TRACE is not set +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XT_MATCH_U32 is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m + +# +# DECnet: Netfilter Configuration +# +# CONFIG_DECNET_NF_GRABULATOR is not set + +# +# Bridge: Netfilter Configuration +# +# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m +CONFIG_IP_DCCP_ACKVEC=y + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_TFRC_LIB=m +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_TIPC=m +CONFIG_TIPC_ADVANCED=y +CONFIG_TIPC_ZONES=3 +CONFIG_TIPC_CLUSTERS=1 +CONFIG_TIPC_NODES=255 +CONFIG_TIPC_SLAVE_NODES=0 +CONFIG_TIPC_PORTS=8191 +CONFIG_TIPC_LOG=0 +# CONFIG_TIPC_DEBUG is not set +CONFIG_ATM=m +CONFIG_ATM_CLIP=m +# CONFIG_ATM_CLIP_NO_ICMP is not set +CONFIG_ATM_LANE=m +# CONFIG_ATM_MPOA is not set +CONFIG_ATM_BR2684=m +CONFIG_ATM_BR2684_IPFILTER=y +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_DECNET=m +# CONFIG_DECNET_ROUTER is not set +CONFIG_LLC=m +CONFIG_LLC2=m +CONFIG_IPX=m +CONFIG_IPX_INTERN=y +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=m +# CONFIG_LTPC is not set +# CONFIG_COPS is not set +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +CONFIG_ECONET=m +CONFIG_ECONET_AUNUDP=y +CONFIG_ECONET_NATIVE=y +CONFIG_WAN_ROUTER=m + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_FIFO=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_ATM=m +CONFIG_NET_SCH_PRIO=m +# CONFIG_NET_SCH_RR is not set +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +# CONFIG_NET_CLS_POLICE is not set +CONFIG_NET_CLS_IND=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +CONFIG_RXKAD=m +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_CONNECTOR=m +# CONFIG_MTD is not set +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_AX88796 is not set +# CONFIG_PARPORT_1284 is not set +# CONFIG_PNP is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +# CONFIG_BLK_DEV_IDEDMA_PCI is not set +# CONFIG_IDE_ARM is not set +# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_IDEDMA is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_NET_TULIP is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_PCNET32_NAPI is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_CS89x0 is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +CONFIG_8139CP=y +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +# CONFIG_NET_POCKET is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_ATM_DRIVERS=y +# CONFIG_ATM_DUMMY is not set +# CONFIG_ATM_TCP is not set +# CONFIG_ATM_LANAI is not set +# CONFIG_ATM_ENI is not set +# CONFIG_ATM_FIRESTREAM is not set +# CONFIG_ATM_ZATM is not set +# CONFIG_ATM_NICSTAR is not set +# CONFIG_ATM_IDT77252 is not set +# CONFIG_ATM_AMBASSADOR is not set +# CONFIG_ATM_HORIZON is not set +# CONFIG_ATM_IA is not set +# CONFIG_ATM_FORE200E_MAYBE is not set +# CONFIG_ATM_HE is not set +# CONFIG_FDDI is not set +CONFIG_HIPPI=y +# CONFIG_ROADRUNNER is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_INPORT is not set +# CONFIG_MOUSE_LOGIBM is not set +# CONFIG_MOUSE_PC110PAD is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +# CONFIG_TIPAR is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=y +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# ISA-based Watchdog Cards +# +# CONFIG_PCWATCHDOG is not set +# CONFIG_MIXCOMWD is not set +# CONFIG_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=m +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_VIDEO_SELECT is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +# CONFIG_HID is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# +# CONFIG_AUXDISPLAY is not set +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set + +# +# Userspace I/O +# +# CONFIG_UIO is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +CONFIG_CONFIGFS_FS=m + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=m +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set +CONFIG_INSTRUMENTATION=y +# CONFIG_PROFILING is not set +# CONFIG_KPROBES is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_LOCK_STAT=y +# CONFIG_DEBUG_LOCKDEP is not set +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +CONFIG_DEBUG_KOBJECT=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_LIST=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +CONFIG_RCU_TORTURE_TEST=m +# CONFIG_FAULT_INJECTION is not set +CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_RODATA=y +CONFIG_4KSTACKS=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY is not set +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_HMAC=m +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +CONFIG_CRYPTO_GF128MUL=m +# CONFIG_CRYPTO_ECB is not set +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_LRW=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_TEA=m +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y diff --git a/datapath/table.c b/datapath/table.c new file mode 100644 index 00000000..c0885b70 --- /dev/null +++ b/datapath/table.c @@ -0,0 +1,240 @@ +#include "flow.h" +#include "datapath.h" + +#include <linux/gfp.h> +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <asm/pgtable.h> + +static void free_table(struct sw_flow ***flows, unsigned int n_buckets, + int free_flows) +{ + unsigned int i; + + for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { + struct sw_flow **l2 = flows[i]; + if (free_flows) { + unsigned int j; + for (j = 0; j < DP_L1_SIZE; j++) { + if (l2[j]) + flow_free(l2[j]); + } + } + free_page((unsigned long)l2); + } + kfree(flows); +} + +static struct sw_flow ***alloc_table(unsigned int n_buckets) +{ + struct sw_flow ***flows; + unsigned int i; + + flows = kmalloc((n_buckets >> DP_L1_BITS) * sizeof(struct sw_flow**), + GFP_KERNEL); + if (!flows) + return NULL; + for (i = 0; i < n_buckets >> DP_L1_BITS; i++) { + flows[i] = (struct sw_flow **)get_zeroed_page(GFP_KERNEL); + if (!flows[i]) { + free_table(flows, i << DP_L1_BITS, 0); + return NULL; + } + } + return flows; +} + +struct dp_table *dp_table_create(unsigned int n_buckets) +{ + struct dp_table *table; + + table = kzalloc(sizeof *table, GFP_KERNEL); + if (!table) + goto err; + + table->n_buckets = n_buckets; + table->flows[0] = alloc_table(n_buckets); + if (!table[0].flows) + goto err_free_tables; + + table->flows[1] = alloc_table(n_buckets); + if (!table->flows[1]) + goto err_free_flows0; + + return table; + +err_free_flows0: + free_table(table->flows[0], table->n_buckets, 0); +err_free_tables: + kfree(table); +err: + return NULL; +} + +void dp_table_destroy(struct dp_table *table, int free_flows) +{ + int i; + for (i = 0; i < 2; i++) + free_table(table->flows[i], table->n_buckets, free_flows); + kfree(table); +} + +static struct sw_flow **find_bucket(struct dp_table *table, + struct sw_flow ***flows, u32 hash) +{ + unsigned int l1 = (hash & (table->n_buckets - 1)) >> DP_L1_SHIFT; + unsigned int l2 = hash & ((1 << DP_L2_BITS) - 1); + return &flows[l1][l2]; +} + +static struct sw_flow *lookup_table(struct dp_table *table, + struct sw_flow ***flows, u32 hash, + const struct odp_flow_key *key) +{ + struct sw_flow **bucket = find_bucket(table, flows, hash); + struct sw_flow *flow = rcu_dereference(*bucket); + if (flow && !memcmp(&flow->key, key, sizeof(struct odp_flow_key))) + return flow; + return NULL; +} + +static u32 flow_hash0(const struct odp_flow_key *key) +{ + return jhash2((u32*)key, sizeof *key / sizeof(u32), 0xaaaaaaaa); +} + +static u32 flow_hash1(const struct odp_flow_key *key) +{ + return jhash2((u32*)key, sizeof *key / sizeof(u32), 0x55555555); +} + +static void find_buckets(struct dp_table *table, + const struct odp_flow_key *key, + struct sw_flow **buckets[2]) +{ + buckets[0] = find_bucket(table, table->flows[0], flow_hash0(key)); + buckets[1] = find_bucket(table, table->flows[1], flow_hash1(key)); +} + +struct sw_flow *dp_table_lookup(struct dp_table *table, + const struct odp_flow_key *key) +{ + struct sw_flow *flow; + flow = lookup_table(table, table->flows[0], flow_hash0(key), key); + if (!flow) + flow = lookup_table(table, table->flows[1], + flow_hash1(key), key); + return flow; +} + +int dp_table_foreach(struct dp_table *table, + int (*callback)(struct sw_flow *flow, void *aux), + void *aux) +{ + unsigned int i, j, k; + for (i = 0; i < 2; i++) { + for (j = 0; j < table->n_buckets >> DP_L1_BITS; j++) { + struct sw_flow **l2 = table->flows[i][j]; + for (k = 0; k < DP_L1_SIZE; k++) { + struct sw_flow *flow = rcu_dereference(l2[k]); + if (flow) { + int error = callback(flow, aux); + if (error) + return error; + } + } + } + } + return 0; +} + +static int insert_flow(struct sw_flow *flow, void *new_table_) +{ + struct dp_table *new_table = new_table_; + struct sw_flow **buckets[2]; + int i; + + find_buckets(new_table, &flow->key, buckets); + for (i = 0; i < 2; i++) { + if (!*buckets[i]) { + rcu_assign_pointer(*buckets[i], flow); + return 0; + } + } + WARN_ON_ONCE(1); + return 0; +} + +static void dp_free_table_rcu(struct rcu_head *rcu) +{ + struct dp_table *table = container_of(rcu, struct dp_table, rcu); + dp_table_destroy(table, 0); +} + +int dp_table_expand(struct datapath *dp) +{ + struct dp_table *old_table = rcu_dereference(dp->table); + struct dp_table *new_table = dp_table_create(old_table->n_buckets * 2); + if (!new_table) + return -ENOMEM; + dp_table_foreach(old_table, insert_flow, new_table); + rcu_assign_pointer(dp->table, new_table); + call_rcu(&old_table->rcu, dp_free_table_rcu); + return 0; +} + +static void dp_free_table_and_flows_rcu(struct rcu_head *rcu) +{ + struct dp_table *table = container_of(rcu, struct dp_table, rcu); + dp_table_destroy(table, 1); +} + +int dp_table_flush(struct datapath *dp) +{ + struct dp_table *old_table = rcu_dereference(dp->table); + struct dp_table *new_table = dp_table_create(DP_L1_SIZE); + if (!new_table) + return -ENOMEM; + rcu_assign_pointer(dp->table, new_table); + call_rcu(&old_table->rcu, dp_free_table_and_flows_rcu); + return 0; +} + +struct sw_flow ** +dp_table_lookup_for_insert(struct dp_table *table, + const struct odp_flow_key *target) +{ + struct sw_flow **buckets[2]; + struct sw_flow **empty_bucket = NULL; + int i; + + find_buckets(table, target, buckets); + for (i = 0; i < 2; i++) { + struct sw_flow *f = rcu_dereference(*buckets[i]); + if (f) { + if (!memcmp(&f->key, target, sizeof(struct odp_flow_key))) + return buckets[i]; + } else if (!empty_bucket) + empty_bucket = buckets[i]; + } + return empty_bucket; +} + +int dp_table_delete(struct dp_table *table, struct sw_flow *target) +{ + struct sw_flow **buckets[2]; + int i; + + find_buckets(table, &target->key, buckets); + for (i = 0; i < 2; i++) { + struct sw_flow *flow = rcu_dereference(*buckets[i]); + if (flow == target) { + rcu_assign_pointer(*buckets[i], NULL); + return 0; + } + } + return -ENOENT; +} |