aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--datapath/tunnel.c56
-rw-r--r--datapath/tunnel.h27
-rw-r--r--datapath/vport-gre.c15
3 files changed, 76 insertions, 22 deletions
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 3f25c9b4..6fa369be 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -680,29 +680,48 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu
new_iph->frag_off = frag_off;
ip_select_ident(new_iph, &rt_dst(rt), NULL);
- tnl_vport->tnl_ops->build_header(skb, vport, mutable);
+ memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags = 0;
- /* Allow our local IP stack to fragment the outer packet even if the
- * DF bit is set as a last resort. */
- skb->local_df = 1;
+ skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt_dst(rt));
+ if (unlikely(!skb))
+ goto error;
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags = 0;
+ while (skb) {
+ struct sk_buff *next = skb->next;
+ int frag_len = skb->len - mutable->tunnel_hlen;
- err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0))
- return orig_len;
- else {
- vport_record_error(vport, VPORT_E_TX_ERROR);
- return 0;
- }
+ skb->next = NULL;
+
+ err = ip_local_out(skb);
+ if (unlikely(net_xmit_eval(err) != 0)) {
+ orig_len -= frag_len;
+ skb = next;
+ goto free_frags;
+ }
+
+ skb = next;
+ };
+
+ return orig_len;
error_free:
kfree_skb(skb);
error:
- vport_record_error(vport, VPORT_E_TX_DROPPED);
-
return 0;
+free_frags:
+ /*
+ * There's no point in continuing to send fragments once one has been
+ * dropped so just free the rest. This may help improve the congestion
+ * that caused the first packet to be dropped.
+ */
+ while (skb) {
+ struct sk_buff *next = skb->next;
+ orig_len -= skb->len - mutable->tunnel_hlen;
+ kfree_skb(skb);
+ skb = next;
+ };
+ return orig_len;
}
int tnl_send(struct vport *vport, struct sk_buff *skb)
@@ -847,6 +866,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
skb = next_skb;
} while (skb);
+ if (unlikely(orig_len == 0))
+ vport_record_error(vport, VPORT_E_TX_DROPPED);
+
return orig_len;
error_free:
@@ -914,6 +936,7 @@ struct vport *tnl_create(const char *name, const void __user *config,
{
struct vport *vport;
struct tnl_vport *tnl_vport;
+ int initial_frag_id;
int err;
vport = vport_alloc(sizeof(struct tnl_vport), vport_ops);
@@ -936,6 +959,9 @@ struct vport *tnl_create(const char *name, const void __user *config,
vport_gen_rand_ether_addr(tnl_vport->mutable->eth_addr);
tnl_vport->mutable->mtu = ETH_DATA_LEN;
+ get_random_bytes(&initial_frag_id, sizeof(int));
+ atomic_set(&tnl_vport->frag_id, initial_frag_id);
+
err = set_config(config, tnl_ops, NULL, tnl_vport->mutable);
if (err)
goto error_free_mutable;
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 89e73bac..92963d76 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -13,8 +13,10 @@
#include "table.h"
#include "vport.h"
-/* The absolute minimum fragment size. Note that there are many other
- * definitions of the minimum MTU. */
+/*
+ * The absolute minimum fragment size. Note that there are many other
+ * definitions of the minimum MTU.
+ */
#define IP_MIN_MTU 68
/*
@@ -47,9 +49,24 @@ struct tnl_ops {
u32 tunnel_type;
u8 ipproto;
+ /*
+ * Returns the length of the tunnel header you will add in
+ * build_header() (i.e. excludes the IP header). Returns a negative
+ * error code if the configuration is invalid.
+ */
int (*hdr_len)(const struct tnl_port_config *);
- void (*build_header)(struct sk_buff *, const struct vport *,
- const struct tnl_mutable_config *);
+
+ /*
+ * Returns a linked list of SKBs with tunnel headers (multiple
+ * packets may be generated in the event of fragmentation). Space
+ * will have already been allocated at the start of the packet equal
+ * to sizeof(struct iphdr) + value returned by hdr_len(). The IP
+ * header will have already been constructed.
+ */
+ struct sk_buff *(*build_header)(struct sk_buff *,
+ const struct vport *,
+ const struct tnl_mutable_config *,
+ struct dst_entry *);
};
struct tnl_vport {
@@ -61,6 +78,8 @@ struct tnl_vport {
/* Protected by RCU. */
struct tnl_mutable_config *mutable;
+
+ atomic_t frag_id;
};
int tnl_init(void);
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 31d2d4f6..223644e2 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -48,9 +48,10 @@ static int gre_hdr_len(const struct tnl_port_config *port_config)
return len;
}
-static void gre_build_header(struct sk_buff *skb,
- const struct vport *vport,
- const struct tnl_mutable_config *mutable)
+static struct sk_buff *gre_build_header(struct sk_buff *skb,
+ const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct dst_entry *dst)
{
struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb);
__be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
@@ -81,6 +82,14 @@ static void gre_build_header(struct sk_buff *skb,
skb->len - sizeof(struct iphdr),
0));
}
+
+ /*
+ * Allow our local IP stack to fragment the outer packet even if the
+ * DF bit is set as a last resort.
+ */
+ skb->local_df = 1;
+
+ return skb;
}
static int parse_header(struct iphdr *iph, __be16 *flags, __be32 *key)