diff options
-rw-r--r-- | datapath/tunnel.c | 56 | ||||
-rw-r--r-- | datapath/tunnel.h | 27 | ||||
-rw-r--r-- | datapath/vport-gre.c | 15 |
3 files changed, 76 insertions, 22 deletions
diff --git a/datapath/tunnel.c b/datapath/tunnel.c index 3f25c9b4..6fa369be 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -680,29 +680,48 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu new_iph->frag_off = frag_off; ip_select_ident(new_iph, &rt_dst(rt), NULL); - tnl_vport->tnl_ops->build_header(skb, vport, mutable); + memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags = 0; - /* Allow our local IP stack to fragment the outer packet even if the - * DF bit is set as a last resort. */ - skb->local_df = 1; + skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt_dst(rt)); + if (unlikely(!skb)) + goto error; - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; + while (skb) { + struct sk_buff *next = skb->next; + int frag_len = skb->len - mutable->tunnel_hlen; - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) - return orig_len; - else { - vport_record_error(vport, VPORT_E_TX_ERROR); - return 0; - } + skb->next = NULL; + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err) != 0)) { + orig_len -= frag_len; + skb = next; + goto free_frags; + } + + skb = next; + }; + + return orig_len; error_free: kfree_skb(skb); error: - vport_record_error(vport, VPORT_E_TX_DROPPED); - return 0; +free_frags: + /* + * There's no point in continuing to send fragments once one has been + * dropped so just free the rest. This may help improve the congestion + * that caused the first packet to be dropped. + */ + while (skb) { + struct sk_buff *next = skb->next; + orig_len -= skb->len - mutable->tunnel_hlen; + kfree_skb(skb); + skb = next; + }; + return orig_len; } int tnl_send(struct vport *vport, struct sk_buff *skb) @@ -847,6 +866,9 @@ int tnl_send(struct vport *vport, struct sk_buff *skb) skb = next_skb; } while (skb); + if (unlikely(orig_len == 0)) + vport_record_error(vport, VPORT_E_TX_DROPPED); + return orig_len; error_free: @@ -914,6 +936,7 @@ struct vport *tnl_create(const char *name, const void __user *config, { struct vport *vport; struct tnl_vport *tnl_vport; + int initial_frag_id; int err; vport = vport_alloc(sizeof(struct tnl_vport), vport_ops); @@ -936,6 +959,9 @@ struct vport *tnl_create(const char *name, const void __user *config, vport_gen_rand_ether_addr(tnl_vport->mutable->eth_addr); tnl_vport->mutable->mtu = ETH_DATA_LEN; + get_random_bytes(&initial_frag_id, sizeof(int)); + atomic_set(&tnl_vport->frag_id, initial_frag_id); + err = set_config(config, tnl_ops, NULL, tnl_vport->mutable); if (err) goto error_free_mutable; diff --git a/datapath/tunnel.h b/datapath/tunnel.h index 89e73bac..92963d76 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -13,8 +13,10 @@ #include "table.h" #include "vport.h" -/* The absolute minimum fragment size. Note that there are many other - * definitions of the minimum MTU. */ +/* + * The absolute minimum fragment size. Note that there are many other + * definitions of the minimum MTU. + */ #define IP_MIN_MTU 68 /* @@ -47,9 +49,24 @@ struct tnl_ops { u32 tunnel_type; u8 ipproto; + /* + * Returns the length of the tunnel header you will add in + * build_header() (i.e. excludes the IP header). Returns a negative + * error code if the configuration is invalid. + */ int (*hdr_len)(const struct tnl_port_config *); - void (*build_header)(struct sk_buff *, const struct vport *, - const struct tnl_mutable_config *); + + /* + * Returns a linked list of SKBs with tunnel headers (multiple + * packets may be generated in the event of fragmentation). Space + * will have already been allocated at the start of the packet equal + * to sizeof(struct iphdr) + value returned by hdr_len(). The IP + * header will have already been constructed. + */ + struct sk_buff *(*build_header)(struct sk_buff *, + const struct vport *, + const struct tnl_mutable_config *, + struct dst_entry *); }; struct tnl_vport { @@ -61,6 +78,8 @@ struct tnl_vport { /* Protected by RCU. */ struct tnl_mutable_config *mutable; + + atomic_t frag_id; }; int tnl_init(void); diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 31d2d4f6..223644e2 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -48,9 +48,10 @@ static int gre_hdr_len(const struct tnl_port_config *port_config) return len; } -static void gre_build_header(struct sk_buff *skb, - const struct vport *vport, - const struct tnl_mutable_config *mutable) +static struct sk_buff *gre_build_header(struct sk_buff *skb, + const struct vport *vport, + const struct tnl_mutable_config *mutable, + struct dst_entry *dst) { struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb); __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen @@ -81,6 +82,14 @@ static void gre_build_header(struct sk_buff *skb, skb->len - sizeof(struct iphdr), 0)); } + + /* + * Allow our local IP stack to fragment the outer packet even if the + * DF bit is set as a last resort. + */ + skb->local_df = 1; + + return skb; } static int parse_header(struct iphdr *iph, __be16 *flags, __be32 *key) |