aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/ip_forward.c71
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp_output.c13
4 files changed, 84 insertions, 7 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9e38217c3931..e40eef4ac697 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1433,7 +1433,8 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_ADDRESS */
+ nla_total_size(4) /* IFA_LOCAL */
+ nla_total_size(4) /* IFA_BROADCAST */
- + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 694de3b7aebf..98d7e53d2afd 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
#include <net/route.h>
#include <net/xfrm.h>
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+ !skb->local_df;
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+ unsigned int mtu;
+
+ if (skb->local_df || !skb_is_gso(skb))
+ return false;
+
+ mtu = dst_mtu(skb_dst(skb));
+
+ /* if seglen > mtu, do software segmentation for IP fragmentation on
+ * output. DF bit cannot be set since ip_forward would have sent
+ * icmp error.
+ */
+ return skb_gso_network_seglen(skb) > mtu;
+}
+
+/* called if GSO skb needs to be fragmented on forward */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ features = netif_skb_dev_features(skb, dst->dev);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = dst_output(segs);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ if (ip_gso_exceeds_dst_mtu(skb))
+ return ip_forward_finish_gso(skb);
+
return dst_output(skb);
}
@@ -88,8 +156,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, dst_mtu(&rt->dst))) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->dst)));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6c6ab14da41..1a362f375e67 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1544,6 +1544,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1645,8 +1646,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (err != 0)
goto no_route;
- RT_CACHE_STAT_INC(in_slow_tot);
-
if (res.type == RTN_BROADCAST)
goto brd_input;
@@ -1715,6 +1714,7 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5560abfe6d30..d2df17940e07 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -686,7 +686,8 @@ static void tcp_tsq_handler(struct sock *sk)
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
+ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ 0, GFP_ATOMIC);
}
/*
* One tasklest per cpu tries to send more skbs.
@@ -1875,7 +1876,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- break;
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ * We abuse smp_mb__after_clear_bit() because
+ * there is no smp_mb__after_set_bit() yet
+ */
+ smp_mb__after_clear_bit();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ break;
}
limit = mss_now;