summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2013-10-16 11:56:25 -0700
committerJohn Stultz <john.stultz@linaro.org>2013-10-16 11:56:25 -0700
commit74d90f8a9859e741344b68849efbeb216042285f (patch)
treef43f7ba8345d28044b2d90820b475c63a10308bb /net/ipv6
parent763b334bd467a1f9453fe19ab19354e3b8c351e2 (diff)
parent4a10c2ac2f368583138b774ca41fac4207911983 (diff)
Merge tag 'v3.12-rc2' into linaro-android-3.12
Merged android/3.10 branch w/ Linux 3.12-rc2 Lots and lots of conflicts. Reverted most of the android bluetooth changes. Also EVIOCGSUSPENDBLOCK/EVIOCSSUSPENDBLOCK have new ioctl numbers, due to a collision with EVIOCREVOKE which may cause some problems with existing userland. Signed-off-by: John Stultz <john.stultz@linaro.org> Conflicts: arch/arm/common/Kconfig arch/arm/kernel/process.c arch/arm/mm/cache-l2x0.c arch/arm/mm/mmu.c drivers/cpufreq/cpufreq_stats.c drivers/hid/hid-multitouch.c drivers/input/evdev.c drivers/mmc/card/block.c drivers/mmc/core/core.c drivers/mmc/core/sd.c drivers/power/power_supply_core.c drivers/usb/gadget/f_rndis.c drivers/usb/phy/Kconfig drivers/usb/phy/Makefile fs/fat/fat.h fs/fat/inode.c fs/select.c include/linux/cgroup.h include/net/bluetooth/sco.h include/uapi/linux/msdos_fs.h kernel/futex.c kernel/printk.c kernel/sched/core.c kernel/time/alarmtimer.c kernel/trace/trace_functions_graph.c lib/Kconfig.debug mm/page_alloc.c net/Makefile net/bluetooth/hci_conn.c net/bluetooth/hci_event.c net/bluetooth/l2cap_core.c net/bluetooth/sco.c net/ipv4/netfilter/Kconfig net/ipv6/netfilter/Kconfig net/netfilter/xt_socket.c net/wireless/sme.c scripts/kconfig/confdata.c scripts/kconfig/lkc.h
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c502
-rw-r--r--net/ipv6/addrconf_core.c51
-rw-r--r--net/ipv6/addrlabel.c48
-rw-r--r--net/ipv6/af_inet6.c33
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/datagram.c27
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/fib6_rules.c37
-rw-r--r--net/ipv6/icmp.c35
-rw-r--r--net/ipv6/ip6_fib.c74
-rw-r--r--net/ipv6/ip6_gre.c19
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c5
-rw-r--r--net/ipv6/ip6_output.c44
-rw-r--r--net/ipv6/ip6_tunnel.c56
-rw-r--r--net/ipv6/ip6mr.c21
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mcast.c354
-rw-r--r--net/ipv6/mip6.c6
-rw-r--r--net/ipv6/ndisc.c86
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c20
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c499
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/ip6table_security.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c4
-rw-r--r--net/ipv6/output_core.c51
-rw-r--r--net/ipv6/ping.c277
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c58
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c220
-rw-r--r--net/ipv6/sit.c211
-rw-r--r--net/ipv6/syncookies.c25
-rw-r--r--net/ipv6/sysctl_net_ipv6.c4
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/udp.c62
-rw-r--r--net/ipv6/udp_offload.c108
-rw-r--r--net/ipv6/xfrm6_output.c21
-rw-r--r--net/ipv6/xfrm6_state.c1
49 files changed, 2296 insertions, 754 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9af088d2cdaa..470a9c008e9b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ab4c38958c6..d6ff12617f36 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
#define ACONF_DEBUG 2
#if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -177,6 +177,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_redirects = 1,
.autoconf = 1,
.force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -202,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -211,6 +214,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra = 1,
.accept_redirects = 1,
.autoconf = 1,
+ .force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -236,54 +242,41 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
-
/* Check if a valid qdisc is available */
static inline bool addrconf_qdisc_ok(const struct net_device *dev)
{
return !qdisc_tx_is_noop(dev);
}
-static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_rs_timer(struct inet6_dev *idev)
+{
+ if (del_timer(&idev->rs_timer))
+ __in6_dev_put(idev);
+}
+
+static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->timer))
+ if (del_timer(&ifp->dad_timer))
__in6_ifa_put(ifp);
}
-enum addrconf_timer_t {
- AC_NONE,
- AC_DAD,
- AC_RS,
-};
+static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+ unsigned long when)
+{
+ if (!timer_pending(&idev->rs_timer))
+ in6_dev_hold(idev);
+ mod_timer(&idev->rs_timer, jiffies + when);
+}
-static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
- enum addrconf_timer_t what,
- unsigned long when)
+static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
+ unsigned long when)
{
- if (!del_timer(&ifp->timer))
+ if (!timer_pending(&ifp->dad_timer))
in6_ifa_hold(ifp);
-
- switch (what) {
- case AC_DAD:
- ifp->timer.function = addrconf_dad_timer;
- break;
- case AC_RS:
- ifp->timer.function = addrconf_rs_timer;
- break;
- default:
- break;
- }
- ifp->timer.expires = jiffies + when;
- add_timer(&ifp->timer);
+ mod_timer(&ifp->dad_timer, jiffies + when);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -311,35 +304,6 @@ err_ip:
return -ENOMEM;
}
-static void snmp6_free_dev(struct inet6_dev *idev)
-{
- kfree(idev->stats.icmpv6msgdev);
- kfree(idev->stats.icmpv6dev);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
-}
-
-/* Nobody refers to this device, we may destroy it. */
-
-void in6_dev_finish_destroy(struct inet6_dev *idev)
-{
- struct net_device *dev = idev->dev;
-
- WARN_ON(!list_empty(&idev->addr_list));
- WARN_ON(idev->mc_list != NULL);
-
-#ifdef NET_REFCNT_DEBUG
- pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
-#endif
- dev_put(dev);
- if (!idev->dead) {
- pr_warn("Freeing alive inet6 device %p\n", idev);
- return;
- }
- snmp6_free_dev(idev);
- kfree_rcu(idev, rcu);
-}
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
@@ -357,7 +321,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
rwlock_init(&ndev->lock);
ndev->dev = dev;
INIT_LIST_HEAD(&ndev->addr_list);
-
+ setup_timer(&ndev->rs_timer, addrconf_rs_timer,
+ (unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
@@ -372,9 +337,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -382,9 +347,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -776,7 +741,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->timer))
+ if (del_timer(&ifp->dad_timer))
pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -816,8 +781,9 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
- int scope, u32 flags)
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+ const struct in6_addr *peer_addr, int pfxlen,
+ int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
@@ -846,7 +812,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG(("ipv6_add_addr: already assigned\n"));
+ ADBG("ipv6_add_addr: already assigned\n");
err = -EEXIST;
goto out;
}
@@ -854,7 +820,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- ADBG(("ipv6_add_addr: malloc failed\n"));
+ ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -866,15 +832,19 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
}
ifa->addr = *addr;
+ if (peer_addr)
+ ifa->peer_addr = *peer_addr;
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- init_timer(&ifa->timer);
+ setup_timer(&ifa->dad_timer, addrconf_dad_timer,
+ (unsigned long)ifa);
INIT_HLIST_NODE(&ifa->addr_lst);
- ifa->timer.data = (unsigned long) ifa;
ifa->scope = scope;
ifa->prefix_len = pfxlen;
ifa->flags = flags | IFA_F_TENTATIVE;
+ ifa->valid_lft = valid_lft;
+ ifa->prefered_lft = prefered_lft;
ifa->cstamp = ifa->tstamp = jiffies;
ifa->tokenized = false;
@@ -994,7 +964,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
write_unlock_bh(&idev->lock);
- addrconf_del_timer(ifp);
+ addrconf_del_dad_timer(ifp);
ipv6_ifa_notify(RTM_DELADDR, ifp);
@@ -1052,7 +1022,6 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
- int max_addresses;
u32 addr_flags;
unsigned long now = jiffies;
@@ -1098,7 +1067,6 @@ retry:
idev->cnf.temp_prefered_lft + age -
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
- max_addresses = idev->cnf.max_addresses;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
@@ -1124,12 +1092,10 @@ retry:
if (ifp->flags & IFA_F_OPTIMISTIC)
addr_flags |= IFA_F_OPTIMISTIC;
- ift = !max_addresses ||
- ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
- addr_flags) : NULL;
- if (IS_ERR_OR_NULL(ift)) {
+ ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+ ipv6_addr_scope(&addr), addr_flags,
+ tmp_valid_lft, tmp_prefered_lft);
+ if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1140,8 +1106,6 @@ retry:
spin_lock_bh(&ift->lock);
ift->ifpub = ifp;
- ift->valid_lft = tmp_valid_lft;
- ift->prefered_lft = tmp_prefered_lft;
ift->cstamp = now;
ift->tstamp = tmp_tstamp;
spin_unlock_bh(&ift->lock);
@@ -1448,6 +1412,23 @@ try_nextdev:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
+int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_ifaddr *ifp;
+ int err = -EADDRNOTAVAIL;
+
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ *addr = ifp->addr;
+ err = 0;
+ break;
+ }
+ }
+ return err;
+}
+
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
unsigned char banned_flags)
{
@@ -1457,17 +1438,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- struct inet6_ifaddr *ifp;
-
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ifp->scope == IFA_LINK &&
- !(ifp->flags & banned_flags)) {
- *addr = ifp->addr;
- err = 0;
- break;
- }
- }
+ err = __ipv6_get_lladdr(idev, addr, banned_flags);
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1581,7 +1553,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_timer(ifp);
+ addrconf_del_dad_timer(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1801,6 +1773,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
}
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+ memcpy(eui, dev->perm_addr, 3);
+ memcpy(eui + 5, dev->perm_addr + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
@@ -1819,6 +1801,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
+ case ARPHRD_TUNNEL6:
+ return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
}
@@ -2044,7 +2028,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG(("addrconf: prefix option too short\n"));
+ ADBG("addrconf: prefix option too short\n");
return;
}
@@ -2175,16 +2159,19 @@ ok:
*/
if (!max_addresses ||
ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ ifp = ipv6_add_addr(in6_dev, &addr, NULL,
+ pinfo->prefix_len,
addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags);
+ addr_flags, valid_lft,
+ prefered_lft);
if (IS_ERR_OR_NULL(ifp)) {
in6_dev_put(in6_dev);
return;
}
- update_lft = create = 1;
+ update_lft = 0;
+ create = 1;
ifp->cstamp = jiffies;
ifp->tokenized = tokenized;
addrconf_dad_start(ifp);
@@ -2205,7 +2192,7 @@ ok:
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
- if (!update_lft && stored_lft) {
+ if (!update_lft && !create && stored_lft) {
if (valid_lft > MIN_VALID_LIFETIME ||
valid_lft > stored_lft)
update_lft = 1;
@@ -2402,6 +2389,7 @@ err_exit:
* Manual configuration of address on an interface
*/
static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
+ const struct in6_addr *peer_pfx,
unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
__u32 valid_lft)
{
@@ -2450,15 +2438,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
prefered_lft = timeout;
}
- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+ ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
+ valid_lft, prefered_lft);
if (!IS_ERR(ifp)) {
- spin_lock_bh(&ifp->lock);
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = jiffies;
- spin_unlock_bh(&ifp->lock);
-
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
expires, flags);
/*
@@ -2500,12 +2483,6 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
read_unlock_bh(&idev->lock);
ipv6_del_addr(ifp);
-
- /* If the last address is deleted administratively,
- disable IPv6 on this interface.
- */
- if (list_empty(&idev->addr_list))
- addrconf_ifdown(idev->dev, 1);
return 0;
}
}
@@ -2526,7 +2503,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
rtnl_unlock();
@@ -2556,7 +2533,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
- ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
+ ifp = ipv6_add_addr(idev, addr, NULL, plen,
+ scope, IFA_F_PERMANENT, 0, 0);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -2682,7 +2660,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
#endif
- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
+ ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp);
@@ -2702,7 +2680,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
(dev->type != ARPHRD_IEEE802154) &&
- (dev->type != ARPHRD_IEEE1394)) {
+ (dev->type != ARPHRD_IEEE1394) &&
+ (dev->type != ARPHRD_TUNNEL6)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2761,8 +2740,6 @@ static void addrconf_gre_config(struct net_device *dev)
struct inet6_dev *idev;
struct in6_addr addr;
- pr_info("%s(%s)\n", __func__, dev->name);
-
ASSERT_RTNL();
if ((idev = ipv6_find_idev(dev)) == NULL) {
@@ -2790,48 +2767,10 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
return -1;
}
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
- struct net_device *link_dev;
- struct net *net = dev_net(idev->dev);
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
- for_each_netdev(net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- pr_debug("init ip6-ip6: add_linklocal failed\n");
-}
-
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
-{
- struct inet6_dev *idev;
-
- ASSERT_RTNL();
-
- idev = addrconf_add_dev(dev);
- if (IS_ERR(idev)) {
- pr_debug("init ip6-ip6: add_dev failed\n");
- return;
- }
- ip6_tnl_add_linklocal(idev);
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
- void *data)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *) data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct inet6_dev *idev = __in6_dev_get(dev);
int run_pending = 0;
int err;
@@ -2895,9 +2834,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_gre_config(dev);
break;
#endif
- case ARPHRD_TUNNEL6:
- addrconf_ip6_tnl_config(dev);
- break;
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
@@ -3039,7 +2975,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_timer(ifa);
+ addrconf_del_dad_timer(ifa);
goto restart;
}
}
@@ -3048,6 +2984,8 @@ static int addrconf_ifdown(struct net_device *dev, int how)
write_lock_bh(&idev->lock);
+ addrconf_del_rs_timer(idev);
+
/* Step 2: clear flags for stateless addrconf */
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3077,7 +3015,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_timer(ifa);
+ addrconf_del_dad_timer(ifa);
list_del(&ifa->if_list);
@@ -3119,10 +3057,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
static void addrconf_rs_timer(unsigned long data)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
- struct inet6_dev *idev = ifp->idev;
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+ struct net_device *dev = idev->dev;
+ struct in6_addr lladdr;
- read_lock(&idev->lock);
+ write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
goto out;
@@ -3133,18 +3072,21 @@ static void addrconf_rs_timer(unsigned long data)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- spin_lock(&ifp->lock);
- if (ifp->probes++ < idev->cnf.rtr_solicits) {
- /* The wait after the last probe can be shorter */
- addrconf_mod_timer(ifp, AC_RS,
- (ifp->probes == idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
- idev->cnf.rtr_solicit_interval);
- spin_unlock(&ifp->lock);
+ if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+ write_unlock(&idev->lock);
+ if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ ndisc_send_rs(dev, &lladdr,
+ &in6addr_linklocal_allrouters);
+ else
+ goto put;
- ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ write_lock(&idev->lock);
+ /* The wait after the last probe can be shorter */
+ addrconf_mod_rs_timer(idev, (idev->rs_probes ==
+ idev->cnf.rtr_solicits) ?
+ idev->cnf.rtr_solicit_delay :
+ idev->cnf.rtr_solicit_interval);
} else {
- spin_unlock(&ifp->lock);
/*
* Note: we do not support deprecated "all on-link"
* assumption any longer.
@@ -3153,8 +3095,9 @@ static void addrconf_rs_timer(unsigned long data)
}
out:
- read_unlock(&idev->lock);
- in6_ifa_put(ifp);
+ write_unlock(&idev->lock);
+put:
+ in6_dev_put(idev);
}
/*
@@ -3170,8 +3113,8 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
else
rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
- ifp->probes = idev->cnf.dad_transmits;
- addrconf_mod_timer(ifp, AC_DAD, rand_num);
+ ifp->dad_probes = idev->cnf.dad_transmits;
+ addrconf_mod_dad_timer(ifp, rand_num);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -3232,40 +3175,40 @@ static void addrconf_dad_timer(unsigned long data)
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
- if (!ifp->probes && addrconf_dad_end(ifp))
+ if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- read_lock(&idev->lock);
+ write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
goto out;
}
- if (ifp->probes == 0) {
+ if (ifp->dad_probes == 0) {
/*
* DAD was successful
*/
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
addrconf_dad_completed(ifp);
goto out;
}
- ifp->probes--;
- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+ ifp->dad_probes--;
+ addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time);
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
@@ -3277,6 +3220,10 @@ out:
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
{
struct net_device *dev = ifp->idev->dev;
+ struct in6_addr lladdr;
+ bool send_rs, send_mld;
+
+ addrconf_del_dad_timer(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -3288,22 +3235,41 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
router advertisements, start sending router solicitations.
*/
- if (ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits > 0 &&
- (dev->flags&IFF_LOOPBACK) == 0 &&
- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ read_lock_bh(&ifp->idev->lock);
+ spin_lock(&ifp->lock);
+ send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL &&
+ ifp->idev->valid_ll_addr_cnt == 1;
+ send_rs = send_mld &&
+ ipv6_accept_ra(ifp->idev) &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0;
+ spin_unlock(&ifp->lock);
+ read_unlock_bh(&ifp->idev->lock);
+
+ /* While dad is in progress mld report's source address is in6_addrany.
+ * Resend with proper ll now.
+ */
+ if (send_mld)
+ ipv6_mc_dad_complete(ifp->idev);
+
+ if (send_rs) {
/*
* If a host as already performed a random delay
* [...] as part of DAD [...] there is no need
* to delay again before sending the first RS
*/
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ return;
+ ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters);
- spin_lock_bh(&ifp->lock);
- ifp->probes = 1;
+ write_lock_bh(&ifp->idev->lock);
+ spin_lock(&ifp->lock);
+ ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
- spin_unlock_bh(&ifp->lock);
+ addrconf_mod_rs_timer(ifp->idev,
+ ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock(&ifp->lock);
+ write_unlock_bh(&ifp->idev->lock);
}
}
@@ -3606,8 +3572,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched));
+ ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
addr_chk_timer.expires = next_sched;
add_timer(&addr_chk_timer);
@@ -3615,18 +3581,20 @@ restart:
rcu_read_unlock_bh();
}
-static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
+ struct in6_addr **peer_pfx)
{
struct in6_addr *pfx = NULL;
+ *peer_pfx = NULL;
+
if (addr)
pfx = nla_data(addr);
if (local) {
if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
- pfx = NULL;
- else
- pfx = nla_data(local);
+ *peer_pfx = pfx;
+ pfx = nla_data(local);
}
return pfx;
@@ -3644,7 +3612,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3652,7 +3620,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
@@ -3710,7 +3678,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
@@ -3722,7 +3690,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
@@ -3750,7 +3718,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
* It would be best to check for !NLM_F_CREATE here but
* userspace alreay relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, pfx,
+ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
preferred_lft, valid_lft);
}
@@ -3807,6 +3775,7 @@ static inline int rt_scope(int ifa_scope)
static inline int inet6_ifaddr_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(16) /* IFA_LOCAL */
+ nla_total_size(16) /* IFA_ADDRESS */
+ nla_total_size(sizeof(struct ifa_cacheinfo));
}
@@ -3845,13 +3814,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
valid = INFINITY_LIFE_TIME;
}
- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
- }
+ if (!ipv6_addr_any(&ifa->peer_addr)) {
+ if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
+ nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
+ goto error;
+ } else
+ if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
+ goto error;
+
+ if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ goto error;
return nlmsg_end(skb, nlh);
+
+error:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
@@ -4051,7 +4029,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *addr = NULL;
+ struct in6_addr *addr = NULL, *peer;
struct net_device *dev = NULL;
struct inet6_ifaddr *ifa;
struct sk_buff *skb;
@@ -4061,7 +4039,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
if (addr == NULL) {
err = -EINVAL;
goto errout;
@@ -4141,6 +4119,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
@@ -4171,6 +4153,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
}
static inline size_t inet6_ifla6_size(void)
@@ -4339,8 +4322,11 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
write_lock_bh(&idev->lock);
- if (update_rs)
+ if (update_rs) {
idev->if_flags |= IF_RS_SENT;
+ idev->rs_probes = 1;
+ addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+ }
/* Well, that's kinda nasty ... */
list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -4353,6 +4339,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}
write_unlock_bh(&idev->lock);
+ addrconf_verify(0);
return 0;
}
@@ -4550,6 +4537,19 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
+static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count)
+{
+ write_lock_bh(&ifp->idev->lock);
+ spin_lock(&ifp->lock);
+ if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|
+ IFA_F_DADFAILED)) == IFA_F_PERMANENT) &&
+ (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL))
+ ifp->idev->valid_ll_addr_cnt += count;
+ WARN_ON(ifp->idev->valid_ll_addr_cnt < 0);
+ spin_unlock(&ifp->lock);
+ write_unlock_bh(&ifp->idev->lock);
+}
+
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
@@ -4558,6 +4558,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
+ update_valid_ll_addr_cnt(ifp, 1);
+
/*
* If the address was optimistic
* we inserted the route at the start of
@@ -4568,11 +4570,28 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
break;
case RTM_DELADDR:
+ update_valid_ll_addr_cnt(ifp, -1);
+
if (ifp->idev->cnf.forwarding)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
+ if (!ipv6_addr_any(&ifp->peer_addr)) {
+ struct rt6_info *rt;
+ struct net_device *dev = ifp->idev->dev;
+
+ rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
+ dev->ifindex, 1);
+ if (rt) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ }
+ }
dst_hold(&ifp->rt->dst);
if (ip6_del_rt(ifp->rt))
@@ -4580,6 +4599,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
break;
}
atomic_inc(&net->ipv6.dev_addr_genid);
+ rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4593,13 +4613,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
#ifdef CONFIG_SYSCTL
static
-int addrconf_sysctl_forward(ctl_table *ctl, int write,
+int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
- ctl_table lctl;
+ struct ctl_table lctl;
int ret;
/*
@@ -4620,13 +4640,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
static void dev_disable_change(struct inet6_dev *idev)
{
+ struct netdev_notifier_info info;
+
if (!idev || !idev->dev)
return;
+ netdev_notifier_info_init(&info, idev->dev);
if (idev->cnf.disable_ipv6)
- addrconf_notify(NULL, NETDEV_DOWN, idev->dev);
+ addrconf_notify(NULL, NETDEV_DOWN, &info);
else
- addrconf_notify(NULL, NETDEV_UP, idev->dev);
+ addrconf_notify(NULL, NETDEV_UP, &info);
}
static void addrconf_disable_change(struct net *net, __s32 newf)
@@ -4675,13 +4698,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
}
static
-int addrconf_sysctl_disable(ctl_table *ctl, int write,
+int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
- ctl_table lctl;
+ struct ctl_table lctl;
int ret;
/*
@@ -4703,7 +4726,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table addrconf_vars[DEVCONF_MAX+1];
+ struct ctl_table addrconf_vars[DEVCONF_MAX+1];
} addrconf_sysctl __read_mostly = {
.sysctl_header = NULL,
.addrconf_vars = {
@@ -4784,6 +4807,22 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "mldv1_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv1_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "mldv2_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv2_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
#ifdef CONFIG_IPV6_PRIVACY
{
.procname = "use_tempaddr",
@@ -4929,6 +4968,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "suppress_frag_ndisc",
+ .data = &ipv6_devconf.suppress_frag_ndisc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 72104562c864..4c11cbcf8308 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -5,6 +5,8 @@
#include <linux/export.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
@@ -97,3 +99,52 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
return atomic_notifier_call_chain(&inet6addr_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+
+const struct ipv6_stub *ipv6_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_stub);
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+EXPORT_SYMBOL(in6addr_loopback);
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+EXPORT_SYMBOL(in6addr_any);
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allnodes);
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allrouters);
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
+
+static void snmp6_free_dev(struct inet6_dev *idev)
+{
+ kfree(idev->stats.icmpv6msgdev);
+ kfree(idev->stats.icmpv6dev);
+ snmp_mib_free((void __percpu **)idev->stats.ipv6);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+ struct net_device *dev = idev->dev;
+
+ WARN_ON(!list_empty(&idev->addr_list));
+ WARN_ON(idev->mc_list != NULL);
+ WARN_ON(timer_pending(&idev->rs_timer));
+
+#ifdef NET_REFCNT_DEBUG
+ pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
+#endif
+ dev_put(dev);
+ if (!idev->dead) {
+ pr_warn("Freeing alive inet6 device %p\n", idev);
+ return;
+ }
+ snmp6_free_dev(idev);
+ kfree_rcu(idev, rcu);
+}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05c..b30ad3741b46 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
/* add a label */
static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
{
+ struct hlist_node *n;
+ struct ip6addrlbl_entry *last = NULL, *p = NULL;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
- __func__,
- newp, replace);
+ ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+ replace);
- if (hlist_empty(&ip6addrlbl_table.head)) {
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
- } else {
- struct hlist_node *n;
- struct ip6addrlbl_entry *p = NULL;
- hlist_for_each_entry_safe(p, n,
- &ip6addrlbl_table.head, list) {
- if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
- p->ifindex == newp->ifindex &&
- ipv6_addr_equal(&p->prefix, &newp->prefix)) {
- if (!replace) {
- ret = -EEXIST;
- goto out;
- }
- hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
- goto out;
- } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
- (p->prefixlen < newp->prefixlen)) {
- hlist_add_before_rcu(&newp->list, &p->list);
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+ p->ifindex == newp->ifindex &&
+ ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+ if (!replace) {
+ ret = -EEXIST;
goto out;
}
+ hlist_replace_rcu(&p->list, &newp->list);
+ ip6addrlbl_put(p);
+ goto out;
+ } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+ (p->prefixlen < newp->prefixlen)) {
+ hlist_add_before_rcu(&newp->list, &p->list);
+ goto out;
}
- hlist_add_after_rcu(&p->list, &newp->list);
+ last = p;
}
+ if (last)
+ hlist_add_after_rcu(&last->list, &newp->list);
+ else
+ hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
if (!ret)
ip6addrlbl_table.seq++;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ebf231016b1f..4309bee83d3b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,12 +49,14 @@
#include <net/udp.h>
#include <net/udplite.h>
#include <net/tcp.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/route.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/ndisc.h>
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
#endif
@@ -798,6 +800,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
+ atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
if (err)
@@ -841,6 +844,15 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
+static const struct ipv6_stub ipv6_stub_impl = {
+ .ipv6_sock_mc_join = ipv6_sock_mc_join,
+ .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+ .ipv6_dst_lookup = ip6_dst_lookup,
+ .udpv6_encap_enable = udpv6_encap_enable,
+ .ndisc_send_na = ndisc_send_na,
+ .nd_tbl = &nd_tbl,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -873,6 +885,9 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_udplite_proto;
+ err = proto_register(&pingv6_prot, 1);
+ if (err)
+ goto out_unregister_ping_proto;
/* We MUST register RAW sockets before we create the ICMP6,
* IGMP6, or NDISC control sockets.
@@ -912,6 +927,9 @@ static int __init inet6_init(void)
err = igmp6_init();
if (err)
goto igmp_fail;
+
+ ipv6_stub = &ipv6_stub_impl;
+
err = ipv6_netfilter_init();
if (err)
goto netfilter_fail;
@@ -930,6 +948,9 @@ static int __init inet6_init(void)
err = ip6_route_init();
if (err)
goto ip6_route_fail;
+ err = ndisc_late_init();
+ if (err)
+ goto ndisc_late_fail;
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
@@ -963,6 +984,10 @@ static int __init inet6_init(void)
if (err)
goto ipv6_packet_fail;
+ err = pingv6_init();
+ if (err)
+ goto pingv6_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -975,6 +1000,8 @@ out:
sysctl_fail:
ipv6_packet_cleanup();
#endif
+pingv6_fail:
+ pingv6_exit();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -990,6 +1017,8 @@ ipv6_exthdrs_fail:
addrconf_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
+ ndisc_late_cleanup();
+ndisc_late_fail:
ip6_route_cleanup();
ip6_route_fail:
#ifdef CONFIG_PROC_FS
@@ -1018,6 +1047,8 @@ register_pernet_fail:
rtnl_unregister_all(PF_INET6);
out_sock_register_fail:
rawv6_exit();
+out_unregister_ping_proto:
+ proto_unregister(&pingv6_prot);
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
out_unregister_udplite_proto:
@@ -1050,6 +1081,7 @@ static void __exit inet6_exit(void)
ipv6_exthdrs_exit();
addrconf_cleanup();
ip6_flowlabel_cleanup();
+ ndisc_late_cleanup();
ip6_route_cleanup();
#ifdef CONFIG_PROC_FS
@@ -1060,6 +1092,7 @@ static void __exit inet6_exit(void)
raw6_proc_exit();
#endif
ipv6_netfilter_fini();
+ ipv6_stub = NULL;
igmp6_cleanup();
ndisc_cleanup();
ip6_mr_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..73784c3d4642 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -628,7 +628,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4b56cbbc7890..48b6bd2a9a14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -879,3 +879,30 @@ exit_f:
return err;
}
EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
+
+void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+ __u16 srcp, __u16 destp, int bucket)
+{
+ struct ipv6_pinfo *np = inet6_sk(sp);
+ const struct in6_addr *dest, *src;
+
+ dest = &np->daddr;
+ src = &np->rcv_saddr;
+ seq_printf(seq,
+ "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
+ bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ sk_wmem_alloc_get(sp),
+ sk_rmem_alloc_get(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops));
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40ffd72243a4..d3618a78fcac 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -425,7 +425,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
net_adj = 0;
return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
- net_adj) & ~(align - 1)) + (net_adj - 2);
+ net_adj) & ~(align - 1)) + net_adj - 2;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -447,7 +447,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb6..8d67900aa003 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
off += optlen;
len -= optlen;
}
- /* This case will not be caught by above check since its padding
- * length is smaller than 7:
- * 1 byte NH + 1 byte Length + 6 bytes Padding
- */
- if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
- goto bad;
if (len == 0)
return true;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 23eed2365fe7..2ba1ba974c3f 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr);
int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
{
const unsigned char *nh = skb_network_header(skb);
- int packet_len = skb->tail - skb->network_header;
+ int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
struct ipv6_opt_hdr *hdr;
int len;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432867c0..e27591635f92 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -55,26 +55,33 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct fib6_table *table;
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
+ int err = 0;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
rt = net->ipv6.ip6_null_entry;
goto discard_pkt;
default:
case FR_ACT_BLACKHOLE:
+ err = -EINVAL;
rt = net->ipv6.ip6_blk_hole_entry;
goto discard_pkt;
case FR_ACT_PROHIBIT:
+ err = -EACCES;
rt = net->ipv6.ip6_prohibit_entry;
goto discard_pkt;
}
table = fib6_get_table(net, rule->table);
- if (table)
- rt = lookup(net, table, flp6, flags);
+ if (!table) {
+ err = -EAGAIN;
+ goto out;
+ }
+ rt = lookup(net, table, flp6, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -101,6 +108,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
}
again:
ip6_rt_put(rt);
+ err = -EAGAIN;
rt = NULL;
goto out;
@@ -108,9 +116,31 @@ discard_pkt:
dst_hold(&rt->dst);
out:
arg->result = rt;
- return rt == NULL ? -EAGAIN : 0;
+ return err;
}
+static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+{
+ struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct net_device *dev = rt->rt6i_idev->dev;
+ /* do not accept result if the route does
+ * not meet the required prefix length
+ */
+ if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
+ goto suppress_route;
+
+ /* do not accept result if the route uses a device
+ * belonging to a forbidden interface group
+ */
+ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
+ goto suppress_route;
+
+ return false;
+
+suppress_route:
+ ip6_rt_put(rt);
+ return true;
+}
static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
@@ -244,6 +274,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.addr_size = sizeof(struct in6_addr),
.action = fib6_rule_action,
.match = fib6_rule_match,
+ .suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4ff0a42b8c7..eef8d945b362 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
@@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net)
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
struct net *net = dev_net(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, 0, 0);
else if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+
+ if (!(type & ICMPV6_INFOMSG_MASK))
+ if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+ ping_err(skb, offset, info);
}
static int icmpv6_rcv(struct sk_buff *skb);
@@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
@@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif
-static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
- struct sock *sk, struct flowi6 *fl6)
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+ struct sock *sk, struct flowi6 *fl6)
{
struct dst_entry *dst, *dst2;
struct flowi6 fl2;
@@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int err = 0;
if ((u8 *)hdr < skb->head ||
- (skb->network_header + sizeof(*hdr)) > skb->tail)
+ (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
return;
/*
@@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
IPPROTO_ICMPV6, 0));
if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
saddr, daddr);
goto csum_error;
}
@@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- /* we couldn't care less */
+ ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -931,6 +940,14 @@ static const struct icmp6_err {
.err = ECONNREFUSED,
.fatal = 1,
},
+ { /* POLICY_FAIL */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* REJECT_ROUTE */
+ .err = EACCES,
+ .fatal = 1,
+ },
};
int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -942,7 +959,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
switch (type) {
case ICMPV6_DEST_UNREACH:
fatal = 1;
- if (code <= ICMPV6_PORT_UNREACH) {
+ if (code < ARRAY_SIZE(tab_unreach)) {
*err = tab_unreach[code].err;
fatal = tab_unreach[code].fatal;
}
@@ -967,7 +984,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
EXPORT_SYMBOL(icmpv6_err_convert);
#ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table_template[] = {
+struct ctl_table ipv6_icmp_table_template[] = {
{
.procname = "ratelimit",
.data = &init_net.ipv6.sysctl.icmpv6_time,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 192dd1a0e188..5bec666aba61 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -425,8 +425,8 @@ out:
* node.
*/
-static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
- int addrlen, int plen,
+static struct fib6_node *fib6_add_1(struct fib6_node *root,
+ struct in6_addr *addr, int plen,
int offset, int allow_create,
int replace_required)
{
@@ -543,7 +543,7 @@ insert_above:
but if it is >= plen, the value is ignored in any case.
*/
- bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
+ bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
/*
* (intermediate)[in]
@@ -632,6 +632,12 @@ insert_above:
return ln;
}
+static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ RTF_GATEWAY;
+}
+
/*
* Insert routing information in a node.
*/
@@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
ins = &fn->leaf;
@@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
* To avoid long list, we only had siblings if the
* route have a gateway.
*/
- if (rt->rt6i_flags & RTF_GATEWAY &&
- !(rt->rt6i_flags & RTF_EXPIRES) &&
- !(iter->rt6i_flags & RTF_EXPIRES))
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
rt->rt6i_nsiblings++;
}
@@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
/* Find the first route that have the same metric */
sibling = fn->leaf;
while (sibling) {
- if (sibling->rt6i_metric == rt->rt6i_metric) {
+ if (sibling->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
list_add_tail(&rt->rt6i_siblings,
&sibling->rt6i_siblings);
break;
@@ -815,12 +822,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
- allow_create, replace_required);
-
+ fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ offsetof(struct rt6_info, rt6i_dst), allow_create,
+ replace_required);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
+ fn = NULL;
goto out;
}
@@ -856,7 +863,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
+ rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required);
@@ -875,7 +882,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
fn->subtree = sfn;
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
+ rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required);
@@ -986,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree)
- fn = fib6_lookup_1(fn->subtree, args + 1);
+ if (fn->subtree) {
+ struct fib6_node *sfn;
+ sfn = fib6_lookup_1(fn->subtree,
+ args + 1);
+ if (!sfn)
+ goto backtrack;
+ fn = sfn;
+ }
#endif
- if (!fn || fn->fn_flags & RTN_RTINFO)
+ if (fn->fn_flags & RTN_RTINFO)
return fn;
}
}
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
if (fn->fn_flags & RTN_ROOT)
break;
@@ -1625,27 +1640,28 @@ static int fib6_age(struct rt6_info *rt, void *arg)
static DEFINE_SPINLOCK(fib6_gc_lock);
-void fib6_run_gc(unsigned long expires, struct net *net)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
- if (expires != ~0UL) {
+ unsigned long now;
+
+ if (force) {
spin_lock_bh(&fib6_gc_lock);
- gc_args.timeout = expires ? (int)expires :
- net->ipv6.sysctl.ip6_rt_gc_interval;
- } else {
- if (!spin_trylock_bh(&fib6_gc_lock)) {
- mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
- return;
- }
- gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+ } else if (!spin_trylock_bh(&fib6_gc_lock)) {
+ mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+ return;
}
+ gc_args.timeout = expires ? (int)expires :
+ net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = icmp6_dst_gc();
fib6_clean_all(net, fib6_age, 0, NULL);
+ now = jiffies;
+ net->ipv6.ip6_rt_last_gc = now;
if (gc_args.more)
mod_timer(&net->ipv6.ip6_fib_timer,
- round_jiffies(jiffies
+ round_jiffies(now
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
del_timer(&net->ipv6.ip6_fib_timer);
@@ -1654,7 +1670,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
static void fib6_gc_timer_cb(unsigned long arg)
{
- fib6_run_gc(0, (struct net *)arg);
+ fib6_run_gc(0, (struct net *)arg, true);
}
static int __net_init fib6_net_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ecd60733e5e2..6b26e9feafb9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -335,6 +335,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
dev->rtnl_link_ops = &ip6gre_link_ops;
nt->dev = dev;
+ nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
@@ -508,8 +509,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
goto drop;
}
- secpath_reset(skb);
-
skb->protocol = gre_proto;
/* WCCP version 1 and 2 protocol decoding.
* - Change protocol to IP
@@ -524,7 +523,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
skb->mac_header = skb->network_header;
__pskb_pull(skb, offset);
skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
- skb->pkt_type = PACKET_HOST;
if (((flags&GRE_CSUM) && csum) ||
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
@@ -556,7 +554,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
- __skb_tunnel_rx(skb, tunnel->dev);
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
skb_reset_network_header(skb);
@@ -693,6 +691,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
tunnel->err_count = 0;
}
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+
max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
@@ -709,8 +709,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb = new_skb;
}
- skb_dst_drop(skb);
-
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
@@ -724,6 +722,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
skb_push(skb, gre_hlen);
skb_reset_network_header(skb);
skb_set_transport_header(skb, sizeof(*ipv6h));
@@ -1255,6 +1258,7 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
@@ -1275,6 +1279,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
struct ip6_tnl *tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4;
@@ -1450,6 +1455,7 @@ static int ip6gre_tap_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ip6gre_tnl_link_config(tunnel, 1);
@@ -1501,6 +1507,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
eth_hw_addr_random(dev);
nt->dev = dev;
+ nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
/* Can use a lockless transmit, unless we generate output sequences */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2bab2aa59745..302d6fb1ff2b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,7 +44,7 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
-
+#include <net/inet_ecn.h>
int ip6_rcv_finish(struct sk_buff *skb)
@@ -109,6 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
+ IP6_ADD_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_NOECTPKTS +
+ (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* A packet received on an interface with a destination address
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 71b766ee821d..d82de7228100 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
+ bool tunnel;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_UDP |
@@ -98,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
goto out;
@@ -105,6 +107,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
+ tunnel = skb->encapsulation;
ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -125,7 +128,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(skb->len - skb->mac_len -
sizeof(*ipv6h));
- if (proto == IPPROTO_UDP) {
+ if (!tunnel && proto == IPPROTO_UDP) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
fptr = (struct frag_hdr *)(skb_network_header(skb) +
unfrag_ip6hlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d5d20cde8d92..3a692d529163 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,31 +56,6 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-int __ip6_local_out(struct sk_buff *skb)
-{
- int len;
-
- len = skb->len - sizeof(struct ipv6hdr);
- if (len > IPV6_MAXPLEN)
- len = 0;
- ipv6_hdr(skb)->payload_len = htons(len);
-
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
-}
-
-int ip6_local_out(struct sk_buff *skb)
-{
- int err;
-
- err = __ip6_local_out(skb);
- if (likely(err == 1))
- err = dst_output(skb);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(ip6_local_out);
-
static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -238,6 +213,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hdr->saddr = fl6->saddr;
hdr->daddr = *first_hop;
+ skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -1057,6 +1033,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->transport_header = skb->network_header + fragheaderlen;
+ skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
}
@@ -1098,11 +1075,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}
-static void ip6_append_data_mtu(int *mtu,
+static void ip6_append_data_mtu(unsigned int *mtu,
int *maxfraglen,
unsigned int fragheaderlen,
struct sk_buff *skb,
- struct rt6_info *rt)
+ struct rt6_info *rt,
+ bool pmtuprobe)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (skb == NULL) {
@@ -1114,7 +1092,9 @@ static void ip6_append_data_mtu(int *mtu,
* this fragment is not first, the headers
* space is regarded as data space.
*/
- *mtu = dst_mtu(rt->dst.path);
+ *mtu = min(*mtu, pmtuprobe ?
+ rt->dst.dev->mtu :
+ dst_mtu(rt->dst.path));
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
@@ -1131,11 +1111,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, mtu;
int exthdrlen;
int dst_exthdrlen;
int hh_len;
- int mtu;
int copy;
int err;
int offset = 0;
@@ -1292,7 +1271,9 @@ alloc_new_skb:
/* update mtu and maxfraglen if necessary */
if (skb == NULL || skb_prev == NULL)
ip6_append_data_mtu(&mtu, &maxfraglen,
- fragheaderlen, skb, rt);
+ fragheaderlen, skb, rt,
+ np->pmtudisc ==
+ IPV6_PMTUDISC_PROBE);
skb_prev = skb;
@@ -1355,6 +1336,7 @@ alloc_new_skb:
/*
* Fill in the control structures
*/
+ skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_NONE;
skb->csum = 0;
/* reserve for fragmentation and ipsec header */
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1e55866cead7..2d8f4829575b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <linux/hash.h>
+#include <linux/etherdevice.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -315,6 +316,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
t = netdev_priv(dev);
t->parms = *p;
+ t->net = dev_net(dev);
err = ip6_tnl_create2(dev);
if (err < 0)
goto failed_free;
@@ -374,7 +376,7 @@ static void
ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
@@ -741,7 +743,7 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
{
struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
if ((p->flags & IP6_TNL_F_CAP_RCV) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
@@ -800,14 +802,12 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
rcu_read_unlock();
goto discard;
}
- secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
skb->protocol = htons(protocol);
- skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
- __skb_tunnel_rx(skb, t->dev);
+ __skb_tunnel_rx(skb, t->dev, t->net);
err = dscp_ecn_decapsulate(t, ipv6h, skb);
if (unlikely(err)) {
@@ -895,7 +895,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
@@ -945,8 +945,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
@@ -996,6 +996,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
@@ -1013,7 +1015,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
consume_skb(skb);
skb = new_skb;
}
- skb_dst_drop(skb);
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
@@ -1027,6 +1028,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1202,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
@@ -1251,7 +1258,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
@@ -1463,8 +1470,10 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->mtu-=8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ /* This perm addr will be used as interface identifier by IPv6 */
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ eth_random_addr(dev->perm_addr);
}
@@ -1479,6 +1488,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
+ t->net = dev_net(dev);
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
@@ -1596,9 +1606,9 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip6_tnl *t;
+ struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm p;
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
@@ -1646,9 +1656,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
- &parm->raddr) ||
- nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
&parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1699,14 +1709,24 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
{
+ struct net *net = dev_net(ip6n->fb_tnl_dev);
+ struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
LIST_HEAD(list);
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6_link_ops)
+ unregister_netdevice_queue(dev, &list);
+
for (h = 0; h < HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, &list);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, &list);
t = rtnl_dereference(t->next);
}
}
@@ -1732,6 +1752,10 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 241fb8ad9fcf..f365310bfcca 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -110,8 +110,8 @@ static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr6_table *mrt);
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache);
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache);
static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
@@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr6_table *mrt, *next;
+ rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
+ rtnl_unlock();
fib_rules_unregister(net->ipv6.mr6_rules_ops);
}
#else
@@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
+ rtnl_lock();
ip6mr_free_table(net->ipv6.mrt6);
+ net->ipv6.mrt6 = NULL;
+ rtnl_unlock();
}
#endif
@@ -667,9 +672,8 @@ static int pim6_rcv(struct sk_buff *skb)
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_NONE;
- skb->pkt_type = PACKET_HOST;
- skb_tunnel_rx(skb, reg_dev);
+ skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
netif_rx(skb);
@@ -1319,7 +1323,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
static int ip6mr_device_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct mr6_table *mrt;
struct mif_device *v;
@@ -2069,8 +2073,8 @@ static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
return ct;
}
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache)
{
int psend = -1;
int vif, ct;
@@ -2151,12 +2155,11 @@ forward:
last_forward:
if (psend != -1) {
ip6mr_forward2(net, mrt, skb, cache, psend);
- return 0;
+ return;
}
dont_forward:
kfree_skb(skb);
- return 0;
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..5636a912074a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -76,7 +76,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bfa6cc36ef2a..096cd67b737c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/pkt_sched.h>
#include <net/mld.h>
#include <linux/netfilter.h>
@@ -94,6 +95,7 @@ static void mld_ifc_event(struct inet6_dev *idev);
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
static void mld_clear_delrec(struct inet6_dev *idev);
+static bool mld_in_v1_mode(const struct inet6_dev *idev);
static int sf_setstate(struct ifmcaddr6 *pmc);
static void sf_markstate(struct ifmcaddr6 *pmc);
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
@@ -106,14 +108,15 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
-
-#define IGMP6_UNSOLICITED_IVAL (10*HZ)
#define MLD_QRV_DEFAULT 2
+/* RFC3810, 9.2. Query Interval */
+#define MLD_QI_DEFAULT (125 * HZ)
+/* RFC3810, 9.3. Query Response Interval */
+#define MLD_QRI_DEFAULT (10 * HZ)
-#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
- (idev)->cnf.force_mld_version == 1 || \
- ((idev)->mc_v1_seen && \
- time_before(jiffies, (idev)->mc_v1_seen)))
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN 24
+#define MLD_V2_QUERY_LEN_MIN 28
#define IPV6_MLD_MAX_MSF 64
@@ -128,6 +131,18 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
pmc != NULL; \
pmc = rcu_dereference(pmc->next))
+static int unsolicited_report_interval(struct inet6_dev *idev)
+{
+ int iv;
+
+ if (mld_in_v1_mode(idev))
+ iv = idev->cnf.mldv1_unsolicited_report_interval;
+ else
+ iv = idev->cnf.mldv2_unsolicited_report_interval;
+
+ return iv > 0 ? iv : 1;
+}
+
int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
@@ -676,7 +691,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
return;
- if (MLD_V1_SEEN(mc->idev)) {
+ if (mld_in_v1_mode(mc->idev)) {
igmp6_join_group(mc);
return;
}
@@ -984,21 +999,49 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- int tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = net_random() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_gq_stop_timer(struct inet6_dev *idev)
+{
+ idev->mc_gq_running = 0;
+ if (del_timer(&idev->mc_gq_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- int tv = net_random() % delay;
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
+static void mld_ifc_stop_timer(struct inet6_dev *idev)
+{
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+ unsigned long tv = net_random() % delay;
+
+ if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+static void mld_dad_stop_timer(struct inet6_dev *idev)
+{
+ if (del_timer(&idev->mc_dad_timer))
+ __in6_dev_put(idev);
+}
+
/*
* IGMP handling (alias multicast ICMPv6 messages)
*/
@@ -1017,12 +1060,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
delay = ma->mca_timer.expires - jiffies;
}
- if (delay >= resptime) {
- if (resptime)
- delay = net_random() % resptime;
- else
- delay = 1;
- }
+ if (delay >= resptime)
+ delay = net_random() % resptime;
+
ma->mca_timer.expires = jiffies + delay;
if (!mod_timer(&ma->mca_timer, jiffies + delay))
atomic_inc(&ma->mca_refcnt);
@@ -1089,6 +1129,158 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
return true;
}
+static int mld_force_mld_version(const struct inet6_dev *idev)
+{
+ /* Normally, both are 0 here. If enforcement to a particular is
+ * being used, individual device enforcement will have a lower
+ * precedence over 'all' device (.../conf/all/force_mld_version).
+ */
+
+ if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
+ return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
+ else
+ return idev->cnf.force_mld_version;
+}
+
+static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 2;
+}
+
+static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 1;
+}
+
+static bool mld_in_v1_mode(const struct inet6_dev *idev)
+{
+ if (mld_in_v2_mode_only(idev))
+ return false;
+ if (mld_in_v1_mode_only(idev))
+ return true;
+ if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
+ return true;
+
+ return false;
+}
+
+static void mld_set_v1_mode(struct inet6_dev *idev)
+{
+ /* RFC3810, relevant sections:
+ * - 9.1. Robustness Variable
+ * - 9.2. Query Interval
+ * - 9.3. Query Response Interval
+ * - 9.12. Older Version Querier Present Timeout
+ */
+ unsigned long switchback;
+
+ switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
+
+ idev->mc_v1_seen = jiffies + switchback;
+}
+
+static void mld_update_qrv(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.8. QRV (Querier's Robustness Variable)
+ * - 9.1. Robustness Variable
+ */
+
+ /* The value of the Robustness Variable MUST NOT be zero,
+ * and SHOULD NOT be one. Catch this here if we ever run
+ * into such a case in future.
+ */
+ WARN_ON(idev->mc_qrv == 0);
+
+ if (mlh2->mld2q_qrv > 0)
+ idev->mc_qrv = mlh2->mld2q_qrv;
+
+ if (unlikely(idev->mc_qrv < 2)) {
+ net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
+ idev->mc_qrv, MLD_QRV_DEFAULT);
+ idev->mc_qrv = MLD_QRV_DEFAULT;
+ }
+}
+
+static void mld_update_qi(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.9. QQIC (Querier's Query Interval Code)
+ * - 9.2. Query Interval
+ * - 9.12. Older Version Querier Present Timeout
+ * (the [Query Interval] in the last Query received)
+ */
+ unsigned long mc_qqi;
+
+ if (mlh2->mld2q_qqic < 128) {
+ mc_qqi = mlh2->mld2q_qqic;
+ } else {
+ unsigned long mc_man, mc_exp;
+
+ mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
+ mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
+
+ mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
+ }
+
+ idev->mc_qi = mc_qqi * HZ;
+}
+
+static void mld_update_qri(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.3. Maximum Response Code
+ * - 9.3. Query Response Interval
+ */
+ idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
+}
+
+static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
+ unsigned long *max_delay)
+{
+ unsigned long mldv1_md;
+
+ /* Ignore v1 queries */
+ if (mld_in_v2_mode_only(idev))
+ return -EINVAL;
+
+ /* MLDv1 router present */
+ mldv1_md = ntohs(mld->mld_maxdelay);
+ *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
+
+ mld_set_v1_mode(idev);
+
+ /* cancel MLDv2 report timer */
+ mld_gq_stop_timer(idev);
+ /* cancel the interface change timer */
+ mld_ifc_stop_timer(idev);
+ /* clear deleted report items */
+ mld_clear_delrec(idev);
+
+ return 0;
+}
+
+static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ unsigned long *max_delay)
+{
+ /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
+ if (mld_in_v1_mode(idev))
+ return -EINVAL;
+
+ *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
+
+ mld_update_qrv(idev, mld);
+ mld_update_qi(idev, mld);
+ mld_update_qri(idev, mld);
+
+ idev->mc_maxdelay = *max_delay;
+
+ return 0;
+}
+
/* called with rcu_read_lock() */
int igmp6_event_query(struct sk_buff *skb)
{
@@ -1100,7 +1292,7 @@ int igmp6_event_query(struct sk_buff *skb)
struct mld_msg *mld;
int group_type;
int mark = 0;
- int len;
+ int len, err;
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
return -EINVAL;
@@ -1114,7 +1306,6 @@ int igmp6_event_query(struct sk_buff *skb)
return -EINVAL;
idev = __in6_dev_get(skb->dev);
-
if (idev == NULL)
return 0;
@@ -1126,35 +1317,23 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == 24) {
- int switchback;
- /* MLDv1 router present */
-
- /* Translate milliseconds to jiffies */
- max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
-
- switchback = (idev->mc_qrv + 1) * max_delay;
- idev->mc_v1_seen = jiffies + switchback;
-
- /* cancel the interface change timer */
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- /* clear deleted report items */
- mld_clear_delrec(idev);
- } else if (len >= 28) {
+ if (len == MLD_V1_QUERY_LEN) {
+ err = mld_process_v1(idev, mld, &max_delay);
+ if (err < 0)
+ return err;
+ } else if (len >= MLD_V2_QUERY_LEN_MIN) {
int srcs_offset = sizeof(struct mld2_query) -
sizeof(struct icmp6hdr);
+
if (!pskb_may_pull(skb, srcs_offset))
return -EINVAL;
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
- if (!max_delay)
- max_delay = 1;
- idev->mc_maxdelay = max_delay;
- if (mlh2->mld2q_qrv)
- idev->mc_qrv = mlh2->mld2q_qrv;
+
+ err = mld_process_v2(idev, mlh2, &max_delay);
+ if (err < 0)
+ return err;
+
if (group_type == IPV6_ADDR_ANY) { /* general query */
if (mlh2->mld2q_nsrcs)
return -EINVAL; /* no sources allowed */
@@ -1343,8 +1522,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
hdr->daddr = *daddr;
}
-static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
{
+ struct net_device *dev = idev->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.igmp_sk;
struct sk_buff *skb;
@@ -1367,9 +1547,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
if (!skb)
return NULL;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
- if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -1409,8 +1590,9 @@ static void mld_sendpack(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
- mldlen = skb->tail - skb->transport_header;
+ payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
+ sizeof(*pip6);
+ mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
pip6->payload_len = htons(payload_len);
pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
@@ -1465,7 +1647,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr;
if (!skb)
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(pmc->idev, dev->mtu);
if (!skb)
return NULL;
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
@@ -1485,7 +1667,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
int type, int gdeleted, int sdeleted)
{
- struct net_device *dev = pmc->idev->dev;
+ struct inet6_dev *idev = pmc->idev;
+ struct net_device *dev = idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -1514,7 +1697,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
}
}
first = 1;
@@ -1541,7 +1724,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
first = 1;
scount = 0;
}
@@ -1596,8 +1779,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
struct sk_buff *skb = NULL;
int type;
+ read_lock_bh(&idev->lock);
if (!pmc) {
- read_lock_bh(&idev->lock);
for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
if (pmc->mca_flags & MAF_NOREPORT)
continue;
@@ -1609,7 +1792,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
- read_unlock_bh(&idev->lock);
} else {
spin_lock_bh(&pmc->mca_lock);
if (pmc->mca_sfcount[MCAST_EXCLUDE])
@@ -1619,6 +1801,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
+ read_unlock_bh(&idev->lock);
if (skb)
mld_sendpack(skb);
}
@@ -1758,7 +1941,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
rcu_read_unlock();
return;
}
-
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1814,6 +1997,46 @@ err_out:
goto out;
}
+static void mld_resend_report(struct inet6_dev *idev)
+{
+ if (mld_in_v1_mode(idev)) {
+ struct ifmcaddr6 *mcaddr;
+ read_lock_bh(&idev->lock);
+ for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) {
+ if (!(mcaddr->mca_flags & MAF_NOREPORT))
+ igmp6_send(&mcaddr->mca_addr, idev->dev,
+ ICMPV6_MGM_REPORT);
+ }
+ read_unlock_bh(&idev->lock);
+ } else {
+ mld_send_report(idev, NULL);
+ }
+}
+
+void ipv6_mc_dad_complete(struct inet6_dev *idev)
+{
+ idev->mc_dad_count = idev->mc_qrv;
+ if (idev->mc_dad_count) {
+ mld_resend_report(idev);
+ idev->mc_dad_count--;
+ if (idev->mc_dad_count)
+ mld_dad_start_timer(idev, idev->mc_maxdelay);
+ }
+}
+
+static void mld_dad_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ mld_resend_report(idev);
+ if (idev->mc_dad_count) {
+ idev->mc_dad_count--;
+ if (idev->mc_dad_count)
+ mld_dad_start_timer(idev, idev->mc_maxdelay);
+ }
+ __in6_dev_put(idev);
+}
+
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
const struct in6_addr *psfsrc)
{
@@ -1840,7 +2063,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
else
pmc->mca_sources = psf->sf_next;
if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
- !MLD_V1_SEEN(idev)) {
+ !mld_in_v1_mode(idev)) {
psf->sf_crcount = idev->mc_qrv;
psf->sf_next = pmc->mca_tomb;
pmc->mca_tomb = psf;
@@ -2105,7 +2328,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
- delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+ delay = net_random() % unsolicited_report_interval(ma->idev);
spin_lock_bh(&ma->mca_lock);
if (del_timer(&ma->mca_timer)) {
@@ -2140,7 +2363,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
static void igmp6_leave_group(struct ifmcaddr6 *ma)
{
- if (MLD_V1_SEEN(ma->idev)) {
+ if (mld_in_v1_mode(ma->idev)) {
if (ma->mca_flags & MAF_LAST_REPORTER)
igmp6_send(&ma->mca_addr, ma->idev->dev,
ICMPV6_MGM_REDUCTION);
@@ -2174,7 +2397,7 @@ static void mld_ifc_timer_expire(unsigned long data)
static void mld_ifc_event(struct inet6_dev *idev)
{
- if (MLD_V1_SEEN(idev))
+ if (mld_in_v1_mode(idev))
return;
idev->mc_ifc_count = idev->mc_qrv;
mld_ifc_start_timer(idev, 1);
@@ -2185,7 +2408,7 @@ static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
- if (MLD_V1_SEEN(ma->idev))
+ if (mld_in_v1_mode(ma->idev))
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
else
mld_send_report(ma->idev, ma);
@@ -2225,12 +2448,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Withdraw multicast list */
read_lock_bh(&idev->lock);
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- idev->mc_gq_running = 0;
- if (del_timer(&idev->mc_gq_timer))
- __in6_dev_put(idev);
+ mld_ifc_stop_timer(idev);
+ mld_gq_stop_timer(idev);
+ mld_dad_stop_timer(idev);
for (i = idev->mc_list; i; i=i->next)
igmp6_group_dropped(i);
@@ -2267,8 +2487,14 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
idev->mc_ifc_count = 0;
setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
(unsigned long)idev);
+ setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
+ (unsigned long)idev);
+
idev->mc_qrv = MLD_QRV_DEFAULT;
- idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+ idev->mc_qi = MLD_QI_DEFAULT;
+ idev->mc_qri = MLD_QRI_DEFAULT;
+
+ idev->mc_maxdelay = unsolicited_report_interval(idev);
idev->mc_v1_seen = 0;
write_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0f9bdc5ee9f3..9ac01dc9402e 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ca4ffcc287f1..f8a55ff1971b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int tlen = dev->needed_tailroom;
struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
- int err;
- skb = sock_alloc_send_skb(sk,
- hlen + sizeof(struct ipv6hdr) + len + tlen,
- 1, &err);
+ skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
- __func__, err);
+ ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+ __func__);
return NULL;
}
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
skb_reset_transport_header(skb);
+ /* Manually assign socket ownership as we avoid calling
+ * sock_alloc_send_pskb() to bypass wmem buffer limits
+ */
+ skb_set_owner_w(skb, sk);
+
return skb;
}
@@ -428,7 +430,6 @@ static void ndisc_send_skb(struct sk_buff *skb,
type = icmp6h->icmp6_type;
if (!dst) {
- struct sock *sk = net->ipv6.ndisc_sk;
struct flowi6 fl6;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
@@ -462,10 +463,10 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt)
+void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ bool router, bool solicited, bool override, bool inc_opt)
{
struct sk_buff *skb;
struct in6_addr tmpaddr;
@@ -479,7 +480,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
if (ifp) {
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
- override = 0;
+ override = false;
inc_opt |= ifp->idev->cnf.force_tllao;
in6_ifa_put(ifp);
} else {
@@ -557,7 +558,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
}
if (ipv6_addr_any(saddr))
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev);
@@ -663,9 +664,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
}
ndisc_send_ns(dev, neigh, target, target, saddr);
} else if ((probes -= neigh->parms->app_probes) < 0) {
-#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
-#endif
} else {
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
@@ -693,7 +692,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -790,7 +789,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
- inc != 0 &&
+ inc &&
idev->nd_parms->proxy_delay != 0) {
/*
* for anycast or proxy,
@@ -853,7 +852,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -1069,7 +1068,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
+ optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
+ sizeof(struct ra_msg);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
@@ -1346,7 +1346,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
u8 *hdr;
struct ndisc_options ndopts;
struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct rd_msg, opt));
#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1368,8 +1368,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
return;
- if (!ndopts.nd_opts_rh)
+ if (!ndopts.nd_opts_rh) {
+ ip6_redirect_no_header(skb, dev_net(skb->dev),
+ skb->dev->ifindex, 0);
return;
+ }
hdr = (u8 *)ndopts.nd_opts_rh;
hdr += 8;
@@ -1516,10 +1519,27 @@ static void pndisc_redo(struct sk_buff *skb)
kfree_skb(skb);
}
+static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
+{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+ if (!idev)
+ return true;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
+ idev->cnf.suppress_frag_ndisc) {
+ net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
+ return true;
+ }
+ return false;
+}
+
int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
+ if (ndisc_suppress_frag_ndisc(skb))
+ return 0;
+
if (skb_linearize(skb))
return 0;
@@ -1568,14 +1588,14 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct inet6_dev *idev;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
- fib6_run_gc(~0UL, net);
+ fib6_run_gc(0, net, false);
idev = in6_dev_get(dev);
if (!idev)
break;
@@ -1585,7 +1605,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
- fib6_run_gc(~0UL, net);
+ fib6_run_gc(0, net, false);
break;
case NETDEV_NOTIFY_PEERS:
ndisc_send_unsol_na(dev);
@@ -1707,24 +1727,28 @@ int __init ndisc_init(void)
if (err)
goto out_unregister_pernet;
#endif
- err = register_netdevice_notifier(&ndisc_netdev_notifier);
- if (err)
- goto out_unregister_sysctl;
out:
return err;
-out_unregister_sysctl:
#ifdef CONFIG_SYSCTL
- neigh_sysctl_unregister(&nd_tbl.parms);
out_unregister_pernet:
-#endif
unregister_pernet_subsys(&ndisc_net_ops);
goto out;
+#endif
}
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+ return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
{
unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 7f45af3e8128..3edca795aebb 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -165,6 +165,19 @@ config IP6_NF_TARGET_REJECT_SKERR
If unsure, say N.
+config IP6_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc2cf3c..2b53738f798c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
@@ -37,3 +37,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 60e9053bab05..3e4e92d5e157 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -71,12 +71,12 @@ static int device_cmp(struct nf_conn *ct, void *ifindex)
static int masq_device_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- const struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
@@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
+ struct netdev_notifier_info info;
- return masq_device_event(this, event, ifa->idev->dev);
+ netdev_notifier_info_init(&info, ifa->idev->dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 573c232239b0..4eaa3b98e81f 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,25 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
nf_ct_attach(nskb, oldskb);
- ip6_local_out(nskb);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ /* If we use ip6_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ nskb->protocol = htons(ETH_P_IPV6);
+ ip6h->payload_len = htons(sizeof(struct tcphdr));
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ return;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip6_local_out(nskb);
}
static inline void
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 000000000000..19cfea8dbcaa
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct ipv6hdr *
+synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct ipv6hdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+ ip6_flow_hdr(iph, 0, 0);
+ iph->hop_limit = 64; //XXX
+ iph->nexthdr = IPPROTO_TCP;
+ iph->saddr = *saddr;
+ iph->daddr = *daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct ipv6hdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = niph->saddr;
+ fl6.daddr = niph->daddr;
+ fl6.fl6_sport = nth->source;
+ fl6.fl6_dport = nth->dest;
+ security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ goto free_nskb;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ goto free_nskb;
+
+ skb_dst_set(nskb, dst);
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip6_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ synproxy_parse_options(skb, par->thoff, th, &opts);
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ __be16 frag_off;
+ u8 nexthdr;
+ int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (thoff < 0)
+ return NF_ACCEPT;
+
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ synproxy_parse_options(skb, thoff, th, &opts);
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ synproxy_parse_options(skb, thoff, th, &opts);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (!(e->ipv6.flags & IP6T_F_PROTO) ||
+ e->ipv6.proto != IPPROTO_TCP ||
+ e->ipv6.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg6_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV6,
+ .target = synproxy_tg6,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg6_check,
+ .destroy = synproxy_tg6_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg6_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg6_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg6_exit(void)
+{
+ xt_unregister_target(&synproxy_tg6_reg);
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+}
+
+module_init(synproxy_tg6_init);
+module_exit(synproxy_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index beb5777d2043..29b44b14c5ea 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,7 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_filter);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index e075399d8b72..c705907ae6ab 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -101,7 +101,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_mangle);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 6383f90efda8..9b076d2d3a7b 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -293,7 +293,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_nat);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 60d1bddff7a0..9a626d86720f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_raw);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index db155351339c..ce88d1d7e525 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,7 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_security);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
}
static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c9b6a6e6a1e8..d6e4dd8b58df 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -158,11 +159,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, protoff)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 61aaf70f376e..2205e8eeeacf 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -69,8 +69,8 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
- if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, 0);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index c2e73e647e44..827f795209cf 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
+#include <net/addrconf.h>
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
{
@@ -40,7 +41,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -74,3 +76,50 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
return offset;
}
EXPORT_SYMBOL(ip6_find_1stfragopt);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int ip6_dst_hoplimit(struct dst_entry *dst)
+{
+ int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+ if (hoplimit == 0) {
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev)
+ hoplimit = idev->cnf.hop_limit;
+ else
+ hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ rcu_read_unlock();
+ }
+ return hoplimit;
+}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
+#endif
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+ ipv6_hdr(skb)->payload_len = htons(len);
+
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ skb_dst(skb)->dev, dst_output);
+}
+EXPORT_SYMBOL_GPL(__ip6_local_out);
+
+int ip6_local_out(struct sk_buff *skb)
+{
+ int err;
+
+ err = __ip6_local_out(skb);
+ if (likely(err == 1))
+ err = dst_output(skb);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 000000000000..18f19df4189f
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,277 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * "Ping" sockets
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors: Lorenzo Colitti (IPv6 support)
+ * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+ .name = "PINGv6",
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .sendmsg = ping_v6_sendmsg,
+ .recvmsg = ping_recvmsg,
+ .bind = ping_bind,
+ .backlog_rcv = ping_queue_rcv_skb,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
+ .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+ .prot = &pingv6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ return -EAFNOSUPPORT;
+}
+static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ return -EAFNOSUPPORT;
+}
+static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload) {}
+static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict)
+{
+ return 0;
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr user_icmph;
+ int addr_type;
+ struct in6_addr *daddr;
+ int iif = 0;
+ struct flowi6 fl6;
+ int err;
+ int hlimit;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct pingfakehdr pfh;
+
+ pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
+ if (msg->msg_name) {
+ struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+ if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+ u->sin6_family != AF_INET6) {
+ return -EINVAL;
+ }
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != u->sin6_scope_id) {
+ return -EINVAL;
+ }
+ daddr = &(u->sin6_addr);
+ iif = u->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &np->daddr;
+ }
+
+ if (!iif)
+ iif = sk->sk_bound_dev_if;
+
+ addr_type = ipv6_addr_type(daddr);
+ if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+ return -EINVAL;
+ if (addr_type & IPV6_ADDR_MAPPED)
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.saddr = np->saddr;
+ fl6.daddr = *daddr;
+ fl6.fl6_icmp_type = user_icmph.icmp6_type;
+ fl6.fl6_icmp_code = user_icmph.icmp6_code;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+ if (!np)
+ return -EBADF;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ pfh.icmph.type = user_icmph.icmp6_type;
+ pfh.icmph.code = user_icmph.icmp6_code;
+ pfh.icmph.checksum = 0;
+ pfh.icmph.un.echo.id = inet->inet_sport;
+ pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+ pfh.iov = msg->msg_iov;
+ pfh.wcheck = 0;
+ pfh.family = AF_INET6;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ lock_sock(sk);
+ err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ 0, hlimit,
+ np->tclass, NULL, &fl6, rt,
+ MSG_DONTWAIT, np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *) &pfh.icmph,
+ len);
+ }
+ release_sock(sk);
+
+ if (err)
+ return err;
+
+ return len;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return ping_seq_start(seq, pos, AF_INET6);
+}
+
+static int ping_v6_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct ping_iter_state *) seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
+ return 0;
+}
+
+static struct ping_seq_afinfo ping_v6_seq_afinfo = {
+ .name = "icmp6",
+ .family = AF_INET6,
+ .seq_fops = &ping_seq_fops,
+ .seq_ops = {
+ .start = ping_v6_seq_start,
+ .show = ping_v6_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
+ },
+};
+
+static int __net_init ping_v6_proc_init_net(struct net *net)
+{
+ return ping_proc_register(net, &ping_v6_seq_afinfo);
+}
+
+static void __net_init ping_v6_proc_exit_net(struct net *net)
+{
+ return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+}
+
+static struct pernet_operations ping_v6_net_ops = {
+ .init = ping_v6_proc_init_net,
+ .exit = ping_v6_proc_exit_net,
+};
+#endif
+
+int __init pingv6_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ int ret = register_pernet_subsys(&ping_v6_net_ops);
+ if (ret)
+ return ret;
+#endif
+ pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+ return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+ pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+#ifdef CONFIG_PROC_FS
+ unregister_pernet_subsys(&ping_v6_net_ops);
+#endif
+ inet6_unregister_protosw(&pingv6_protosw);
+}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 51c3285b5d9b..091d066a57b3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -91,6 +91,10 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
/* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
+ SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
+ SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index eedff8ccded5..58916bbb1728 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -63,6 +63,8 @@
#include <linux/seq_file.h>
#include <linux/export.h>
+#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
+
static struct raw_hashinfo raw_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
@@ -108,11 +110,14 @@ found:
*/
static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
{
- struct icmp6hdr *_hdr;
+ struct icmp6hdr _hdr;
const struct icmp6hdr *hdr;
+ /* We require only the four bytes of the ICMPv6 header, not any
+ * additional bytes of message body in "struct icmp6hdr".
+ */
hdr = skb_header_pointer(skb, skb_transport_offset(skb),
- sizeof(_hdr), &_hdr);
+ ICMPV6_HDRLEN, &_hdr);
if (hdr) {
const __u32 *data = &raw6_sk(sk)->filter.data[0];
unsigned int type = hdr->icmp6_type;
@@ -628,6 +633,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
goto error;
skb_reserve(skb, hlen);
+ skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
skb_dst_set(skb, &rt->dst);
@@ -1132,7 +1138,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL)
- amount = skb->tail - skb->transport_header;
+ amount = skb_tail_pointer(skb) -
+ skb_transport_header(skb);
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
@@ -1226,45 +1233,16 @@ struct proto rawv6_prot = {
};
#ifdef CONFIG_PROC_FS
-static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
-{
- struct ipv6_pinfo *np = inet6_sk(sp);
- const struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = 0;
- srcp = inet_sk(sp)->inet_num;
- seq_printf(seq,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
- i,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
- 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
-}
-
static int raw6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ struct sock *sp = v;
+ __u16 srcp = inet_sk(sp)->inet_num;
+ ip6_dgram_sock_seq_show(seq, v, srcp, 0,
+ raw_seq_private(seq)->bucket);
+ }
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4b8b0b..1aeb473b2cc6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
+ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb_dst(skb)->dev);
int evicted;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad0aa6b0b86a..c979dd96d82a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -65,6 +65,12 @@
#include <linux/sysctl.h>
#endif
+enum rt6_nud_state {
+ RT6_NUD_FAIL_HARD = -2,
+ RT6_NUD_FAIL_SOFT = -1,
+ RT6_NUD_SUCCEED = 1
+};
+
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
@@ -83,6 +89,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -276,9 +283,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- rt->rt6i_genid = rt_genid(net);
+ rt->rt6i_genid = rt_genid_ipv6(net);
INIT_LIST_HEAD(&rt->rt6i_siblings);
- rt->rt6i_nsiblings = 0;
}
return rt;
}
@@ -394,7 +400,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count,
}
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
- struct flowi6 *fl6)
+ struct flowi6 *fl6, int oif,
+ int strict)
{
struct rt6_info *sibling, *next_sibling;
int route_choosen;
@@ -408,6 +415,8 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
&match->rt6i_siblings, rt6i_siblings) {
route_choosen--;
if (route_choosen == 0) {
+ if (rt6_score_route(sibling, oif, strict) < 0)
+ break;
match = sibling;
break;
}
@@ -527,26 +536,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
return 0;
}
-static inline bool rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
{
struct neighbour *neigh;
- bool ret = false;
+ enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
- return true;
+ return RT6_NUD_SUCCEED;
rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
- ret = true;
+ ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
else if (!(neigh->nud_state & NUD_FAILED))
- ret = true;
+ ret = RT6_NUD_SUCCEED;
#endif
read_unlock(&neigh->lock);
+ } else {
+ ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
+ RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
}
rcu_read_unlock_bh();
@@ -560,43 +572,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
- return -1;
+ return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
- if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
- return -1;
+ if (strict & RT6_LOOKUP_F_REACHABLE) {
+ int n = rt6_check_neigh(rt);
+ if (n < 0)
+ return n;
+ }
return m;
}
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
- int *mpri, struct rt6_info *match)
+ int *mpri, struct rt6_info *match,
+ bool *do_rr)
{
int m;
+ bool match_do_rr = false;
if (rt6_check_expired(rt))
goto out;
m = rt6_score_route(rt, oif, strict);
- if (m < 0)
+ if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
+ match_do_rr = true;
+ m = 0; /* lowest valid score */
+ } else if (m < 0) {
goto out;
+ }
+
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(rt);
if (m > *mpri) {
- if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(match);
+ *do_rr = match_do_rr;
*mpri = m;
match = rt;
- } else if (strict & RT6_LOOKUP_F_REACHABLE) {
- rt6_probe(rt);
}
-
out:
return match;
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
struct rt6_info *rr_head,
- u32 metric, int oif, int strict)
+ u32 metric, int oif, int strict,
+ bool *do_rr)
{
struct rt6_info *rt, *match;
int mpri = -1;
@@ -604,10 +625,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
for (rt = rr_head; rt && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
@@ -616,15 +637,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
struct rt6_info *match, *rt0;
struct net *net;
+ bool do_rr = false;
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ &do_rr);
- if (!match &&
- (strict & RT6_LOOKUP_F_REACHABLE)) {
+ if (do_rr) {
struct rt6_info *next = rt0->dst.rt6_next;
/* no entries matched; do round-robin */
@@ -743,7 +765,7 @@ restart:
rt = fn->leaf;
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6);
+ rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
BACKTRACK(net, &fl6->saddr);
out:
dst_use(&rt->dst, jiffies);
@@ -875,8 +897,8 @@ restart_2:
restart:
rt = rt6_select(fn, oif, strict | reachable);
- if (rt->rt6i_nsiblings && oif == 0)
- rt = rt6_multipath_select(rt, fl6);
+ if (rt->rt6i_nsiblings)
+ rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
BACKTRACK(net, &fl6->saddr);
if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -1039,7 +1061,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+ if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
return NULL;
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
@@ -1074,10 +1096,13 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
- if (rt->rt6i_flags & RTF_CACHE)
- rt6_update_expires(rt, 0);
- else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+ if (rt->rt6i_flags & RTF_CACHE) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
+ }
}
}
@@ -1131,6 +1156,77 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+/* Handle redirects */
+struct ip6rd_flowi {
+ struct flowi6 fl6;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ int flags)
+{
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
+ struct rt6_info *rt;
+ struct fib6_node *fn;
+
+ /* Get the "current" route for this destination and
+ * check if the redirect has come from approriate router.
+ *
+ * RFC 4861 specifies that redirects should only be
+ * accepted if they come from the nexthop to the target.
+ * Due to the way the routes are chosen, this notion
+ * is a bit fuzzy and one might need to check all possible
+ * routes.
+ */
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt6_check_expired(rt))
+ continue;
+ if (rt->dst.error)
+ break;
+ if (!(rt->rt6i_flags & RTF_GATEWAY))
+ continue;
+ if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+ continue;
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ continue;
+ break;
+ }
+
+ if (!rt)
+ rt = net->ipv6.ip6_null_entry;
+ else if (rt->dst.error) {
+ rt = net->ipv6.ip6_null_entry;
+ goto out;
+ }
+ BACKTRACK(net, &fl6->saddr);
+out:
+ dst_hold(&rt->dst);
+
+ read_unlock_bh(&table->tb6_lock);
+
+ return rt;
+};
+
+static struct dst_entry *ip6_route_redirect(struct net *net,
+ const struct flowi6 *fl6,
+ const struct in6_addr *gateway)
+{
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip6rd_flowi rdfl;
+
+ rdfl.fl6 = *fl6;
+ rdfl.gateway = *gateway;
+
+ return fib6_rule_lookup(net, &rdfl.fl6,
+ flags, __ip6_route_redirect);
+}
+
void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
@@ -1145,13 +1241,32 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
- dst = ip6_route_output(net, NULL, &fl6);
- if (!dst->error)
- rt6_do_redirect(dst, NULL, skb);
+ dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+ u32 mark)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = msg->dest;
+ fl6.saddr = iph->daddr;
+
+ dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
@@ -1285,7 +1400,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
static int ip6_dst_gc(struct dst_ops *ops)
{
- unsigned long now = jiffies;
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
@@ -1295,13 +1409,12 @@ static int ip6_dst_gc(struct dst_ops *ops)
int entries;
entries = dst_entries_get_fast(ops);
- if (time_after(rt_last_gc + rt_min_interval, now) &&
+ if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
entries <= rt_max_size)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
- net->ipv6.ip6_rt_last_gc = now;
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1310,25 +1423,6 @@ out:
return entries > rt_max_size;
}
-int ip6_dst_hoplimit(struct dst_entry *dst)
-{
- int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
- if (hoplimit == 0) {
- struct net_device *dev = dst->dev;
- struct inet6_dev *idev;
-
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev)
- hoplimit = idev->cnf.hop_limit;
- else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
- rcu_read_unlock();
- }
- return hoplimit;
-}
-EXPORT_SYMBOL(ip6_dst_hoplimit);
-
/*
*
*/
@@ -1649,7 +1743,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
int optlen, on_link;
u8 *lladdr;
- optlen = skb->tail - skb->transport_header;
+ optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
optlen -= sizeof(*msg);
if (optlen < 0) {
@@ -2681,9 +2775,9 @@ errout:
}
static int ip6_route_dev_notify(struct notifier_block *this,
- unsigned long event, void *data)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
@@ -2790,7 +2884,7 @@ static const struct file_operations rt6_stats_seq_fops = {
#ifdef CONFIG_SYSCTL
static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
+int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -2801,11 +2895,11 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
net = (struct net *)ctl->extra1;
delay = net->ipv6.sysctl.flush_delay;
proc_dointvec(ctl, write, buffer, lenp, ppos);
- fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+ fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
-ctl_table ipv6_route_table_template[] = {
+struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
.data = &init_net.ipv6.sysctl.flush_delay,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 335363478bbf..7ee5cb96db34 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
static void ipip6_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
- struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
if (dev == sitn->fb_tunnel_dev) {
RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
} else {
- ipip6_tunnel_unlink(sitn, netdev_priv(dev));
- ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
+ ipip6_tunnel_unlink(sitn, tunnel);
+ ipip6_tunnel_del_prl(tunnel, NULL);
}
dev_put(dev);
}
@@ -577,19 +577,21 @@ static int ipip6_rcv(struct sk_buff *skb)
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
- secpath_reset(skb);
+ if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
+ tunnel->parms.iph.protocol != 0)
+ goto out;
+
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- skb->pkt_type = PACKET_HOST;
if (tunnel->dev->priv_flags & IFF_ISATAP) {
if (!isatap_chksrc(skb, iph, tunnel)) {
tunnel->dev->stats.rx_errors++;
goto out;
}
- } else {
+ } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
if (is_spoofed_6rd(tunnel, iph->saddr,
&ipv6_hdr(skb)->saddr) ||
is_spoofed_6rd(tunnel, iph->daddr,
@@ -599,7 +601,7 @@ static int ipip6_rcv(struct sk_buff *skb)
}
}
- __skb_tunnel_rx(skb, tunnel->dev);
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
@@ -629,6 +631,38 @@ out:
return 0;
}
+static const struct tnl_ptk_info tpi = {
+ /* no tunnel info required for ipip. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ iph = ip_hdr(skb);
+ tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->saddr, iph->daddr);
+ if (tunnel != NULL) {
+ if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi.proto))
+ goto drop;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ }
+
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -690,13 +724,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
struct flowi4 fl4;
int mtu;
const struct in6_addr *addr6;
int addr_type;
+ u8 ttl;
+ int err;
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -764,7 +799,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+ rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
dst, tiph->saddr,
0, 0,
IPPROTO_IPV6, RT_TOS(tos),
@@ -839,34 +874,19 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb = new_skb;
iph6 = ipv6_hdr(skb);
}
+ ttl = tiph->ttl;
+ if (ttl == 0)
+ ttl = iph6->hop_limit;
+ tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
- skb->transport_header = skb->network_header;
- skb_push(skb, sizeof(struct iphdr));
- skb_reset_network_header(skb);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags = 0;
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
- /*
- * Push down and install the IPIP header.
- */
-
- iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr)>>2;
- iph->frag_off = df;
- iph->protocol = IPPROTO_IPV6;
- iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- iph->daddr = fl4.daddr;
- iph->saddr = fl4.saddr;
-
- if ((iph->ttl = tiph->ttl) == 0)
- iph->ttl = iph6->hop_limit;
-
- skb->ip_summed = CHECKSUM_NONE;
- ip_select_ident(iph, skb_dst(skb), NULL);
- iptunnel_xmit(skb, dev);
+ err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
+ ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error_icmp:
@@ -877,6 +897,43 @@ tx_error:
return NETDEV_TX_OK;
}
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tiph = &tunnel->parms.iph;
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipip_tunnel_xmit(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ipip6_tunnel_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+
+}
+
static void ipip6_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -888,7 +945,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
+ struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
+ NULL,
iph->daddr, iph->saddr,
0, 0,
IPPROTO_IPV6,
@@ -903,7 +961,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+ tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -916,7 +974,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
{
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
ipip6_tunnel_unlink(sitn, t);
@@ -1027,7 +1085,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+ if (p.iph.protocol != IPPROTO_IPV6 &&
+ p.iph.protocol != IPPROTO_IPIP &&
+ p.iph.protocol != 0)
+ goto done;
+ if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
goto done;
if (p.iph.ttl)
@@ -1164,7 +1226,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
- .ndo_start_xmit = ipip6_tunnel_xmit,
+ .ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_change_mtu = ipip6_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
@@ -1188,7 +1250,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
}
@@ -1197,6 +1258,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -1217,6 +1279,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
iph->version = 4;
@@ -1232,6 +1295,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
return 0;
}
+static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data || !data[IFLA_IPTUN_PROTO])
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip6_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -1268,6 +1347,10 @@ static void ipip6_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_FLAGS])
parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
}
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1339,9 +1422,9 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip_tunnel *t;
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1391,6 +1474,8 @@ static size_t ipip6_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_FLAGS */
nla_total_size(2) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
#ifdef CONFIG_IPV6_SIT_6RD
/* IFLA_IPTUN_6RD_PREFIX */
nla_total_size(sizeof(struct in6_addr)) +
@@ -1416,6 +1501,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
goto nla_put_failure;
@@ -1445,6 +1531,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
#ifdef CONFIG_IPV6_SIT_6RD
[IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
[IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
@@ -1459,6 +1546,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
.policy = ipip6_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip6_tunnel_setup,
+ .validate = ipip6_validate,
.newlink = ipip6_newlink,
.changelink = ipip6_changelink,
.get_size = ipip6_get_size,
@@ -1471,10 +1559,22 @@ static struct xfrm_tunnel sit_handler __read_mostly = {
.priority = 1,
};
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+ .handler = ipip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+
static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
{
+ struct net *net = dev_net(sitn->fb_tunnel_dev);
+ struct net_device *dev, *aux;
int prio;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &sit_link_ops)
+ unregister_netdevice_queue(dev, head);
+
for (prio = 1; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
@@ -1482,7 +1582,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
t = rtnl_dereference(sitn->tunnels[prio][h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
t = rtnl_dereference(t->next);
}
}
@@ -1507,6 +1612,10 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
if (err)
@@ -1553,6 +1662,7 @@ static void __exit sit_cleanup(void)
{
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1569,9 +1679,14 @@ static int __init sit_init(void)
return err;
err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
if (err < 0) {
- pr_info("%s: can't add protocol\n", __func__);
+ pr_info("%s: can't register ip6ip4\n", __func__);
goto xfrm_tunnel_failed;
}
+ err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ if (err < 0) {
+ pr_info("%s: can't register ip4ip4\n", __func__);
+ goto xfrm_tunnel4_failed;
+ }
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1580,6 +1695,8 @@ out:
return err;
rtnl_link_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm_tunnel_failed:
unregister_pernet_device(&sit_net_ops);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20bd717..bf63ac8a49b9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -112,32 +112,38 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
& COOKIEMASK;
}
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+ const struct tcphdr *th, __u16 *mssp)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v6_init_sequence(iph, th, mssp);
+}
+
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+ __u32 cookie)
+{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
th->source, th->dest, seq,
@@ -145,6 +151,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v6_check);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
@@ -167,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e85c48bd404f..107b2f1d90ae 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,7 +16,7 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
-static ctl_table ipv6_table_template[] = {
+static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
.data = &init_net.ipv6.sysctl.bindv6only,
@@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = {
{ }
};
-static ctl_table ipv6_rotable[] = {
+static struct ctl_table ipv6_rotable[] = {
{
.procname = "mld_max_msf",
.data = &sysctl_mld_max_msf,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0a17ed9eaf39..5c71501fc917 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -63,6 +63,7 @@
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
+#include <net/busy_poll.h>
#include <asm/uaccess.h>
@@ -962,7 +963,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
if (!want_cookie)
goto drop;
@@ -1236,8 +1238,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->num_retrans;
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1360,8 +1360,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
}
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
- goto reset;
+ tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
@@ -1426,7 +1425,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxtclass)
- np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1498,6 +1497,7 @@ process:
if (sk_filter(sk, skb))
goto discard_and_relse;
+ sk_mark_napi_id(sk, skb);
skb->dev = NULL;
bh_lock_sock_nested(sk);
@@ -1730,7 +1730,7 @@ static void get_openreq6(struct seq_file *seq,
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
@@ -1781,7 +1781,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1924,6 +1924,7 @@ struct proto tcpv6_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 42923b14dfa6..f4058150262b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/inet6_hashtables.h>
+#include <net/busy_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
*/
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
- int ret = udpv6_queue_rcv_skb(sk, skb);
+ int ret;
+
+ sk_mark_napi_id(sk, skb);
+ ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
@@ -955,11 +959,16 @@ static int udp_v6_push_pending_frames(struct sock *sk)
struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+ struct flowi6 *fl6;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
+ if (up->pending == AF_INET)
+ return udp_push_pending_frames(sk);
+
+ fl6 = &inet->cork.fl.u.ip6;
+
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
@@ -1359,48 +1368,17 @@ static const struct inet6_protocol udpv6_protocol = {
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
-
-static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
-{
- struct inet_sock *inet = inet_sk(sp);
- struct ipv6_pinfo *np = inet6_sk(sp);
- const struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = ntohs(inet->inet_dport);
- srcp = ntohs(inet->inet_sport);
- seq_printf(seq,
- "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
- bucket,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
- 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
-}
-
int udp6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct udp_iter_state *)seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
return 0;
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index d3cfaf9c7a08..60559511bd9c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,26 +21,25 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
- /* UDP Tunnel offload on ipv6 is not yet supported. */
- if (skb->encapsulation)
- return -EINVAL;
-
if (!pskb_may_pull(skb, sizeof(*uh)))
return -EINVAL;
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
+ if (likely(!skb->encapsulation)) {
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
@@ -64,7 +63,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
- SKB_GSO_GRE) ||
+ SKB_GSO_GRE |
+ SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -74,47 +74,51 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
- /* Check if there is enough headroom to insert fragment header. */
- tnl_hlen = skb_tnl_header_len(skb);
- if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
- if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
- goto out;
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ segs = skb_udp_tunnel_segment(skb, features);
+ else {
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
}
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
- unfrag_ip6hlen + tnl_hlen;
- packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
- memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
- SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
-
out:
return segs;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8755a3079d0f..6cd625e37706 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -34,8 +34,10 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb)
struct sock *sk = skb->sk;
if (sk) {
- proto = sk->sk_protocol;
+ if (sk->sk_family != AF_INET6)
+ return 0;
+ proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
return inet6_sk(sk)->dontfrag;
}
@@ -54,13 +56,15 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
ipv6_local_rxpmtu(sk, &fl6, mtu);
}
-static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
+void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
{
struct flowi6 fl6;
+ const struct ipv6hdr *hdr;
struct sock *sk = skb->sk;
+ hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
fl6.fl6_dport = inet_sk(sk)->inet_dport;
- fl6.daddr = ipv6_hdr(skb)->daddr;
+ fl6.daddr = hdr->daddr;
ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
}
@@ -80,7 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (xfrm6_local_dontfrag(skb))
xfrm6_local_rxpmtu(skb, mtu);
else if (skb->sk)
- xfrm6_local_error(skb, mtu);
+ xfrm_local_error(skb, mtu);
else
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ret = -EMSGSIZE;
@@ -136,13 +140,18 @@ static int __xfrm6_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
- int mtu = ip6_skb_dst_mtu(skb);
+ int mtu;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ mtu = ip6_skb_dst_mtu(skb);
+ else
+ mtu = dst_mtu(skb_dst(skb));
if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
} else if (!skb->local_df && skb->len > mtu && skb->sk) {
- xfrm6_local_error(skb, mtu);
+ xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index d8c70b8efc24..3fc970135fc6 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -183,6 +183,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
.extract_input = xfrm6_extract_input,
.extract_output = xfrm6_extract_output,
.transport_finish = xfrm6_transport_finish,
+ .local_error = xfrm6_local_error,
};
int __init xfrm6_state_init(void)