From f440ea85d429e59f63d626e017403cb09d9adbdb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 May 2018 08:23:01 -0400 Subject: do d_instantiate/unlock_new_inode combinations safely commit 1e2e547a93a00ebc21582c06ca3c6cfea2a309ee upstream. For anything NFS-exported we do _not_ want to unlock new inode before it has grown an alias; original set of fixes got the ordering right, but missed the nasty complication in case of lockdep being enabled - unlock_new_inode() does lockdep_annotate_inode_mutex_key(inode) which can only be done before anyone gets a chance to touch ->i_mutex. Unfortunately, flipping the order and doing unlock_new_inode() before d_instantiate() opens a window when mkdir can race with open-by-fhandle on a guessed fhandle, leading to multiple aliases for a directory inode and all the breakage that follows from that. Correct solution: a new primitive (d_instantiate_new()) combining these two in the right order - lockdep annotate, then d_instantiate(), then the rest of unlock_new_inode(). All combinations of d_instantiate() with unlock_new_inode() should be converted to that. Cc: stable@kernel.org # 2.6.29 and later Tested-by: Mike Marshall Reviewed-by: Andreas Dilger Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f05a659cdf34..006f4ccda5f5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -226,6 +226,7 @@ extern seqlock_t rename_lock; * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); +extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); -- cgit v1.2.3 From a59bd819576d9dc0ca279f2c1a4b3903ca786d12 Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Tue, 8 May 2018 16:50:16 +0800 Subject: IB/umem: Use the correct mm during ib_umem_release commit 8e907ed4882714fd13cfe670681fc6cb5284c780 upstream. User-space may invoke ibv_reg_mr and ibv_dereg_mr in different threads. If ibv_dereg_mr is called after the thread which invoked ibv_reg_mr has exited, get_pid_task will return NULL and ib_umem_release will not decrease mm->pinned_vm. Instead of using threads to locate the mm, use the overall tgid from the ib_ucontext struct instead. This matches the behavior of ODP and disassociate in handling the mm of the process that called ibv_reg_mr. Cc: Fixes: 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Signed-off-by: Lidong Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_umem.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 23159dd5be18..a1fd63871d17 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { int writable; int hugetlb; struct work_struct work; - struct pid *pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; -- cgit v1.2.3 From 6fc72fd1565bcd7f872ad89cd0c7a60a7cf68c96 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sun, 11 Feb 2018 11:28:12 +0800 Subject: ptr_ring: prevent integer overflow when calculating size [ Upstream commit 54e02162d4454a99227f520948bf4494c3d972d0 ] Switch to use dividing to prevent integer overflow when size is too big to calculate allocation size properly. Reported-by: Eric Biggers Fixes: 6e6e41c31122 ("ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 35d125569e68..e8b12b79a0de 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -450,7 +450,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, */ static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - if (size * sizeof(void *) > KMALLOC_MAX_SIZE) + if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); } -- cgit v1.2.3 From f49e3a9acc524b8641bcacf8e33a1cac27371213 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 10 Feb 2018 13:20:34 +0100 Subject: mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4 [ Upstream commit 651b9920d7a694ffb1f885aef2bbb068a25d9d66 ] This ensures that mac80211 allocated management frames are properly aligned, which makes copying them more efficient. For instance, mt76 uses iowrite32_copy to copy beacon frames to beacon template memory on the chip. Misaligned 32-bit accesses cause CPU exceptions on MIPS and should be avoided. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4f1d2dec43ce..87b62bae20af 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4141,7 +4141,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. */ -#define IEEE80211_TX_STATUS_HEADROOM 14 +#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4) /** * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames -- cgit v1.2.3 From 305eb32d45f00730f8f61a1cff111e3c4b07a766 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 14:45:54 -0800 Subject: bug.h: work around GCC PR82365 in BUG() [ Upstream commit 173a3efd3edb2ef6ef07471397c5f542a360e9c1 ] Looking at functions with large stack frames across all architectures led me discovering that BUG() suffers from the same problem as fortify_panic(), which I've added a workaround for already. In short, variables that go out of scope by calling a noreturn function or __builtin_unreachable() keep using stack space in functions afterwards. A workaround that was identified is to insert an empty assembler statement just before calling the function that doesn't return. I'm adding a macro "barrier_before_unreachable()" to document this, and insert calls to that in all instances of BUG() that currently suffer from this problem. The files that saw the largest change from this had these frame sizes before, and much less with my patch: fs/ext4/inode.c:82:1: warning: the frame size of 1672 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/namei.c:434:1: warning: the frame size of 904 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/super.c:2279:1: warning: the frame size of 1160 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/xattr.c:146:1: warning: the frame size of 1168 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/f2fs/inode.c:152:1: warning: the frame size of 1424 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:1195:1: warning: the frame size of 1068 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:395:1: warning: the frame size of 1084 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:298:1: warning: the frame size of 928 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:418:1: warning: the frame size of 908 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_lblcr.c:718:1: warning: the frame size of 960 bytes is larger than 800 bytes [-Wframe-larger-than=] drivers/net/xen-netback/netback.c:1500:1: warning: the frame size of 1088 bytes is larger than 800 bytes [-Wframe-larger-than=] In case of ARC and CRIS, it turns out that the BUG() implementation actually does return (or at least the compiler thinks it does), resulting in lots of warnings about uninitialized variable use and leaving noreturn functions, such as: block/cfq-iosched.c: In function 'cfq_async_queue_prio': block/cfq-iosched.c:3804:1: error: control reaches end of non-void function [-Werror=return-type] include/linux/dmaengine.h: In function 'dma_maxpq': include/linux/dmaengine.h:1123:1: error: control reaches end of non-void function [-Werror=return-type] This makes them call __builtin_trap() instead, which should normally dump the stack and kill the current process, like some of the other architectures already do. I tried adding barrier_before_unreachable() to panic() and fortify_panic() as well, but that had very little effect, so I'm not submitting that patch. Vineet said: : For ARC, it is double win. : : 1. Fixes 3 -Wreturn-type warnings : : | ../net/core/ethtool.c:311:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../kernel/sched/core.c:3246:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../include/linux/sunrpc/svc_xprt.h:180:1: warning: control reaches end of : non-void function [-Wreturn-type] : : 2. bloat-o-meter reports code size improvements as gcc elides the : generated code for stack return. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 Link: http://lkml.kernel.org/r/20171219114112.939391-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Vineet Gupta [arch/arc] Tested-by: Vineet Gupta [arch/arc] Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Tony Luck Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: "David S. Miller" Cc: Christopher Li Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Kees Cook Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Will Deacon Cc: "Steven Rostedt (VMware)" Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/bug.h | 1 + include/linux/compiler-gcc.h | 15 ++++++++++++++- include/linux/compiler.h | 5 +++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index af2cc94a61bf..ae1a33aa8955 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -50,6 +50,7 @@ struct bug_entry { #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index b78b31af36f8..f43113b8890b 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -211,6 +211,15 @@ #endif #endif +/* + * calling noreturn functions, __builtin_unreachable() and __builtin_trap() + * confuse the stack allocation in gcc, leading to overly large stack + * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * Adding an empty inline assembly before it works around the problem + */ +#define barrier_before_unreachable() asm volatile("") + /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer @@ -221,7 +230,11 @@ * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() \ - do { annotate_unreachable(); __builtin_unreachable(); } while (0) + do { \ + annotate_unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ + } while (0) /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e8c9cd18bb05..853929f98962 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define barrier_data(ptr) barrier() #endif +/* workaround for GCC PR82365 if needed */ +#ifndef barrier_before_unreachable +# define barrier_before_unreachable() do { } while (0) +#endif + /* Unreachable code */ #ifdef CONFIG_STACK_VALIDATION #define annotate_reachable() ({ \ -- cgit v1.2.3 From 0b9f26e97f2bd557cc41de670cffd0c594ebf6b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Feb 2018 20:55:28 +0100 Subject: regulatory: add NUL to request alpha2 [ Upstream commit 657308f73e674e86b60509a430a46e569bf02846 ] Similar to the ancient commit a5fe8e7695dc ("regulatory: add NUL to alpha2"), add another byte to alpha2 in the request struct so that when we use nla_put_string(), we don't overrun anything. Fixes: 73d54c9e74c4 ("cfg80211: add regulatory netlink multicast group") Reported-by: Kees Cook Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/regulatory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebc5a2ed8631..f83cacce3308 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -78,7 +78,7 @@ struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; - char alpha2[2]; + char alpha2[3]; enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; -- cgit v1.2.3 From 5f95541a0db52dd4edf2dcca3d3e3ba54b97d3cf Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 22 Feb 2018 13:05:41 +0100 Subject: kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds [ Upstream commit 076467490b8176eb96eddc548a14d4135c7b5852 ] Move the kvm_arch_irq_routing_update() prototype outside of ifdef CONFIG_HAVE_KVM_EVENTFD guards to fix the following sparse warning: arch/s390/kvm/../../../virt/kvm/irqchip.c:171:28: warning: symbol 'kvm_arch_irq_routing_update' was not declared. Should it be static? Signed-off-by: Sebastian Ott Acked-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5a8019befafd..39f0489eb137 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1104,7 +1104,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm) { } #endif -void kvm_arch_irq_routing_update(struct kvm *kvm); static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { @@ -1113,6 +1112,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ +void kvm_arch_irq_routing_update(struct kvm *kvm); + static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* -- cgit v1.2.3 From 3c08f8140a9a67c48aa133eb0962af71076487ce Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 Feb 2018 11:50:03 +1000 Subject: virtio-gpu: fix ioctl and expose the fixed status to userspace. [ Upstream commit 9a191b114906457c4b2494c474f58ae4142d4e67 ] This exposes to mesa that it can use the fixed ioctl for querying later cap sets, cap set 1 is forever frozen in time. Signed-off-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20180221015003.22884-1-airlied@gmail.com Signed-off-by: Gerd Hoffmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/drm/virtgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 91a31ffed828..9a781f0611df 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer { }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ +#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ struct drm_virtgpu_getparam { __u64 param; -- cgit v1.2.3 From 50de7f4347cf0dbe7b9c28e273a8cf498067790e Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Feb 2018 19:41:52 +0300 Subject: ARC: mcip: halt GFRC counter when ARC cores halt [ Upstream commit 07423d00a2b2a71a97e4287d9262cb83c4c4c89f ] In SMP systems, GFRC is used for clocksource. However by default the counter keeps running even when core is halted (say when debugging via a JTAG debugger). This confuses Linux timekeeping and triggers flase RCU stall splat such as below: | [ARCLinux]# while true; do ./shm_open_23-1.run-test ; done | Running with 1000 processes for 1000 objects | hrtimer: interrupt took 485060 ns | | create_cnt: 1000 | Running with 1000 processes for 1000 objects | [ARCLinux]# INFO: rcu_preempt self-detected stall on CPU | 2-...: (1 GPs behind) idle=a01/1/0 softirq=135770/135773 fqs=0 | INFO: rcu_preempt detected stalls on CPUs/tasks: | 0-...: (1 GPs behind) idle=71e/0/0 softirq=135264/135264 fqs=0 | 2-...: (1 GPs behind) idle=a01/1/0 softirq=135770/135773 fqs=0 | 3-...: (1 GPs behind) idle=4e0/0/0 softirq=134304/134304 fqs=0 | (detected by 1, t=13648 jiffies, g=31493, c=31492, q=1) Starting from ARC HS v3.0 it's possible to tie GFRC to state of up-to 4 ARC cores with help of GFRC's CORE register where we set a mask for cores which state we need to rely on. We update cpu mask every time new cpu came online instead of using hardcoded one or using mask generated from "possible_cpus" as we want it set correctly even if we run kernel on HW which has fewer cores than expected (or we launch kernel via debugger and kick fever cores than HW has) Note that GFRC halts when all cores have halted and thus relies on programming of Inter-Core-dEbug register to halt all cores when one halts. Signed-off-by: Alexey Brodkin Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/soc/arc/mcip.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index c2d1b15da136..1138da57baaf 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -15,6 +15,7 @@ #define ARC_REG_MCIP_BCR 0x0d0 #define ARC_REG_MCIP_IDU_BCR 0x0D5 +#define ARC_REG_GFRC_BUILD 0x0D6 #define ARC_REG_MCIP_CMD 0x600 #define ARC_REG_MCIP_WDATA 0x601 #define ARC_REG_MCIP_READBACK 0x602 @@ -40,6 +41,8 @@ struct mcip_cmd { #define CMD_GFRC_READ_LO 0x42 #define CMD_GFRC_READ_HI 0x43 +#define CMD_GFRC_SET_CORE 0x47 +#define CMD_GFRC_READ_CORE 0x48 #define CMD_IDU_ENABLE 0x71 #define CMD_IDU_DISABLE 0x72 -- cgit v1.2.3 From f7f78191c910d741c5971094003dfe7233c29743 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Feb 2018 19:41:53 +0300 Subject: ARC: mcip: update MCIP debug mask when the new cpu came online [ Upstream commit f3205de98db2fc8083796dd5ad81b191e436fab8 ] As of today we use hardcoded MCIP debug mask, so if we launch kernel via debugger and kick fever cores than HW has all cpus hang at the momemt of setup MCIP debug mask. So update MCIP debug mask when the new cpu came online, instead of use hardcoded MCIP debug mask. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/soc/arc/mcip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index 1138da57baaf..a91f25151a5b 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -37,7 +37,9 @@ struct mcip_cmd { #define CMD_SEMA_RELEASE 0x12 #define CMD_DEBUG_SET_MASK 0x34 +#define CMD_DEBUG_READ_MASK 0x35 #define CMD_DEBUG_SET_SELECT 0x36 +#define CMD_DEBUG_READ_SELECT 0x37 #define CMD_GFRC_READ_LO 0x42 #define CMD_GFRC_READ_HI 0x43 -- cgit v1.2.3 From 3c84b5aaf7a5ebbdf08ebe08d79c911670712d20 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Tue, 27 Feb 2018 20:10:18 +0800 Subject: block: display the correct diskname for bio [ Upstream commit 9c0fb1e313aaf4e8edec22433c8b22dd308e466c ] bio_devname use __bdevname to display the device name, and can only show the major and minor of the part0, Fix this by using disk_name to display the correct name. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/bio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 45f00dd6323c..5aa40f4712ff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -501,6 +501,7 @@ void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_set_dev(bio, bdev) \ do { \ @@ -519,9 +520,6 @@ do { \ #define bio_dev(bio) \ disk_devt((bio)->bi_disk) -#define bio_devname(bio, buf) \ - __bdevname(bio_dev(bio), (buf)) - #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_current(struct bio *bio); -- cgit v1.2.3 From 8387fbac8e18e26a60559adc63e0b7067303b0a4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 14 Mar 2018 10:21:14 +0100 Subject: ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu [ Upstream commit d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 ] Prior to the rework of PMTU information storage in commit 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 11 +++++++++-- include/net/ip_fib.h | 1 + include/net/route.h | 3 ++- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index af8addbaa3c1..81da1123fc8e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -326,6 +326,13 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline int ip_mtu_locked(const struct dst_entry *dst) +{ + const struct rtable *rt = (const struct rtable *)dst; + + return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); +} + static inline int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) { @@ -333,7 +340,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) return pmtudisc == IP_PMTUDISC_DO || (pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU))); + !ip_mtu_locked(dst)); } static inline bool ip_sk_accept_pmtu(const struct sock *sk) @@ -359,7 +366,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); if (net->ipv4.sysctl_ip_fwd_use_pmtu || - dst_metric_locked(dst, RTAX_MTU) || + ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1a7f7e424320..5c5d344c0629 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; + bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; diff --git a/include/net/route.h b/include/net/route.h index d538e6db1afe..6077a0fb3044 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -63,7 +63,8 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_pmtu; + u32 rt_mtu_locked:1, + rt_pmtu:31; u32 rt_table_id; -- cgit v1.2.3 From 01a68a265ef506e95058c3cd0a8d9e4264eb9233 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:27 +0900 Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off [ Upstream commit 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 ] When we have a bridge with vlan_filtering on and a vlan device on top of it, packets would be corrupted in skb_vlan_untag() called from br_dev_xmit(). The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(), which makes use of skb->mac_len. In this function mac_len is meant for handling rx path with vlan devices with reorder_header disabled, but in tx path mac_len is typically 0 and cannot be used, which is the problem in this case. The current code even does not properly handle rx path (skb_vlan_untag() called from __netif_receive_skb_core()) with reorder_header off actually. In rx path single tag case, it works as follows: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+------+---- | ETH | VLAN | ETH | | ADDRS | TPID | TCI | TYPE | +-------------------+-------------+------+---- <-------- mac_len ---------> <-------------> to be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <-------- mac_len ---------> This is ok, but in rx double tag case, it corrupts packets: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+-------------+------+---- | ETH | VLAN | VLAN | ETH | | ADDRS | TPID | TCI | TPID | TCI | TYPE | +-------------------+-------------+-------------+------+---- <--------------- mac_len ----------------> <-------------> should be removed <---------------------------> actually will be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <--------------- mac_len ----------------> So, two of vlan tags are both removed while only inner one should be removed and mac_header (and mac_len) is broken. skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2), so use skb->data and skb->mac_header to calculate the right offset. Reported-by: Brandon Carpenter Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..244e3213ecb0 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -30,6 +30,7 @@ */ #define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_TLEN 2 /* Octets in ethernet type field */ #define ETH_HLEN 14 /* Total octets in header. */ #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ #define ETH_DATA_LEN 1500 /* Max. octets in payload */ -- cgit v1.2.3 From 99ba9a9728707888b113f00ac8eee4faa6d60431 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:28 +0900 Subject: vlan: Fix out of order vlan headers with reorder header off [ Upstream commit cbe7128c4b92e2004984f477fd38dfa81662f02e ] With reorder header off, received packets are untagged in skb_vlan_untag() called from within __netif_receive_skb_core(), and later the tag will be inserted back in vlan_do_receive(). This caused out of order vlan headers when we create a vlan device on top of another vlan device, because vlan_do_receive() inserts a tag as the outermost vlan tag. E.g. the outer tag is first removed in skb_vlan_untag() and inserted back in vlan_do_receive(), then the inner tag is next removed and inserted back as the outermost tag. This patch fixes the behaviour by inserting the inner tag at the right position. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 66 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index ab927383c99d..7b49e1759bf1 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, } /** - * __vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers * - * Inserts the VLAN tag into @skb as part of the payload + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns error if skb_cow_head failes. * * Does not change skb->protocol so this function can be used during receive. */ -static inline int __vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline int __vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci, + unsigned int mac_len) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; - veth = skb_push(skb, VLAN_HLEN); + skb_push(skb, VLAN_HLEN); - /* Move the mac addresses to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); + /* Move the mac header sans proto to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; + veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); + /* first, the ethernet type */ veth->h_vlan_proto = vlan_proto; @@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, } /** - * vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload + * Returns error if skb_cow_head failes. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline int __vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + +/** + * vlan_insert_inner_tag - inner VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers + * + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. * * Following the skb_unshare() example, in case of error, the calling function @@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, + u16 vlan_tci, + unsigned int mac_len) { int err; - err = __vlan_insert_tag(skb, vlan_proto, vlan_tci); + err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len); if (err) { dev_kfree_skb_any(skb); return NULL; @@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, return skb; } +/** + * vlan_insert_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + /** * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag -- cgit v1.2.3 From 5b5f4fd97d8fdc5d41ad632ed5154989b38b613f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 26 Mar 2018 15:08:33 -0700 Subject: llc: properly handle dev_queue_xmit() return value [ Upstream commit b85ab56c3f81c5a24b5a5213374f549df06430da ] llc_conn_send_pdu() pushes the skb into write queue and calls llc_conn_send_pdus() to flush them out. However, the status of dev_queue_xmit() is not returned to caller, in this case, llc_conn_state_process(). llc_conn_state_process() needs hold the skb no matter success or failure, because it still uses it after that, therefore we should hold skb before dev_queue_xmit() when that skb is the one being processed by llc_conn_state_process(). For other callers, they can just pass NULL and ignore the return value as they are. Reported-by: Noam Rathaus Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/llc_conn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea985aa7a6c5..df528a623548 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); -- cgit v1.2.3 From 206199412baeba06745b03a7950895f26f95c14b Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 29 Mar 2018 19:05:30 +0900 Subject: vlan: Fix vlan insertion for packets without ethernet header [ Upstream commit c769accdf3d8a103940bea2979b65556718567e9 ] In some situation vlan packets do not have ethernet headers. One example is packets from tun devices. Users can specify vlan protocol in tun_pi field instead of IP protocol. When we have a vlan device with reorder_hdr disabled on top of the tun device, such packets from tun devices are untagged in skb_vlan_untag() and vlan headers will be inserted back in vlan_insert_inner_tag(). vlan_insert_inner_tag() however did not expect packets without ethernet headers, so in such a case size argument for memmove() underflowed. We don't need to copy headers for packets which do not have preceding headers of vlan headers, so skip memmove() in that case. Also don't write vlan protocol in skb->data when it does not have enough room for it. Fixes: cbe7128c4b92 ("vlan: Fix out of order vlan headers with reorder header off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7b49e1759bf1..87b8c20d5b27 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -323,13 +323,24 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, skb_push(skb, VLAN_HLEN); /* Move the mac header sans proto to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); + if (likely(mac_len > ETH_TLEN)) + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); /* first, the ethernet type */ - veth->h_vlan_proto = vlan_proto; + if (likely(mac_len >= ETH_TLEN)) { + /* h_vlan_encapsulated_proto should already be populated, and + * skb->data has space for h_vlan_proto + */ + veth->h_vlan_proto = vlan_proto; + } else { + /* h_vlan_encapsulated_proto should not be populated, and + * skb->data has no space for h_vlan_proto + */ + veth->h_vlan_encapsulated_proto = skb->protocol; + } /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); -- cgit v1.2.3