From bce47648977d3c846f3ade30ddd78af67f80308f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 1 Dec 2014 17:56:54 +0100 Subject: drm/i915: Unlock panel even when LVDS is disabled commit b0616c5306b342ceca07044dbc4f917d95c4f825 upstream. Otherwise we'll have backtraces in assert_panel_unlocked because the BIOS locks the register. In the reporter's case this regression was introduced in commit c31407a3672aaebb4acddf90944a114fa5c8af7b Author: Chris Wilson Date: Thu Oct 18 21:07:01 2012 +0100 drm/i915: Add no-lvds quirk for Supermicro X7SPA-H Reported-by: Alexey Orishko Cc: Alexey Orishko Cc: Chris Wilson Cc: Francois Tigeot Signed-off-by: Daniel Vetter Tested-by: Alexey Orishko Signed-off-by: Jani Nikula [bwh: Backported to 3.2: adjust context; comment was duplicated] Signed-off-by: Ben Hutchings --- drivers/gpu/drm/i915/intel_lvds.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index fadd02116a6f..4da8182662c8 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -914,6 +914,18 @@ bool intel_lvds_init(struct drm_device *dev) int pipe; u8 pin; + /* + * Unlock registers and just leave them unlocked. Do this before + * checking quirk lists to avoid bogus WARNINGs. + */ + if (HAS_PCH_SPLIT(dev)) { + I915_WRITE(PCH_PP_CONTROL, + I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); + } else { + I915_WRITE(PP_CONTROL, + I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); + } + /* Skip init on machines we know falsely report LVDS */ if (dmi_check_system(intel_no_lvds)) return false; @@ -1088,19 +1100,6 @@ out: pwm = I915_READ(BLC_PWM_PCH_CTL1); pwm |= PWM_PCH_ENABLE; I915_WRITE(BLC_PWM_PCH_CTL1, pwm); - /* - * Unlock registers and just - * leave them unlocked - */ - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else { - /* - * Unlock registers and just - * leave them unlocked - */ - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); } dev_priv->lid_notifier.notifier_call = intel_lid_notify; if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { -- cgit v1.2.3 From c310193a1731d74b5b95d8608a79eb4c08685284 Mon Sep 17 00:00:00 2001 From: Devin Ryles Date: Fri, 7 Nov 2014 17:59:05 -0500 Subject: AHCI: Add DeviceIDs for Sunrise Point-LP SATA controller commit 249cd0a187ed4ef1d0af7f74362cc2791ec5581b upstream. This patch adds DeviceIDs for Sunrise Point-LP. Signed-off-by: Devin Ryles Signed-off-by: Tejun Heo Signed-off-by: Ben Hutchings --- drivers/ata/ahci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 923ac15603ba..e86822291803 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -331,6 +331,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ + { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ -- cgit v1.2.3 From 3e6b2cfaf3aec080122fc0eecf2573364beaba61 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 14 Nov 2014 13:39:05 -0800 Subject: sata_fsl: fix error handling of irq_of_parse_and_map commit aad0b624129709c94c2e19e583b6053520353fa8 upstream. irq_of_parse_and_map() returns 0 on error (the result is unsigned int), so testing for negative result never works. Signed-off-by: Dmitry Torokhov Signed-off-by: Tejun Heo Signed-off-by: Ben Hutchings --- drivers/ata/sata_fsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 78ae7b67b09e..54702f84dbc3 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1338,7 +1338,7 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->csr_base = csr_base; irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (irq < 0) { + if (!irq) { dev_err(&ofdev->dev, "invalid irq from platform\n"); goto error_exit_with_cleanup; } -- cgit v1.2.3 From d528656f0d33ff97a4aea48f2f1b66535fdfa880 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 2 Dec 2014 15:59:39 -0800 Subject: mm: fix swapoff hang after page migration and fork commit 2022b4d18a491a578218ce7a4eca8666db895a73 upstream. I've been seeing swapoff hangs in recent testing: it's cycling around trying unsuccessfully to find an mm for some remaining pages of swap. I have been exercising swap and page migration more heavily recently, and now notice a long-standing error in copy_one_pte(): it's trying to add dst_mm to swapoff's mmlist when it finds a swap entry, but is doing so even when it's a migration entry or an hwpoison entry. Which wouldn't matter much, except it adds dst_mm next to src_mm, assuming src_mm is already on the mmlist: which may not be so. Then if pages are later swapped out from dst_mm, swapoff won't be able to find where to replace them. There's already a !non_swap_entry() test for stats: move that up before the swap_duplicate() and the addition to mmlist. Signed-off-by: Hugh Dickins Cc: Kelley Nielsen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- mm/memory.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5a7f3140a678..628cadca97ef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -870,20 +870,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (!pte_file(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - if (likely(!non_swap_entry(entry))) + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } rss[MM_SWAPENTS]++; - else if (is_write_migration_entry(entry) && + } else if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both parent -- cgit v1.2.3 From aff08aba9cdff851082d49ff3906c43fa2d06a2d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Dec 2014 13:13:28 -0500 Subject: ahci: disable MSI on SAMSUNG 0xa800 SSD commit 2b21ef0aae65f22f5ba86b13c4588f6f0c2dbefb upstream. Just like 0x1600 which got blacklisted by 66a7cbc303f4 ("ahci: disable MSI instead of NCQ on Samsung pci-e SSDs on macbooks"), 0xa800 chokes on NCQ commands if MSI is enabled. Disable MSI. Signed-off-by: Tejun Heo Reported-by: Dominik Mierzejewski Link: https://bugzilla.kernel.org/show_bug.cgi?id=89171 Signed-off-by: Ben Hutchings --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e86822291803..81f32e5465bb 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -502,6 +502,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { * enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731 */ { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi }, + { PCI_VDEVICE(SAMSUNG, 0xa800), board_ahci_nomsi }, /* Enmotus */ { PCI_DEVICE(0x1c44, 0x8000), board_ahci }, -- cgit v1.2.3 From eff3ef9a83145917ccbabc07e0289c388b24600c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 1 Dec 2014 17:34:04 +0200 Subject: i2c: davinci: generate STP always when NACK is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9ea359f7314132cbcb5a502d2d8ef095be1f45e4 upstream. According to I2C specification the NACK should be handled as follows: "When SDA remains HIGH during this ninth clock pulse, this is defined as the Not Acknowledge signal. The master can then generate either a STOP condition to abort the transfer, or a repeated START condition to start a new transfer." [I2C spec Rev. 6, 3.1.6: http://www.nxp.com/documents/user_manual/UM10204.pdf] Currently the Davinci i2c driver interrupts the transfer on receipt of a NACK but fails to send a STOP in some situations and so makes the bus stuck until next I2C IP reset (idle/enable). For example, the issue will happen during SMBus read transfer which consists from two i2c messages write command/address and read data: S Slave Address Wr A Command Code A Sr Slave Address Rd A D1..Dn A P <--- write -----------------------> <--- read ---------------------> The I2C client device will send NACK if it can't recognize "Command Code" and it's expected from I2C master to generate STP in this case. But now, Davinci i2C driver will just exit with -EREMOTEIO and STP will not be generated. Hence, fix it by generating Stop condition (STP) always when NACK is received. This patch fixes Davinci I2C in the same way it was done for OMAP I2C commit cda2109a26eb ("i2c: omap: query STP always when NACK is received"). Reviewed-by: Uwe Kleine-König Reported-by: Hein Tibosch Signed-off-by: Grygorii Strashko Signed-off-by: Wolfram Sang Signed-off-by: Ben Hutchings --- drivers/i2c/busses/i2c-davinci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 79b4bcb3b85c..1837fe6677b9 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -416,11 +416,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) if (dev->cmd_err & DAVINCI_I2C_STR_NACK) { if (msg->flags & I2C_M_IGNORE_NAK) return msg->len; - if (stop) { - w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); - w |= DAVINCI_I2C_MDR_STP; - davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); - } + w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); + w |= DAVINCI_I2C_MDR_STP; + davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); return -EREMOTEIO; } return -EIO; -- cgit v1.2.3 From 7ecef8c8b70c21c944ccdf8b8406292e71038a98 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Sep 2014 14:06:55 +0200 Subject: udf: Avoid infinite loop when processing indirect ICBs commit c03aa9f6e1f938618e6db2e23afef0574efeeb65 upstream. We did not implement any bound on number of indirect ICBs we follow when loading inode. Thus corrupted medium could cause kernel to go into an infinite loop, possibly causing a stack overflow. Fix the possible stack overflow by removing recursion from __udf_read_inode() and limit number of indirect ICBs we follow to avoid infinite loops. Signed-off-by: Jan Kara [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- fs/udf/inode.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index af37ce33ffef..a0f6dedb14c3 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1176,13 +1176,22 @@ update_time: return 0; } +/* + * Maximum length of linked list formed by ICB hierarchy. The chosen number is + * arbitrary - just that we hopefully don't limit any real use of rewritten + * inode on write-once media but avoid looping for too long on corrupted media. + */ +#define UDF_MAX_ICB_NESTING 1024 + static void __udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; uint16_t ident; struct udf_inode_info *iinfo = UDF_I(inode); + unsigned int indirections = 0; +reread: /* * Set defaults, but the inode is still incomplete! * Note: get_new_inode() sets the following on a new inode: @@ -1219,28 +1228,26 @@ static void __udf_read_inode(struct inode *inode) ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, &ident); if (ident == TAG_IDENT_IE && ibh) { - struct buffer_head *nbh = NULL; struct kernel_lb_addr loc; struct indirectEntry *ie; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(struct kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); + if (ie->indirectICB.extLength) { + brelse(bh); + brelse(ibh); + memcpy(&iinfo->i_location, &loc, + sizeof(struct kernel_lb_addr)); + if (++indirections > UDF_MAX_ICB_NESTING) { + udf_err(inode->i_sb, + "too many ICBs in ICB hierarchy" + " (max %d supported)\n", + UDF_MAX_ICB_NESTING); + make_bad_inode(inode); return; } - brelse(nbh); + goto reread; } } brelse(ibh); -- cgit v1.2.3 From 590461b16c5464b9d4377898abc057239a6afc3a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 17:54:26 +0100 Subject: net: sctp: fix NULL pointer dereference in af->from_addr_param on malformed packet commit e40607cbe270a9e8360907cb1e62ddf0736e4864 upstream. An SCTP server doing ASCONF will panic on malformed INIT ping-of-death in the form of: ------------ INIT[PARAM: SET_PRIMARY_IP] ------------> While the INIT chunk parameter verification dissects through many things in order to detect malformed input, it misses to actually check parameters inside of parameters. E.g. RFC5061, section 4.2.4 proposes a 'set primary IP address' parameter in ASCONF, which has as a subparameter an address parameter. So an attacker may send a parameter type other than SCTP_PARAM_IPV4_ADDRESS or SCTP_PARAM_IPV6_ADDRESS, param_type2af() will subsequently return 0 and thus sctp_get_af_specific() returns NULL, too, which we then happily dereference unconditionally through af->from_addr_param(). The trace for the log: BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 IP: [] sctp_process_init+0x492/0x990 [sctp] PGD 0 Oops: 0000 [#1] SMP [...] Pid: 0, comm: swapper Not tainted 2.6.32-504.el6.x86_64 #1 Bochs Bochs RIP: 0010:[] [] sctp_process_init+0x492/0x990 [sctp] [...] Call Trace: [] ? sctp_bind_addr_copy+0x5d/0xe0 [sctp] [] sctp_sf_do_5_1B_init+0x21b/0x340 [sctp] [] sctp_do_sm+0x71/0x1210 [sctp] [] ? sctp_endpoint_lookup_assoc+0xc9/0xf0 [sctp] [] sctp_endpoint_bh_rcv+0x116/0x230 [sctp] [] sctp_inq_push+0x56/0x80 [sctp] [] sctp_rcv+0x982/0xa10 [sctp] [] ? ipt_local_in_hook+0x23/0x28 [iptable_filter] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [...] A minimal way to address this is to check for NULL as we do on all other such occasions where we know sctp_get_af_specific() could possibly return with NULL. Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/sctp/sm_make_chunk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d8d470460090..c40952cb826a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2570,6 +2570,9 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); -- cgit v1.2.3 From 1aded21661bda559a407cfb7c69d0e53b72bc671 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 17 Sep 2014 02:50:50 +0300 Subject: KVM: x86: Don't report guest userspace emulation error to userspace commit a2b9e6c1a35afcc0973acb72e591c714e78885ff upstream. Commit fc3a9157d314 ("KVM: X86: Don't report L2 emulation failures to user-space") disabled the reporting of L2 (nested guest) emulation failures to userspace due to race-condition between a vmexit and the instruction emulator. The same rational applies also to userspace applications that are permitted by the guest OS to access MMIO area or perform PIO. This patch extends the current behavior - of injecting a #UD instead of reporting it to userspace - also for guest userspace code. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini Signed-off-by: Ben Hutchings --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2d7d0df41b6e..bb179cccdbb5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4846,7 +4846,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; -- cgit v1.2.3 From 106ed96d46fcaf9f2e72555035fa585403cf4dd3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Dec 2014 16:48:16 -0800 Subject: x86/tls: Validate TLS entries to protect espfix commit 41bdc78544b8a93a9c6814b8bbbfef966272abbe upstream. Installing a 16-bit RW data segment into the GDT defeats espfix. AFAICT this will not affect glibc, Wine, or dosemu at all. Signed-off-by: Andy Lutomirski Acked-by: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: security@kernel.org Cc: Willy Tarreau Signed-off-by: Ingo Molnar Signed-off-by: Ben Hutchings --- arch/x86/kernel/tls.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index bcfec2d23769..7af73380ae4b 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -28,6 +28,21 @@ static int get_free_idx(void) return -ESRCH; } +static bool tls_desc_okay(const struct user_desc *info) +{ + if (LDT_empty(info)) + return true; + + /* + * espfix is required for 16-bit data segments, but espfix + * only works for LDT segments. + */ + if (!info->seg_32bit) + return false; + + return true; +} + static void set_tls_desc(struct task_struct *p, int idx, const struct user_desc *info, int n) { @@ -67,6 +82,9 @@ int do_set_thread_area(struct task_struct *p, int idx, if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + if (!tls_desc_okay(&info)) + return -EINVAL; + if (idx == -1) idx = info.entry_number; @@ -197,6 +215,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, { struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; + int i; if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || @@ -210,6 +229,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, else info = infobuf; + for (i = 0; i < count / sizeof(struct user_desc); i++) + if (!tls_desc_okay(info + i)) + return -EINVAL; + set_tls_desc(target, GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), info, count / sizeof(struct user_desc)); -- cgit v1.2.3 From 2f67670174ad4bd1c48e8b97cc107e3232d422ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Sep 2014 09:09:28 -0300 Subject: ttusb-dec: buffer overflow in ioctl commit f2e323ec96077642d397bb1c355def536d489d16 upstream. We need to add a limit check here so we don't overflow the buffer. Signed-off-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab [bwh: Backported to 3.2: adjust filename] Signed-off-by: Ben Hutchings --- drivers/media/dvb/ttusb-dec/ttusbdecfe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/dvb/ttusb-dec/ttusbdecfe.c b/drivers/media/dvb/ttusb-dec/ttusbdecfe.c index 21260aad1e54..852870b80df3 100644 --- a/drivers/media/dvb/ttusb-dec/ttusbdecfe.c +++ b/drivers/media/dvb/ttusb-dec/ttusbdecfe.c @@ -154,6 +154,9 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + if (cmd->msg_len > sizeof(b) - 4) + return -EINVAL; + memcpy(&b[4], cmd->msg, cmd->msg_len); state->config->send_command(fe, 0x72, -- cgit v1.2.3 From 060d11323f35afb752a7ba6c5bead732c204de55 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 5 Dec 2014 19:03:28 -0800 Subject: x86, kvm: Clear paravirt_enabled on KVM guests for espfix32's benefit commit 29fa6825463c97e5157284db80107d1bfac5d77b upstream. paravirt_enabled has the following effects: - Disables the F00F bug workaround warning. There is no F00F bug workaround any more because Linux's standard IDT handling already works around the F00F bug, but the warning still exists. This is only cosmetic, and, in any event, there is no such thing as KVM on a CPU with the F00F bug. - Disables 32-bit APM BIOS detection. On a KVM paravirt system, there should be no APM BIOS anyway. - Disables tboot. I think that the tboot code should check the CPUID hypervisor bit directly if it matters. - paravirt_enabled disables espfix32. espfix32 should *not* be disabled under KVM paravirt. The last point is the purpose of this patch. It fixes a leak of the high 16 bits of the kernel stack address on 32-bit KVM paravirt guests. Fixes CVE-2014-8134. Suggested-by: Konrad Rzeszutek Wilk Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini Signed-off-by: Ben Hutchings --- arch/x86/kernel/kvm.c | 9 ++++++++- arch/x86/kernel/kvmclock.c | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a9c2116001d6..4b6701e95ae7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -419,7 +419,14 @@ static void kvm_leave_lazy_mmu(void) static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 44842d756b29..e90eca04216e 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -203,7 +203,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) -- cgit v1.2.3 From 026181647a6262f4ba6d60c0847d306ad685468c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Oct 2014 19:19:16 -0400 Subject: move d_rcu from overlapping d_child to overlapping d_alias commit 946e51f2bf37f1656916eb75bd0742ba33983c28 upstream. Signed-off-by: Al Viro [bwh: Backported to 3.2: - Apply name changes in all the different places we use d_alias and d_child - Move the WARN_ON() in __d_free() to d_free() as we don't have dentry_free()] Signed-off-by: Ben Hutchings --- arch/powerpc/platforms/cell/spufs/inode.c | 4 +- drivers/usb/core/inode.c | 6 +-- fs/9p/vfs_inode_dotl.c | 2 +- fs/affs/amigaffs.c | 2 +- fs/autofs4/expire.c | 10 ++-- fs/autofs4/root.c | 2 +- fs/ceph/dir.c | 8 ++-- fs/ceph/inode.c | 6 +-- fs/cifs/inode.c | 2 +- fs/coda/cache.c | 2 +- fs/dcache.c | 80 +++++++++++++++---------------- fs/debugfs/inode.c | 2 +- fs/exportfs/expfs.c | 2 +- fs/ext4/fsync.c | 2 +- fs/libfs.c | 12 ++--- fs/ncpfs/dir.c | 2 +- fs/ncpfs/ncplib_kernel.h | 4 +- fs/nfs/getroot.c | 2 +- fs/notify/fsnotify.c | 4 +- fs/ocfs2/dcache.c | 2 +- include/linux/dcache.h | 8 ++-- kernel/cgroup.c | 4 +- security/selinux/selinuxfs.c | 6 +-- 23 files changed, 87 insertions(+), 87 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 70ec4e992f54..941d5cbd8357 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -165,7 +165,7 @@ static void spufs_prune_dir(struct dentry *dir) struct dentry *dentry, *tmp; mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_dlock(dentry); @@ -223,7 +223,7 @@ out: * - free child's inode if possible * - free child */ - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { dput(dentry); } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 2278dad886e2..5babd94c9fbc 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -212,7 +212,7 @@ static void update_bus(struct dentry *bus) mutex_lock(&bus->d_inode->i_mutex); - list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child) + list_for_each_entry(dev, &bus->d_subdirs, d_child) if (dev->d_inode) update_dev(dev); @@ -229,7 +229,7 @@ static void update_sb(struct super_block *sb) mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); - list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { + list_for_each_entry(bus, &root->d_subdirs, d_child) { if (bus->d_inode) { switch (S_IFMT & bus->d_inode->i_mode) { case S_IFDIR: @@ -345,7 +345,7 @@ static int usbfs_empty (struct dentry *dentry) spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { - struct dentry *de = list_entry(list, struct dentry, d_u.d_child); + struct dentry *de = list_entry(list, struct dentry, d_child); spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED); if (usbfs_positive(de)) { diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0b5745e21946..30d4fa826e7c 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -81,7 +81,7 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) spin_lock(&inode->i_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); - dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + dentry = list_entry(inode->i_dentry.next, struct dentry, d_u.d_alias); spin_unlock(&inode->i_lock); return dentry; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index de37ec842340..43c05d8fd82c 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -132,7 +132,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) head = &inode->i_dentry; next = head->next; while (next != head) { - dentry = list_entry(next, struct dentry, d_alias); + dentry = list_entry(next, struct dentry, d_u.d_alias); if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = data; break; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 2c69d1257314..7fc03715d5a8 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -100,7 +100,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, p = prev; spin_lock(&p->d_lock); again: - next = p->d_u.d_child.next; + next = p->d_child.next; start: if (next == &root->d_subdirs) { spin_unlock(&p->d_lock); @@ -109,7 +109,7 @@ start: return NULL; } - q = list_entry(next, struct dentry, d_u.d_child); + q = list_entry(next, struct dentry, d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ @@ -165,13 +165,13 @@ again: goto relock; } spin_unlock(&p->d_lock); - next = p->d_u.d_child.next; + next = p->d_child.next; p = parent; if (next != &parent->d_subdirs) break; } } - ret = list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_child); spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ @@ -455,7 +455,7 @@ found: spin_lock(&sbi->lookup_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + list_move(&expired->d_parent->d_subdirs, &expired->d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); spin_unlock(&sbi->lookup_lock); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 790fa6387644..2e936c6bfc7e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -651,7 +651,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) /* only consider parents below dentrys in the root */ if (IS_ROOT(parent->d_parent)) return; - d_child = &dentry->d_u.d_child; + d_child = &dentry->d_child; /* Set parent managed if it's becoming empty */ if (d_child->next == &parent->d_subdirs && d_child->prev == &parent->d_subdirs) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 98954003a8d3..7903e62473cf 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -104,7 +104,7 @@ static unsigned fpos_off(loff_t p) /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on - * d_u.d_child when we initially get results back from the MDS, and + * d_child when we initially get results back from the MDS, and * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * @@ -140,11 +140,11 @@ static int __dcache_readdir(struct file *filp, p = parent->d_subdirs.prev; dout(" initial p %p/%p\n", p->prev, p->next); } else { - p = last->d_u.d_child.prev; + p = last->d_child.prev; } more: - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); while (1) { dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, @@ -166,7 +166,7 @@ more: !dentry->d_inode ? " null" : ""); spin_unlock(&dentry->d_lock); p = p->prev; - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 87fb132fb330..8e889b773d24 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -868,9 +868,9 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dir->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &dir->d_subdirs); + list_move(&dn->d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); + dn->d_child.prev, dn->d_child.next); spin_unlock(&dn->d_lock); spin_unlock(&dir->d_lock); } @@ -1256,7 +1256,7 @@ retry_lookup: /* reorder parent's d_subdirs */ spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &parent->d_subdirs); + list_move(&dn->d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index c0c51e11db7e..710dd05ccd9c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -823,7 +823,7 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + list_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 690157876184..4b2e5cb502f6 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -95,7 +95,7 @@ static void coda_flag_children(struct dentry *parent, int flag) spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { - de = list_entry(child, struct dentry, d_u.d_child); + de = list_entry(child, struct dentry, d_child); /* don't know what to do with negative dentries */ if ( ! de->d_inode ) continue; diff --git a/fs/dcache.c b/fs/dcache.c index d3229296a0bc..226a93a3f7bd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -42,7 +42,7 @@ /* * Usage: * dcache->d_inode->i_lock protects: - * - i_dentry, d_alias, d_inode of aliases + * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -57,7 +57,7 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent - * - d_alias, d_inode + * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock @@ -140,7 +140,6 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!list_empty(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); @@ -151,6 +150,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { + WARN_ON(!list_empty(&dentry->d_u.d_alias)); BUG_ON(dentry->d_count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) @@ -189,7 +189,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + list_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -213,7 +213,7 @@ static void dentry_unlink_inode(struct dentry * dentry) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + list_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -306,7 +306,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); /* * Inform try_to_ascend() that we are no longer attached to the * dentry tree @@ -624,7 +624,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon) again: discon_alias = NULL; - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + list_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -677,7 +677,7 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + list_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -857,7 +857,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* descend to the first leaf in the current subtree */ while (!list_empty(&dentry->d_subdirs)) dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); + struct dentry, d_child); /* consume the dentries from this leaf up through its parents * until we find one with children or run out altogether */ @@ -889,17 +889,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) if (IS_ROOT(dentry)) { parent = NULL; - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); } else { parent = dentry->d_parent; parent->d_count--; - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); } inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + list_del_init(&dentry->d_u.d_alias); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -917,7 +917,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) } while (list_empty(&dentry->d_subdirs)); dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); + struct dentry, d_child); } } @@ -1010,7 +1010,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1037,7 +1037,7 @@ resume: this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); @@ -1093,7 +1093,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1144,7 +1144,7 @@ resume: this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } out: @@ -1230,8 +1230,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); - INIT_LIST_HEAD(&dentry->d_u.d_child); + INIT_LIST_HEAD(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); this_cpu_inc(nr_dentry); @@ -1261,7 +1261,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) */ __dget_dlock(parent); dentry->d_parent = parent; - list_add(&dentry->d_u.d_child, &parent->d_subdirs); + list_add(&dentry->d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); return dentry; @@ -1318,7 +1318,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) { if (unlikely(IS_AUTOMOUNT(inode))) dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - list_add(&dentry->d_alias, &inode->i_dentry); + list_add(&dentry->d_u.d_alias, &inode->i_dentry); } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); @@ -1343,7 +1343,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!list_empty(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1382,7 +1382,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return NULL; } - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + list_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct qstr *qstr = &alias->d_name; /* @@ -1408,7 +1408,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!list_empty(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1458,7 +1458,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (list_empty(&inode->i_dentry)) return NULL; - alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); + alias = list_first_entry(&inode->i_dentry, struct dentry, d_u.d_alias); __dget(alias); return alias; } @@ -1525,7 +1525,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - list_add(&tmp->d_alias, &inode->i_dentry); + list_add(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); @@ -1931,7 +1931,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dparent->d_subdirs, d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); __dget_dlock(dentry); @@ -2178,8 +2178,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_u.d_child); - list_del(&target->d_u.d_child); + list_del(&dentry->d_child); + list_del(&target->d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -2189,15 +2189,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_u.d_child); + INIT_LIST_HEAD(&target->d_child); } else { swap(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); + list_add(&target->d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); @@ -2304,18 +2304,18 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) swap(dentry->d_name.hash, anon->d_name.hash); dentry->d_parent = (aparent == anon) ? dentry : aparent; - list_del(&dentry->d_u.d_child); + list_del(&dentry->d_child); if (!IS_ROOT(dentry)) - list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); else - INIT_LIST_HEAD(&dentry->d_u.d_child); + INIT_LIST_HEAD(&dentry->d_child); anon->d_parent = (dparent == dentry) ? anon : dparent; - list_del(&anon->d_u.d_child); + list_del(&anon->d_child); if (!IS_ROOT(anon)) - list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); + list_add(&anon->d_child, &anon->d_parent->d_subdirs); else - INIT_LIST_HEAD(&anon->d_u.d_child); + INIT_LIST_HEAD(&anon->d_child); write_seqcount_end(&dentry->d_seq); write_seqcount_end(&anon->d_seq); @@ -2893,7 +2893,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -2923,7 +2923,7 @@ resume: this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 01951c6b6632..6ac0893ca9fe 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -399,7 +399,7 @@ void debugfs_remove_recursive(struct dentry *dentry) * use the d_u.d_child as the rcu head and corrupt this list. */ spin_lock(&parent->d_lock); - list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &parent->d_subdirs, d_child) { if (!debugfs_positive(child)) continue; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b05acb796135..3bbf5e75df0a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + list_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a8d03a4d3f8b..019c29c89824 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -139,7 +139,7 @@ static int ext4_sync_parent(struct inode *inode) spin_lock(&inode->i_lock); if (!list_empty(&inode->i_dentry)) { dentry = list_first_entry(&inode->i_dentry, - struct dentry, d_alias); + struct dentry, d_u.d_alias); dget(dentry); } spin_unlock(&inode->i_lock); diff --git a/fs/libfs.c b/fs/libfs.c index f6d411eef1e7..ce85edf8aca5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -104,18 +104,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ - list_del(&cursor->d_u.d_child); + list_del(&cursor->d_child); p = dentry->d_subdirs.next; while (n && p != &dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } - list_add_tail(&cursor->d_u.d_child, p); + list_add_tail(&cursor->d_child, p); spin_unlock(&dentry->d_lock); } } @@ -139,7 +139,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_path.dentry; struct dentry *cursor = filp->private_data; - struct list_head *p, *q = &cursor->d_u.d_child; + struct list_head *p, *q = &cursor->d_child; ino_t ino; int i = filp->f_pos; @@ -165,7 +165,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (!simple_positive(next)) { spin_unlock(&next->d_lock); @@ -282,7 +282,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { spin_unlock(&child->d_lock); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 844bd64807d4..efa38a92858c 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -391,7 +391,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_u.d_child); + dent = list_entry(next, struct dentry, d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 09881e6aa5ad..64a817af666c 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -194,7 +194,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -216,7 +216,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index dcb61548887f..d7abf9ed4d4d 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -65,7 +65,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - list_del_init(&sb->s_root->d_alias); + list_del_init(&sb->s_root->d_u.d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 79b47cbb5cd8..f8ea28fc5d43 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -62,14 +62,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + list_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); - list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &alias->d_subdirs, d_child) { if (!child->d_inode) continue; diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index e5ba34818332..26977cc14ae4 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -175,7 +175,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, spin_lock(&inode->i_lock); list_for_each(p, &inode->i_dentry) { - dentry = list_entry(p, struct dentry, d_alias); + dentry = list_entry(p, struct dentry, d_u.d_alias); spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1dfe974fb9fd..99374de00c14 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -133,15 +133,15 @@ struct dentry { void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ + struct list_head d_child; /* child of parent list */ + struct list_head d_subdirs; /* our children */ /* - * d_child and d_rcu can share memory + * d_alias and d_rcu can share memory */ union { - struct list_head d_child; /* child of parent list */ + struct list_head d_alias; /* inode alias list */ struct rcu_head d_rcu; } d_u; - struct list_head d_subdirs; /* our children */ - struct list_head d_alias; /* inode alias list */ }; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ffcf896ddfc0..eafb6dd3e19c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -881,7 +881,7 @@ static void cgroup_clear_directory(struct dentry *dentry) spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + struct dentry *d = list_entry(node, struct dentry, d_child); spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); @@ -915,7 +915,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(&dentry->d_u.d_child); + list_del_init(&dentry->d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); remove_dir(dentry); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b4f802d2c1d4..4dd8dcf14689 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1197,7 +1197,7 @@ static void sel_remove_entries(struct dentry *de) spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + struct dentry *d = list_entry(node, struct dentry, d_child); spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); @@ -1704,12 +1704,12 @@ static void sel_remove_classes(void) list_for_each(class_node, &class_dir->d_subdirs) { struct dentry *class_subdir = list_entry(class_node, - struct dentry, d_u.d_child); + struct dentry, d_child); struct list_head *class_subdir_node; list_for_each(class_subdir_node, &class_subdir->d_subdirs) { struct dentry *d = list_entry(class_subdir_node, - struct dentry, d_u.d_child); + struct dentry, d_child); if (d->d_inode) if (d->d_inode->i_mode & S_IFDIR) -- cgit v1.2.3 From 2d5a2e6775fadc4ac5b7a1a5cbcdec1bffc0b142 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Oct 2014 19:31:10 -0400 Subject: deal with deadlock in d_walk() commit ca5358ef75fc69fee5322a38a340f5739d997c10 upstream. ... by not hitting rename_retry for reasons other than rename having happened. In other words, do _not_ restart when finding that between unlocking the child and locking the parent the former got into __dentry_kill(). Skip the killed siblings instead... Signed-off-by: Al Viro [bwh: Backported to 3.2: - As we only have try_to_ascend() and not d_walk(), apply this change to all callers of try_to_ascend() - Adjust context to make __dentry_kill() apply to d_kill()] Signed-off-by: Ben Hutchings --- fs/dcache.c | 101 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 39 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 226a93a3f7bd..3f657424e46d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -306,9 +306,9 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - list_del(&dentry->d_child); + __list_del_entry(&dentry->d_child); /* - * Inform try_to_ascend() that we are no longer attached to the + * Inform ascending readers that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -949,34 +949,6 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /* * Search for at least 1 mount point in the dentry's subdirs. @@ -1032,17 +1004,32 @@ resume: /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ @@ -1054,6 +1041,8 @@ positive: return 1; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) goto again; locked = 1; @@ -1139,23 +1128,40 @@ resume: /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } out: - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return found; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (found) return found; if (locked) @@ -2914,26 +2920,43 @@ resume: } spin_unlock(&dentry->d_lock); } + rcu_read_lock(); +ascend: if (this_parent != root) { struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; this_parent->d_count--; } - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename */ + if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } - spin_unlock(&this_parent->d_lock); if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) write_sequnlock(&rename_lock); return; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); if (locked) goto again; locked = 1; -- cgit v1.2.3 From 6d9f360c00512f6a6fded2efcfcbe78ec73e5b1b Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Tue, 18 Sep 2012 13:38:59 -0400 Subject: ext4: make orphan functions be no-op in no-journal mode commit c9b92530a723ac5ef8e352885a1862b18f31b2f5 upstream. Instead of checking whether the handle is valid, we check if journal is enabled. This avoids taking the s_orphan_lock mutex in all cases when there is no journal in use, including the error paths where ext4_orphan_del() is called with a handle set to NULL. Signed-off-by: Anatol Pomozov Signed-off-by: "Theodore Ts'o" [bwh: Adjust context to apply after commit 0e9a9a1ad619 ('ext4: avoid hang when mounting non-journal filesystems with orphan list') and commit e2bfb088fac0 ('ext4: don't orphan or truncate the boot loader inode')] Signed-off-by: Ben Hutchings --- fs/ext4/namei.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cd39fa744709..c9f2e3d04a14 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1986,7 +1986,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0, rc; - if (!ext4_handle_valid(handle) || is_bad_inode(inode)) + if (!EXT4_SB(sb)->s_journal || is_bad_inode(inode)) return 0; mutex_lock(&EXT4_SB(sb)->s_orphan_lock); @@ -2060,8 +2060,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; - /* ext4_handle_valid() assumes a valid handle_t pointer */ - if (handle && !ext4_handle_valid(handle) && + if (!EXT4_SB(inode->i_sb)->s_journal && !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) return 0; @@ -2081,7 +2080,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (sbi->s_journal && !handle) + if (!handle) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); -- cgit v1.2.3 From 17d1b500792b382169883c1e2415f1e930236128 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 22 Nov 2013 12:04:18 +0100 Subject: s390,time: revert direct ktime path for s390 clockevent device commit 8adbf78ec4839c1dc4ff20c9a1f332a7bc99e6e6 upstream. Git commit 4f37a68cdaf6dea833cfdded2a3e0c47c0f006da "s390: Use direct ktime path for s390 clockevent device" makes use of the CLOCK_EVT_FEAT_KTIME clockevent option to avoid the delta calculation with ktime_get() in clockevents_program_event and the get_tod_clock() in s390_next_event. This is based on the assumption that the difference between the internal ktime and the hardware clock is reflected in the wall_to_monotonic delta. But this is not true, the ntp corrections are applied via changes to the tk->mult multiplier and this is not reflected in wall_to_monotonic. In theory this could be solved by using the raw monotonic clock but it is simpler to switch back to the standard clock delta calculation. Signed-off-by: Martin Schwidefsky [bwh: Backported to 3.2: s/get_tod_clock()/get_clock()/] Signed-off-by: Ben Hutchings --- arch/s390/kernel/time.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b2f44de079b9..fd65e5e2c7b0 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -110,20 +110,10 @@ static void fixup_clock_comparator(unsigned long long delta) set_clock_comparator(S390_lowcore.clock_comparator); } -static int s390_next_ktime(ktime_t expires, +static int s390_next_event(unsigned long delta, struct clock_event_device *evt) { - struct timespec ts; - u64 nsecs; - - ts.tv_sec = ts.tv_nsec = 0; - monotonic_to_bootbased(&ts); - nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires)); - do_div(nsecs, 125); - S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9); - /* Program the maximum value if we have an overflow (== year 2042) */ - if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc)) - S390_lowcore.clock_comparator = -1ULL; + S390_lowcore.clock_comparator = get_clock() + delta; set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -148,15 +138,14 @@ void init_cpu_timer(void) cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_KTIME; + cd->features = CLOCK_EVT_FEAT_ONESHOT; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_ktime = s390_next_ktime; + cd->set_next_event = s390_next_event; cd->set_mode = s390_set_mode; clockevents_register_device(cd); -- cgit v1.2.3 From 125f21f38d448ae04aae1a665147faf1263a4736 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 26 Aug 2013 15:16:49 +0200 Subject: drm: fix DRM_IOCTL_MODE_GETFB handle-leak commit 101b96f32956ee99bf1468afaf572b88cda9f88b upstream. DRM_IOCTL_MODE_GETFB is used to retrieve information about a given framebuffer ID. It is a read-only helper and was thus declassified for unprivileged access in: commit a14b1b42477c5ef089fcda88cbaae50d979eb8f9 Author: Mandeep Singh Baines Date: Fri Jan 20 12:11:16 2012 -0800 drm: remove master fd restriction on mode setting getters However, alongside width, height and stride information, DRM_IOCTL_MODE_GETFB also passes back a handle to the underlying buffer of the framebuffer. This handle allows users to mmap() it and read or write into it. Obviously, this should be restricted to DRM-Master. With the current setup, *any* process with access to /dev/dri/card0 (which means any process with access to hardware-accelerated rendering) can access the current screen framebuffer and modify it ad libitum. For backwards-compatibility reasons we want to keep the DRM_IOCTL_MODE_GETFB call unprivileged. Besides, it provides quite useful information regarding screen setup. So we simply test whether the caller is the current DRM-Master and if not, we return 0 as handle, which is always invalid. A following DRM_IOCTL_GEM_CLOSE on this handle will fail with EINVAL, but we accept this. Users shouldn't test for errors during GEM_CLOSE, anyway. And it is still better as a failing MODE_GETFB call. v2: add capable(CAP_SYS_ADMIN) check for compatibility with i-g-t Signed-off-by: David Herrmann Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie [bwh: Backported to 3.2: - drm_framebuffer_funcs::create_handle must be non-null - Adjust context, indentation] Signed-off-by: Ben Hutchings --- drivers/gpu/drm/drm_crtc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3f1799b52d06..09851ce2500e 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1815,7 +1815,17 @@ int drm_mode_getfb(struct drm_device *dev, r->depth = fb->depth; r->bpp = fb->bits_per_pixel; r->pitch = fb->pitch; - fb->funcs->create_handle(fb, file_priv, &r->handle); + if (file_priv->is_master || capable(CAP_SYS_ADMIN)) { + ret = fb->funcs->create_handle(fb, file_priv, &r->handle); + } else { + /* GET_FB() is an unprivileged ioctl so we must not + * return a buffer-handle to non-master processes! For + * backwards-compatibility reasons, we cannot make + * GET_FB() privileged, so just return an invalid handle + * for non-masters. */ + r->handle = 0; + ret = 0; + } out: mutex_unlock(&dev->mode_config.mutex); -- cgit v1.2.3 From 543563d72f5b098ab719f296b6357d88701c1a1e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Mar 2014 18:14:40 +0100 Subject: crypto: ghash-clmulni-intel - use C implementation for setkey() commit 8ceee72808d1ae3fb191284afc2257a2be964725 upstream. The GHASH setkey() function uses SSE registers but fails to call kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and then having to deal with the restriction that they cannot be called from interrupt context, move the setkey() implementation to the C domain. Note that setkey() does not use any particular SSE features and is not expected to become a performance bottleneck. Signed-off-by: Ard Biesheuvel Acked-by: H. Peter Anvin Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation) Signed-off-by: Herbert Xu [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 28 ---------------------------- arch/x86/crypto/ghash-clmulni-intel_glue.c | 14 +++++++++++--- 2 files changed, 11 insertions(+), 31 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1eb7f90cb7b9..eb4d2a254b35 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -24,10 +24,6 @@ .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f -.Lpoly: - .octa 0xc2000000000000000000000000000001 -.Ltwo_one: - .octa 0x00000001000000000000000000000001 #define DATA %xmm0 #define SHASH %xmm1 @@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: ret - -/* - * void clmul_ghash_setkey(be128 *shash, const u8 *key); - * - * Calculate hash_key << 1 mod poly - */ -ENTRY(clmul_ghash_setkey) - movaps .Lbswap_mask, BSWAP - movups (%rsi), %xmm0 - PSHUFB_XMM BSWAP %xmm0 - movaps %xmm0, %xmm1 - psllq $1, %xmm0 - psrlq $63, %xmm1 - movaps %xmm1, %xmm2 - pslldq $8, %xmm1 - psrldq $8, %xmm2 - por %xmm1, %xmm0 - # reduction - pshufd $0b00100100, %xmm2, %xmm1 - pcmpeqd .Ltwo_one, %xmm1 - pand .Lpoly, %xmm1 - pxor %xmm1, %xmm0 - movups %xmm0, (%rdi) - ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 976aa64d9a20..294a264656f1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -29,8 +29,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash); void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, const be128 *shash); -void clmul_ghash_setkey(be128 *shash, const u8 *key); - struct ghash_async_ctx { struct cryptd_ahash *cryptd_tfm; }; @@ -57,13 +55,23 @@ static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + be128 *x = (be128 *)key; + u64 a, b; if (keylen != GHASH_BLOCK_SIZE) { crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } - clmul_ghash_setkey(&ctx->shash, key); + /* perform multiplication by 'x' in GF(2^128) */ + a = be64_to_cpu(x->a); + b = be64_to_cpu(x->b); + + ctx->shash.a = (__be64)((b << 1) | (a >> 63)); + ctx->shash.b = (__be64)((a << 1) | (b >> 63)); + + if (a >> 63) + ctx->shash.b ^= cpu_to_be64(0xc2); return 0; } -- cgit v1.2.3 From 540aa5b743b3ba2c7651f3e311a0fc8d4865534e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets commit 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 upstream. UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller [bwh: For 3.2, net/ipv6/output_core.c is a completely new file] --- drivers/net/macvtap.c | 3 +++ drivers/net/tun.c | 5 +++++ include/net/ipv6.h | 1 + net/ipv6/Makefile | 2 +- net/ipv6/output_core.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 net/ipv6/output_core.c diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b0f901518b76..0e6e57ed1a2f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -577,6 +578,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: return -EINVAL; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ee1aab0805d1..2fbbca670457 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -695,6 +696,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, break; } + skb_reset_network_header(skb); + if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -706,6 +709,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, break; case VIRTIO_NET_HDR_GSO_UDP: skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: tun->dev->stats.rx_frame_errors++; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 00a2eb609c33..ab2e6d724cf8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -481,6 +481,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add } extern void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); +void ipv6_proxy_select_ident(struct sk_buff *skb); /* * Prototypes exported by ipv6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac1..4b20d5606f6d 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -37,6 +37,6 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o output_core.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c new file mode 100644 index 000000000000..a6126c62a9be --- /dev/null +++ b/net/ipv6/output_core.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + static bool hashrnd_initialized = false; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + if (unlikely(!hashrnd_initialized)) { + hashrnd_initialized = true; + get_random_bytes(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + } + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -- cgit v1.2.3 From 3af10169145c8eed7b3591c0644da4298405efbc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 18:00:09 +0100 Subject: net: sctp: fix memory leak in auth key management commit 4184b2a79a7612a9272ce20d639934584a1f3786 upstream. A very minimal and simple user space application allocating an SCTP socket, setting SCTP_AUTH_KEY setsockopt(2) on it and then closing the socket again will leak the memory containing the authentication key from user space: unreferenced object 0xffff8800837047c0 (size 16): comm "a.out", pid 2789, jiffies 4296954322 (age 192.258s) hex dump (first 16 bytes): 01 00 00 00 04 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc+0xe8/0x270 [] sctp_auth_create_key+0x23/0x50 [sctp] [] sctp_auth_set_key+0xa1/0x140 [sctp] [] sctp_setsockopt+0xd03/0x1180 [sctp] [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] system_call_fastpath+0x12/0x17 [] 0xffffffffffffffff This is bad because of two things, we can bring down a machine from user space when auth_enable=1, but also we would leave security sensitive keying material in memory without clearing it after use. The issue is that sctp_auth_create_key() already sets the refcount to 1, but after allocation sctp_auth_set_key() does an additional refcount on it, and thus leaving it around when we free the socket. Fixes: 65b07e5d0d0 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/sctp/auth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 333926d4d356..53d455c09ce5 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -866,8 +866,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) -- cgit v1.2.3 From 0aba46add2915b344580569e87d9c41274b9c475 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 13 Oct 2014 16:34:10 +0200 Subject: ipv4: fix nexthop attlen check in fib_nh_match commit f76936d07c4eeb36d8dbb64ebd30ab46ff85d9f7 upstream. fib_nh_match does not match nexthops correctly. Example: ip route add 172.16.10/24 nexthop via 192.168.122.12 dev eth0 \ nexthop via 192.168.122.13 dev eth0 ip route del 172.16.10/24 nexthop via 192.168.122.14 dev eth0 \ nexthop via 192.168.122.15 dev eth0 Del command is successful and route is removed. After this patch applied, the route is correctly matched and result is: RTNETLINK answers: No such process Please consider this for stable trees as well. Fixes: 4e902c57417c4 ("[IPv4]: FIB configuration using struct fib_config") Signed-off-by: Jiri Pirko Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 76da979baed0..1cdb4a9ac713 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -467,7 +467,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) return 1; attrlen = rtnh_attrlen(rtnh); - if (attrlen < 0) { + if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); -- cgit v1.2.3 From 10f2216850e5955d102f8a052f5f3621e1aca328 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 May 2013 06:52:26 +0000 Subject: tcp: md5: remove spinlock usage in fast path commit 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b upstream. TCP md5 code uses per cpu variables but protects access to them with a shared spinlock, which is a contention point. [ tcp_md5sig_pool_lock is locked twice per incoming packet ] Makes things much simpler, by allocating crypto structures once, first time a socket needs md5 keys, and not deallocating them as they are really small. Next step would be to allow crypto allocations being done in a NUMA aware way. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller [bwh: Backported to 3.2: - Adjust context - Conditions for alloc/free are quite different] Signed-off-by: Ben Hutchings --- include/net/tcp.h | 8 ++-- net/ipv4/tcp.c | 98 +++++++++++------------------------------------- net/ipv4/tcp_ipv4.c | 7 +--- net/ipv4/tcp_minisocks.c | 7 +--- net/ipv6/tcp_ipv6.c | 7 +--- 5 files changed, 29 insertions(+), 98 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 238255b27106..e90235fb1b18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1214,11 +1214,13 @@ extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr); #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *); -extern void tcp_free_md5sig_pool(void); +extern bool tcp_alloc_md5sig_pool(void); extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -extern void tcp_put_md5sig_pool(void); +static inline void tcp_put_md5sig_pool(void) +{ + local_bh_enable(); +} extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ec8b4b7ee2e2..573d786e3bf9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2863,9 +2863,8 @@ int tcp_gro_complete(struct sk_buff *skb) EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG -static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; -static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_MUTEX(tcp_md5sig_mutex); static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { @@ -2880,30 +2879,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) free_percpu(pool); } -void tcp_free_md5sig_pool(void) -{ - struct tcp_md5sig_pool __percpu *pool = NULL; - - spin_lock_bh(&tcp_md5sig_pool_lock); - if (--tcp_md5sig_users == 0) { - pool = tcp_md5sig_pool; - tcp_md5sig_pool = NULL; - } - spin_unlock_bh(&tcp_md5sig_pool_lock); - if (pool) - __tcp_free_md5sig_pool(pool); -} -EXPORT_SYMBOL(tcp_free_md5sig_pool); - -static struct tcp_md5sig_pool __percpu * -__tcp_alloc_md5sig_pool(struct sock *sk) +static void __tcp_alloc_md5sig_pool(void) { int cpu; struct tcp_md5sig_pool __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) - return NULL; + return; for_each_possible_cpu(cpu) { struct crypto_hash *hash; @@ -2914,53 +2897,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk) per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } - return pool; + /* before setting tcp_md5sig_pool, we must commit all writes + * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + */ + smp_wmb(); + tcp_md5sig_pool = pool; + return; out_free: __tcp_free_md5sig_pool(pool); - return NULL; } -struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +bool tcp_alloc_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *pool; - int alloc = 0; - -retry: - spin_lock_bh(&tcp_md5sig_pool_lock); - pool = tcp_md5sig_pool; - if (tcp_md5sig_users++ == 0) { - alloc = 1; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } else if (!pool) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - cpu_relax(); - goto retry; - } else - spin_unlock_bh(&tcp_md5sig_pool_lock); - - if (alloc) { - /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool __percpu *p; - - p = __tcp_alloc_md5sig_pool(sk); - spin_lock_bh(&tcp_md5sig_pool_lock); - if (!p) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return NULL; - } - pool = tcp_md5sig_pool; - if (pool) { - /* oops, it has already been assigned. */ - spin_unlock_bh(&tcp_md5sig_pool_lock); - __tcp_free_md5sig_pool(p); - } else { - tcp_md5sig_pool = pool = p; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } + if (unlikely(!tcp_md5sig_pool)) { + mutex_lock(&tcp_md5sig_mutex); + + if (!tcp_md5sig_pool) + __tcp_alloc_md5sig_pool(); + + mutex_unlock(&tcp_md5sig_mutex); } - return pool; + return tcp_md5sig_pool != NULL; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2977,28 +2934,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) struct tcp_md5sig_pool __percpu *p; local_bh_disable(); - - spin_lock(&tcp_md5sig_pool_lock); - p = tcp_md5sig_pool; - if (p) - tcp_md5sig_users++; - spin_unlock(&tcp_md5sig_pool_lock); - + p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - return this_cpu_ptr(p); + return __this_cpu_ptr(p); local_bh_enable(); return NULL; } EXPORT_SYMBOL(tcp_get_md5sig_pool); -void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); - tcp_free_md5sig_pool(); -} -EXPORT_SYMBOL(tcp_put_md5sig_pool); - int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 26eb8e2a54e2..b4e0eb49f56d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -938,8 +938,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } md5sig = tp->md5sig_info; - if (md5sig->entries4 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { + if (md5sig->entries4 == 0 && !tcp_alloc_md5sig_pool()) { kfree(newkey); return -ENOMEM; } @@ -949,8 +948,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, (md5sig->entries4 + 1)), GFP_ATOMIC); if (!keys) { kfree(newkey); - if (md5sig->entries4 == 0) - tcp_free_md5sig_pool(); return -ENOMEM; } @@ -994,7 +991,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; tp->md5sig_info->alloced4 = 0; - tcp_free_md5sig_pool(); } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memmove(&tp->md5sig_info->keys4[i], @@ -1022,7 +1018,6 @@ static void tcp_v4_clear_md5_list(struct sock *sk) for (i = 0; i < tp->md5sig_info->entries4; i++) kfree(tp->md5sig_info->keys4[i].base.key); tp->md5sig_info->entries4 = 0; - tcp_free_md5sig_pool(); } if (tp->md5sig_info->keys4) { kfree(tp->md5sig_info->keys4); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 66363b689ad6..00e1530af8ac 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -365,7 +365,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (key != NULL) { memcpy(&tcptw->tw_md5_key, key->key, key->keylen); tcptw->tw_md5_keylen = key->keylen; - if (tcp_alloc_md5sig_pool(sk) == NULL) + if (!tcp_alloc_md5sig_pool()) BUG(); } } while (0); @@ -403,11 +403,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) void tcp_twsk_destructor(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG - struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_keylen) - tcp_free_md5sig_pool(); -#endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 057a9d21e6f8..655cc6081cf0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -592,7 +592,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, sk_nocaps_add(sk, NETIF_F_GSO_MASK); } if (tp->md5sig_info->entries6 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { + !tcp_alloc_md5sig_pool()) { kfree(newkey); return -ENOMEM; } @@ -602,8 +602,6 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, if (!keys) { kfree(newkey); - if (tp->md5sig_info->entries6 == 0) - tcp_free_md5sig_pool(); return -ENOMEM; } @@ -649,7 +647,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) kfree(tp->md5sig_info->keys6); tp->md5sig_info->keys6 = NULL; tp->md5sig_info->alloced6 = 0; - tcp_free_md5sig_pool(); } else { /* shrink the database */ if (tp->md5sig_info->entries6 != i) @@ -673,7 +670,6 @@ static void tcp_v6_clear_md5_list (struct sock *sk) for (i = 0; i < tp->md5sig_info->entries6; i++) kfree(tp->md5sig_info->keys6[i].base.key); tp->md5sig_info->entries6 = 0; - tcp_free_md5sig_pool(); } kfree(tp->md5sig_info->keys6); @@ -684,7 +680,6 @@ static void tcp_v6_clear_md5_list (struct sock *sk) for (i = 0; i < tp->md5sig_info->entries4; i++) kfree(tp->md5sig_info->keys4[i].base.key); tp->md5sig_info->entries4 = 0; - tcp_free_md5sig_pool(); } kfree(tp->md5sig_info->keys4); -- cgit v1.2.3 From 724035313906d1569c81524132ef494ce1a112af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 12:58:58 -0700 Subject: tcp: md5: do not use alloc_percpu() commit 349ce993ac706869d553a1816426d3a4bfda02b1 upstream. percpu tcp_md5sig_pool contains memory blobs that ultimately go through sg_set_buf(). -> sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); This requires that whole area is in a physically contiguous portion of memory. And that @buf is not backed by vmalloc(). Given that alloc_percpu() can use vmalloc() areas, this does not fit the requirements. Replace alloc_percpu() by a static DEFINE_PER_CPU() as tcp_md5sig_pool is small anyway, there is no gain to dynamically allocate it. Signed-off-by: Eric Dumazet Fixes: 765cf9976e93 ("tcp: md5: remove one indirection level in tcp_md5sig_pool") Reported-by: Crestez Dan Leonard Signed-off-by: David S. Miller [bwh: Backported to 3.2: the deleted code differs slightly due to API changes] Signed-off-by: Ben Hutchings --- net/ipv4/tcp.c | 59 ++++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 573d786e3bf9..32c9e83f44a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2863,61 +2863,42 @@ int tcp_gro_complete(struct sk_buff *skb) EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (!hash || IS_ERR(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2931,13 +2912,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return __this_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } -- cgit v1.2.3 From b136685fdeae304667ae14d59d51a965b0412c62 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 15 Oct 2014 16:24:02 +0400 Subject: ipv4: dst_entry leak in ip_send_unicast_reply() commit 4062090e3e5caaf55bed4523a69f26c3265cc1d2 upstream. ip_setup_cork() called inside ip_append_data() steals dst entry from rt to cork and in case errors in __ip_append_data() nobody frees stolen dst entry Fixes: 2e77d89b2fa8 ("net: avoid a pair of dst_hold()/dst_release() in ip_append_data()") Signed-off-by: Vasily Averin Acked-by: Eric Dumazet Signed-off-by: David S. Miller [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- net/ipv4/ip_output.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 115157b355d9..bf2e54b4f299 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1472,6 +1472,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + int err; if (ip_options_echo(&replyopts.opt.opt, skb)) return; @@ -1509,8 +1510,13 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; - ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, &rt, MSG_DONTWAIT); + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { + ip_flush_pending_frames(sk); + goto out; + } + if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(skb) + @@ -1519,7 +1525,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, skb->ip_summed = CHECKSUM_NONE; ip_push_pending_frames(sk, &fl4); } - +out: bh_unlock_sock(sk); ip_rt_put(rt); -- cgit v1.2.3 From ab25611227cffac1b1bc8e115e0719158ac8568c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 31 Oct 2014 03:10:31 +0000 Subject: drivers/net: macvtap and tun depend on INET commit de11b0e8c569b96c2cf6a811e3805b7aeef498a3 upstream. These drivers now call ipv6_proxy_select_ident(), which is defined only if CONFIG_INET is enabled. However, they have really depended on CONFIG_INET for as long as they have allowed sending GSO packets from userland. Reported-by: kbuild test robot Signed-off-by: Ben Hutchings Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Fixes: b9fb9ee07e67 ("macvtap: add GSO/csum offload support") Fixes: 5188cd44c55d ("drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets") Signed-off-by: David S. Miller --- drivers/net/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 654a5e94e0e7..61d3d1f185d4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -143,6 +143,7 @@ config MACVLAN config MACVTAP tristate "MAC-VLAN based tap driver (EXPERIMENTAL)" depends on MACVLAN + depends on INET help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -195,6 +196,7 @@ config RIONET_RX_SIZE config TUN tristate "Universal TUN/TAP device driver support" + depends on INET select CRC32 ---help--- TUN/TAP provides packet reception and transmission for user space -- cgit v1.2.3 From 85c3fe917f5459427c4ba5797332f9910400afeb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Dec 2014 12:13:58 +0100 Subject: net: sctp: use MAX_HEADER for headroom reserve in output path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9772b54c55266ce80c639a80aa68eeb908f8ecf5 upstream. To accomodate for enough headroom for tunnels, use MAX_HEADER instead of LL_MAX_HEADER. Robert reported that he has hit after roughly 40hrs of trinity an skb_under_panic() via SCTP output path (see reference). I couldn't reproduce it from here, but not using MAX_HEADER as elsewhere in other protocols might be one possible cause for this. In any case, it looks like accounting on chunks themself seems to look good as the skb already passed the SCTP output path and did not hit any skb_over_panic(). Given tunneling was enabled in his .config, the headroom would have been expanded by MAX_HEADER in this case. Reported-by: Robert Święcki Reference: https://lkml.org/lkml/2014/12/1/507 Fixes: 594ccc14dfe4d ("[SCTP] Replace incorrect use of dev_alloc_skb with alloc_skb in sctp_packet_transmit().") Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings --- net/sctp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 5bd9aa93139d..c3b85497d0ab 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -384,12 +384,12 @@ int sctp_packet_transmit(struct sctp_packet *packet) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); if (!nskb) goto nomem; /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); + skb_reserve(nskb, packet->overhead + MAX_HEADER); /* Set the owning socket so that we know where to get the * destination IP address. -- cgit v1.2.3 From 9a8d305befe3218c7523179c0d406d876b5cbbed Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Sep 2014 13:17:48 +0200 Subject: x86: kvm: use alternatives for VMCALL vs. VMMCALL if kernel text is read-only commit c1118b3602c2329671ad5ec8bdf8e374323d6343 upstream. On x86_64, kernel text mappings are mapped read-only with CONFIG_DEBUG_RODATA. In that case, KVM will fail to patch VMCALL instructions to VMMCALL as required on AMD processors. The failure mode is currently a divide-by-zero exception, which obviously is a KVM bug that has to be fixed. However, picking the right instruction between VMCALL and VMMCALL will be faster and will help if you cannot upgrade the hypervisor. Reported-by: Chris Webb Tested-by: Chris Webb Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Acked-by: Borislav Petkov Signed-off-by: Paolo Bonzini [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/kvm_para.h | 10 ++++++++-- arch/x86/kernel/cpu/amd.c | 7 +++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a315f1ccbf93..b8a5fe545f95 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -193,6 +193,7 @@ #define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c3767cfac..9f0a680a7e12 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -91,15 +91,21 @@ struct kvm_vcpu_pv_apf_data { #ifdef __KERNEL__ #include +#include extern void kvmclock_init(void); extern int kvm_register_clock(char *txt); -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f07becc715bb..2d44a28be0ea 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -469,6 +469,13 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); } #endif + + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) -- cgit v1.2.3 From ac4619ec85e120ecbcd8c96e518cd99aa41c8012 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Jan 2015 01:27:54 +0000 Subject: Linux 3.2.66 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1433109ccf88..f08f8bfbd788 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 2 -SUBLEVEL = 65 +SUBLEVEL = 66 EXTRAVERSION = NAME = Saber-toothed Squirrel -- cgit v1.2.3