From a58d37bce0d21cf7fbd589384c619e465ef2f927 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Mon, 24 Jun 2019 16:41:48 +0200
Subject: ocxl: Fix concurrent AFU open and device removal

If an ocxl device is unbound through sysfs at the same time its AFU is
being opened by a user process, the open code may dereference freed
stuctures, which can lead to kernel oops messages. You'd have to hit a
tiny time window, but it's possible. It's fairly easy to test by
making the time window bigger artificially.

Fix it with a combination of 2 changes:
  - when an AFU device is found in the IDR by looking for the device
    minor number, we should hold a reference on the device until after
    the context is allocated. A reference on the AFU structure is kept
    when the context is allocated, so we can release the reference on
    the device after the context allocation.
  - with the fix above, there's still another even tinier window,
    between the time the AFU device is found in the IDR and the
    reference on the device is taken. We can fix this one by removing
    the IDR entry earlier, when the device setup is removed, instead
    of waiting for the 'release' device callback. With proper locking
    around the IDR.

Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend")
Cc: stable@vger.kernel.org # v5.2+
Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190624144148.32022-1-fbarrat@linux.ibm.com
---
 drivers/misc/ocxl/file.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index 2870c25da166..4d1b44de1492 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -18,18 +18,15 @@ static struct class *ocxl_class;
 static struct mutex minors_idr_lock;
 static struct idr minors_idr;
 
-static struct ocxl_file_info *find_file_info(dev_t devno)
+static struct ocxl_file_info *find_and_get_file_info(dev_t devno)
 {
 	struct ocxl_file_info *info;
 
-	/*
-	 * We don't declare an RCU critical section here, as our AFU
-	 * is protected by a reference counter on the device. By the time the
-	 * info reference is removed from the idr, the ref count of
-	 * the device is already at 0, so no user API will access that AFU and
-	 * this function can't return it.
-	 */
+	mutex_lock(&minors_idr_lock);
 	info = idr_find(&minors_idr, MINOR(devno));
+	if (info)
+		get_device(&info->dev);
+	mutex_unlock(&minors_idr_lock);
 	return info;
 }
 
@@ -58,14 +55,16 @@ static int afu_open(struct inode *inode, struct file *file)
 
 	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
 
-	info = find_file_info(inode->i_rdev);
+	info = find_and_get_file_info(inode->i_rdev);
 	if (!info)
 		return -ENODEV;
 
 	rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping);
-	if (rc)
+	if (rc) {
+		put_device(&info->dev);
 		return rc;
-
+	}
+	put_device(&info->dev);
 	file->private_data = ctx;
 	return 0;
 }
@@ -487,7 +486,6 @@ static void info_release(struct device *dev)
 {
 	struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev);
 
-	free_minor(info);
 	ocxl_afu_put(info->afu);
 	kfree(info);
 }
@@ -577,6 +575,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu)
 
 	ocxl_file_make_invisible(info);
 	ocxl_sysfs_unregister_afu(info);
+	free_minor(info);
 	device_unregister(&info->dev);
 }
 
-- 
cgit v1.2.3


From 14c73bd344da60abaf7da3ea2e7733ddda35bbac Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 5 Dec 2019 14:02:17 +0530
Subject: powerpc/vcpu: Assume dedicated processors as non-preempt

With commit 247f2f6f3c70 ("sched/core: Don't schedule threads on
pre-empted vCPUs"), the scheduler avoids preempted vCPUs to schedule
tasks on wakeup. This leads to wrong choice of CPU, which in-turn
leads to larger wakeup latencies. Eventually, it leads to performance
regression in latency sensitive benchmarks like soltp, schbench etc.

On Powerpc, vcpu_is_preempted() only looks at yield_count. If the
yield_count is odd, the vCPU is assumed to be preempted. However
yield_count is increased whenever the LPAR enters CEDE state (idle).
So any CPU that has entered CEDE state is assumed to be preempted.

Even if vCPU of dedicated LPAR is preempted/donated, it should have
right of first-use since they are supposed to own the vCPU.

On a Power9 System with 32 cores:
  # lscpu
  Architecture:        ppc64le
  Byte Order:          Little Endian
  CPU(s):              128
  On-line CPU(s) list: 0-127
  Thread(s) per core:  8
  Core(s) per socket:  1
  Socket(s):           16
  NUMA node(s):        2
  Model:               2.2 (pvr 004e 0202)
  Model name:          POWER9 (architected), altivec supported
  Hypervisor vendor:   pHyp
  Virtualization type: para
  L1d cache:           32K
  L1i cache:           32K
  L2 cache:            512K
  L3 cache:            10240K
  NUMA node0 CPU(s):   0-63
  NUMA node1 CPU(s):   64-127

  # perf stat -a -r 5 ./schbench
  v5.4                               v5.4 + patch
  Latency percentiles (usec)         Latency percentiles (usec)
        50.0000th: 45                      50.0th: 45
        75.0000th: 62                      75.0th: 63
        90.0000th: 71                      90.0th: 74
        95.0000th: 77                      95.0th: 78
        *99.0000th: 91                     *99.0th: 82
        99.5000th: 707                     99.5th: 83
        99.9000th: 6920                    99.9th: 86
        min=0, max=10048                   min=0, max=96
  Latency percentiles (usec)         Latency percentiles (usec)
        50.0000th: 45                      50.0th: 46
        75.0000th: 61                      75.0th: 64
        90.0000th: 72                      90.0th: 75
        95.0000th: 79                      95.0th: 79
        *99.0000th: 691                    *99.0th: 83
        99.5000th: 3972                    99.5th: 85
        99.9000th: 8368                    99.9th: 91
        min=0, max=16606                   min=0, max=117
  Latency percentiles (usec)         Latency percentiles (usec)
        50.0000th: 45                      50.0th: 46
        75.0000th: 61                      75.0th: 64
        90.0000th: 71                      90.0th: 75
        95.0000th: 77                      95.0th: 79
        *99.0000th: 106                    *99.0th: 83
        99.5000th: 2364                    99.5th: 84
        99.9000th: 7480                    99.9th: 90
        min=0, max=10001                   min=0, max=95
  Latency percentiles (usec)         Latency percentiles (usec)
        50.0000th: 45                      50.0th: 47
        75.0000th: 62                      75.0th: 65
        90.0000th: 72                      90.0th: 75
        95.0000th: 78                      95.0th: 79
        *99.0000th: 93                     *99.0th: 84
        99.5000th: 108                     99.5th: 85
        99.9000th: 6792                    99.9th: 90
        min=0, max=17681                   min=0, max=117
  Latency percentiles (usec)         Latency percentiles (usec)
        50.0000th: 46                      50.0th: 45
        75.0000th: 62                      75.0th: 64
        90.0000th: 73                      90.0th: 75
        95.0000th: 79                      95.0th: 79
        *99.0000th: 113                    *99.0th: 82
        99.5000th: 2724                    99.5th: 83
        99.9000th: 6184                    99.9th: 93
        min=0, max=9887                    min=0, max=111

   Performance counter stats for 'system wide' (5 runs):

  context-switches    43,373  ( +-  0.40% )   44,597 ( +-  0.55% )
  cpu-migrations       1,211  ( +-  5.04% )      220 ( +-  6.23% )
  page-faults         15,983  ( +-  5.21% )   15,360 ( +-  3.38% )

Waiman Long suggested using static_keys.

Fixes: 247f2f6f3c70 ("sched/core: Don't schedule threads on pre-empted vCPUs")
Cc: stable@vger.kernel.org # v4.18+
Reported-by: Parth Shah <parth@linux.ibm.com>
Reported-by: Ihor Pasichnyk <Ihor.Pasichnyk@ibm.com>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Phil Auld <pauld@redhat.com>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
Tested-by: Parth Shah <parth@linux.ibm.com>
[mpe: Move the key and setting of the key to pseries/setup.c]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191213035036.6913-1-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/spinlock.h    | 4 +++-
 arch/powerpc/platforms/pseries/setup.c | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index e9a960e28f3c..cac95a3f30c2 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -36,10 +36,12 @@
 #endif
 
 #ifdef CONFIG_PPC_PSERIES
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
 #define vcpu_is_preempted vcpu_is_preempted
 static inline bool vcpu_is_preempted(int cpu)
 {
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+	if (!static_branch_unlikely(&shared_processor))
 		return false;
 	return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
 }
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0a40201f315f..0c8421dd01ab 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -74,6 +74,9 @@
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
 
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL_GPL(shared_processor);
+
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
 unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void)
 
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		vpa_init(boot_cpuid);
+
+		if (lppaca_shared_proc(get_lppaca()))
+			static_branch_enable(&shared_processor);
+
 		ppc_md.power_save = pseries_lpar_idle;
 		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
 #ifdef CONFIG_PCI_IOV
-- 
cgit v1.2.3


From 656c21d6af5d9279dd7b51ca7a4a71008127044b Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 5 Dec 2019 14:02:18 +0530
Subject: powerpc/shared: Use static key to detect shared processor

With the static key shared processor available, is_shared_processor()
can return without having to query the lppaca structure.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Phil Auld <pauld@redhat.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191213035036.6913-2-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/spinlock.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index cac95a3f30c2..1b55fc08f853 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -112,13 +112,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
 
 static inline bool is_shared_processor(void)
 {
-/*
- * LPPACA is only available on Pseries so guard anything LPPACA related to
- * allow other platforms (which include this common header) to compile.
- */
-#ifdef CONFIG_PPC_PSERIES
-	return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
-		lppaca_shared_proc(local_paca->lppaca_ptr));
+#ifdef CONFIG_PPC_SPLPAR
+	return static_branch_unlikely(&shared_processor);
 #else
 	return false;
 #endif
-- 
cgit v1.2.3


From 8fabc623238e68b3ac63c0dd1657bf86c1fa33af Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 14:35:24 +0200
Subject: powerpc: Ensure that swiotlb buffer is allocated from low memory

Some powerpc platforms (e.g. 85xx) limit DMA-able memory way below 4G.
If a system has more physical memory than this limit, the swiotlb
buffer is not addressable because it is allocated from memblock using
top-down mode.

Force memblock to bottom-up mode before calling swiotlb_init() to
ensure that the swiotlb buffer is DMA-able.

Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191204123524.22919-1-rppt@kernel.org
---
 arch/powerpc/mm/mem.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9488b63dfc87..617c2777926f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -289,6 +289,14 @@ void __init mem_init(void)
 	BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
 
 #ifdef CONFIG_SWIOTLB
+	/*
+	 * Some platforms (e.g. 85xx) limit DMA-able memory way below
+	 * 4G. We force memblock to bottom-up mode to ensure that the
+	 * memory allocated in swiotlb_init() is DMA-able.
+	 * As it's the last memblock allocation, no need to reset it
+	 * back to to-down.
+	 */
+	memblock_set_bottom_up(true);
 	swiotlb_init(0);
 #endif
 
-- 
cgit v1.2.3


From 099bc4812f09155da77eeb960a983470249c9ce1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 9 Dec 2019 06:19:08 +0000
Subject: powerpc/irq: fix stack overflow verification

Before commit 0366a1c70b89 ("powerpc/irq: Run softirqs off the top of
the irq stack"), check_stack_overflow() was called by do_IRQ(), before
switching to the irq stack.
In that commit, do_IRQ() was renamed __do_irq(), and is now executing
on the irq stack, so check_stack_overflow() has just become almost
useless.

Move check_stack_overflow() call in do_IRQ() to do the check while
still on the current stack.

Fixes: 0366a1c70b89 ("powerpc/irq: Run softirqs off the top of the irq stack")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/e033aa8116ab12b7ca9a9c75189ad0741e3b9b5f.1575872340.git.christophe.leroy@c-s.fr
---
 arch/powerpc/kernel/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..add67498c126 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs)
 
 	trace_irq_entry(regs);
 
-	check_stack_overflow();
-
 	/*
 	 * Query the platform PIC for the interrupt & ack it.
 	 *
@@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs)
 	irqsp = hardirq_ctx[raw_smp_processor_id()];
 	sirqsp = softirq_ctx[raw_smp_processor_id()];
 
+	check_stack_overflow();
+
 	/* Already there ? */
 	if (unlikely(cursp == irqsp || cursp == sirqsp)) {
 		__do_irq(regs);
-- 
cgit v1.2.3


From 913e73c77d48aeeb50c16450a653dca9c71ae2e2 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Mon, 9 Dec 2019 11:55:13 +0100
Subject: ocxl: Fix potential memory leak on context creation

If we couldn't fully init a context, we were leaking memory.

Fixes: b9721d275cc2 ("ocxl: Allow external drivers to use OpenCAPI contexts")
Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191209105513.8566-1-fbarrat@linux.ibm.com
---
 drivers/misc/ocxl/context.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index 994563a078eb..de8a66b9d76b 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -10,18 +10,17 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
 	int pasid;
 	struct ocxl_context *ctx;
 
-	*context = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
-	if (!*context)
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
 		return -ENOMEM;
 
-	ctx = *context;
-
 	ctx->afu = afu;
 	mutex_lock(&afu->contexts_lock);
 	pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
 			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
 	if (pasid < 0) {
 		mutex_unlock(&afu->contexts_lock);
+		kfree(ctx);
 		return pasid;
 	}
 	afu->pasid_count++;
@@ -43,6 +42,7 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
 	 * duration of the life of the context
 	 */
 	ocxl_afu_get(afu);
+	*context = ctx;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ocxl_context_alloc);
-- 
cgit v1.2.3


From 0601546f23fb70d84b807e73cfe8e789d054c98d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Sat, 14 Dec 2019 08:10:29 +0000
Subject: powerpc/8xx: fix bogus __init on mmu_mapin_ram_chunk()

Remove __init qualifier for mmu_mapin_ram_chunk() as it is called by
mmu_mark_initmem_nx() and mmu_mark_rodata_ro() which are not __init
functions.

At the same time, mark it static as it is only used in this file.

Reported-by: kbuild test robot <lkp@intel.com>
Fixes: a2227a277743 ("powerpc/32: Don't populate page tables for block mapped pages except on the 8xx")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/56648921986a6b3e7315b1fbbf4684f21bd2dea8.1576310997.git.christophe.leroy@c-s.fr
---
 arch/powerpc/mm/nohash/8xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 090af2d2d3e4..96eb8e43f39b 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -103,7 +103,7 @@ static void mmu_patch_addis(s32 *site, long simm)
 	patch_instruction_site(site, instr);
 }
 
-void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
 {
 	unsigned long s = offset;
 	unsigned long v = PAGE_OFFSET + s;
-- 
cgit v1.2.3


From e352f576d345e5bf1fb62c8559851448a6c1d9cd Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 16 Dec 2019 11:30:58 +0100
Subject: powerpc/pseries/cmm: fix managed page counts when migrating pages
 between zones

Commit 63341ab03706 (virtio-balloon: fix managed page counts when migrating
pages between zones) fixed a long existing BUG in the virtio-balloon
driver when pages would get migrated between zones.  I did not try to
reproduce on powerpc, but looking at the code, the same should apply to
powerpc/cmm ever since it started using the balloon compaction
infrastructure (luckily just recently).

In case we have to migrate a ballon page to a newpage of another zone, the
managed page count of both zones is wrong. Paired with memory offlining
(which will adjust the managed page count), we can trigger kernel crashes
and all kinds of different symptoms.

Fix it by properly adjusting the managed page count when migrating if
the zone changed.

We'll temporarily modify the totalram page count. If this ever becomes a
problem, we can fine tune by providing helpers that don't touch
the totalram pages (e.g., adjust_zone_managed_page_count()).

Fixes: fe030c9b85e6 ("powerpc/pseries/cmm: Implement balloon compaction")
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191216103058.4958-1-david@redhat.com
---
 arch/powerpc/platforms/pseries/cmm.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 91571841df8a..9dba7e880885 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -539,6 +539,16 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
 	/* balloon page list reference */
 	get_page(newpage);
 
+	/*
+	 * When we migrate a page to a different zone, we have to fixup the
+	 * count of both involved zones as we adjusted the managed page count
+	 * when inflating.
+	 */
+	if (page_zone(page) != page_zone(newpage)) {
+		adjust_managed_page_count(page, 1);
+		adjust_managed_page_count(newpage, -1);
+	}
+
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	balloon_page_insert(b_dev_info, newpage);
 	balloon_page_delete(page);
-- 
cgit v1.2.3


From 61e3acd8c693a14fc69b824cb5b08d02cb90a6e7 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <ajd@linux.ibm.com>
Date: Tue, 10 Dec 2019 00:22:21 +1100
Subject: powerpc: Fix __clear_user() with KUAP enabled

The KUAP implementation adds calls in clear_user() to enable and
disable access to userspace memory. However, it doesn't add these to
__clear_user(), which is used in the ptrace regset code.

As there's only one direct user of __clear_user() (the regset code),
and the time taken to set the AMR for KUAP purposes is going to
dominate the cost of a quick access_ok(), there's not much point
having a separate path.

Rename __clear_user() to __arch_clear_user(), and make __clear_user()
just call clear_user().

Reported-by: syzbot+f25ecf4b2982d8c7a640@syzkaller-ppc64.appspotmail.com
Reported-by: Daniel Axtens <dja@axtens.net>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
[mpe: Use __arch_clear_user() for the asm version like arm64 & nds32]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191209132221.15328-1-ajd@linux.ibm.com
---
 arch/powerpc/include/asm/uaccess.h | 9 +++++++--
 arch/powerpc/lib/string_32.S       | 4 ++--
 arch/powerpc/lib/string_64.S       | 6 +++---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 15002b51ff18..c92fe7fe9692 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 
-extern unsigned long __clear_user(void __user *addr, unsigned long size);
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
 {
@@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
 	might_fault();
 	if (likely(access_ok(addr, size))) {
 		allow_write_to_user(addr, size);
-		ret = __clear_user(addr, size);
+		ret = __arch_clear_user(addr, size);
 		prevent_write_to_user(addr, size);
 	}
 	return ret;
 }
 
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+	return clear_user(addr, size);
+}
+
 extern long strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index f69a6aab7bfb..1ddb26394e8a 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
-_GLOBAL(__clear_user)
+_GLOBAL(__arch_clear_user)
 /*
  * Use dcbz on the complete cache lines in the destination
  * to set them to zero.  This requires that the destination
@@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
 	EX_TABLE(8b, 91b)
 	EX_TABLE(9b, 91b)
 
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 507b18b1660e..169872bc0892 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -17,7 +17,7 @@ PPC64_CACHES:
 	.section	".text"
 
 /**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
  * @to:   Destination address, in user space.
  * @n:    Number of bytes to zero.
  *
@@ -58,7 +58,7 @@ err3;	stb	r0,0(r3)
 	mr	r3,r4
 	blr
 
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
 	cmpdi	r4,32
 	neg	r6,r3
 	li	r0,0
@@ -181,4 +181,4 @@ err1;	dcbz	0,r3
 	cmpdi	r4,32
 	blt	.Lshort_clear
 	b	.Lmedium_clear
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
-- 
cgit v1.2.3


From 228b607d8ea1b7d4561945058d5692709099d432 Mon Sep 17 00:00:00 2001
From: Marcus Comstedt <marcus@mc.pp.se>
Date: Sun, 15 Dec 2019 10:49:00 +0100
Subject: KVM: PPC: Book3S HV: Fix regression on big endian hosts

VCPU_CR is the offset of arch.regs.ccr in kvm_vcpu.
arch/powerpc/include/asm/kvm_host.h defines arch.regs as a struct
pt_regs, and arch/powerpc/include/asm/ptrace.h defines the ccr field
of pt_regs as "unsigned long ccr".  Since unsigned long is 64 bits, a
64-bit load needs to be used to load it, unless an endianness specific
correction offset is added to access the desired subpart.  In this
case there is no reason to _not_ use a 64 bit load though.

Fixes: 6c85b7bc637b ("powerpc/kvm: Use UV_RETURN ucall to return to ultravisor")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191215094900.46740-1-marcus@mc.pp.se
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0496e66aaa56..c6fbbd29bd87 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1117,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	ld	r7, VCPU_GPR(R7)(r4)
 	bne	ret_to_ultra
 
-	lwz	r0, VCPU_CR(r4)
+	ld	r0, VCPU_CR(r4)
 	mtcr	r0
 
 	ld	r0, VCPU_GPR(R0)(r4)
@@ -1137,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  *   R3 = UV_RETURN
  */
 ret_to_ultra:
-	lwz	r0, VCPU_CR(r4)
+	ld	r0, VCPU_CR(r4)
 	mtcr	r0
 
 	ld	r0, VCPU_GPR(R3)(r4)
-- 
cgit v1.2.3