diff options
Diffstat (limited to 'arch')
81 files changed, 533 insertions, 184 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 00e3702ec79b..8b1a614cb58a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -6,6 +6,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE + depends on !PREEMPT_RT_FULL select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 0c4132dd3507..fb9eaa4dc109 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -108,7 +108,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* If we're in an interrupt context, or have no user context, we must not take the fault. */ - if (!mm || in_atomic()) + if (!mm || pagefault_disabled()) goto no_context; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2d345d2c3042..7736fcfbdde9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,6 +19,7 @@ config ARM select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HARDIRQS_SW_RESEND + select IRQ_FORCED_THREADING select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER @@ -47,6 +48,7 @@ config ARM select HAVE_MEMBLOCK select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS + select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index fa09e6b49bf1..fbd0ba76c1dd 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -3,6 +3,14 @@ #include <linux/thread_info.h> +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + + /* * switch_to(prev, next) should switch from task `prev' to `next' * `prev' will never be the same as `next'. schedule() itself @@ -12,6 +20,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + switch_kmaps(prev, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index f00b5692cd9d..1f24211f7182 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -50,6 +50,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ @@ -148,6 +149,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NEED_RESCHED_LAZY 3 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 @@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..b609861fbe40 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -53,6 +53,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d43c7e54ec6c..d33a745c40ae 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -204,11 +204,18 @@ __irq_svc: #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + bne 1f @ return from exeption + ldr r0, [tsk, #TI_FLAGS] @ get flags + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set + blne svc_preempt @ preempt! + + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r8, #0 @ if preempt lazy count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED_LAZY blne svc_preempt +1: #endif svc_exit r5, irq = 1 @ return from exception @@ -223,6 +230,8 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + bne 1b + tst r0, #_TIF_NEED_RESCHED_LAZY moveq pc, r8 @ go again b 1b #endif diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1f2740e3dbc0..b32b043b5b6d 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -118,7 +118,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) continue; } - err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu", + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", cpu_pmu); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index ab1fe3bb31f0..c4405c6c1a20 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -433,6 +433,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU +/* + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not + * initialized by pgtable_page_ctor() then a coredump of the vector page will + * fail. + */ +static int __init vectors_user_mapping_init_page(void) +{ + struct page *page; + unsigned long addr = 0xffff0000; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + page = pmd_page(*(pmd)); + + pgtable_page_ctor(page); + + return 0; +} +late_initcall(vectors_user_mapping_init_page); + #ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 5a42c12767af..fbbe3ffdbb15 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -584,7 +584,8 @@ asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { do { - if (likely(thread_flags & _TIF_NEED_RESCHED)) { + if (likely(thread_flags & (_TIF_NEED_RESCHED | + _TIF_NEED_RESCHED_LAZY))) { schedule(); } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 180b3024bec3..54dd6afb17b9 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -134,6 +134,7 @@ clkevt32k_mode(enum clock_event_mode mode, struct clock_event_device *dev) break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: + remove_irq(AT91_ID_SYS, &at91rm9200_timer_irq); case CLOCK_EVT_MODE_RESUME: irqmask = 0; break; diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 3a4bc2e1a65e..4f6af15aef78 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -77,7 +77,7 @@ static struct clocksource pit_clk = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; - +static struct irqaction at91sam926x_pit_irq; /* * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) */ @@ -86,6 +86,8 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) { switch (mode) { case CLOCK_EVT_MODE_PERIODIC: + /* Set up irq handler */ + setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq); /* update clocksource counter */ pit_cnt += pit_cycle * PIT_PICNT(pit_read(AT91_PIT_PIVR)); pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN @@ -98,6 +100,7 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) case CLOCK_EVT_MODE_UNUSED: /* disable irq, leaving the clocksource active */ pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN); + remove_irq(AT91_ID_SYS, &at91sam926x_pit_irq); break; case CLOCK_EVT_MODE_RESUME: break; diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index a0e8ff7758a4..68c80285e9e6 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -71,7 +71,7 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)(S5P_VA_SCU); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __cpuinit exynos_secondary_init(unsigned int cpu) { @@ -84,8 +84,8 @@ static void __cpuinit exynos_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -97,7 +97,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -126,7 +126,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -165,7 +165,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c index 00cdb0a5dac8..4da2f47cb296 100644 --- a/arch/arm/mach-msm/platsmp.c +++ b/arch/arm/mach-msm/platsmp.c @@ -30,7 +30,7 @@ extern void msm_secondary_startup(void); -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static inline int get_core_count(void) { @@ -50,8 +50,8 @@ static void __cpuinit msm_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static __cpuinit void prepare_cold_cpu(unsigned int cpu) @@ -88,7 +88,7 @@ static int __cpuinit msm_boot_secondary(unsigned int cpu, struct task_struct *id * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -122,7 +122,7 @@ static int __cpuinit msm_boot_secondary(unsigned int cpu, struct task_struct *id * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 2a551f997aea..6c9a3d5ef669 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -44,7 +44,7 @@ u16 pm44xx_errata; /* SCU base address */ static void __iomem *scu_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -68,8 +68,8 @@ static void __cpuinit omap4_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -83,7 +83,7 @@ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct * * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -160,7 +160,7 @@ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct * * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return 0; } diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 1c3de7bed841..3f1371316fc0 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -23,7 +23,7 @@ static void __iomem *scu_base; static void __iomem *rsc_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static struct map_desc scu_io_desc __initdata = { .length = SZ_4K, @@ -56,8 +56,8 @@ static void __cpuinit sirfsoc_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static struct of_device_id rsc_ids[] = { @@ -95,7 +95,7 @@ static int __cpuinit sirfsoc_boot_secondary(unsigned int cpu, struct task_struct /* make sure write buffer is drained */ mb(); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -128,7 +128,7 @@ static int __cpuinit sirfsoc_boot_secondary(unsigned int cpu, struct task_struct * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 9c4c722c954e..ec5437dec3a7 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -20,7 +20,7 @@ #include <mach/spear.h> #include "generic.h" -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -36,8 +36,8 @@ static void __cpuinit spear13xx_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -48,7 +48,7 @@ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_stru * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -75,7 +75,7 @@ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_stru * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index 14d90469392f..85d6d1493459 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -52,7 +52,7 @@ static void __iomem *scu_base_addr(void) return NULL; } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __cpuinit ux500_secondary_init(unsigned int cpu) { @@ -65,8 +65,8 @@ static void __cpuinit ux500_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -77,7 +77,7 @@ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct * * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -98,7 +98,7 @@ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct * * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c97f7940cb95..a7e70899c9a0 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -279,7 +279,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; /* diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 21b9e1bf9b77..bd41dd8cc561 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -38,6 +38,7 @@ EXPORT_SYMBOL(kunmap); void *kmap_atomic(struct page *page) { + pte_t pte = mk_pte(page, kmap_prot); unsigned int idx; unsigned long vaddr; void *kmap; @@ -76,7 +77,10 @@ void *kmap_atomic(struct page *page) * in place, so the contained TLB flush ensures the TLB is updated * with the new mapping. */ - set_top_pte(vaddr, mk_pte(page, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_top_pte(vaddr, pte); return (void *)vaddr; } @@ -93,12 +97,15 @@ void __kunmap_atomic(void *kvaddr) if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - set_top_pte(vaddr, __pte(0)); #else (void) idx; /* to kill a warning */ #endif + set_top_pte(vaddr, __pte(0)); kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ @@ -110,6 +117,7 @@ EXPORT_SYMBOL(__kunmap_atomic); void *kmap_atomic_pfn(unsigned long pfn) { + pte_t pte = pfn_pte(pfn, kmap_prot); unsigned long vaddr; int idx, type; @@ -121,7 +129,10 @@ void *kmap_atomic_pfn(unsigned long pfn) #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_top_pte(vaddr))); #endif - set_top_pte(vaddr, pfn_pte(pfn, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_top_pte(vaddr, pte); return (void *)vaddr; } @@ -135,3 +146,29 @@ struct page *kmap_atomic_to_page(const void *ptr) return pte_page(get_top_pte(vaddr)); } + +#if defined CONFIG_PREEMPT_RT_FULL +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), __pte(0)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), + next_p->kmap_pte[i]); + } +} +#endif diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index 1e1b2d769748..82c366b222cb 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -31,7 +31,7 @@ static void __cpuinit write_pen_release(int val) outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1)); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __cpuinit versatile_secondary_init(unsigned int cpu) { @@ -44,8 +44,8 @@ void __cpuinit versatile_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -56,7 +56,7 @@ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idl * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -86,7 +86,7 @@ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idl * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index b2f2d2d66849..9577e6930b39 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) * If we're in an interrupt or have no user context, we must * not take the fault... */ - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) + if (!mm || regs->sr & SYSREG_BIT(GM) || pagefault_disabled()) goto no_context; local_irq_enable(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 73312ab6c696..1a403d9124e4 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -114,7 +114,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, * user context, we must not take the fault. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; retry: diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 331c1e2cfb67..e87972c1d7d1 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 6cf0341f978e..dd8841559c43 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -98,7 +98,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * If we're in an interrupt or have no user context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index 3cdfa9c1d091..69450568dbf6 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, * If we're in an interrupt or have no user context or are running in an * atomic region then we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto bad_area_nosemaphore; /* When running in the kernel we expect faults to occur only to diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index a563727806bf..9ea40dbb1ca0 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; retry: diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 731f739d17a1..81bb8462d4c5 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -108,7 +108,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) is_write = 0; - if (unlikely(in_atomic() || !mm)) { + if (unlikely(!mm || pagefault_disabled())) { if (kernel_mode(regs)) goto bad_area_nosemaphore; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e53e2b40d695..a8cc8c35d378 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2115,7 +2115,7 @@ config CPU_R4400_WORKAROUNDS # config HIGHMEM bool "High Memory Support" - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index fd3ef2c2afbc..85cf081ba3c2 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -572,6 +572,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { local_irq_enable(); + preempt_check_resched(); /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 0fead53d1c26..cd030058e927 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -89,7 +89,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto bad_area_nosemaphore; retry: diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index d48a84fd7fae..34a83b972f38 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; retry: diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f247a3480e8e..39037babb183 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -177,7 +177,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; retry: diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe404e77246e..95e5d5c0e6ce 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool + default y if PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM bool - default y + default y if !PREEMPT_RT_FULL config GENERIC_LOCKBREAK bool @@ -132,6 +133,7 @@ config PPC select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_PREEMPT_LAZY select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS @@ -285,7 +287,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" - depends on PPC32 + depends on PPC32 && !PREEMPT_RT_FULL source kernel/Kconfig.hz source kernel/Kconfig.preempt diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index ba7b1973866e..f50711f4c7cd 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -43,6 +43,8 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ struct restart_block restart_block; unsigned long local_flags; /* private flags for thread */ @@ -90,8 +92,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_POLLING_NRFLAG 18 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -126,13 +129,16 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED_LAZY) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 302886b77de2..3da556dca648 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -165,6 +165,7 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 22b45a4955cd..081f926193f2 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -890,7 +890,14 @@ resume_kernel: cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED + bne+ 1f + lwz r0,TI_PREEMPT_LAZY(r9) + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ + bne restore + lwz r0,TI_FLAGS(r9) + andi. r0,r0,_TIF_NEED_RESCHED_LAZY beq+ restore +1: lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ @@ -901,11 +908,11 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq +2: bl preempt_schedule_irq CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + andi. r0,r3,_TIF_NEED_RESCHED_MASK + bne- 2b #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -1226,7 +1233,7 @@ global_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -1247,7 +1254,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 38847767012d..3f53a9a7bce5 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1,5 +1,5 @@ /* - * PowerPC version + * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> @@ -239,12 +239,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) RFI b . /* prevent speculative execution */ -syscall_error: +syscall_error: oris r5,r5,0x1000 /* Set SO bit in CR */ neg r3,r3 std r5,_CCR(r1) b .Lsyscall_error_cont - + /* Traced system call support */ syscall_dotrace: bl .save_nvgprs @@ -270,7 +270,7 @@ syscall_dotrace: syscall_enosys: li r3,-ENOSYS b syscall_exit - + syscall_exit_work: #ifdef CONFIG_PPC_BOOK3S mtmsrd r10,1 /* Restore RI */ @@ -333,7 +333,7 @@ _GLOBAL(save_nvgprs) std r0,_TRAP(r1) blr - + /* * The sigsuspend and rt_sigsuspend system calls can call do_signal * and thus put the process into the stopped state where we might @@ -406,7 +406,7 @@ DSCR_DEFAULT: * the fork code also. * * The code which creates the new task context is in 'copy_thread' - * in arch/powerpc/kernel/process.c + * in arch/powerpc/kernel/process.c */ .align 7 _GLOBAL(_switch) @@ -653,7 +653,7 @@ _GLOBAL(ret_from_except_lite) andi. r0,r4,_TIF_USER_WORK_MASK beq restore - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,_TIF_NEED_RESCHED_MASK beq 1f bl .restore_interrupts SCHEDULE_USER @@ -703,10 +703,18 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ + lwz r8,TI_PREEMPT(r9) + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ + bne restore andi. r0,r4,_TIF_NEED_RESCHED + bne+ check_count + + andi. r0,r4,_TIF_NEED_RESCHED_LAZY beq+ restore + lwz r8,TI_PREEMPT_LAZY(r9) + /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) +check_count: cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 @@ -723,7 +731,7 @@ resume_kernel: /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,_TIF_NEED_RESCHED_MASK bne 1b /* @@ -890,7 +898,7 @@ restore_check_irq_replay: bl .__check_irq_replay cmpwi cr0,r3,0 beq restore_no_replay - + /* * We need to re-emit an interrupt. We do so by re-using our * existing exception frame. We first change the trap value, @@ -932,7 +940,7 @@ restore_check_irq_replay: b .ret_from_except #endif /* CONFIG_PPC_DOORBELL */ 1: b .ret_from_except /* What else to do here ? */ - + unrecov_restore: addi r3,r1,STACK_FRAME_OVERHEAD bl .unrecoverable_exception @@ -944,7 +952,7 @@ unrecov_restore: * called with the MMU off. * * In addition, we need to be in 32b mode, at least for now. - * + * * Note: r3 is an input parameter to rtas, so don't trash it... */ _GLOBAL(enter_rtas) @@ -978,7 +986,7 @@ _GLOBAL(enter_rtas) li r0,0 mtcr r0 -#ifdef CONFIG_BUG +#ifdef CONFIG_BUG /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ @@ -986,7 +994,7 @@ _GLOBAL(enter_rtas) 1: tdnei r0,0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif - + /* Hard-disable interrupts */ mfmsr r6 rldicl r7,r6,48,1 @@ -1000,7 +1008,7 @@ _GLOBAL(enter_rtas) std r1,PACAR1(r13) std r6,PACASAVEDMSR(r13) - /* Setup our real return addr */ + /* Setup our real return addr */ LOAD_REG_ADDR(r4,.rtas_return_loc) clrldi r4,r4,2 /* convert to realmode address */ mtlr r4 @@ -1008,7 +1016,7 @@ _GLOBAL(enter_rtas) li r0,0 ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI andc r0,r6,r0 - + li r9,1 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI @@ -1019,7 +1027,7 @@ _GLOBAL(enter_rtas) LOAD_REG_ADDR(r4, rtas) ld r5,RTASENTRY(r4) /* get the rtas->entry value */ ld r4,RTASBASE(r4) /* get the rtas->base value */ - + mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 rfid @@ -1037,9 +1045,9 @@ _STATIC(rtas_return_loc) mfmsr r6 li r0,MSR_RI andc r6,r6,r0 - sync + sync mtmsrd r6 - + ld r1,PACAR1(r4) /* Restore our SP */ ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ @@ -1137,7 +1145,7 @@ _GLOBAL(enter_prom) REST_10GPRS(22, r1) ld r4,_CCR(r1) mtcr r4 - + addi r1,r1,PROM_FRAME_SIZE ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ea185e0b3cae..7514bc4a77b4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -603,6 +603,7 @@ void irq_ctx_init(void) } } +#ifndef CONFIG_PREEMPT_RT_FULL static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; @@ -639,6 +640,7 @@ void do_softirq(void) local_irq_restore(flags); } +#endif irq_hw_number_t virq_to_hw(unsigned int virq) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e469f30e6eeb..7deab8a2c9a6 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,6 +36,7 @@ .text +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -46,6 +47,7 @@ _GLOBAL(call_do_softirq) lwz r0,4(r1) mtlr r0 blr +#endif _GLOBAL(call_handle_irq) mflr r0 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 6820e45f557b..9ff6d245387d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -29,6 +29,7 @@ .text +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -39,6 +40,7 @@ _GLOBAL(call_do_softirq) ld r0,16(r1) mtlr r0 blr +#endif _GLOBAL(call_handle_irq) ld r8,0(r6) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8726779e1409..87f730e6197a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -264,7 +264,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (in_atomic() || mm == NULL) { + if (in_atomic() || mm == NULL || pagefault_disabled()) { if (!user_mode(regs)) { rc = SIGSEGV; goto bail; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index b89ef65392dc..2898b737deb7 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -340,7 +340,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, { int l1irq; int l2irq; - struct irq_chip *irqchip; + struct irq_chip *uninitialized_var(irqchip); void *hndlr; int type; u32 reg; diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 1e121088826f..806cbbd86ec6 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) static struct irqaction tbint_irqaction = { .handler = timebase_interrupt, + .flags = IRQF_NO_THREAD, .name = "tbint", }; diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index d4fa03f2b6ac..5e6ff38ea69f 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) static struct irqaction cpm_error_irqaction = { .handler = cpm_error_interrupt, + .flags = IRQF_NO_THREAD, .name = "error", }; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 047c3e4c59a2..d4f4acf8f7fe 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -296,7 +296,8 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(trans_exc_code) || + !mm || pagefault_disabled())) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; @@ -440,7 +441,8 @@ void __kprobes do_asce_exception(struct pt_regs *regs) clear_tsk_thread_flag(current, TIF_PER_TRAP); trans_exc_code = regs->int_parm_long; - if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(trans_exc_code) || !mm || + pagefault_disabled())) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 47b600e4b2c5..59fccbe9777b 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 063af10ff3c1..ae4b14181b80 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -191,6 +192,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#endif #else static inline void handle_one_irq(unsigned int irq) { diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 1f49c28affa9..8ff1613008dd 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -440,7 +440,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(!mm || pagefault_disabled())) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 9ac9f1666339..6787bd3507ab 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -521,6 +521,10 @@ menu "Executable file formats" source "fs/Kconfig.binfmt" +config EARLY_PRINTK + bool + default y + config COMPAT bool depends on SPARC64 diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 9bcbbe2c4e7e..1bc5cd808ff8 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -698,6 +698,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq(void) { unsigned long flags; @@ -723,6 +724,7 @@ void do_softirq(void) local_irq_restore(flags); } +#endif #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 1434526970a6..0884ccd78fc3 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -309,6 +309,7 @@ void __init setup_arch(char **cmdline_p) boot_flags_init(*cmdline_p); + early_console = &prom_early_console; register_console(&prom_early_console); printk("ARCH: "); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 13785547e435..6482b8738408 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -551,6 +551,12 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } +static inline void register_prom_console(void) +{ + early_console = &prom_early_console; + register_console(&prom_early_console); +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -562,7 +568,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_EARLYFB if (btext_find_display()) #endif - register_console(&prom_early_console); + register_prom_console(); if (tlb_type == hypervisor) printk("ARCH: SUN4V\n"); diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index e98bfda205a2..18cbe13ff42f 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -200,7 +200,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto no_context; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 5062ff389e83..2764ac69289a 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -321,7 +321,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (!mm || pagefault_disabled()) goto intr_or_no_mm; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 3d2b81c163a6..1ba0ccc717f3 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -360,7 +360,7 @@ static int handle_page_fault(struct pt_regs *regs, * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (in_atomic() || !mm) { + if (!mm || pagefault_disabled()) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 089f3987e273..991b33af992b 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -39,7 +39,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, * If the fault was during atomic operation, don't take the fault, just * fail. */ - if (in_atomic()) + if (pagefault_disabled()) goto out_nosemaphore; retry: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..8dc65e2beeec 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,6 +121,7 @@ config X86 select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION select RTC_LIB + select HAVE_PREEMPT_LAZY config INSTRUCTION_DECODER def_bool y @@ -177,8 +178,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API +config RWSEM_GENERIC_SPINLOCK + def_bool PREEMPT_RT_FULL + config RWSEM_XCHGADD_ALGORITHM - def_bool y + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_CALIBRATE_DELAY def_bool y @@ -460,7 +464,7 @@ config X86_MDFLD select MFD_INTEL_MSIC ---help--- Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin - Internet Device(MID) platform. + Internet Device(MID) platform. Unlike standard x86 PCs, Medfield does not have many legacy devices nor standard legacy replacement devices/features. e.g. Medfield does not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. @@ -796,7 +800,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index f80e668785c0..3fbe870b5c97 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -252,14 +252,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -276,14 +276,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -300,14 +300,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -324,14 +324,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -364,18 +364,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 6c896fbe21db..3cedb221a774 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,21 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define STACKFAULT_STACK 0 +# define DOUBLEFAULT_STACK 1 +# define NMI_STACK 2 +# define DEBUG_STACK 0 +# define MCE_STACK 3 +# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */ +#else +# define STACKFAULT_STACK 1 +# define DOUBLEFAULT_STACK 2 +# define NMI_STACK 3 +# define DEBUG_STACK 4 +# define MCE_STACK 5 +# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#endif #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 35e67a457182..6ec0792b3b9f 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,6 +23,19 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* + * Because some traps use the IST stack, we must keep preemption + * disabled while calling do_trap(), but do_trap() may call + * force_sig_info() which will grab the signal spin_locks for the + * task, which in PREEMPT_RT_FULL are mutexes. By defining + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the + * trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a998598f172..64fb5cbe54fa 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -57,7 +57,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of + * it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = __native_read_tsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1df6e84691f..0365caf31d49 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -30,6 +30,8 @@ struct thread_info { __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => lazy preemptable, + <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -81,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ @@ -106,6 +109,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) @@ -156,6 +160,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ed796ccc32c..7f116f21c3bf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2391,7 +2391,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic(cfg); return true; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3c..a36d9cfb71ea 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -33,6 +33,7 @@ void common(void) { OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22018f70a671..4a309545c0c5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1104,7 +1104,9 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = DEBUG_STKSZ +#endif }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9239504b41cb..aaf4b9b94f38 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -18,6 +18,7 @@ #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/uaccess.h> +#include <linux/kthread.h> #include <linux/kdebug.h> #include <linux/kernel.h> #include <linux/percpu.h> @@ -41,6 +42,7 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> #include <asm/processor.h> #include <asm/mce.h> @@ -1256,7 +1258,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(struct hrtimer, mce_timer); static unsigned long mce_adjust_timer_default(unsigned long interval) { @@ -1266,13 +1268,10 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void mce_timer_fn(unsigned long data) +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) { - struct timer_list *t = &__get_cpu_var(mce_timer); unsigned long iv; - WARN_ON(smp_processor_id() != data); - if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); @@ -1293,9 +1292,11 @@ static void mce_timer_fn(unsigned long data) __this_cpu_write(mce_next_interval, iv); /* Might have become 0 after CMCI storm subsided */ if (iv) { - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); + hrtimer_forward_now(timer, ns_to_ktime( + jiffies_to_usecs(iv) * 1000ULL)); + return HRTIMER_RESTART; } + return HRTIMER_NORESTART; } /* @@ -1303,28 +1304,37 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long when = jiffies + interval; + struct hrtimer *t = &__get_cpu_var(mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer_pinned(t, when); + if (hrtimer_active(t)) { + s64 exp; + s64 intv_us; + + intv_us = jiffies_to_usecs(interval); + exp = ktime_to_us(hrtimer_expires_remaining(t)); + if (intv_us < exp) { + hrtimer_cancel(t); + hrtimer_start_range_ns(t, + ns_to_ktime(intv_us * 1000), + 0, HRTIMER_MODE_REL_PINNED); + } } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); + hrtimer_start_range_ns(t, + ns_to_ktime(jiffies_to_usecs(interval) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } if (interval < iv) __this_cpu_write(mce_next_interval, interval); } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1334,6 +1344,63 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); +static void __mce_notify_work(void) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (mce_helper[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + pr_info(HW_ERR "Machine check events logged\n"); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +struct task_struct *mce_notify_helper; + +static int mce_notify_helper_thread(void *unused) +{ + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (kthread_should_stop()) + break; + __mce_notify_work(); + } + return 0; +} + +static int mce_notify_work_init(void) +{ + mce_notify_helper = kthread_run(mce_notify_helper_thread, NULL, + "mce-notify"); + if (!mce_notify_helper) + return -ENOMEM; + + return 0; +} + +static void mce_notify_work(void) +{ + wake_up_process(mce_notify_helper); +} +#else +static void mce_notify_work(void) +{ + __mce_notify_work(); +} +static inline int mce_notify_work_init(void) { return 0; } +#endif + /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI @@ -1341,19 +1408,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); */ int mce_notify_irq(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - if (test_and_clear_bit(0, &mce_need_notify)) { - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - if (mce_helper[0]) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - + mce_notify_work(); return 1; } return 0; @@ -1624,7 +1680,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) { unsigned long iv = mce_adjust_timer(check_interval * HZ); @@ -1633,16 +1689,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) if (mca_cfg.ignore_ce || !iv) return; - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, smp_processor_id()); + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct hrtimer *t = &__get_cpu_var(mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; mce_start_timer(cpu, t); } @@ -2299,6 +2356,8 @@ static void __cpuinit mce_disable_cpu(void *h) if (!mce_available(__this_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(&__get_cpu_var(mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < mca_cfg.banks; i++) { @@ -2325,6 +2384,7 @@ static void __cpuinit mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2332,7 +2392,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: @@ -2348,11 +2407,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); break; } @@ -2414,6 +2471,8 @@ static __init int mcheck_init_device(void) /* register character device /dev/mcelog */ misc_register(&mce_chrdev_device); + err = mce_notify_work_init(); + return err; } device_initcall_sync(mcheck_init_device); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207dab92..52b4bcd33b4f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -21,10 +21,14 @@ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { +#if DEBUG_STACK > 0 [ DEBUG_STACK-1 ] = "#DB", +#endif [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", +#if STACKFAULT_STACK > 0 [ STACKFAULT_STACK-1 ] = "#SS", +#endif [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8f3e2dec1df3..393a09a70524 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -364,14 +364,22 @@ ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all -need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl + jnz 1f + + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + testl $_TIF_NEED_RESCHED_LAZY, %ecx jz restore_all - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? + +1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq - jmp need_resched + movl TI_flags(%ebp), %ecx # need_resched set ? + testl $_TIF_NEED_RESCHED_MASK, %ecx + jnz 1b + jmp restore_all END(resume_kernel) #endif CFI_ENDPROC @@ -607,7 +615,7 @@ ENDPROC(system_call) ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jz work_notifysig work_resched: call schedule @@ -620,7 +628,7 @@ work_resched: andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jnz work_resched work_notifysig: # deal with pending signals and diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 727208941030..f005eb455cab 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -673,8 +673,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $_TIF_NEED_RESCHED_MASK,%edx + jz sysret_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -786,8 +786,8 @@ GLOBAL(int_with_check) /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $_TIF_NEED_RESCHED_MASK,%edx + jz int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1086,8 +1086,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $_TIF_NEED_RESCHED_MASK,%edx + jz retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1120,9 +1120,15 @@ retint_signal: ENTRY(retint_kernel) cmpl $0,TI_preempt_count(%rcx) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) + bt $TIF_NEED_RESCHED,TI_flags(%rcx) + jc 1f + + cmpl $0,TI_preempt_lazy_count(%rcx) + jnz retint_restore_args + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) jnc retint_restore_args - bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ + +1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq jmp exit_intr @@ -1334,6 +1340,7 @@ bad_gs: jmp 2b .previous +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1353,6 +1360,7 @@ ENTRY(call_softirq) ret CFI_ENDPROC END(call_softirq) +#endif #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1522,7 +1530,7 @@ paranoid_userspace: movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx + testl $_TIF_NEED_RESCHED_MASK,%ebx jnz paranoid_schedule movl %ebx,%edx /* arg3: thread flags */ TRACE_IRQS_ON diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 344faf8d0d62..f60ecc0d4db4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -179,6 +180,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index d04d3ecded62..831f247b5798 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - +#ifndef CONFIG_PREEMPT_RT_FULL extern void call_softirq(void); asmlinkage void do_softirq(void) @@ -108,3 +108,4 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } +#endif diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index ca8f703a1e70..129b8bb73de2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -28,3 +29,4 @@ void arch_irq_work_raise(void) apic_wait_icr_idle(); #endif } +#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7305f7dfc7ab..ee29e6c11f40 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -214,6 +215,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +#ifdef CONFIG_PREEMPT_RT_FULL +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + pte_t *ptep = kmap_pte - idx; + + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } +} +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * switch_to(x,y) should switch tasks from x to y. @@ -293,6 +323,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); + switch_kmaps(prev_p, next_p); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 087ab2af381a..d827bf98fd60 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -743,6 +743,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 772e2a846dec..cbd25b2caf6e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -85,9 +85,21 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void conditional_sti_ist(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 + /* + * X86_64 uses a per CPU stack on the IST for certain traps + * like int3. The task can not be preempted when using one + * of these stacks, thus preemption must be disabled, otherwise + * the stack can be corrupted if the task is scheduled out, + * and another task comes in and uses this stack. + * + * On x86_32 the task keeps its own stack and it is OK if the + * task schedules out. + */ inc_preempt_count(); +#endif if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -98,11 +110,13 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void conditional_cli_ist(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); +#ifdef CONFIG_X86_64 dec_preempt_count(); +#endif } static int __kprobes @@ -235,9 +249,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) prev_state = exception_enter(); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); } exception_exit(prev_state); } @@ -340,9 +354,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); exit: exception_exit(prev_state); @@ -448,12 +462,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + conditional_sti_ist(regs); if (regs->flags & X86_VM_MASK) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); goto exit; } @@ -473,7 +487,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); exit: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8ba99c34180..d34574282d9b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5326,6 +5326,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out_free_percpu; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 654be4ae3047..a71d337b8955 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1104,7 +1104,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(!mm || pagefault_disabled())) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 252b8f5489ba..0f00d9746604 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap); */ void *kmap_atomic_prot(struct page *page, pgprot_t prot) { + pte_t pte = mk_pte(page, prot); unsigned long vaddr; int idx, type; @@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte-idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); arch_flush_lazy_mmu_mode(); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 7b179b499fa3..62377d67ab07 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { + pte_t pte = pfn_pte(pfn, prot); unsigned long vaddr; int idx, type; @@ -64,7 +65,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + WARN_ON(!pte_none(*(kmap_pte - idx))); + +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte - idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -110,6 +116,9 @@ iounmap_atomic(void __iomem *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 4b7bc8db170f..d57c257b469c 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs) /* If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) { + if (!mm || pagefault_disabled()) { bad_page_fault(regs, address, SIGSEGV); return; } |