diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2014-10-31 14:44:50 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2014-10-31 14:44:50 -0400 |
commit | 1bd930d84ef6c382e9e18f16cbbf7c120b28cb21 (patch) | |
tree | d33796bef5dff5fbdedf213e88b15698bd62f2c8 /arch/x86 | |
parent | 3448c4ac838aa08a8e4400c56d7d613b3f3d3c8e (diff) | |
parent | e21af7dfe5795138dc2466dc5684d3297acd48a9 (diff) |
Merge tag 'v3.14.16' into v3.14-rt
This is the 3.14.16 stable release
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 25 | ||||
-rw-r--r-- | arch/x86/include/asm/espfix.h | 16 | ||||
-rw-r--r-- | arch/x86/include/asm/irqflags.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/setup.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 12 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 77 | ||||
-rw-r--r-- | arch/x86/kernel/espfix_64.c | 208 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 31 | ||||
-rw-r--r-- | arch/x86/vdso/vdso32-setup.c | 8 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 9 |
15 files changed, 363 insertions, 49 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5df95226c2af..5677140d7b82 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -970,10 +970,27 @@ config VM86 default y depends on X86_32 ---help--- - This option is required by programs like DOSEMU to run 16-bit legacy - code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. + This option is required by programs like DOSEMU to run + 16-bit real mode legacy code on x86 processors. It also may + be needed by software like XFree86 to initialize some video + cards via BIOS. Disabling this option saves about 6K. + +config X86_16BIT + bool "Enable support for 16-bit segments" if EXPERT + default y + ---help--- + This option is required by programs like Wine to run 16-bit + protected mode legacy code on x86 processors. Disabling + this option saves about 300 bytes on i386, or around 6K text + plus 16K runtime memory on x86-64, + +config X86_ESPFIX32 + def_bool y + depends on X86_16BIT && X86_32 + +config X86_ESPFIX64 + def_bool y + depends on X86_16BIT && X86_64 config TOSHIBA tristate "Toshiba Laptop support" diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h new file mode 100644 index 000000000000..99efebb2f69d --- /dev/null +++ b/arch/x86/include/asm/espfix.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_ESPFIX_H +#define _ASM_X86_ESPFIX_H + +#ifdef CONFIG_X86_64 + +#include <asm/percpu.h> + +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +extern void init_espfix_bsp(void); +extern void init_espfix_ap(void); + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf88e624..0a8b519226b8 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ -#define INTERRUPT_RETURN iretq +#define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index c883bf726398..7166e25ecb57 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index d62c9f809bc5..75b14ca135be 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -65,6 +65,8 @@ static inline void x86_ce4100_early_setup(void) { } #ifndef _SETUP +#include <asm/espfix.h> + /* * This is set up by the setup-routine at boot-time */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cb648c84b327..56bac868cb91 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8c73349c654d..1dfe3373e838 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -545,6 +545,7 @@ syscall_exit: restore_all: TRACE_IRQS_IRET restore_all_notrace: +#ifdef CONFIG_X86_ESPFIX32 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: PT_OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. @@ -555,6 +556,7 @@ restore_all_notrace: cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS +#endif restore_nocheck: RESTORE_REGS 4 # skip orig_eax/error_code irq_return: @@ -567,6 +569,7 @@ ENTRY(iret_exc) .previous _ASM_EXTABLE(irq_return,iret_exc) +#ifdef CONFIG_X86_ESPFIX32 CFI_RESTORE_STATE ldt_ss: #ifdef CONFIG_PARAVIRT @@ -610,6 +613,7 @@ ldt_ss: lss (%esp), %esp /* switch to espfix segment */ CFI_ADJUST_CFA_OFFSET -8 jmp restore_nocheck +#endif CFI_ENDPROC ENDPROC(system_call) @@ -722,6 +726,7 @@ END(syscall_badsys) * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. */ +#ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ @@ -731,8 +736,10 @@ END(syscall_badsys) pushl_cfi %eax lss (%esp), %esp /* switch to the normal stack segment */ CFI_ADJUST_CFA_OFFSET -8 +#endif .endm .macro UNWIND_ESPFIX_STACK +#ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax @@ -743,6 +750,7 @@ END(syscall_badsys) /* switch to normal stack */ FIXUP_ESPFIX_STACK 27: +#endif .endm /* @@ -1373,11 +1381,13 @@ END(debug) ENTRY(nmi) RING0_INT_FRAME ASM_CLAC +#ifdef CONFIG_X86_ESPFIX32 pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax popl_cfi %eax je nmi_espfix_stack +#endif cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup pushl_cfi %eax @@ -1417,6 +1427,7 @@ nmi_debug_stack_check: FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct +#ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * @@ -1438,6 +1449,7 @@ nmi_espfix_stack: lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 jmp irq_return +#endif CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d893814a0c18..9d702f33f745 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -58,6 +58,7 @@ #include <asm/asm.h> #include <asm/context_tracking.h> #include <asm/smap.h> +#include <asm/pgtable_types.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -1041,12 +1042,45 @@ restore_args: irq_return: INTERRUPT_RETURN - _ASM_EXTABLE(irq_return, bad_iret) -#ifdef CONFIG_PARAVIRT ENTRY(native_iret) + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ +#ifdef CONFIG_X86_ESPFIX64 + testb $4,(SS-RIP)(%rsp) + jnz native_irq_return_ldt +#endif + +native_irq_return_iret: iretq - _ASM_EXTABLE(native_iret, bad_iret) + _ASM_EXTABLE(native_irq_return_iret, bad_iret) + +#ifdef CONFIG_X86_ESPFIX64 +native_irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi + SWAPGS + movq PER_CPU_VAR(espfix_waddr),%rdi + movq %rax,(0*8)(%rdi) /* RAX */ + movq (2*8)(%rsp),%rax /* RIP */ + movq %rax,(1*8)(%rdi) + movq (3*8)(%rsp),%rax /* CS */ + movq %rax,(2*8)(%rdi) + movq (4*8)(%rsp),%rax /* RFLAGS */ + movq %rax,(3*8)(%rdi) + movq (6*8)(%rsp),%rax /* SS */ + movq %rax,(5*8)(%rdi) + movq (5*8)(%rsp),%rax /* RSP */ + movq %rax,(4*8)(%rdi) + andl $0xffff0000,%eax + popq_cfi %rdi + orq PER_CPU_VAR(espfix_stack),%rax + SWAPGS + movq %rax,%rsp + popq_cfi %rax + jmp native_irq_return_iret #endif .section .fixup,"ax" @@ -1125,9 +1159,40 @@ check_int_off: call preempt_schedule_irq jmp exit_intr #endif - CFI_ENDPROC END(common_interrupt) + + /* + * If IRET takes a fault on the espfix stack, then we + * end up promoting it to a doublefault. In that case, + * modify the stack to make it look like we just entered + * the #GP handler from user space, similar to bad_iret. + */ +#ifdef CONFIG_X86_ESPFIX64 + ALIGN +__do_double_fault: + XCPT_FRAME 1 RDI+8 + movq RSP(%rdi),%rax /* Trap on the espfix stack? */ + sarq $PGDIR_SHIFT,%rax + cmpl $ESPFIX_PGD_ENTRY,%eax + jne do_double_fault /* No, just deliver the fault */ + cmpl $__KERNEL_CS,CS(%rdi) + jne do_double_fault + movq RIP(%rdi),%rax + cmpq $native_irq_return_iret,%rax + jne do_double_fault /* This shouldn't happen... */ + movq PER_CPU_VAR(kernel_stack),%rax + subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ + movq %rax,RSP(%rdi) + movq $0,(%rax) /* Missing (lost) #GP error code */ + movq $general_protection,RIP(%rdi) + retq + CFI_ENDPROC +END(__do_double_fault) +#else +# define __do_double_fault do_double_fault +#endif + /* * End of kprobes section */ @@ -1329,7 +1394,7 @@ zeroentry overflow do_overflow zeroentry bounds do_bounds zeroentry invalid_op do_invalid_op zeroentry device_not_available do_device_not_available -paranoiderrorentry double_fault do_double_fault +paranoiderrorentry double_fault __do_double_fault zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun errorentry invalid_TSS do_invalid_TSS errorentry segment_not_present do_segment_not_present @@ -1618,7 +1683,7 @@ error_sti: */ error_kernelspace: incl %ebx - leaq irq_return(%rip),%rcx + leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs movl %ecx,%eax /* zero extend */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c new file mode 100644 index 000000000000..94d857fb1033 --- /dev/null +++ b/arch/x86/kernel/espfix_64.c @@ -0,0 +1,208 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2014 Intel Corporation; author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * ----------------------------------------------------------------------- */ + +/* + * The IRET instruction, when returning to a 16-bit segment, only + * restores the bottom 16 bits of the user space stack pointer. This + * causes some 16-bit software to break, but it also leaks kernel state + * to user space. + * + * This works around this by creating percpu "ministacks", each of which + * is mapped 2^16 times 64K apart. When we detect that the return SS is + * on the LDT, we copy the IRET frame to the ministack and use the + * relevant alias to return to userspace. The ministacks are mapped + * readonly, so if the IRET fault we promote #GP to #DF which is an IST + * vector and thus has its own stack; we then do the fixup in the #DF + * handler. + * + * This file sets up the ministacks and the related page tables. The + * actual ministack invocation is in entry_64.S. + */ + +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/random.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/setup.h> +#include <asm/espfix.h> + +/* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round + * it up to a cache line to avoid unnecessary sharing. + */ +#define ESPFIX_STACK_SIZE (8*8UL) +#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) + +/* There is address space for how many espfix pages? */ +#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) + +#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) +#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS +# error "Need more than one PGD for the ESPFIX hack" +#endif + +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + +/* This contains the *bottom* address of the espfix stack */ +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +/* Initialization mutex - should this be a spinlock? */ +static DEFINE_MUTEX(espfix_init_mutex); + +/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ +#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) +static void *espfix_pages[ESPFIX_MAX_PAGES]; + +static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] + __aligned(PAGE_SIZE); + +static unsigned int page_random, slot_random; + +/* + * This returns the bottom address of the espfix stack for a specific CPU. + * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case + * we have to account for some amount of padding at the end of each page. + */ +static inline unsigned long espfix_base_addr(unsigned int cpu) +{ + unsigned long page, slot; + unsigned long addr; + + page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; + slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; + addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); + addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); + addr += ESPFIX_BASE_ADDR; + return addr; +} + +#define PTE_STRIDE (65536/PAGE_SIZE) +#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) +#define ESPFIX_PMD_CLONES PTRS_PER_PMD +#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) + +#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) + +static void init_espfix_random(void) +{ + unsigned long rand; + + /* + * This is run before the entropy pools are initialized, + * but this is hopefully better than nothing. + */ + if (!arch_get_random_long(&rand)) { + /* The constant is an arbitrary large prime */ + rdtscll(rand); + rand *= 0xc345c6b72fd16123UL; + } + + slot_random = rand % ESPFIX_STACKS_PER_PAGE; + page_random = (rand / ESPFIX_STACKS_PER_PAGE) + & (ESPFIX_PAGE_SPACE - 1); +} + +void __init init_espfix_bsp(void) +{ + pgd_t *pgd_p; + pteval_t ptemask; + + ptemask = __supported_pte_mask; + + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); + + /* Randomize the locations */ + init_espfix_random(); + + /* The rest is the same as for any other processor */ + init_espfix_ap(); +} + +void init_espfix_ap(void) +{ + unsigned int cpu, page; + unsigned long addr; + pud_t pud, *pud_p; + pmd_t pmd, *pmd_p; + pte_t pte, *pte_p; + int n; + void *stack_page; + pteval_t ptemask; + + /* We only have to do this once... */ + if (likely(this_cpu_read(espfix_stack))) + return; /* Already initialized */ + + cpu = smp_processor_id(); + addr = espfix_base_addr(cpu); + page = cpu/ESPFIX_STACKS_PER_PAGE; + + /* Did another CPU already set this up? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (likely(stack_page)) + goto done; + + mutex_lock(&espfix_init_mutex); + + /* Did we race on the lock? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (stack_page) + goto unlock_done; + + ptemask = __supported_pte_mask; + + pud_p = &espfix_pud_page[pud_index(addr)]; + pud = *pud_p; + if (!pud_present(pud)) { + pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PUD_CLONES; n++) + set_pud(&pud_p[n], pud); + } + + pmd_p = pmd_offset(&pud, addr); + pmd = *pmd_p; + if (!pmd_present(pmd)) { + pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PMD_CLONES; n++) + set_pmd(&pmd_p[n], pmd); + } + + pte_p = pte_offset_kernel(&pmd, addr); + stack_page = (void *)__get_free_page(GFP_KERNEL); + pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); + for (n = 0; n < ESPFIX_PTE_CLONES; n++) + set_pte(&pte_p[n*PTE_STRIDE], pte); + + /* Job is done for this CPU and any CPU which shares this page */ + ACCESS_ONCE(espfix_pages[page]) = stack_page; + +unlock_done: + mutex_unlock(&espfix_init_mutex); +done: + this_cpu_write(espfix_stack, addr); + this_cpu_write(espfix_waddr, (unsigned long)stack_page + + (addr & ~PAGE_MASK)); +} diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index dcbbaa165bde..c37886d759cc 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -20,8 +20,6 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> -int sysctl_ldt16 = 0; - #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) { @@ -231,16 +229,10 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } - /* - * On x86-64 we do not support 16-bit segments due to - * IRET leaking the high bits of the kernel stack address. - */ -#ifdef CONFIG_X86_64 - if (!ldt_info.seg_32bit && !sysctl_ldt16) { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { error = -EINVAL; goto out_unlock; } -#endif fill_ldt(&ldt, &ldt_info); if (oldmode) diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93eb..a1da6737ba5b 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a32da804252e..395be6d8bbde 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -243,6 +243,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(); +#endif + + /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a33081..3620928631ce 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,11 +30,13 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + unsigned long lines; }; struct addr_marker { unsigned long start_address; const char *name; + unsigned long max_lines; }; /* indices for address_markers; keep sync'd w/ address_markers below */ @@ -45,6 +47,7 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, + ESPFIX_START_NR, HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -67,6 +70,7 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, + { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, @@ -163,7 +167,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, pgprot_t new_prot, int level) { pgprotval_t prot, cur; - static const char units[] = "KMGTPE"; + static const char units[] = "BKMGTPE"; /* * If we have a "break" in the series, we need to flush the state that @@ -178,6 +182,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; + st->lines = 0; seq_printf(m, "---[ %s ]---\n", st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { @@ -188,17 +193,21 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); - - delta = (st->current_address - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; + if (!st->marker->max_lines || + st->lines < st->marker->max_lines) { + seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); + + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level); } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); + st->lines++; /* * We print markers for special areas of address space, diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index f1d633a43f8e..d6bfb876cfb0 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -41,7 +41,6 @@ enum { #ifdef CONFIG_X86_64 #define vdso_enabled sysctl_vsyscall32 #define arch_setup_additional_pages syscall32_setup_pages -extern int sysctl_ldt16; #endif /* @@ -381,13 +380,6 @@ static struct ctl_table abi_table2[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "ldt16", - .data = &sysctl_ldt16, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, {} }; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 0982233b9b84..a6a72ce8630f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -574,13 +574,7 @@ void xen_enable_syscall(void) } #endif /* CONFIG_X86_64 */ } -void xen_enable_nmi(void) -{ -#ifdef CONFIG_X86_64 - if (register_callback(CALLBACKTYPE_nmi, (char *)nmi)) - BUG(); -#endif -} + void __init xen_pvmmu_arch_setup(void) { HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); @@ -595,7 +589,6 @@ void __init xen_pvmmu_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - xen_enable_nmi(); } /* This function is not called for HVM domains */ |