aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig20
-rw-r--r--arch/x86/Kconfig.assembler4
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/acpi.c7
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S19
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/head_64.S9
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S3
-rw-r--r--arch/x86/boot/string.c43
-rw-r--r--arch/x86/boot/string.h1
-rw-r--r--arch/x86/boot/tools/build.c16
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S4
-rw-r--r--arch/x86/crypto/blake2s-glue.c10
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S2
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S2
-rw-r--r--arch/x86/crypto/chacha_glue.c14
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S26
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c2
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c2
-rw-r--r--arch/x86/crypto/poly1305_glue.c13
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c1
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c1
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c1
-rw-r--r--arch/x86/entry/calling.h40
-rw-r--r--arch/x86/entry/entry_32.S8
-rw-r--r--arch/x86/entry/entry_64.S16
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/vdso/Makefile15
-rw-r--r--arch/x86/entry/vdso/vdso2c.c4
-rw-r--r--arch/x86/entry/vdso/vdso2c.h16
-rw-r--r--arch/x86/events/Kconfig6
-rw-r--r--arch/x86/events/Makefile3
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/Makefile2
-rw-r--r--arch/x86/events/intel/bts.c2
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/cstate.c1
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/events/probe.c13
-rw-r--r--arch/x86/events/rapl.c (renamed from arch/x86/events/intel/rapl.c)69
-rw-r--r--arch/x86/events/zhaoxin/Makefile2
-rw-r--r--arch/x86/events/zhaoxin/core.c613
-rw-r--r--arch/x86/hyperv/hv_init.c36
-rw-r--r--arch/x86/ia32/audit.c1
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/GEN-for-each-reg.h25
-rw-r--r--arch/x86/include/asm/apb_timer.h5
-rw-r--r--arch/x86/include/asm/archrandom.h26
-rw-r--r--arch/x86/include/asm/asm-prototypes.h35
-rw-r--r--arch/x86/include/asm/audit.h7
-rw-r--r--arch/x86/include/asm/bitops.h12
-rw-r--r--arch/x86/include/asm/checksum.h2
-rw-r--r--arch/x86/include/asm/checksum_32.h21
-rw-r--r--arch/x86/include/asm/checksum_64.h12
-rw-r--r--arch/x86/include/asm/compat.h8
-rw-r--r--arch/x86/include/asm/cpu_device_id.h31
-rw-r--r--arch/x86/include/asm/delay.h4
-rw-r--r--arch/x86/include/asm/device.h2
-rw-r--r--arch/x86/include/asm/dma.h2
-rw-r--r--arch/x86/include/asm/doublefault.h2
-rw-r--r--arch/x86/include/asm/efi.h50
-rw-r--r--arch/x86/include/asm/floppy.h19
-rw-r--r--arch/x86/include/asm/fpu/internal.h10
-rw-r--r--arch/x86/include/asm/fpu/xstate.h52
-rw-r--r--arch/x86/include/asm/ftrace.h11
-rw-r--r--arch/x86/include/asm/intel-mid.h9
-rw-r--r--arch/x86/include/asm/intel_pmc_ipc.h59
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h114
-rw-r--r--arch/x86/include/asm/intel_scu_ipc_legacy.h91
-rw-r--r--arch/x86/include/asm/intel_telemetry.h6
-rw-r--r--arch/x86/include/asm/invpcid.h7
-rw-r--r--arch/x86/include/asm/io_bitmap.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h11
-rw-r--r--arch/x86/include/asm/mmzone_32.h39
-rw-r--r--arch/x86/include/asm/module.h60
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/mwait.h24
-rw-r--r--arch/x86/include/asm/nospec-branch.h97
-rw-r--r--arch/x86/include/asm/orc_types.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_32.h3
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h8
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/resctrl.h (renamed from arch/x86/include/asm/resctrl_sched.h)9
-rw-r--r--arch/x86/include/asm/smap.h11
-rw-r--r--arch/x86/include/asm/spinlock_types.h22
-rw-r--r--arch/x86/include/asm/stackprotector.h7
-rw-r--r--arch/x86/include/asm/switch_to.h23
-rw-r--r--arch/x86/include/asm/traps.h7
-rw-r--r--arch/x86/include/asm/unwind.h2
-rw-r--r--arch/x86/include/asm/unwind_hints.h31
-rw-r--r--arch/x86/include/asm/uv/bios.h7
-rw-r--r--arch/x86/include/asm/uv/uv.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h54
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h7
-rw-r--r--arch/x86/include/asm/vermagic.h68
-rw-r--r--arch/x86/include/uapi/asm/unistd.h11
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/apb_timer.c53
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/io_apic.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c91
-rw-r--r--arch/x86/kernel/audit_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/common.c40
-rw-r--r--arch/x86/kernel/cpu/intel.c7
-rw-r--r--arch/x86/kernel/cpu/match.c7
-rw-r--r--arch/x86/kernel/cpu/mce/core.c65
-rw-r--r--arch/x86/kernel/cpu/mce/p5.c5
-rw-r--r--arch/x86/kernel/cpu/mce/winchip.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c15
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c32
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h15
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c27
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c4
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/dumpstack_64.c3
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/fpu/core.c53
-rw-r--r--arch/x86/kernel/fpu/init.c3
-rw-r--r--arch/x86/kernel/fpu/regset.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c144
-rw-r--r--arch/x86/kernel/fpu/xstate.c287
-rw-r--r--arch/x86/kernel/ftrace.c41
-rw-r--r--arch/x86/kernel/ftrace_32.S2
-rw-r--r--arch/x86/kernel/ftrace_64.S44
-rw-r--r--arch/x86/kernel/ioport.c22
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/signal.c30
-rw-r--r--arch/x86/kernel/smpboot.c76
-rw-r--r--arch/x86/kernel/tboot.c8
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/traps.c110
-rw-r--r--arch/x86/kernel/tsc.c12
-rw-r--r--arch/x86/kernel/unwind_frame.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c130
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/ioapic.c10
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/svm/nested.c39
-rw-r--r--arch/x86/kvm/svm/sev.c11
-rw-r--r--arch/x86/kvm/svm/svm.c48
-rw-r--r--arch/x86/kvm/svm/vmenter.S10
-rw-r--r--arch/x86/kvm/vmx/nested.c23
-rw-r--r--arch/x86/kvm/vmx/vmenter.S3
-rw-r--r--arch/x86/kvm/vmx/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c102
-rw-r--r--arch/x86/lib/checksum_32.S4
-rw-r--r--arch/x86/lib/csum-wrappers_64.c35
-rw-r--r--arch/x86/lib/delay.c114
-rw-r--r--arch/x86/lib/retpoline.S63
-rw-r--r--arch/x86/mm/cpu_entry_area.c4
-rw-r--r--arch/x86/mm/dump_pagetables.c35
-rw-r--r--arch/x86/mm/fault.c176
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/numa_32.c34
-rw-r--r--arch/x86/mm/pat/set_memory.c12
-rw-r--r--arch/x86/mm/pti.c8
-rw-r--r--arch/x86/mm/tlb.c37
-rw-r--r--arch/x86/net/bpf_jit_comp.c18
-rw-r--r--arch/x86/net/bpf_jit_comp32.c28
-rw-r--r--arch/x86/platform/efi/efi.c8
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S2
-rw-r--r--arch/x86/platform/uv/bios_uv.c16
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c2
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/um/asm/checksum.h20
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/smp_pv.c1
191 files changed, 2710 insertions, 1973 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d6104ea8af0..70c668d976cc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FILTER_PGPROT
@@ -68,6 +69,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,7 @@ config X86
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_USE_SYM_ANNOTATIONS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
@@ -149,7 +152,7 @@ config X86
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
- select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD
+ select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_ASM_MODVERSIONS
@@ -595,7 +598,7 @@ config X86_INTEL_MID
select I2C
select DW_APB_TIMER
select APB_TIMER
- select INTEL_SCU_IPC
+ select INTEL_SCU_PCI
select MFD_INTEL_MSIC
---help---
Select to build a kernel capable of supporting Intel MID (Mobile
@@ -1610,19 +1613,10 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config ARCH_HAVE_MEMORY_PRESENT
- def_bool y
- depends on X86_32 && DISCONTIGMEM
-
config ARCH_FLATMEM_ENABLE
def_bool y
depends on X86_32 && !NUMA
-config ARCH_DISCONTIGMEM_ENABLE
- def_bool n
- depends on NUMA && X86_32
- depends on BROKEN
-
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
@@ -1887,10 +1881,10 @@ config X86_UMIP
results are dummy.
config X86_INTEL_MEMORY_PROTECTION_KEYS
- prompt "Intel Memory Protection Keys"
+ prompt "Memory Protection Keys"
def_bool y
# Note: only available in 64-bit mode
- depends on CPU_SUP_INTEL && X86_64
+ depends on X86_64 && (CPU_SUP_INTEL || CPU_SUP_AMD)
select ARCH_USES_HIGH_VMA_FLAGS
select ARCH_HAS_PKEYS
---help---
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 13de0db38d4e..26b8c08e2fc4 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -15,3 +15,7 @@ config AS_SHA256_NI
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
help
Supported by binutils >= 2.24 and LLVM integrated assembler
+config AS_TPAUSE
+ def_bool $(as-instr,tpause %ecx)
+ help
+ Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e74690b028a..f909d3ce36e6 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -99,15 +99,6 @@ config DEBUG_WX
If in doubt, say "Y".
-config DOUBLEFAULT
- default y
- bool "Enable doublefault exception handler" if EXPERT && X86_32
- ---help---
- This option allows trapping of rare doublefault exceptions that
- would otherwise cause a system to silently reboot. Disabling this
- option saves about 4k and might cause you much additional grey
- hair.
-
config DEBUG_TLBFLUSH
bool "Set upper limit of TLB entries to flush one-by-one"
depends on DEBUG_KERNEL
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b65ec63c7db7..00e378de8bc0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -246,7 +246,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/
boot := arch/x86/boot
-BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
+BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage
PHONY += bzImage $(BOOT_TARGETS)
@@ -267,8 +267,8 @@ endif
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-PHONY += install
-install:
+PHONY += install bzlilo
+install bzlilo:
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += vdso_install
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index e17be90ab312..4c5355684321 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -57,11 +57,10 @@ $(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
-targets += cpustr.h
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
endif
-clean-files += cpustr.h
+targets += cpustr.h
# ---------------------------------------------------------------------------
@@ -129,6 +128,8 @@ quiet_cmd_genimage = GENIMAGE $3
cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
$(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
+PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install
+
# This requires write access to /dev/fd0
bzdisk: $(obj)/bzImage $(obj)/mtools.conf
$(call cmd,genimage,bzdisk,/dev/fd0)
@@ -146,7 +147,7 @@ isoimage: $(obj)/bzImage
$(call cmd,genimage,isoimage,$(obj)/image.iso)
@$(kecho) 'Kernel: $(obj)/image.iso is ready'
-bzlilo: $(obj)/bzImage
+bzlilo:
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ef2ad7253cd5..8bcbcee54aa1 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -280,9 +280,9 @@ acpi_physical_address get_rsdp_addr(void)
*/
#define MAX_ADDR_LEN 19
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
+static unsigned long get_cmdline_acpi_rsdp(void)
{
- acpi_physical_address addr = 0;
+ unsigned long addr = 0;
#ifdef CONFIG_KEXEC
char val[MAX_ADDR_LEN] = { };
@@ -292,7 +292,7 @@ static acpi_physical_address get_cmdline_acpi_rsdp(void)
if (ret < 0)
return 0;
- if (kstrtoull(val, 16, &addr))
+ if (boot_kstrtoul(val, 16, &addr))
return 0;
#endif
return addr;
@@ -314,7 +314,6 @@ static unsigned long get_acpi_srat_table(void)
* different ideas about whether to trust a command-line parameter.
*/
rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
-
if (!rsdp)
rsdp = (struct acpi_table_rsdp *)(long)
boot_params->acpi_rsdp_addr;
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 2b2049259619..c4bb0f9363f5 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -28,8 +28,6 @@ SYM_FUNC_START(__efi64_thunk)
push %rbx
leaq 1f(%rip), %rbp
- leaq efi_gdt64(%rip), %rbx
- movl %ebx, 2(%rbx) /* Fixup the gdt base address */
movl %ds, %eax
push %rax
@@ -48,7 +46,8 @@ SYM_FUNC_START(__efi64_thunk)
movl %r8d, 0xc(%rsp)
movl %r9d, 0x10(%rsp)
- sgdt 0x14(%rsp)
+ leaq 0x14(%rsp), %rbx
+ sgdt (%rbx)
/*
* Switch to gdt with 32-bit segments. This is the firmware GDT
@@ -68,8 +67,7 @@ SYM_FUNC_START(__efi64_thunk)
pushq %rax
lretq
-1: lgdt 0x14(%rsp)
- addq $32, %rsp
+1: addq $32, %rsp
movq %rdi, %rax
pop %rbx
@@ -175,14 +173,3 @@ SYM_DATA_END(efi32_boot_cs)
SYM_DATA_START(efi32_boot_ds)
.word 0
SYM_DATA_END(efi32_boot_ds)
-
-SYM_DATA_START(efi_gdt64)
- .word efi_gdt64_end - efi_gdt64
- .long 0 /* Filled out by user */
- .word 0
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x0080890000000000 /* TS descriptor */
- .quad 0x0000000000000000 /* TS continued */
-SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ab3307036ba4..03557f2174bf 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -49,16 +49,17 @@
* Position Independent Executable (PIE) so that linker won't optimize
* R_386_GOT32X relocation to its fixed symbol address. Older
* linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less
+ * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less
* optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
* R_386_32 relocations when relocating the kernel. To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
+ * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
* hidden:
*/
.hidden _bss
.hidden _ebss
.hidden _got
.hidden _egot
+ .hidden _end
__HEAD
SYM_FUNC_START(startup_32)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 4f7e6b84be07..e821a7d7d5c4 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -42,6 +42,7 @@
.hidden _ebss
.hidden _got
.hidden _egot
+ .hidden _end
__HEAD
.code32
@@ -393,6 +394,14 @@ SYM_CODE_START(startup_64)
addq %rax, 2(%rax)
lgdt (%rax)
+ /* Reload CS so IRET returns to a CS actually in the GDT */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+
/*
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 508cfa6828c5..8f1025d1f681 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
_data = . ;
*(.data)
*(.data.*)
+ *(.bss.efistub)
_edata = . ;
}
. = ALIGN(L1_CACHE_BYTES);
@@ -73,4 +74,6 @@ SECTIONS
#endif
. = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
_end = .;
+
+ DISCARDS
}
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 8272a4492844..8a3fff9128bb 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -117,7 +117,6 @@ static unsigned int simple_guess_base(const char *cp)
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*/
-
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
unsigned long long result = 0;
@@ -335,3 +334,45 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
s++;
return _kstrtoull(s, base, res);
}
+
+static int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the simple_strtoull.
+ */
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+ */
+ if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+ __alignof__(unsigned long) == __alignof__(unsigned long long))
+ return kstrtoull(s, base, (unsigned long long *)res);
+ else
+ return _kstrtoul(s, base, res);
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 38d8f2f5e47e..995f7b7ad512 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -30,4 +30,5 @@ extern unsigned long long simple_strtoull(const char *cp, char **endp,
unsigned int base);
int kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res);
#endif /* BOOT_STRING_H */
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 8f8c8e386cea..c8b8c1a8d1fc 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -59,14 +59,14 @@ u8 buf[SETUP_SECT_MAX*512];
#define PECOFF_COMPAT_RESERVE 0x0
#endif
-unsigned long efi32_stub_entry;
-unsigned long efi64_stub_entry;
-unsigned long efi_pe_entry;
-unsigned long efi32_pe_entry;
-unsigned long kernel_info;
-unsigned long startup_64;
-unsigned long _ehead;
-unsigned long _end;
+static unsigned long efi32_stub_entry;
+static unsigned long efi64_stub_entry;
+static unsigned long efi_pe_entry;
+static unsigned long efi32_pe_entry;
+static unsigned long kernel_info;
+static unsigned long startup_64;
+static unsigned long _ehead;
+static unsigned long _end;
/*----------------------------------------------------------------------*/
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cad6e1bfa7d5..54e7d15dbd0d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- CALL_NOSPEC %r11
+ CALL_NOSPEC r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- CALL_NOSPEC %r11
+ CALL_NOSPEC r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 06ef2d4a4701..6737bcea1fa1 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
blake2s_compress_generic(state, block, nblocks, inc);
return;
}
- for (;;) {
+ do {
const size_t blocks = min_t(size_t, nblocks,
- PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+ SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) &&
@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
kernel_fpu_end();
nblocks -= blocks;
- if (!nblocks)
- break;
block += blocks * BLAKE2S_BLOCK_SIZE;
- }
+ } while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress_arch);
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index d01ddd73de65..ecc0a9a905c4 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- CALL_NOSPEC %r9;
+ CALL_NOSPEC r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 563ef6e83cdd..0907243c501c 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- CALL_NOSPEC %r9;
+ CALL_NOSPEC r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index b412c21ee06e..22250091cdbe 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -153,9 +153,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_fpu_begin();
- chacha_dosimd(state, dst, src, bytes, nrounds);
- kernel_fpu_end();
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, todo, nrounds);
+ kernel_fpu_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 0e6690e3618c..8501ec4532f4 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -75,7 +75,7 @@
.text
SYM_FUNC_START(crc_pcl)
-#define bufp %rdi
+#define bufp rdi
#define bufp_dw %edi
#define bufp_w %di
#define bufp_b %dil
@@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
## 1) ALIGN:
################################################################
- mov bufp, bufptmp # rdi = *buf
- neg bufp
- and $7, bufp # calculate the unalignment amount of
+ mov %bufp, bufptmp # rdi = *buf
+ neg %bufp
+ and $7, %bufp # calculate the unalignment amount of
# the address
je proc_block # Skip if aligned
@@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
do_align:
#### Calculate CRC of unaligned bytes of the buffer (if any)
movq (bufptmp), tmp # load a quadward from the buffer
- add bufp, bufptmp # align buffer pointer for quadword
+ add %bufp, bufptmp # align buffer pointer for quadword
# processing
- sub bufp, len # update buffer length
+ sub %bufp, len # update buffer length
align_loop:
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
shr $8, tmp # get next byte
- dec bufp
+ dec %bufp
jne align_loop
proc_block:
@@ -169,10 +169,10 @@ continue_block:
xor crc2, crc2
## branch into array
- lea jump_table(%rip), bufp
- movzxw (bufp, %rax, 2), len
- lea crc_array(%rip), bufp
- lea (bufp, len, 1), bufp
+ lea jump_table(%rip), %bufp
+ movzxw (%bufp, %rax, 2), len
+ lea crc_array(%rip), %bufp
+ lea (%bufp, len, 1), %bufp
JMP_NOSPEC bufp
################################################################
@@ -218,9 +218,9 @@ LABEL crc_ %i
## 4) Combine three results:
################################################################
- lea (K_table-8)(%rip), bufp # first entry is for idx 1
+ lea (K_table-8)(%rip), %bufp # first entry is for idx 1
shlq $3, %rax # rax *= 8
- pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
+ pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
subq %rax, tmp # tmp -= rax*24
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index f7567cbd35b6..80fcb85736e1 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
- unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+ unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index a661ede3b5cf..cc6b7c1a2705 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
- unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+ unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 6dfec19f7d57..dfe921efa9b2 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
- PAGE_SIZE % POLY1305_BLOCK_SIZE);
+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
+ SZ_4K % POLY1305_BLOCK_SIZE);
if (!static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
- for (;;) {
- const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+ do {
+ const size_t bytes = min_t(size_t, len, SZ_4K);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
kernel_fpu_end();
+
len -= bytes;
- if (!len)
- break;
inp += bytes;
- }
+ } while (len);
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index a801ffc10cbb..18200135603f 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -21,7 +21,6 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 6394b5fe8db6..dd06249229e1 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -34,7 +34,6 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 82cc1b3ced1d..b0b05c93409e 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -32,7 +32,6 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/cryptohash.h>
#include <linux/string.h>
#include <linux/types.h>
#include <crypto/sha.h>
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..1c7f13bb6728 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- /*
- * Push registers and sanitize registers of values that a
- * speculation attack might otherwise want to exploit. The
- * lower registers are likely clobbered well before they
- * could be put to use in a speculative execution gadget.
- * Interleave XOR with PUSH for better uop scheduling:
- */
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
+
+ /*
+ * Sanitize registers of values that a speculation attack might
+ * otherwise want to exploit. The lower registers are likely clobbered
+ * well before they could be put to use in a speculative execution
+ * gadget.
+ */
+ xorl %edx, %edx /* nospec dx */
+ xorl %ecx, %ecx /* nospec cx */
+ xorl %r8d, %r8d /* nospec r8 */
+ xorl %r9d, %r9d /* nospec r9 */
+ xorl %r10d, %r10d /* nospec r10 */
+ xorl %r11d, %r11d /* nospec r11 */
+ xorl %ebx, %ebx /* nospec rbx */
+ xorl %ebp, %ebp /* nospec rbp */
+ xorl %r12d, %r12d /* nospec r12 */
+ xorl %r13d, %r13d /* nospec r13 */
+ xorl %r14d, %r14d /* nospec r14 */
+ xorl %r15d, %r15d /* nospec r15 */
+
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b67bae7091d7..ac232f456396 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
/* kernel thread */
1: movl %edi, %eax
- CALL_NOSPEC %ebx
+ CALL_NOSPEC ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
+ CALL_NOSPEC edi
jmp ret_from_exception
SYM_CODE_END(common_exception_read_cr2)
@@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception)
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
+ CALL_NOSPEC edi
jmp ret_from_exception
SYM_CODE_END(common_exception)
@@ -1536,7 +1536,6 @@ SYM_CODE_START(debug)
jmp common_exception
SYM_CODE_END(debug)
-#ifdef CONFIG_DOUBLEFAULT
SYM_CODE_START(double_fault)
1:
/*
@@ -1576,7 +1575,6 @@ SYM_CODE_START(double_fault)
hlt
jmp 1b
SYM_CODE_END(double_fault)
-#endif
/*
* NMI is doubly nasty. It can happen on the first instruction of
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..64fe3d82157e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- UNWIND_HINT_EMPTY
POP_REGS pop_rdi=0 skip_r11rcx=1
/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
* %rdi: prev task
* %rsi: next task
*/
-SYM_CODE_START(__switch_to_asm)
- UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
popq %rbp
jmp __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
/*
* A newly forked process directly context switches into this address.
@@ -349,7 +348,7 @@ SYM_CODE_START(ret_from_fork)
/* kernel thread */
UNWIND_HINT_EMPTY
movq %r12, %rdi
- CALL_NOSPEC %rbx
+ CALL_NOSPEC rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+
*/
SYM_CODE_START(interrupt_entry)
- UNWIND_HINT_FUNC
+ UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC
cld
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
+ UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
movq (%rdi), %rdi
jmp 2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
- UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+ UNWIND_HINT_REGS
call do_exit
SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54581ac671b4..d8f8a1a69ed1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -442,3 +442,4 @@
435 i386 clone3 sys_clone3
437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd
+439 i386 faccessat2 sys_faccessat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f839bc..78847b32e137 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -359,6 +359,7 @@
435 common clone3 sys_clone3
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 433a1259f61d..54e03ab26ff3 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -24,6 +24,8 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
+vobjs32-y += vdso32/vclock_gettime.o
# files to link into kernel
obj-y += vma.o
@@ -37,10 +39,12 @@ vdso_img-$(VDSO32-y) += 32
obj-$(VDSO32-y) += vdso32-setup.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.lds $(vobjs-y)
+targets += vdso32/vdso32.lds $(vobjs32-y)
# Build the vDSO image C files and link them in.
vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
@@ -130,10 +134,6 @@ $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
-targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
-targets += vdso32/vclock_gettime.o
-
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
@@ -158,12 +158,7 @@ endif
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
-$(obj)/vdso32.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/vclock_gettime.o \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/system_call.o \
- $(obj)/vdso32/sigreturn.o
+$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
$(call if_changed,vdso_and_check)
#
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3842873b3ae3..7380908045c7 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -187,7 +187,7 @@ static void map_input(const char *name, void **addr, size_t *len, int prot)
int fd = open(name, O_RDONLY);
if (fd == -1)
- err(1, "%s", name);
+ err(1, "open(%s)", name);
tmp_len = lseek(fd, 0, SEEK_END);
if (tmp_len == (off_t)-1)
@@ -240,7 +240,7 @@ int main(int argc, char **argv)
outfilename = argv[3];
outfile = fopen(outfilename, "w");
if (!outfile)
- err(1, "%s", argv[2]);
+ err(1, "fopen(%s)", outfilename);
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index a20b134de2a8..6f46e11ce539 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -13,8 +13,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
unsigned long load_size = -1; /* Work around bogus warning */
unsigned long mapping_size;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
- int i;
- unsigned long j;
+ unsigned long i, syms_nr;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*alt_sec = NULL;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
@@ -86,11 +85,10 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
+ syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
/* Walk the symbol table */
- for (i = 0;
- i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
- i++) {
- int k;
+ for (i = 0; i < syms_nr; i++) {
+ unsigned int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
const char *sym_name = raw_addr +
@@ -150,11 +148,11 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile,
"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
- for (j = 0; j < stripped_len; j++) {
- if (j % 10 == 0)
+ for (i = 0; i < stripped_len; i++) {
+ if (i % 10 == 0)
fprintf(outfile, "\n\t");
fprintf(outfile, "0x%02X, ",
- (int)((unsigned char *)stripped_addr)[j]);
+ (int)((unsigned char *)stripped_addr)[i]);
}
fprintf(outfile, "\n};\n\n");
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 9a7a1446cb3a..4a809c6cbd2f 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -10,11 +10,11 @@ config PERF_EVENTS_INTEL_UNCORE
available on NehalemEX and more modern processors.
config PERF_EVENTS_INTEL_RAPL
- tristate "Intel rapl performance events"
- depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ tristate "Intel/AMD rapl performance events"
+ depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI
default y
---help---
- Include support for Intel rapl performance events for power
+ Include support for Intel and AMD rapl performance events for power
monitoring on modern processors.
config PERF_EVENTS_INTEL_CSTATE
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9e07f554333f..12c42eba77ec 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += core.o probe.o
+obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
+obj-$(CONFIG_CPU_SUP_CENTAUR) += zhaoxin/
+obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin/
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a619763e96e1..9e63ee50b19a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1839,6 +1839,10 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
x86_pmu.name = "HYGON";
break;
+ case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
+ err = zhaoxin_pmu_init();
+ break;
default:
err = -ENOTSUPP;
}
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3468b0c1dc7c..e67a5886336c 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -2,8 +2,6 @@
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
-intel-rapl-perf-objs := rapl.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 6a3b599ee0fe..731dd8d0dbb1 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -58,7 +58,7 @@ struct bts_buffer {
local_t head;
unsigned long end;
void **data_pages;
- struct bts_phys buf[0];
+ struct bts_phys buf[];
};
static struct pmu bts_pmu;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 332954cccece..ca35c8b5ee10 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1892,8 +1892,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs
static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1),
EVENT_EXTRA_END
};
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e4aa20c0426f..442e1ed4acd4 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -643,6 +643,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 1db7a51d9792..e94af4a54d0d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -226,8 +226,6 @@ static int __init pt_pmu_hw_init(void)
pt_pmu.vmx = true;
}
- attrs = NULL;
-
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
&pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 0da4a4605536..b469ddd45515 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -130,7 +130,7 @@ struct intel_uncore_box {
struct list_head list;
struct list_head active_list;
void __iomem *io_addr;
- struct intel_uncore_extra_reg shared_regs[0];
+ struct intel_uncore_extra_reg shared_regs[];
};
/* CFL uncore 8th cbox MSRs */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f1cd1ca1a77b..e17a3d8a47ed 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -618,6 +618,7 @@ struct x86_pmu {
/* PMI handler bits */
unsigned int late_ack :1,
+ enabled_ack :1,
counter_freezing :1;
/*
* sysfs attrs
@@ -1133,3 +1134,12 @@ static inline int is_ht_workaround_enabled(void)
return 0;
}
#endif /* CONFIG_CPU_SUP_INTEL */
+
+#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN))
+int zhaoxin_pmu_init(void);
+#else
+static inline int zhaoxin_pmu_init(void)
+{
+ return 0;
+}
+#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
index c2ede2f3b277..136a1e847254 100644
--- a/arch/x86/events/probe.c
+++ b/arch/x86/events/probe.c
@@ -10,6 +10,11 @@ not_visible(struct kobject *kobj, struct attribute *attr, int i)
return 0;
}
+/*
+ * Accepts msr[] array with non populated entries as long as either
+ * msr[i].msr is 0 or msr[i].grp is NULL. Note that the default sysfs
+ * visibility is visible when group->is_visible callback is set.
+ */
unsigned long
perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
{
@@ -24,8 +29,16 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
if (!msr[bit].no_check) {
struct attribute_group *grp = msr[bit].grp;
+ /* skip entry with no group */
+ if (!grp)
+ continue;
+
grp->is_visible = not_visible;
+ /* skip unpopulated entry */
+ if (!msr[bit].msr)
+ continue;
+
if (msr[bit].test && !msr[bit].test(bit, data))
continue;
/* Virt sucks; you cannot tell if a R/O MSR is present :/ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/rapl.c
index a5dbd25852cb..0f2bf59f4354 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -1,11 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Support Intel RAPL energy consumption counters
+ * Support Intel/AMD RAPL energy consumption counters
* Copyright (C) 2013 Google, Inc., Stephane Eranian
*
* Intel RAPL interface is specified in the IA-32 Manual Vol3b
* section 14.7.1 (September 2013)
*
+ * AMD RAPL interface for Fam17h is described in the public PPR:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=206537
+ *
* RAPL provides more controls than just reporting energy consumption
* however here we only expose the 3 energy consumption free running
* counters (pp0, pkg, dram).
@@ -58,8 +61,8 @@
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
-#include "../perf_event.h"
-#include "../probe.h"
+#include "perf_event.h"
+#include "probe.h"
MODULE_LICENSE("GPL");
@@ -128,7 +131,9 @@ struct rapl_pmus {
};
struct rapl_model {
+ struct perf_msr *rapl_msrs;
unsigned long events;
+ unsigned int msr_power_unit;
bool apply_quirk;
};
@@ -138,7 +143,7 @@ static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
-static struct perf_msr rapl_msrs[];
+static struct perf_msr *rapl_msrs;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
@@ -455,9 +460,16 @@ static struct attribute *rapl_events_cores[] = {
NULL,
};
+static umode_t
+rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return 0;
+}
+
static struct attribute_group rapl_events_cores_group = {
.name = "events",
.attrs = rapl_events_cores,
+ .is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_pkg[] = {
@@ -470,6 +482,7 @@ static struct attribute *rapl_events_pkg[] = {
static struct attribute_group rapl_events_pkg_group = {
.name = "events",
.attrs = rapl_events_pkg,
+ .is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_ram[] = {
@@ -482,6 +495,7 @@ static struct attribute *rapl_events_ram[] = {
static struct attribute_group rapl_events_ram_group = {
.name = "events",
.attrs = rapl_events_ram,
+ .is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_gpu[] = {
@@ -494,6 +508,7 @@ static struct attribute *rapl_events_gpu[] = {
static struct attribute_group rapl_events_gpu_group = {
.name = "events",
.attrs = rapl_events_gpu,
+ .is_visible = rapl_not_visible,
};
static struct attribute *rapl_events_psys[] = {
@@ -506,6 +521,7 @@ static struct attribute *rapl_events_psys[] = {
static struct attribute_group rapl_events_psys_group = {
.name = "events",
.attrs = rapl_events_psys,
+ .is_visible = rapl_not_visible,
};
static bool test_msr(int idx, void *data)
@@ -513,7 +529,7 @@ static bool test_msr(int idx, void *data)
return test_bit(idx, (unsigned long *) data);
}
-static struct perf_msr rapl_msrs[] = {
+static struct perf_msr intel_rapl_msrs[] = {
[PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr },
[PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
[PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr },
@@ -521,6 +537,16 @@ static struct perf_msr rapl_msrs[] = {
[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr },
};
+/*
+ * Force to PERF_RAPL_MAX size due to:
+ * - perf_msr_probe(PERF_RAPL_MAX)
+ * - want to use same event codes across both architectures
+ */
+static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+};
+
+
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -575,13 +601,13 @@ static int rapl_cpu_online(unsigned int cpu)
return 0;
}
-static int rapl_check_hw_unit(bool apply_quirk)
+static int rapl_check_hw_unit(struct rapl_model *rm)
{
u64 msr_rapl_power_unit_bits;
int i;
/* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
return -1;
for (i = 0; i < NR_RAPL_DOMAINS; i++)
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
@@ -592,7 +618,7 @@ static int rapl_check_hw_unit(bool apply_quirk)
* "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
- if (apply_quirk)
+ if (rm->apply_quirk)
rapl_hw_unit[PERF_RAPL_RAM] = 16;
/*
@@ -673,6 +699,8 @@ static struct rapl_model model_snb = {
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_PP1),
.apply_quirk = false,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_snbep = {
@@ -680,6 +708,8 @@ static struct rapl_model model_snbep = {
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.apply_quirk = false,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_hsw = {
@@ -688,6 +718,8 @@ static struct rapl_model model_hsw = {
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PP1),
.apply_quirk = false,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_hsx = {
@@ -695,12 +727,16 @@ static struct rapl_model model_hsx = {
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.apply_quirk = true,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_knl = {
.events = BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.apply_quirk = true,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_skl = {
@@ -710,6 +746,15 @@ static struct rapl_model model_skl = {
BIT(PERF_RAPL_PP1) |
BIT(PERF_RAPL_PSYS),
.apply_quirk = false,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
+};
+
+static struct rapl_model model_amd_fam17h = {
+ .events = BIT(PERF_RAPL_PKG),
+ .apply_quirk = false,
+ .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
+ .rapl_msrs = amd_rapl_msrs,
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
@@ -738,8 +783,11 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
+ X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
@@ -755,10 +803,13 @@ static int __init rapl_pmu_init(void)
return -ENODEV;
rm = (struct rapl_model *) id->driver_data;
+
+ rapl_msrs = rm->rapl_msrs;
+
rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
false, (void *) &rm->events);
- ret = rapl_check_hw_unit(rm->apply_quirk);
+ ret = rapl_check_hw_unit(rm);
if (ret)
return ret;
diff --git a/arch/x86/events/zhaoxin/Makefile b/arch/x86/events/zhaoxin/Makefile
new file mode 100644
index 000000000000..642c1174d662
--- /dev/null
+++ b/arch/x86/events/zhaoxin/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += core.o
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
new file mode 100644
index 000000000000..898fa1ae9ceb
--- /dev/null
+++ b/arch/x86/events/zhaoxin/core.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Zhoaxin PMU; like Intel Architectural PerfMon-v2
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/nmi.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+/*
+ * Zhaoxin PerfMon, used on zxc and later.
+ */
+static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
+
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0082,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x051a,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0083,
+};
+
+static struct event_constraint zxc_event_constraints[] __read_mostly = {
+
+ FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint zxd_event_constraints[] __read_mostly = {
+
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
+ FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
+ FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
+ EVENT_CONSTRAINT_END
+};
+
+static __initconst const u64 zxd_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0042,
+ [C(RESULT_MISS)] = 0x0538,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x0043,
+ [C(RESULT_MISS)] = 0x0562,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0300,
+ [C(RESULT_MISS)] = 0x0301,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x030a,
+ [C(RESULT_MISS)] = 0x030b,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0042,
+ [C(RESULT_MISS)] = 0x052c,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x0043,
+ [C(RESULT_MISS)] = 0x0530,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0564,
+ [C(RESULT_MISS)] = 0x0565,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c0,
+ [C(RESULT_MISS)] = 0x0534,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0700,
+ [C(RESULT_MISS)] = 0x0709,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+};
+
+static __initconst const u64 zxe_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0568,
+ [C(RESULT_MISS)] = 0x054b,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x0669,
+ [C(RESULT_MISS)] = 0x0562,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0300,
+ [C(RESULT_MISS)] = 0x0301,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x030a,
+ [C(RESULT_MISS)] = 0x030b,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0568,
+ [C(RESULT_MISS)] = 0x052c,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x0669,
+ [C(RESULT_MISS)] = 0x0530,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0564,
+ [C(RESULT_MISS)] = 0x0565,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c0,
+ [C(RESULT_MISS)] = 0x0534,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0028,
+ [C(RESULT_MISS)] = 0x0029,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+};
+
+static void zhaoxin_pmu_disable_all(void)
+{
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+}
+
+static void zhaoxin_pmu_enable_all(int added)
+{
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+}
+
+static inline u64 zhaoxin_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void zhaoxin_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void zxc_pmu_ack_status(u64 ack)
+{
+ /*
+ * ZXC needs global control enabled in order to clear status bits.
+ */
+ zhaoxin_pmu_enable_all(0);
+ zhaoxin_pmu_ack_status(ack);
+ zhaoxin_pmu_disable_all();
+}
+
+static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+ u64 ctrl_val, mask;
+
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void zhaoxin_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ zhaoxin_pmu_disable_fixed(hwc);
+ return;
+ }
+
+ x86_pmu_disable_event(event);
+}
+
+static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+ u64 ctrl_val, bits, mask;
+
+ /*
+ * Enable IRQ generation (0x8),
+ * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+ * if requested:
+ */
+ bits = 0x8ULL;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+ bits |= 0x2;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+ bits |= 0x1;
+
+ bits <<= (idx * 4);
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ ctrl_val |= bits;
+ wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void zhaoxin_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+ zhaoxin_pmu_enable_fixed(hwc);
+ return;
+ }
+
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ u64 status;
+ int bit;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ zhaoxin_pmu_disable_all();
+ status = zhaoxin_pmu_get_status();
+ if (!status)
+ goto done;
+
+again:
+ if (x86_pmu.enabled_ack)
+ zxc_pmu_ack_status(status);
+ else
+ zhaoxin_pmu_ack_status(status);
+
+ inc_irq_stat(apic_perf_irqs);
+
+ /*
+ * CondChgd bit 63 doesn't mean any overflow status. Ignore
+ * and clear the bit.
+ */
+ if (__test_and_clear_bit(63, (unsigned long *)&status)) {
+ if (!status)
+ goto done;
+ }
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ x86_perf_event_update(event);
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = zhaoxin_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ zhaoxin_pmu_enable_all(0);
+ return handled;
+}
+
+static u64 zhaoxin_pmu_event_map(int hw_event)
+{
+ return zx_pmon_event_map[hw_event];
+}
+
+static struct event_constraint *
+zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ if (x86_pmu.event_constraints) {
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+ }
+
+ return &unconstrained;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+PMU_FORMAT_ATTR(edge, "config:18");
+PMU_FORMAT_ATTR(inv, "config:23");
+PMU_FORMAT_ATTR(cmask, "config:24-31");
+
+static struct attribute *zx_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
+{
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+ return x86_event_sysfs_show(page, config, event);
+}
+
+static const struct x86_pmu zhaoxin_pmu __initconst = {
+ .name = "zhaoxin",
+ .handle_irq = zhaoxin_pmu_handle_irq,
+ .disable_all = zhaoxin_pmu_disable_all,
+ .enable_all = zhaoxin_pmu_enable_all,
+ .enable = zhaoxin_pmu_enable_event,
+ .disable = zhaoxin_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = zhaoxin_pmu_event_map,
+ .max_events = ARRAY_SIZE(zx_pmon_event_map),
+ .apic = 1,
+ /*
+ * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
+ */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = zhaoxin_get_event_constraints,
+
+ .format_attrs = zx_arch_formats_attr,
+ .events_sysfs_show = zhaoxin_event_sysfs_show,
+};
+
+static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
+ { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+ { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+ { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+ { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+ { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+ { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+ { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void zhaoxin_arch_events_quirk(void)
+{
+ int bit;
+
+ /* disable event that reported as not presend by cpuid */
+ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
+ zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
+ pr_warn("CPUID marked event: \'%s\' unavailable\n",
+ zx_arch_events_map[bit].name);
+ }
+}
+
+__init int zhaoxin_pmu_init(void)
+{
+ union cpuid10_edx edx;
+ union cpuid10_eax eax;
+ union cpuid10_ebx ebx;
+ struct event_constraint *c;
+ unsigned int unused;
+ int version;
+
+ pr_info("Welcome to zhaoxin pmu!\n");
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * hw_event or not.
+ */
+ cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+
+ if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
+ return -ENODEV;
+
+ version = eax.split.version_id;
+ if (version != 2)
+ return -ENODEV;
+
+ x86_pmu = zhaoxin_pmu;
+ pr_info("Version check pass!\n");
+
+ x86_pmu.version = version;
+ x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntval_bits = eax.split.bit_width;
+ x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.events_maskl = ebx.full;
+ x86_pmu.events_mask_len = eax.split.mask_length;
+
+ x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+ x86_add_quirk(zhaoxin_arch_events_quirk);
+
+ switch (boot_cpu_data.x86) {
+ case 0x06:
+ if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
+
+ x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
+
+ /* Clearing status works only if the global control is enable on zxc. */
+ x86_pmu.enabled_ack = 1;
+
+ x86_pmu.event_constraints = zxc_event_constraints;
+ zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
+ zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
+ zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
+ zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
+
+ pr_cont("ZXC events, ");
+ break;
+ }
+ return -ENODEV;
+
+ case 0x07:
+ zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
+
+ zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x1b:
+ memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ x86_pmu.event_constraints = zxd_event_constraints;
+
+ zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
+ zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
+
+ pr_cont("ZXD events, ");
+ break;
+ case 0x3b:
+ memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ x86_pmu.event_constraints = zxd_event_constraints;
+
+ zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
+ zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
+
+ pr_cont("ZXE events, ");
+ break;
+ default:
+ return -ENODEV;
+ }
+ break;
+
+ default:
+ return -ENODEV;
+ }
+
+ x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
+ x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+ if (x86_pmu.event_constraints) {
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+ c->weight += x86_pmu.num_counters;
+ }
+ }
+
+ return 0;
+}
+
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 624f5d9b0f79..e2137070386a 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -73,7 +73,8 @@ static int hv_cpu_init(unsigned int cpu)
struct page *pg;
input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- pg = alloc_page(GFP_KERNEL);
+ /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
+ pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
if (unlikely(!pg))
return -ENOMEM;
*input_arg = page_address(pg);
@@ -96,8 +97,7 @@ static int hv_cpu_init(unsigned int cpu)
* not be stopped in the case of CPU offlining and the VM will hang.
*/
if (!*hvp) {
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
- PAGE_KERNEL);
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
}
if (*hvp) {
@@ -225,10 +225,18 @@ static int hv_cpu_die(unsigned int cpu)
rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
if (re_ctrl.target_vp == hv_vp_index[cpu]) {
- /* Reassign to some other online CPU */
+ /*
+ * Reassign reenlightenment notifications to some other online
+ * CPU or just disable the feature if there are no online CPUs
+ * left (happens on hibernation).
+ */
new_cpu = cpumask_any_but(cpu_online_mask, cpu);
- re_ctrl.target_vp = hv_vp_index[new_cpu];
+ if (new_cpu < nr_cpu_ids)
+ re_ctrl.target_vp = hv_vp_index[new_cpu];
+ else
+ re_ctrl.enabled = 0;
+
wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
}
@@ -254,6 +262,7 @@ static int __init hv_pci_init(void)
static int hv_suspend(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ int ret;
/*
* Reset the hypercall page as it is going to be invalidated
@@ -270,12 +279,17 @@ static int hv_suspend(void)
hypercall_msr.enable = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
- return 0;
+ ret = hv_cpu_die(0);
+ return ret;
}
static void hv_resume(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ int ret;
+
+ ret = hv_cpu_init(0);
+ WARN_ON(ret);
/* Re-enable the hypercall page */
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -286,8 +300,16 @@ static void hv_resume(void)
hv_hypercall_pg = hv_hypercall_pg_saved;
hv_hypercall_pg_saved = NULL;
+
+ /*
+ * Reenlightenment notifications are disabled by hv_cpu_die(0),
+ * reenable them here if hv_reenlightenment_cb was previously set.
+ */
+ if (hv_reenlightenment_cb)
+ set_hv_tscchange_cb(hv_reenlightenment_cb);
}
+/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */
static struct syscore_ops hv_syscore_ops = {
.suspend = hv_suspend,
.resume = hv_resume,
@@ -356,7 +378,7 @@ void __init hyperv_init(void)
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+ hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
goto remove_cpuhp_state;
diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index 3d21eab7aaed..6efe6cb3768a 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/unistd_32.h>
+#include <asm/audit.h>
unsigned ia32_dir_class[] = {
#include <asm-generic/audit_dir_write.h>
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f9d8804144d0..81cf22398cd1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -350,7 +350,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault);
user_access_end();
- if (__copy_siginfo_to_user32(&frame->info, &ksig->info, false))
+ if (__copy_siginfo_to_user32(&frame->info, &ksig->info))
return -EFAULT;
/* Set up registers for signal handler */
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
new file mode 100644
index 000000000000..1b07fb102c4e
--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,25 @@
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rbx)
+GEN(rcx)
+GEN(rdx)
+GEN(rsi)
+GEN(rdi)
+GEN(rbp)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ebx)
+GEN(ecx)
+GEN(edx)
+GEN(esi)
+GEN(edi)
+GEN(ebp)
+#endif
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index 99bb207fc04c..87ce8e963215 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -25,11 +25,7 @@
#define APBT_MIN_FREQ 1000000
#define APBT_MMAP_SIZE 1024
-#define APBT_DEV_USED 1
-
extern void apbt_time_init(void);
-extern unsigned long apbt_quick_calibrate(void);
-extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
extern void apbt_setup_secondary_clock(void);
extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
@@ -38,7 +34,6 @@ extern int sfi_mtimer_num;
#else /* CONFIG_APB_TIMER */
-static inline unsigned long apbt_quick_calibrate(void) {return 0; }
static inline void apbt_time_init(void) { }
#endif
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 7a4bb1bd4bdb..ebc248e49549 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -15,16 +15,6 @@
#define RDRAND_RETRY_LOOPS 10
-#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
-#define RDSEED_INT ".byte 0x0f,0xc7,0xf8"
-#ifdef CONFIG_X86_64
-# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
-# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8"
-#else
-# define RDRAND_LONG RDRAND_INT
-# define RDSEED_LONG RDSEED_INT
-#endif
-
/* Unconditional execution of RDRAND and RDSEED */
static inline bool __must_check rdrand_long(unsigned long *v)
@@ -32,9 +22,9 @@ static inline bool __must_check rdrand_long(unsigned long *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_LONG
+ asm volatile("rdrand %[out]"
CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ : CC_OUT(c) (ok), [out] "=r" (*v));
if (ok)
return true;
} while (--retry);
@@ -46,9 +36,9 @@ static inline bool __must_check rdrand_int(unsigned int *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_INT
+ asm volatile("rdrand %[out]"
CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ : CC_OUT(c) (ok), [out] "=r" (*v));
if (ok)
return true;
} while (--retry);
@@ -58,18 +48,18 @@ static inline bool __must_check rdrand_int(unsigned int *v)
static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
- asm volatile(RDSEED_LONG
+ asm volatile("rdseed %[out]"
CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ : CC_OUT(c) (ok), [out] "=r" (*v));
return ok;
}
static inline bool __must_check rdseed_int(unsigned int *v)
{
bool ok;
- asm volatile(RDSEED_INT
+ asm volatile("rdseed %[out]"
CC_SET(c)
- : CC_OUT(c) (ok), "=a" (*v));
+ : CC_OUT(c) (ok), [out] "=r" (*v));
return ok;
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ce92c4acc913..9bf2620ce817 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,24 +17,19 @@ extern void cmpxchg8b_emu(void);
#endif
#ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
-#endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
+
+#define DECL_INDIRECT_THUNK(reg) \
+ extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+
+#define DECL_RETPOLINE(reg) \
+ extern asmlinkage void __x86_retpoline_ ## reg (void);
+
+#undef GEN
+#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) DECL_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
+
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
new file mode 100644
index 000000000000..36aec57ea7a3
--- /dev/null
+++ b/arch/x86/include/asm/audit.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AUDIT_H
+#define _ASM_X86_AUDIT_H
+
+int ia32_classify_syscall(unsigned int syscall);
+
+#endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 53f246e9df5a..0367efdc5b7a 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -52,9 +52,9 @@ static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "orb %1,%0"
+ asm volatile(LOCK_PREFIX "orb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" (CONST_MASK(nr) & 0xff)
+ : "iq" (CONST_MASK(nr))
: "memory");
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
@@ -72,9 +72,9 @@ static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "andb %1,%0"
+ asm volatile(LOCK_PREFIX "andb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" (CONST_MASK(nr) ^ 0xff));
+ : "iq" (~CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
@@ -123,9 +123,9 @@ static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "xorb %1,%0"
+ asm volatile(LOCK_PREFIX "xorb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
- : "iq" ((u8)CONST_MASK(nr)));
+ : "iq" (CONST_MASK(nr)));
} else {
asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index d79d1e622dcf..0ada98d5d09f 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+#define HAVE_CSUM_COPY_USER
#ifdef CONFIG_X86_32
# include <asm/checksum_32.h>
#else
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index f57b94e02c57..11624c8a9d8d 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -44,18 +44,21 @@ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
}
-static inline __wsum csum_partial_copy_from_user(const void __user *src,
- void *dst,
- int len, __wsum sum,
- int *err_ptr)
+static inline __wsum csum_and_copy_from_user(const void __user *src,
+ void *dst, int len,
+ __wsum sum, int *err_ptr)
{
__wsum ret;
might_sleep();
- stac();
+ if (!user_access_begin(src, len)) {
+ if (len)
+ *err_ptr = -EFAULT;
+ return sum;
+ }
ret = csum_partial_copy_generic((__force void *)src, dst,
len, sum, err_ptr, NULL);
- clac();
+ user_access_end();
return ret;
}
@@ -173,7 +176,6 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
/*
* Copy and checksum to user
*/
-#define HAVE_CSUM_COPY_USER
static inline __wsum csum_and_copy_to_user(const void *src,
void __user *dst,
int len, __wsum sum,
@@ -182,11 +184,10 @@ static inline __wsum csum_and_copy_to_user(const void *src,
__wsum ret;
might_sleep();
- if (access_ok(dst, len)) {
- stac();
+ if (user_access_begin(dst, len)) {
ret = csum_partial_copy_generic(src, (__force void *)dst,
len, sum, NULL, err_ptr);
- clac();
+ user_access_end();
return ret;
}
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 3ec6d3267cf9..0a289b87e872 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -129,27 +129,19 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER 1
-
-
/* Do not call this directly. Use the wrappers below */
extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
int len, __wsum sum,
int *src_err_ptr, int *dst_err_ptr);
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst,
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp);
extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
int len, __wsum sum);
-/* Old names. To be removed. */
-#define csum_and_copy_to_user csum_partial_copy_to_user
-#define csum_and_copy_from_user csum_partial_copy_from_user
-
/**
* ip_compute_csum - Compute an 16bit IP checksum.
* @buff: buffer address.
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 52e9f3480f69..d4edf281fff4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -214,7 +214,11 @@ static inline bool in_compat_syscall(void)
#endif
struct compat_siginfo;
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
- const kernel_siginfo_t *from, bool x32_ABI);
+
+#ifdef CONFIG_X86_X32_ABI
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+ const kernel_siginfo_t *from);
+#define copy_siginfo_to_user32 copy_siginfo_to_user32
+#endif /* CONFIG_X86_X32_ABI */
#endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index cf3d621c6892..eb8fcede9e3b 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -20,12 +20,14 @@
#define X86_CENTAUR_FAM6_C7_D 0xd
#define X86_CENTAUR_FAM6_NANO 0xf
+#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
/**
- * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching
+ * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@_vendor
* @_family: The family number or X86_FAMILY_ANY
* @_model: The model number, model constant or X86_MODEL_ANY
+ * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY
* @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
* @_data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
@@ -37,16 +39,35 @@
* into another macro at the usage site for good reasons, then please
* start this local macro with X86_MATCH to allow easy grepping.
*/
-#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \
- _feature, _data) { \
+#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
+ _steppings, _feature, _data) { \
.vendor = X86_VENDOR_##_vendor, \
.family = _family, \
.model = _model, \
+ .steppings = _steppings, \
.feature = _feature, \
.driver_data = (unsigned long) _data \
}
/**
+ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
+ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@_vendor
+ * @_family: The family number or X86_FAMILY_ANY
+ * @_model: The model number, model constant or X86_MODEL_ANY
+ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
+ * set to wildcards.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
+ X86_STEPPING_ANY, feature, data)
+
+/**
* X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
@@ -139,6 +160,10 @@
#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
+#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
+ steppings, X86_FEATURE_ANY, data)
+
/*
* Match specific microcode revisions.
*
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e7841f953..630891d25819 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,10 @@
#define _ASM_X86_DELAY_H
#include <asm-generic/delay.h>
+#include <linux/init.h>
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 7e31f7f1bb06..49bd6cf3eec9 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -3,7 +3,7 @@
#define _ASM_X86_DEVICE_H
struct dev_archdata {
-#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
+#ifdef CONFIG_IOMMU_API
void *iommu; /* hook for IOMMU specific extension */
#endif
};
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 00f7cf45e699..8e95aa4b0d17 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -74,7 +74,7 @@
#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT))
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
index af9a14ac8962..54a6e4a2e132 100644
--- a/arch/x86/include/asm/doublefault.h
+++ b/arch/x86/include/asm/doublefault.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_DOUBLEFAULT_H
#define _ASM_X86_DOUBLEFAULT_H
-#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+#ifdef CONFIG_X86_32
extern void doublefault_init_cpu_tss(void);
#else
static inline void doublefault_init_cpu_tss(void)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8391c115c0ec..89dcc7aa7e2c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -9,6 +9,7 @@
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
#include <linux/build_bug.h>
+#include <linux/kernel.h>
extern unsigned long efi_fw_vendor, efi_config_table;
@@ -225,14 +226,21 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
/* arch specific definitions used by the stub code */
-__attribute_const__ bool efi_is_64bit(void);
+#ifdef CONFIG_EFI_MIXED
+
+#define ARCH_HAS_EFISTUB_WRAPPERS
+
+static inline bool efi_is_64bit(void)
+{
+ extern const bool efi_is64;
+
+ return efi_is64;
+}
static inline bool efi_is_native(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
return true;
- if (!IS_ENABLED(CONFIG_EFI_MIXED))
- return true;
return efi_is_64bit();
}
@@ -286,6 +294,15 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_allocate_pool(type, size, buffer) \
((type), (size), efi64_zero_upper(buffer))
+#define __efi64_argmap_create_event(type, tpl, f, c, event) \
+ ((type), (tpl), (f), (c), efi64_zero_upper(event))
+
+#define __efi64_argmap_set_timer(event, type, time) \
+ ((event), (type), lower_32_bits(time), upper_32_bits(time))
+
+#define __efi64_argmap_wait_for_event(num, event, index) \
+ ((num), (event), efi64_zero_upper(index))
+
#define __efi64_argmap_handle_protocol(handle, protocol, interface) \
((handle), (protocol), efi64_zero_upper(interface))
@@ -307,6 +324,10 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \
((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf))
+/* Graphics Output Protocol */
+#define __efi64_argmap_query_mode(gop, mode, size, info) \
+ ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info))
+
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
@@ -335,15 +356,26 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define efi_bs_call(func, ...) \
(efi_is_native() \
- ? efi_system_table()->boottime->func(__VA_ARGS__) \
- : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
- boottime), func, __VA_ARGS__))
+ ? efi_system_table->boottime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table, \
+ boottime), \
+ func, __VA_ARGS__))
#define efi_rt_call(func, ...) \
(efi_is_native() \
- ? efi_system_table()->runtime->func(__VA_ARGS__) \
- : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
- runtime), func, __VA_ARGS__))
+ ? efi_system_table->runtime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table, \
+ runtime), \
+ func, __VA_ARGS__))
+
+#else /* CONFIG_EFI_MIXED */
+
+static inline bool efi_is_64bit(void)
+{
+ return IS_ENABLED(CONFIG_X86_64);
+}
+
+#endif /* CONFIG_EFI_MIXED */
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 7ec59edde154..d43717b423cb 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -31,8 +31,8 @@
#define CSW fd_routine[can_use_virtual_dma & 1]
-#define fd_inb(port) inb_p(port)
-#define fd_outb(value, port) outb_p(value, port)
+#define fd_inb(base, reg) inb_p((base) + (reg))
+#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy")
#define fd_free_dma() CSW._free_dma(FLOPPY_DMA)
@@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
st = 1;
for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
lcount; lcount--, lptr++) {
- st = inb(virtual_dma_port + 4) & 0xa0;
- if (st != 0xa0)
+ st = inb(virtual_dma_port + FD_STATUS);
+ st &= STATUS_DMA | STATUS_READY;
+ if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
- outb_p(*lptr, virtual_dma_port + 5);
+ outb_p(*lptr, virtual_dma_port + FD_DATA);
else
- *lptr = inb_p(virtual_dma_port + 5);
+ *lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
- st = inb(virtual_dma_port + 4);
+ st = inb(virtual_dma_port + FD_STATUS);
}
#ifdef TRACE_FLPY_INT
calls++;
#endif
- if (st == 0x20)
+ if (st == STATUS_DMA)
return IRQ_HANDLED;
- if (!(st & 0x20)) {
+ if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count = 0;
#ifdef TRACE_FLPY_INT
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 44c48e34d799..42159f45bf9c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -31,7 +31,8 @@ extern void fpu__save(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu);
extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
-extern void fpu__clear(struct fpu *fpu);
+extern void fpu__clear_user_states(struct fpu *fpu);
+extern void fpu__clear_all(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
@@ -92,7 +93,7 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave)
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
+ xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
}
static inline void fpstate_init_fxstate(struct fxregs_state *fx)
@@ -399,7 +400,10 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
u32 hmask = mask >> 32;
int err;
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+ if (static_cpu_has(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+ else
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
}
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index c6136d79f8c0..422d8369012a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,19 +21,29 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-/* Supervisor features */
-#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_FP | \
- XFEATURE_MASK_SSE | \
- XFEATURE_MASK_YMM | \
- XFEATURE_MASK_OPMASK | \
- XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM | \
- XFEATURE_MASK_PKRU | \
- XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+/* All currently supported user features */
+#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
+
+/* All currently supported supervisor features */
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
+
+/*
+ * Unsupported supervisor features. When a supervisor feature in this mask is
+ * supported in the future, move it to the supported supervisor feature mask.
+ */
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+
+/* All supervisor states including supported and unsupported states. */
+#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
@@ -41,7 +51,18 @@
#define REX_PREFIX
#endif
-extern u64 xfeatures_mask;
+extern u64 xfeatures_mask_all;
+
+static inline u64 xfeatures_mask_supervisor(void)
+{
+ return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+}
+
+static inline u64 xfeatures_mask_user(void)
+{
+ return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
+}
+
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
@@ -54,8 +75,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+void copy_supervisor_to_kernel(struct xregs_state *xsave);
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-extern int validate_xstate_header(const struct xstate_header *hdr);
+int validate_user_xstate_header(const struct xstate_header *hdr);
#endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 85be2f506272..84b9449be080 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,16 +56,23 @@ struct dyn_arch_ftrace {
#ifndef __ASSEMBLY__
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+extern void set_ftrace_ops_ro(void);
+#else
+static inline void set_ftrace_ops_ro(void) { }
+#endif
+
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
/*
* Compare the symbol name with the system call name. Skip the
- * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
- (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+ (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}
#ifndef COMPILE_OFFSETS
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 8e5af119dc2d..de58391bdee0 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -88,11 +88,17 @@ static inline bool intel_mid_has_msic(void)
return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
}
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
#else /* !CONFIG_X86_INTEL_MID */
#define intel_mid_identify_cpu() 0
#define intel_mid_has_msic() 0
+static inline void intel_scu_devices_create(void) { }
+static inline void intel_scu_devices_destroy(void) { }
+
#endif /* !CONFIG_X86_INTEL_MID */
enum intel_mid_timer_options {
@@ -115,9 +121,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
#define SFI_MTMR_MAX_NUM 8
#define SFI_MRTC_MAX 8
-extern void intel_scu_devices_create(void);
-extern void intel_scu_devices_destroy(void);
-
/* VRTC timer */
#define MRST_VRTC_MAP_SZ 1024
/* #define MRST_VRTC_PGOFFSET 0xc00 */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
deleted file mode 100644
index e6da1ce26256..000000000000
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_PMC_IPC_H_
-#define _ASM_X86_INTEL_PMC_IPC_H_
-
-/* Commands */
-#define PMC_IPC_PMIC_ACCESS 0xFF
-#define PMC_IPC_PMIC_ACCESS_READ 0x0
-#define PMC_IPC_PMIC_ACCESS_WRITE 0x1
-#define PMC_IPC_USB_PWR_CTRL 0xF0
-#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF
-#define PMC_IPC_PHY_CONFIG 0xEE
-#define PMC_IPC_NORTHPEAK_CTRL 0xED
-#define PMC_IPC_PM_DEBUG 0xEC
-#define PMC_IPC_PMC_TELEMTRY 0xEB
-#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA
-
-/* IPC return code */
-#define IPC_ERR_NONE 0
-#define IPC_ERR_CMD_NOT_SUPPORTED 1
-#define IPC_ERR_CMD_NOT_SERVICED 2
-#define IPC_ERR_UNABLE_TO_SERVICE 3
-#define IPC_ERR_CMD_INVALID 4
-#define IPC_ERR_CMD_FAILED 5
-#define IPC_ERR_EMSECURITY 6
-#define IPC_ERR_UNSIGNEDKERNEL 7
-
-/* GCR reg offsets from gcr base*/
-#define PMC_GCR_PMC_CFG_REG 0x08
-#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78
-#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80
-
-#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
-
-int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen);
-int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read64(u32 offset, u64 *data);
-
-#else
-
-static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_s0ix_counter_read(u64 *data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
-{
- return -EINVAL;
-}
-
-#endif /*CONFIG_INTEL_PMC_IPC*/
-
-#endif
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 2a1442ba6e78..11d457af68c5 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -2,61 +2,69 @@
#ifndef _ASM_X86_INTEL_SCU_IPC_H_
#define _ASM_X86_INTEL_SCU_IPC_H_
-#include <linux/notifier.h>
-
-#define IPCMSG_INDIRECT_READ 0x02
-#define IPCMSG_INDIRECT_WRITE 0x05
-
-#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
-
-#define IPCMSG_WARM_RESET 0xF0
-#define IPCMSG_COLD_RESET 0xF1
-#define IPCMSG_SOFT_RESET 0xF2
-#define IPCMSG_COLD_BOOT 0xF3
-
-#define IPCMSG_VRTC 0xFA /* Set vRTC device */
- /* Command id associated with message IPCMSG_VRTC */
- #define IPC_CMD_VRTC_SETTIME 1 /* Set time */
- #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */
-
-/* Read single register */
-int intel_scu_ipc_ioread8(u16 addr, u8 *data);
-
-/* Read a vector */
-int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
-
-/* Write single register */
-int intel_scu_ipc_iowrite8(u16 addr, u8 data);
-
-/* Write a vector */
-int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
-
-/* Update single register based on the mask */
-int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
-
-/* Issue commands to the SCU with or without data */
-int intel_scu_ipc_simple_command(int cmd, int sub);
-int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
- u32 *out, int outlen);
-
-extern struct blocking_notifier_head intel_scu_notifier;
-
-static inline void intel_scu_notifier_add(struct notifier_block *nb)
-{
- blocking_notifier_chain_register(&intel_scu_notifier, nb);
-}
-
-static inline void intel_scu_notifier_remove(struct notifier_block *nb)
-{
- blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
-}
-
-static inline int intel_scu_notifier_post(unsigned long v, void *p)
+#include <linux/ioport.h>
+
+struct device;
+struct intel_scu_ipc_dev;
+
+/**
+ * struct intel_scu_ipc_data - Data used to configure SCU IPC
+ * @mem: Base address of SCU IPC MMIO registers
+ * @irq: The IRQ number used for SCU (optional)
+ */
+struct intel_scu_ipc_data {
+ struct resource mem;
+ int irq;
+};
+
+struct intel_scu_ipc_dev *
+__intel_scu_ipc_register(struct device *parent,
+ const struct intel_scu_ipc_data *scu_data,
+ struct module *owner);
+
+#define intel_scu_ipc_register(parent, scu_data) \
+ __intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu);
+
+struct intel_scu_ipc_dev *
+__devm_intel_scu_ipc_register(struct device *parent,
+ const struct intel_scu_ipc_data *scu_data,
+ struct module *owner);
+
+#define devm_intel_scu_ipc_register(parent, scu_data) \
+ __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
+
+struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void);
+void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu);
+struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev);
+
+int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr,
+ u8 *data);
+int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr,
+ u8 data);
+int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr,
+ u8 *data, size_t len);
+int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr,
+ u8 *data, size_t len);
+
+int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr,
+ u8 data, u8 mask);
+
+int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
+ int sub);
+int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
+ int sub, const void *in, size_t inlen,
+ size_t size, void *out, size_t outlen);
+
+static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd,
+ int sub, const void *in, size_t inlen,
+ void *out, size_t outlen)
{
- return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+ return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen,
+ inlen, out, outlen);
}
-#define SCU_AVAILABLE 1
-#define SCU_DOWN 2
+#include <asm/intel_scu_ipc_legacy.h>
#endif
diff --git a/arch/x86/include/asm/intel_scu_ipc_legacy.h b/arch/x86/include/asm/intel_scu_ipc_legacy.h
new file mode 100644
index 000000000000..4cf13fecb673
--- /dev/null
+++ b/arch/x86/include/asm/intel_scu_ipc_legacy.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
+#define _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
+
+#include <linux/notifier.h>
+
+#define IPCMSG_INDIRECT_READ 0x02
+#define IPCMSG_INDIRECT_WRITE 0x05
+
+#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
+
+#define IPCMSG_WARM_RESET 0xF0
+#define IPCMSG_COLD_RESET 0xF1
+#define IPCMSG_SOFT_RESET 0xF2
+#define IPCMSG_COLD_BOOT 0xF3
+
+#define IPCMSG_VRTC 0xFA /* Set vRTC device */
+/* Command id associated with message IPCMSG_VRTC */
+#define IPC_CMD_VRTC_SETTIME 1 /* Set time */
+#define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */
+
+/* Don't call these in new code - they will be removed eventually */
+
+/* Read single register */
+static inline int intel_scu_ipc_ioread8(u16 addr, u8 *data)
+{
+ return intel_scu_ipc_dev_ioread8(NULL, addr, data);
+}
+
+/* Read a vector */
+static inline int intel_scu_ipc_readv(u16 *addr, u8 *data, int len)
+{
+ return intel_scu_ipc_dev_readv(NULL, addr, data, len);
+}
+
+/* Write single register */
+static inline int intel_scu_ipc_iowrite8(u16 addr, u8 data)
+{
+ return intel_scu_ipc_dev_iowrite8(NULL, addr, data);
+}
+
+/* Write a vector */
+static inline int intel_scu_ipc_writev(u16 *addr, u8 *data, int len)
+{
+ return intel_scu_ipc_dev_writev(NULL, addr, data, len);
+}
+
+/* Update single register based on the mask */
+static inline int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask)
+{
+ return intel_scu_ipc_dev_update(NULL, addr, data, mask);
+}
+
+/* Issue commands to the SCU with or without data */
+static inline int intel_scu_ipc_simple_command(int cmd, int sub)
+{
+ return intel_scu_ipc_dev_simple_command(NULL, cmd, sub);
+}
+
+static inline int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
+ u32 *out, int outlen)
+{
+ /* New API takes both inlen and outlen as bytes so convert here */
+ size_t inbytes = inlen * sizeof(u32);
+ size_t outbytes = outlen * sizeof(u32);
+
+ return intel_scu_ipc_dev_command_with_size(NULL, cmd, sub, in, inbytes,
+ inlen, out, outbytes);
+}
+
+extern struct blocking_notifier_head intel_scu_notifier;
+
+static inline void intel_scu_notifier_add(struct notifier_block *nb)
+{
+ blocking_notifier_chain_register(&intel_scu_notifier, nb);
+}
+
+static inline void intel_scu_notifier_remove(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
+}
+
+static inline int intel_scu_notifier_post(unsigned long v, void *p)
+{
+ return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
+}
+
+#define SCU_AVAILABLE 1
+#define SCU_DOWN 2
+
+#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 2f77e31a1283..8046e70dfd7c 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -10,6 +10,8 @@
#define TELEM_MAX_EVENTS_SRAM 28
#define TELEM_MAX_OS_ALLOCATED_EVENTS 20
+#include <asm/intel_scu_ipc.h>
+
enum telemetry_unit {
TELEM_PSS = 0,
TELEM_IOSS,
@@ -51,6 +53,8 @@ struct telemetry_plt_config {
struct telemetry_unit_config ioss_config;
struct mutex telem_trace_lock;
struct mutex telem_lock;
+ struct intel_pmc_dev *pmc;
+ struct intel_scu_ipc_dev *scu;
bool telem_in_use;
};
@@ -92,7 +96,7 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
int telemetry_clear_pltdata(void);
-int telemetry_pltconfig_valid(void);
+struct telemetry_plt_config *telemetry_get_pltdata(void);
int telemetry_get_evtname(enum telemetry_unit telem_unit,
const char **name, int len);
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
index 989cfa86de85..734482afbf81 100644
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
* stale TLB entries and, especially if we're flushing global
* mappings, we don't want the compiler to reorder any subsequent
* memory accesses before the TLB flush.
- *
- * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
- * invpcid (%rcx), %rax in long mode.
*/
- asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
- : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+ asm volatile("invpcid %[desc], %[type]"
+ :: [desc] "m" (desc), [type] "r" (type) : "memory");
}
#define INVPCID_TYPE_INDIV_ADDR 0
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 07344d82e88e..ac1a99ffbd8d 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -17,7 +17,7 @@ struct task_struct;
#ifdef CONFIG_X86_IOPL_IOPERM
void io_bitmap_share(struct task_struct *tsk);
-void io_bitmap_exit(void);
+void io_bitmap_exit(struct task_struct *tsk);
void native_tss_update_io_bitmap(void);
@@ -29,7 +29,7 @@ void native_tss_update_io_bitmap(void);
#else
static inline void io_bitmap_share(struct task_struct *tsk) { }
-static inline void io_bitmap_exit(void) { }
+static inline void io_bitmap_exit(struct task_struct *tsk) { }
static inline void tss_update_io_bitmap(void) { }
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4698343b9a05..1c0b62d26962 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -578,6 +578,7 @@ struct kvm_vcpu_arch {
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
+ u32 host_pkru;
u32 pkru;
u32 hflags;
u64 efer;
@@ -1093,8 +1094,6 @@ struct kvm_x86_ops {
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
- u64 (*get_dr6)(struct kvm_vcpu *vcpu);
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
@@ -1280,8 +1279,7 @@ extern struct kmem_cache *x86_fpu_cache;
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return __vmalloc(kvm_x86_ops.vm_size,
- GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL);
+ return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
void kvm_arch_free_vm(struct kvm *kvm);
@@ -1449,6 +1447,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
@@ -1663,8 +1662,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
{
/* We can only post Fixed and LowPrio IRQs */
- return (irq->delivery_mode == dest_Fixed ||
- irq->delivery_mode == dest_LowestPrio);
+ return (irq->delivery_mode == APIC_DM_FIXED ||
+ irq->delivery_mode == APIC_DM_LOWEST);
}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 73d8dd14dda2..2d4515e8b7df 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,43 +14,4 @@ extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_DISCONTIGMEM
-
-/*
- * generic node memory support, the following assumptions apply:
- *
- * 1) memory comes in 64Mb contiguous chunks which are either present or not
- * 2) we will not have more than 64Gb in total
- *
- * for now assume that 64Gb is max amount of RAM for whole system
- * 64Gb / 4096bytes/page = 16777216 pages
- */
-#define MAX_NR_PAGES 16777216
-#define MAX_SECTIONS 1024
-#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS)
-
-extern s8 physnode_map[];
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-#ifdef CONFIG_NUMA
- return((int) physnode_map[(pfn) / PAGES_PER_SECTION]);
-#else
- return 0;
-#endif
-}
-
-static inline int pfn_valid(int pfn)
-{
- int nid = pfn_to_nid(pfn);
-
- if (nid >= 0)
- return (pfn < node_end_pfn(nid));
- return 0;
-}
-
-#define early_pfn_valid(pfn) pfn_valid((pfn))
-
-#endif /* CONFIG_DISCONTIGMEM */
-
#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index c215d2762488..e988bac0a4a1 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -13,64 +13,4 @@ struct mod_arch_specific {
#endif
};
-#ifdef CONFIG_X86_64
-/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M486SX
-#define MODULE_PROC_FAMILY "486SX "
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_MATOM
-#define MODULE_PROC_FAMILY "ATOM "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_MELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_X86_32
-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
-#endif
-
#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 1c42ecbe75cb..d30805ed323e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -35,6 +35,8 @@ typedef int (*hyperv_fill_flush_list_func)(
rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
#define hv_set_synint_state(int_num, val) \
wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
+#define hv_recommend_using_aeoi() \
+ (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED))
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 12c9684d59ba..ef452b817f44 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -301,6 +301,9 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b
+#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
+
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index b809f117f3f4..73d997aa2966 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -20,8 +20,10 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
-#define MWAITX_MAX_LOOPS ((u32)-1)
+#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
+#define TPAUSE_C01_STATE 1
+#define TPAUSE_C02_STATE 0
u32 get_umwait_control_msr(void);
@@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
current_clr_polling();
}
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+ /* "tpause %ecx, %edx, %eax;" */
+ #ifdef CONFIG_AS_TPAUSE
+ asm volatile("tpause %%ecx\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #else
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #endif
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 07e95dcb40ad..d52d1aacdd97 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,20 +4,13 @@
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
+#include <linux/frame.h>
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
-
-/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- ANNOTATE_IGNORE_ALTERNATIVE
+#include <asm/unwind_hints.h>
/*
* Fill the CPU return stack buffer.
@@ -46,21 +39,25 @@
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
mov $(nr/2), reg; \
771: \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
call 772f; \
773: /* speculation trap */ \
+ UNWIND_HINT_EMPTY; \
pause; \
lfence; \
jmp 773b; \
772: \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
call 774f; \
775: /* speculation trap */ \
+ UNWIND_HINT_EMPTY; \
pause; \
lfence; \
jmp 775b; \
774: \
+ add $(BITS_PER_LONG/8) * 2, sp; \
dec reg; \
- jnz 771b; \
- add $(BITS_PER_LONG/8) * nr, sp;
+ jnz 771b;
#ifdef __ASSEMBLY__
@@ -77,57 +74,27 @@
.endm
/*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
- call .Ldo_rop_\@
-.Lspec_trap_\@:
- pause
- lfence
- jmp .Lspec_trap_\@
-.Ldo_rop_\@:
- mov \reg, (%_ASM_SP)
- ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
- jmp .Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
- RETPOLINE_JMP \reg
-.Ldo_call_\@:
- call .Ldo_retpoline_jmp_\@
-.endm
-
-/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
*/
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
- __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
- jmp *\reg
+ jmp *%\reg
#endif
.endm
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
- __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+ __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
- call *\reg
+ call *%\reg
#endif
.endm
@@ -137,10 +104,8 @@
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
- \ftr
+ ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+ __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lskip_rsb_\@:
#endif
.endm
@@ -161,16 +126,16 @@
* which is ensured when CONFIG_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ "call __x86_retpoline_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
X86_FEATURE_RETPOLINE_AMD)
+
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
#else /* CONFIG_X86_32 */
@@ -180,7 +145,6 @@
* here, anyway.
*/
# define CALL_NOSPEC \
- ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
@@ -237,27 +201,6 @@ enum ssb_mitigation {
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
-/*
- * On VMEXIT we must ensure that no RSB predictions learned in the guest
- * can be followed in the host, by overwriting the RSB completely. Both
- * retpoline and IBRS mitigations for Spectre v2 need this; only on future
- * CPUs with IBRS_ALL *might* it be avoided.
- */
-static inline void vmexit_fill_RSB(void)
-{
-#ifdef CONFIG_RETPOLINE
- unsigned long loops;
-
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE("jmp 910f",
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
- X86_FEATURE_RETPOLINE)
- "910:"
- : "=r" (loops), ASM_CALL_CONSTRAINT
- : : "memory" );
-#endif
-}
-
static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 6e060907c163..d25534940bde 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -58,8 +58,7 @@
#define ORC_TYPE_CALL 0
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_SAVE 3
-#define UNWIND_HINT_TYPE_RESTORE 4
+#define UNWIND_HINT_TYPE_RET_OFFSET 3
#ifndef __ASSEMBLY__
/*
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 6deb6cd236e3..7f6ccff0ba72 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -20,6 +20,8 @@ typedef union {
#define SHARED_KERNEL_PMD 0
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
+
/*
* traditional i386 two-level paging structure:
*/
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 33845d36897c..80fbb4a9ed87 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -27,6 +27,8 @@ typedef union {
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
#endif
+#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0dca7f7aeff2..be7b19646897 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,8 +66,7 @@ do { \
#endif /* !__ASSEMBLY__ */
/*
- * kern_addr_valid() is (1) for FLATMEM and (0) for
- * SPARSEMEM and DISCONTIGMEM
+ * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM
*/
#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr) (1)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 52e5f5f2240d..8f63efb2a2cc 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,4 +159,6 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
+#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b6606fe6cfdf..2e7c442cc618 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -194,7 +194,6 @@ enum page_cache_mode {
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
-#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
@@ -220,7 +219,6 @@ enum page_cache_mode {
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
-#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
@@ -284,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
+static inline pgprot_t pgprot_nx(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) | _PAGE_NX);
+}
+#define pgprot_nx pgprot_nx
+
#ifdef CONFIG_X86_PAE
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3bcf27caf6c9..29ee0c088009 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -113,9 +113,10 @@ struct cpuinfo_x86 {
/* in KB - valid for CPUS which support this call: */
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
+ /* Cache QoS architectural values, valid only on the BSP: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
@@ -727,7 +728,6 @@ static inline void sync_core(void)
unsigned int tmp;
asm volatile (
- UNWIND_HINT_SAVE
"mov %%ss, %0\n\t"
"pushq %q0\n\t"
"pushq %%rsp\n\t"
@@ -737,7 +737,6 @@ static inline void sync_core(void)
"pushq %q0\n\t"
"pushq $1f\n\t"
"iretq\n\t"
- UNWIND_HINT_RESTORE
"1:"
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
#endif
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h
index f6b7fe2833cc..07603064df8f 100644
--- a/arch/x86/include/asm/resctrl_sched.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_RESCTRL_SCHED_H
-#define _ASM_X86_RESCTRL_SCHED_H
+#ifndef _ASM_X86_RESCTRL_H
+#define _ASM_X86_RESCTRL_H
#ifdef CONFIG_X86_CPU_RESCTRL
@@ -84,10 +84,13 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}
+void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
#else
static inline void resctrl_sched_in(void) {}
+static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
#endif /* CONFIG_X86_CPU_RESCTRL */
-#endif /* _ASM_X86_RESCTRL_SCHED_H */
+#endif /* _ASM_X86_RESCTRL_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 27c47d183f4b..8b58d6975d5d 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void)
{
unsigned long flags;
- asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
- X86_FEATURE_SMAP)
+ asm volatile ("# smap_save\n\t"
+ ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+ "pushf; pop %0; " __ASM_CLAC "\n\t"
+ "1:"
: "=rm" (flags) : : "memory", "cc");
return flags;
@@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void)
static __always_inline void smap_restore(unsigned long flags)
{
- asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+ asm volatile ("# smap_restore\n\t"
+ ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
+ "push %0; popf\n\t"
+ "1:"
: : "g" (flags) : "memory", "cc");
}
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index bf3e34b25afc..323db6c5852a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -3,29 +3,7 @@
#define _ASM_X86_SPINLOCK_TYPES_H
#include <linux/types.h>
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define __TICKET_LOCK_INC 2
-#define TICKET_SLOWPATH_FLAG ((__ticket_t)1)
-#else
-#define __TICKET_LOCK_INC 1
-#define TICKET_SLOWPATH_FLAG ((__ticket_t)0)
-#endif
-
-#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
-typedef u8 __ticket_t;
-typedef u16 __ticketpair_t;
-#else
-typedef u16 __ticket_t;
-typedef u32 __ticketpair_t;
-#endif
-
-#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC)
-
-#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
-
#include <asm-generic/qspinlock_types.h>
-
#include <asm-generic/qrwlock_types.h>
#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 91e29b6a86a5..9804a7957f4e 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -55,8 +55,13 @@
/*
* Initialize the stackprotector canary value.
*
- * NOTE: this must only be called from functions that never return,
+ * NOTE: this must only be called from functions that never return
* and it must always be inlined.
+ *
+ * In addition, it should be called from a compilation unit for which
+ * stack protector is disabled. Alternatively, the caller should not end
+ * with a function call which gets tail-call optimized as that would
+ * lead to checking a modified canary value.
*/
static __always_inline void boot_init_stack_canary(void)
{
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 0e059b73437b..9f69cc497f4b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,27 +12,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
-/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *next)
-{
-#ifdef CONFIG_VMAP_STACK
- /*
- * If we switch to a stack that has a top-level paging entry
- * that is not present in the current mm, the resulting #PF will
- * will be promoted to a double-fault and we'll panic. Probe
- * the new stack now so that vmalloc_fault can fix up the page
- * tables if needed. This can only happen if we use a stack
- * in vmap space.
- *
- * We assume that the stack is aligned so that it never spans
- * more than one top-level paging entry.
- *
- * To minimize cache pollution, just follow the stack pointer.
- */
- READ_ONCE(*(unsigned char *)next->thread.sp);
-#endif
-}
-
asmlinkage void ret_from_fork(void);
/*
@@ -67,8 +46,6 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
- prepare_switch_to(next); \
- \
((last) = __switch_to_asm((prev), (next))); \
} while (0)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c26a7e1d8a2c..2ae904bf25e4 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -69,9 +69,7 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
-#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
-#endif
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
@@ -118,11 +116,6 @@ void smp_spurious_interrupt(struct pt_regs *regs);
void smp_error_interrupt(struct pt_regs *regs);
asmlinkage void smp_irq_move_cleanup_interrupt(void);
-extern void ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs);
-extern void ist_begin_non_atomic(struct pt_regs *regs);
-extern void ist_end_non_atomic(void);
-
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..70fc159ebe69 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
- struct pt_regs *regs;
+ struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index f5e2eb12cb71..7d903fdb3f43 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -86,32 +86,15 @@
UNWIND_HINT sp_offset=\sp_offset
.endm
-.macro UNWIND_HINT_SAVE
- UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
-.endm
-
-.macro UNWIND_HINT_RESTORE
- UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+/*
+ * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
+ * and sibling calls. On these, sp_offset denotes the expected offset from
+ * initial_func_cfi.
+ */
+.macro UNWIND_HINT_RET_OFFSET sp_offset=8
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
.endm
-#else /* !__ASSEMBLY__ */
-
-#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
- "987: \n\t" \
- ".pushsection .discard.unwind_hints\n\t" \
- /* struct unwind_hint */ \
- ".long 987b - .\n\t" \
- ".short " __stringify(sp_offset) "\n\t" \
- ".byte " __stringify(sp_reg) "\n\t" \
- ".byte " __stringify(type) "\n\t" \
- ".byte " __stringify(end) "\n\t" \
- ".balign 4 \n\t" \
- ".popsection\n\t"
-
-#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
-
-#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 389174eaec79..2fcc3ac12e76 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -123,12 +123,6 @@ enum uv_memprotect {
UV_MEMPROT_ALLOW_RW
};
-/*
- * bios calls have 6 parameters
- */
-extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
@@ -146,7 +140,6 @@ extern long sn_partition_id;
extern long sn_coherency_id;
extern long sn_region_size;
extern long system_serial_number;
-#define uv_partition_coherence_id() (sn_coherency_id)
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 45ea95ce79b4..ae587ce544f4 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -31,7 +31,6 @@ static inline bool is_early_uv_system(void)
}
extern int is_uv_system(void);
extern int is_uv_hubbed(int uvtype);
-extern int is_uv_hubless(int uvtype);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
@@ -44,7 +43,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
static inline int is_uv_hubbed(int uv) { return 0; }
-static inline int is_uv_hubless(int uv) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 950cd1395d5d..60ca0afdeaf9 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -219,20 +219,6 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
}
-#define UV_HUB_INFO_VERSION 0x7150
-extern int uv_hub_info_version(void);
-static inline int uv_hub_info_check(int version)
-{
- if (uv_hub_info_version() == version)
- return 0;
-
- pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
- uv_hub_info_version(), version);
-
- BUG(); /* Catastrophic - cannot continue on unknown UV system */
-}
-#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION)
-
/*
* HUB revision ranges for each UV HUB architecture.
* This is a software convention - NOT the hardware revision numbers in
@@ -244,51 +230,32 @@ static inline int uv_hub_info_check(int version)
#define UV4_HUB_REVISION_BASE 7
#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
-/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */
static inline int is_uv1_hub(void)
{
-#ifdef UV1_HUB_IS_SUPPORTED
return is_uv_hubbed(uv(1));
-#else
- return 0;
-#endif
}
static inline int is_uv2_hub(void)
{
-#ifdef UV2_HUB_IS_SUPPORTED
return is_uv_hubbed(uv(2));
-#else
- return 0;
-#endif
}
static inline int is_uv3_hub(void)
{
-#ifdef UV3_HUB_IS_SUPPORTED
return is_uv_hubbed(uv(3));
-#else
- return 0;
-#endif
}
/* First test "is UV4A", then "is UV4" */
static inline int is_uv4a_hub(void)
{
-#ifdef UV4A_HUB_IS_SUPPORTED
if (is_uv_hubbed(uv(4)))
return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
-#endif
return 0;
}
static inline int is_uv4_hub(void)
{
-#ifdef UV4_HUB_IS_SUPPORTED
return is_uv_hubbed(uv(4));
-#else
- return 0;
-#endif
}
static inline int is_uvx_hub(void)
@@ -692,7 +659,6 @@ static inline int uv_cpu_blade_processor_id(int cpu)
{
return uv_cpu_info_per(cpu)->blade_cpu_id;
}
-#define _uv_cpu_blade_processor_id 1 /* indicate function available */
/* Blade number to Node number (UV1..UV4 is 1:1) */
static inline int uv_blade_to_node(int blade)
@@ -856,26 +822,6 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
}
extern unsigned int uv_apicid_hibits;
-static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
-{
- apicid |= uv_apicid_hibits;
- return (1UL << UVH_IPI_INT_SEND_SHFT) |
- ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
- (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
- (vector << UVH_IPI_INT_VECTOR_SHFT);
-}
-
-static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
-{
- unsigned long val;
- unsigned long dmode = dest_Fixed;
-
- if (vector == NMI_VECTOR)
- dmode = dest_NMI;
-
- val = uv_hub_ipi_value(apicid, vector, dmode);
- uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
/*
* Get the minimum revision number of the hub chips within the partition.
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 62c79e26a59a..9ee5ed6e8b34 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -99,13 +99,6 @@
#define UV3_HUB_PART_NUMBER_X 0x4321
#define UV4_HUB_PART_NUMBER 0x99a1
-/* Compat: Indicate which UV Hubs are supported. */
-#define UV1_HUB_IS_SUPPORTED 1
-#define UV2_HUB_IS_SUPPORTED 1
-#define UV3_HUB_IS_SUPPORTED 1
-#define UV4_HUB_IS_SUPPORTED 1
-#define UV4A_HUB_IS_SUPPORTED 1
-
/* Error function to catch undefined references */
extern unsigned long uv_undefined(char *str);
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
new file mode 100644
index 000000000000..75884d2cdec3
--- /dev/null
+++ b/arch/x86/include/asm/vermagic.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MCORE2
+#define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MK8
+#define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_MELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+#else
+# define MODULE_ARCH_VERMAGIC ""
+#endif
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 196fdd02b8b1..be5e2e747f50 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -2,8 +2,15 @@
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H
-/* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT 0x40000000UL
+/*
+ * x32 syscall flag bit. Some user programs expect syscall NR macros
+ * and __X32_SYSCALL_BIT to have type int, even though syscall numbers
+ * are, for practical purposes, unsigned long.
+ *
+ * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
+ * thing regardless.
+ */
+#define __X32_SYSCALL_BIT 0x40000000
#ifndef __KERNEL__
# ifdef __i386__
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ba89cabe5fcf..2a7c3afa62e2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -102,9 +102,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
-ifeq ($(CONFIG_X86_32),y)
-obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
-endif
+obj-$(CONFIG_X86_32) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index fe698f96617c..263eeaddb0aa 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -345,56 +345,3 @@ out_noapbt:
apb_timer_block_enabled = 0;
panic("failed to enable APB timer\n");
}
-
-/* called before apb_timer_enable, use early map */
-unsigned long apbt_quick_calibrate(void)
-{
- int i, scale;
- u64 old, new;
- u64 t1, t2;
- unsigned long khz = 0;
- u32 loop, shift;
-
- apbt_set_mapping();
- dw_apb_clocksource_start(clocksource_apbt);
-
- /* check if the timer can count down, otherwise return */
- old = dw_apb_clocksource_read(clocksource_apbt);
- i = 10000;
- while (--i) {
- if (old != dw_apb_clocksource_read(clocksource_apbt))
- break;
- }
- if (!i)
- goto failed;
-
- /* count 16 ms */
- loop = (apbt_freq / 1000) << 4;
-
- /* restart the timer to ensure it won't get to 0 in the calibration */
- dw_apb_clocksource_start(clocksource_apbt);
-
- old = dw_apb_clocksource_read(clocksource_apbt);
- old += loop;
-
- t1 = rdtsc();
-
- do {
- new = dw_apb_clocksource_read(clocksource_apbt);
- } while (new < old);
-
- t2 = rdtsc();
-
- shift = 5;
- if (unlikely(loop >> shift == 0)) {
- printk(KERN_INFO
- "APBT TSC calibration failed, not enough resolution\n");
- return 0;
- }
- scale = (int)div_u64((t2 - t1), loop >> shift);
- khz = (scale * (apbt_freq / 1000)) >> shift;
- printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
- return khz;
-failed:
- return 0;
-}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 81b9c63dae1b..4b1d31be50b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");
-
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
@@ -546,46 +544,20 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-static u32 hsx_deadline_rev(void)
-{
- switch (boot_cpu_data.x86_stepping) {
- case 0x02: return 0x3a; /* EP */
- case 0x04: return 0x0f; /* EX */
- }
-
- return ~0U;
-}
-
-static u32 bdx_deadline_rev(void)
-{
- switch (boot_cpu_data.x86_stepping) {
- case 0x02: return 0x00000011;
- case 0x03: return 0x0700000e;
- case 0x04: return 0x0f00000c;
- case 0x05: return 0x0e000003;
- }
-
- return ~0U;
-}
+static const struct x86_cpu_id deadline_match[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
-static u32 skx_deadline_rev(void)
-{
- switch (boot_cpu_data.x86_stepping) {
- case 0x03: return 0x01000136;
- case 0x04: return 0x02000014;
- }
-
- if (boot_cpu_data.x86_stepping > 4)
- return 0;
+ X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
- return ~0U;
-}
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
-static const struct x86_cpu_id deadline_match[] = {
- X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
- X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
- X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
- X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
+ X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20),
@@ -603,34 +575,29 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
- boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
m = x86_match_cpu(deadline_match);
if (!m)
- return;
+ return true;
- /*
- * Function pointers will have the MSB set due to address layout,
- * immediate revisions will not.
- */
- if ((long)m->driver_data < 0)
- rev = ((u32 (*)(void))(m->driver_data))();
- else
- rev = (u32)m->driver_data;
+ rev = (u32)m->driver_data;
if (boot_cpu_data.microcode >= rev)
- return;
+ return true;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
}
/*
@@ -2092,7 +2059,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
- apic_check_deadline_errata();
+ if (apic_validate_deadline_timer())
+ pr_debug("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 913c88617848..ce61e3e7d399 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -154,19 +154,6 @@ static inline bool mp_is_legacy_irq(int irq)
return irq >= 0 && irq < nr_legacy_irqs();
}
-/*
- * Initialize all legacy IRQs and all pins on the first IOAPIC
- * if we have legacy interrupt controller. Kernel boot option "pirq="
- * may rely on non-legacy pins on the first IOAPIC.
- */
-static inline int mp_init_irq_at_boot(int ioapic, int irq)
-{
- if (!nr_legacy_irqs())
- return 0;
-
- return ioapic == 0 || mp_is_legacy_irq(irq);
-}
-
static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
{
return ioapics[ioapic].irqdomain;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ad53b2abc859..69e70ed0f5e6 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -30,8 +30,6 @@ static enum uv_system_type uv_system_type;
static int uv_hubbed_system;
static int uv_hubless_system;
static u64 gru_start_paddr, gru_end_paddr;
-static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
-static u64 gru_dist_lmask, gru_dist_umask;
static union uvh_apicid uvh_apicid;
/* Unpack OEM/TABLE ID's to be NULL terminated strings */
@@ -48,11 +46,9 @@ static struct {
unsigned int gnode_shift;
} uv_cpuid;
-int uv_min_hub_revision_id;
-EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+static int uv_min_hub_revision_id;
unsigned int uv_apicid_hibits;
-EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static struct apic apic_x2apic_uv_x;
static struct uv_hub_info_s uv_hub_info_node0;
@@ -85,20 +81,7 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
static inline bool is_GRU_range(u64 start, u64 end)
{
- if (gru_dist_base) {
- u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */
- u64 sl = start & gru_dist_lmask; /* Base offset bits */
- u64 eu = end & gru_dist_umask;
- u64 el = end & gru_dist_lmask;
-
- /* Must reside completely within a single GRU range: */
- return (sl == gru_dist_base && el == gru_dist_base &&
- su >= gru_first_node_paddr &&
- su <= gru_last_node_paddr &&
- eu == su);
- } else {
- return start >= gru_start_paddr && end <= gru_end_paddr;
- }
+ return start >= gru_start_paddr && end <= gru_end_paddr;
}
static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -385,11 +368,10 @@ int is_uv_hubbed(int uvtype)
}
EXPORT_SYMBOL_GPL(is_uv_hubbed);
-int is_uv_hubless(int uvtype)
+static int is_uv_hubless(int uvtype)
{
return (uv_hubless_system & uvtype);
}
-EXPORT_SYMBOL_GPL(is_uv_hubless);
void **__uv_hub_info_list;
EXPORT_SYMBOL_GPL(__uv_hub_info_list);
@@ -417,12 +399,6 @@ static __initdata struct uv_gam_range_s *_gr_table;
#define SOCK_EMPTY ((unsigned short)~0)
-extern int uv_hub_info_version(void)
-{
- return UV_HUB_INFO_VERSION;
-}
-EXPORT_SYMBOL(uv_hub_info_version);
-
/* Default UV memory block size is 2GB */
static unsigned long mem_block_size __initdata = (2UL << 30);
@@ -590,12 +566,21 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
static void uv_send_IPI_one(int cpu, int vector)
{
- unsigned long apicid;
- int pnode;
+ unsigned long apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ int pnode = uv_apicid_to_pnode(apicid);
+ unsigned long dmode, val;
+
+ if (vector == NMI_VECTOR)
+ dmode = dest_NMI;
+ else
+ dmode = dest_Fixed;
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ ((apicid | uv_apicid_hibits) << UVH_IPI_INT_APIC_ID_SHFT) |
+ (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
- apicid = per_cpu(x86_cpu_to_apicid, cpu);
- pnode = uv_apicid_to_pnode(apicid);
- uv_hub_send_ipi(pnode, apicid, vector);
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
@@ -797,42 +782,6 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
init_extra_mapping_wb(paddr, bytes);
}
-static __init void map_gru_distributed(unsigned long c)
-{
- union uvh_rh_gam_gru_overlay_config_mmr_u gru;
- u64 paddr;
- unsigned long bytes;
- int nid;
-
- gru.v = c;
-
- /* Only base bits 42:28 relevant in dist mode */
- gru_dist_base = gru.v & 0x000007fff0000000UL;
- if (!gru_dist_base) {
- pr_info("UV: Map GRU_DIST base address NULL\n");
- return;
- }
-
- bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
- gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
- gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
- gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
-
- for_each_online_node(nid) {
- paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
- gru_dist_base;
- init_extra_mapping_wb(paddr, bytes);
- gru_first_node_paddr = min(paddr, gru_first_node_paddr);
- gru_last_node_paddr = max(paddr, gru_last_node_paddr);
- }
-
- /* Save upper (63:M) bits of address only for is_GRU_range */
- gru_first_node_paddr &= gru_dist_umask;
- gru_last_node_paddr &= gru_dist_umask;
-
- pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
-}
-
static __init void map_gru_high(int max_pnode)
{
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
@@ -846,12 +795,6 @@ static __init void map_gru_high(int max_pnode)
return;
}
- /* Only UV3 has distributed GRU mode */
- if (is_uv3_hub() && gru.s3.mode) {
- map_gru_distributed(gru.v);
- return;
- }
-
base = (gru.v & mask) >> shift;
map_high("GRU", base, shift, shift, max_pnode, map_wb);
gru_start_paddr = ((u64)base << shift);
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index e1efe44ebefc..83d9cad4e68b 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <asm/unistd.h>
+#include <asm/audit.h>
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
@@ -41,7 +42,6 @@ int audit_classify_arch(int arch)
int audit_classify_syscall(int abi, unsigned syscall)
{
#ifdef CONFIG_IA32_EMULATION
- extern int ia32_classify_syscall(unsigned);
if (abi == AUDIT_ARCH_I386)
return ia32_classify_syscall(syscall);
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 547ad7bbf0e0..d4806eac9325 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -18,6 +18,7 @@
#include <asm/pci-direct.h>
#include <asm/delay.h>
#include <asm/debugreg.h>
+#include <asm/resctrl.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
}
}
+
+ resctrl_cpu_detect(c);
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -1142,8 +1145,7 @@ static const int amd_erratum_383[] =
/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
static const int amd_erratum_1054[] =
- AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bed0cb83fe24..d07809286b95 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -854,30 +854,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}
-static void init_cqm(struct cpuinfo_x86 *c)
-{
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- return;
- }
-
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = cpuid_ebx(0xf);
-
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
- u32 eax, ebx, ecx, edx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
-
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
-}
-
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -945,7 +921,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
init_speculation_control(c);
- init_cqm(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -1377,20 +1352,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
#endif
}
-static void x86_init_cache_qos(struct cpuinfo_x86 *c)
-{
- /*
- * The heavy lifting of max_rmid and cache_occ_scale are handled
- * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
- * in case CQM bits really aren't there in this CPU.
- */
- if (c != &boot_cpu_data) {
- boot_cpu_data.x86_cache_max_rmid =
- min(boot_cpu_data.x86_cache_max_rmid,
- c->x86_cache_max_rmid);
- }
-}
-
/*
* Validate that ACPI/mptables have the same information about the
* effective APIC id and update the package map.
@@ -1503,7 +1464,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif
x86_init_rdrand(c);
- x86_init_cache_qos(c);
setup_pku(c);
/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a19a680542ce..166d7c355896 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -22,6 +22,7 @@
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#include <asm/traps.h>
+#include <asm/resctrl.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
detect_ht_early(c);
}
+static void bsp_init_intel(struct cpuinfo_x86 *c)
+{
+ resctrl_cpu_detect(c);
+}
+
#ifdef CONFIG_X86_32
/*
* Early probe support logic for ppro memory erratum #50
@@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
+ .c_bsp_init = bsp_init_intel,
.c_init = init_intel,
.c_x86_vendor = X86_VENDOR_INTEL,
};
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index d3482eb43ff3..ad6776081e60 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -39,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
const struct x86_cpu_id *m;
struct cpuinfo_x86 *c = &boot_cpu_data;
- for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+ for (m = match;
+ m->vendor | m->family | m->model | m->steppings | m->feature;
+ m++) {
if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
continue;
if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
continue;
if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
continue;
+ if (m->steppings != X86_STEPPING_ANY &&
+ !(BIT(c->x86_stepping) & m->steppings))
+ continue;
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
continue;
return m;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 54165f3569e8..e9265e2f28c9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -42,6 +42,8 @@
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
+#include <linux/task_work.h>
+#include <linux/hardirq.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
@@ -1086,23 +1088,6 @@ static void mce_clear_state(unsigned long *toclear)
}
}
-static int do_memory_failure(struct mce *m)
-{
- int flags = MF_ACTION_REQUIRED;
- int ret;
-
- pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
- if (!(m->mcgstatus & MCG_STATUS_RIPV))
- flags |= MF_MUST_KILL;
- ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
- if (ret)
- pr_err("Memory error not recovered");
- else
- set_mce_nospec(m->addr >> PAGE_SHIFT);
- return ret;
-}
-
-
/*
* Cases where we avoid rendezvous handler timeout:
* 1) If this CPU is offline.
@@ -1204,6 +1189,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
*m = *final;
}
+static void kill_me_now(struct callback_head *ch)
+{
+ force_sig(SIGBUS);
+}
+
+static void kill_me_maybe(struct callback_head *cb)
+{
+ struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+ int flags = MF_ACTION_REQUIRED;
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
+ if (!(p->mce_status & MCG_STATUS_RIPV))
+ flags |= MF_MUST_KILL;
+
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
+ return;
+ }
+
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1222,7 +1230,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
* backing the user stack, tracing that reads the user stack will cause
* potentially infinite recursion.
*/
-void notrace do_machine_check(struct pt_regs *regs, long error_code)
+void noinstr do_machine_check(struct pt_regs *regs, long error_code)
{
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
@@ -1259,7 +1267,7 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
if (__mc_check_crashing_cpu(cpu))
return;
- ist_enter(regs);
+ nmi_enter();
this_cpu_inc(mce_exception_count);
@@ -1352,23 +1360,24 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
- ist_begin_non_atomic(regs);
- local_irq_enable();
-
- if (kill_it || do_memory_failure(&m))
- force_sig(SIGBUS);
- local_irq_disable();
- ist_end_non_atomic();
+ /* If this triggers there is no way to recover. Die hard. */
+ BUG_ON(!on_thread_stack() || !user_mode(regs));
+
+ current->mce_addr = m.addr;
+ current->mce_status = m.mcgstatus;
+ current->mce_kill_me.func = kill_me_maybe;
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ task_work_add(current, &current->mce_kill_me, true);
} else {
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
out_ist:
- ist_exit(regs);
+ nmi_exit();
}
EXPORT_SYMBOL_GPL(do_machine_check);
-NOKPROBE_SYMBOL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
int memory_failure(unsigned long pfn, int flags)
diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c
index 4ae6df556526..5ee94aa1b766 100644
--- a/arch/x86/kernel/cpu/mce/p5.c
+++ b/arch/x86/kernel/cpu/mce/p5.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
+#include <linux/hardirq.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -24,7 +25,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
{
u32 loaddr, hi, lotype;
- ist_enter(regs);
+ nmi_enter();
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -39,7 +40,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- ist_exit(regs);
+ nmi_exit();
}
/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c
index a30ea13cccc2..b3938c195365 100644
--- a/arch/x86/kernel/cpu/mce/winchip.c
+++ b/arch/x86/kernel/cpu/mce/winchip.c
@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/hardirq.h>
#include <asm/processor.h>
#include <asm/traps.h>
@@ -18,12 +19,12 @@
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
- ist_enter(regs);
+ nmi_enter();
pr_emerg("CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- ist_exit(regs);
+ nmi_exit();
}
/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7019d4b2df0c..baec68b7e010 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
/*
* Returns:
* < 0 - on error
- * 0 - no update done
- * 1 - microcode was updated
+ * 0 - success (no update done or microcode was updated)
*/
static int __reload_late(void *info)
{
@@ -573,11 +572,11 @@ static int __reload_late(void *info)
else
goto wait_for_siblings;
- if (err > UCODE_NFOUND) {
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ if (err >= UCODE_NFOUND) {
+ if (err == UCODE_ERROR)
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
ret = -1;
- } else if (err == UCODE_UPDATED || err == UCODE_OK) {
- ret = 1;
}
wait_for_siblings:
@@ -608,7 +607,7 @@ static int microcode_reload_late(void)
atomic_set(&late_cpus_out, 0);
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
- if (ret > 0)
+ if (ret == 0)
microcode_check();
pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
@@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev,
put:
put_online_cpus();
- if (ret >= 0)
+ if (ret == 0)
ret = size;
return ret;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9556930cd8c1..a5ee607a3b89 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,6 +63,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
+ fallthrough;
+ case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
+ return msr - MSR_ARCH_PERFMON_PERFCTR0;
}
return 0;
}
@@ -92,6 +96,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
+ fallthrough;
+ case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
+ return msr - MSR_ARCH_PERFMON_EVENTSEL0;
}
return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index d8cc5223b7ce..12f967c6b603 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -22,7 +22,7 @@
#include <linux/cpuhotplug.h>
#include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include "internal.h"
/* Mutex to protect rdtgroup access. */
@@ -958,6 +958,36 @@ static __init void rdt_init_res_defs(void)
static enum cpuhp_state rdt_online;
+/* Runs once on the BSP during boot. */
+void resctrl_cpu_detect(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ c->x86_cache_mbm_width_offset = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ c->x86_cache_mbm_width_offset = eax & 0xff;
+ else
+ c->x86_cache_mbm_width_offset = -1;
+ }
+}
+
static int __init resctrl_late_init(void)
{
struct rdt_resource *r;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 055c8613b531..934c8fb8a64a 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
return ret;
}
-void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
- struct rdtgroup *rdtgrp, int evtid, int first)
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+ struct rdt_domain *d, struct rdtgroup *rdtgrp,
+ int evtid, int first)
{
/*
* setup the parameters to send to the IPI to read the data.
*/
rr->rgrp = rdtgrp;
rr->evtid = evtid;
+ rr->r = r;
rr->d = d;
rr->val = 0;
rr->first = first;
@@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
goto out;
}
- mon_event_read(&rr, d, rdtgrp, evtid, false);
+ mon_event_read(&rr, r, d, rdtgrp, evtid, false);
if (rr.val & RMID_VAL_ERROR)
seq_puts(m, "Error\n");
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3dd13f3a8b23..f20a47d120b1 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -31,7 +31,7 @@
#define CQM_LIMBOCHECK_INTERVAL 1000
-#define MBM_CNTR_WIDTH 24
+#define MBM_CNTR_WIDTH_BASE 24
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
@@ -40,6 +40,12 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+/*
+ * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
+ * data to be returned. The counter width is discovered from the hardware
+ * as an offset from MBM_CNTR_WIDTH_BASE.
+ */
+#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
struct rdt_fs_context {
@@ -87,6 +93,7 @@ union mon_data_bits {
struct rmid_read {
struct rdtgroup *rgrp;
+ struct rdt_resource *r;
struct rdt_domain *d;
int evtid;
bool first;
@@ -460,6 +467,7 @@ struct rdt_resource {
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
+ unsigned int mbm_width;
unsigned long fflags;
};
@@ -587,8 +595,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
unsigned int dom_id);
void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
struct rdt_domain *d);
-void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
- struct rdtgroup *rdtgrp, int evtid, int first);
+void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
+ struct rdt_domain *d, struct rdtgroup *rdtgrp,
+ int evtid, int first);
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 773124b0e18a..837d7d012b7b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -214,9 +214,9 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
-static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
{
- u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+ u64 shift = 64 - width, chunks;
chunks = (cur_msr << shift) - (prev_msr << shift);
return chunks >>= shift;
@@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
return 0;
}
- chunks = mbm_overflow_count(m->prev_msr, tval);
+ chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
m->chunks += chunks;
m->prev_msr = tval;
@@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
return;
- chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
m->chunks_bw += chunks;
m->chunks = m->chunks_bw;
cur_bw = (chunks * r->mon_scale) >> 20;
@@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
}
}
-static void mbm_update(struct rdt_domain *d, int rmid)
+static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
{
struct rmid_read rr;
rr.first = false;
+ rr.r = r;
rr.d = d;
/*
@@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work)
struct rdtgroup *prgrp, *crgrp;
int cpu = smp_processor_id();
struct list_head *head;
+ struct rdt_resource *r;
struct rdt_domain *d;
mutex_lock(&rdtgroup_mutex);
@@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work)
if (!static_branch_likely(&rdt_mon_enable_key))
goto out_unlock;
- d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+ d = get_domain_from_cpu(cpu, r);
if (!d)
goto out_unlock;
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(d, prgrp->mon.rmid);
+ mbm_update(r, d, prgrp->mon.rmid);
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(d, crgrp->mon.rmid);
+ mbm_update(r, d, crgrp->mon.rmid);
if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
@@ -614,11 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r)
int rdt_get_mon_l3_config(struct rdt_resource *r)
{
+ unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;
r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+ r->mbm_width = MBM_CNTR_WIDTH_BASE;
+
+ if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
+ r->mbm_width += mbm_offset;
+ else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
+ pr_warn("Ignoring impossible MBM counter offset\n");
/*
* A reasonable upper limit on the max threshold is the number
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d7623e1b927d..4bd28b388a1a 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -24,7 +24,7 @@
#include <asm/cacheflush.h>
#include <asm/intel-family.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/perf_event.h>
#include "../../events/perf_event.h" /* For X86_CONFIG() */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5a359d9fcc05..d7cb5ab0d1f0 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -29,7 +29,7 @@
#include <uapi/linux/magic.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include "internal.h"
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
@@ -2472,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
goto out_destroy;
if (is_mbm_event(mevt->evtid))
- mon_event_read(&rr, d, prgrp, mevt->evtid, true);
+ mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
}
kernfs_activate(kn);
return 0;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 8e3a8fedfa4d..722fd712e1cf 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,6 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
{
-#ifdef CONFIG_DOUBLEFAULT
struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
struct doublefault_stack *ss = &cea->doublefault_stack;
@@ -103,9 +102,6 @@ static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
return true;
-#else
- return false;
-#endif
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ recursion_check:
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ if (task == current)
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c5399e80c59c..4d13c57f370a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -910,14 +910,6 @@ static int __init parse_memmap_one(char *p)
return -EINVAL;
if (!strncmp(p, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /*
- * If we are doing a crash dump, we still need to know
- * the real memory size before the original memory map is
- * reset.
- */
- saved_max_pfn = e820__end_of_ram_pfn();
-#endif
e820_table->nr_entries = 0;
userdef = 1;
return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9b33904251a9..93fbdff2974f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -15,12 +15,9 @@
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/fixmap.h>
-#include <asm/intel-mid.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
#include <linux/usb/xhci-dbgp.h>
-#include <linux/efi.h>
-#include <asm/efi.h>
#include <asm/pci_x86.h>
/* Simple VGA output */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 12c70840980e..06c818967bb6 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -291,15 +291,13 @@ void fpu__drop(struct fpu *fpu)
}
/*
- * Clear FPU registers by setting them up from
- * the init fpstate:
+ * Clear FPU registers by setting them up from the init fpstate.
+ * Caller must do fpregs_[un]lock() around it.
*/
-static inline void copy_init_fpstate_to_fpregs(void)
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
{
- fpregs_lock();
-
if (use_xsave())
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
else if (static_cpu_has(X86_FEATURE_FXSR))
copy_kernel_to_fxregs(&init_fpstate.fxsave);
else
@@ -307,9 +305,6 @@ static inline void copy_init_fpstate_to_fpregs(void)
if (boot_cpu_has(X86_FEATURE_OSPKE))
copy_init_pkru_to_fpregs();
-
- fpregs_mark_activate();
- fpregs_unlock();
}
/*
@@ -318,18 +313,40 @@ static inline void copy_init_fpstate_to_fpregs(void)
* Called by sys_execve(), by the signal handler code and by various
* error paths.
*/
-void fpu__clear(struct fpu *fpu)
+static void fpu__clear(struct fpu *fpu, bool user_only)
{
- WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+ WARN_ON_FPU(fpu != &current->thread.fpu);
- fpu__drop(fpu);
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
+ fpu__drop(fpu);
+ fpu__initialize(fpu);
+ return;
+ }
- /*
- * Make sure fpstate is cleared and initialized.
- */
- fpu__initialize(fpu);
- if (static_cpu_has(X86_FEATURE_FPU))
- copy_init_fpstate_to_fpregs();
+ fpregs_lock();
+
+ if (user_only) {
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
+ xfeatures_mask_supervisor())
+ copy_kernel_to_xregs(&fpu->state.xsave,
+ xfeatures_mask_supervisor());
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
+ } else {
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
+ }
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+}
+
+void fpu__clear_user_states(struct fpu *fpu)
+{
+ fpu__clear(fpu, true);
+}
+
+void fpu__clear_all(struct fpu *fpu)
+{
+ fpu__clear(fpu, false);
}
/*
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6ce7e0a23268..61ddc3a5e5c2 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -224,7 +224,8 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
- return XCNTXT_MASK;
+ return XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
/* Legacy code to initialize eager fpu mode. */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index d652b939ccfb..bd1d0649f8ce 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -139,7 +139,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
} else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
if (!ret)
- ret = validate_xstate_header(&xsave->header);
+ ret = validate_user_xstate_header(&xsave->header);
}
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 400a05e1c1c5..9393a445d73c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -211,9 +211,9 @@ retry:
}
static inline void
-sanitize_restored_xstate(union fpregs_state *state,
- struct user_i387_ia32_struct *ia32_env,
- u64 xfeatures, int fx_only)
+sanitize_restored_user_xstate(union fpregs_state *state,
+ struct user_i387_ia32_struct *ia32_env,
+ u64 user_xfeatures, int fx_only)
{
struct xregs_state *xsave = &state->xsave;
struct xstate_header *header = &xsave->header;
@@ -226,13 +226,22 @@ sanitize_restored_xstate(union fpregs_state *state,
*/
/*
- * Init the state that is not present in the memory
- * layout and not enabled by the OS.
+ * 'user_xfeatures' might have bits clear which are
+ * set in header->xfeatures. This represents features that
+ * were in init state prior to a signal delivery, and need
+ * to be reset back to the init state. Clear any user
+ * feature bits which are set in the kernel buffer to get
+ * them back to the init state.
+ *
+ * Supervisor state is unchanged by input from userspace.
+ * Ensure supervisor state bits stay set and supervisor
+ * state is not modified.
*/
if (fx_only)
header->xfeatures = XFEATURE_MASK_FPSSE;
else
- header->xfeatures &= xfeatures;
+ header->xfeatures &= user_xfeatures |
+ xfeatures_mask_supervisor();
}
if (use_fxsr()) {
@@ -252,16 +261,24 @@ sanitize_restored_xstate(union fpregs_state *state,
*/
static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
{
+ u64 init_bv;
+ int r;
+
if (use_xsave()) {
if (fx_only) {
- u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
- copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- return copy_user_to_fxregs(buf);
+ init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
+
+ r = copy_user_to_fxregs(buf);
+ if (!r)
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ return r;
} else {
- u64 init_bv = xfeatures_mask & ~xbv;
- if (unlikely(init_bv))
+ init_bv = xfeatures_mask_user() & ~xbv;
+
+ r = copy_user_to_xregs(buf, xbv);
+ if (!r && unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- return copy_user_to_xregs(buf, xbv);
+ return r;
}
} else if (use_fxsr()) {
return copy_user_to_fxregs(buf);
@@ -277,7 +294,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
- u64 xfeatures = 0;
+ u64 user_xfeatures = 0;
int fx_only = 0;
int ret = 0;
@@ -285,7 +302,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
IS_ENABLED(CONFIG_IA32_EMULATION));
if (!buf) {
- fpu__clear(fpu);
+ fpu__clear_user_states(fpu);
return 0;
}
@@ -310,32 +327,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
trace_x86_fpu_xstate_check_failed(fpu);
} else {
state_size = fx_sw_user.xstate_size;
- xfeatures = fx_sw_user.xfeatures;
+ user_xfeatures = fx_sw_user.xfeatures;
}
}
- /*
- * The current state of the FPU registers does not matter. By setting
- * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
- * is not modified on context switch and that the xstate is considered
- * to be loaded again on return to userland (overriding last_cpu avoids
- * the optimisation).
- */
- set_thread_flag(TIF_NEED_FPU_LOAD);
- __fpu_invalidate_fpregs_state(fpu);
-
if ((unsigned long)buf_fx % 64)
fx_only = 1;
- /*
- * For 32-bit frames with fxstate, copy the fxstate so it can be
- * reconstructed later.
- */
- if (ia32_fxstate) {
- ret = __copy_from_user(&env, buf, sizeof(env));
- if (ret)
- goto err_out;
- envp = &env;
- } else {
+
+ if (!ia32_fxstate) {
/*
* Attempt to restore the FPU registers directly from user
* memory. For that to succeed, the user access cannot cause
@@ -345,20 +344,65 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
*/
fpregs_lock();
pagefault_disable();
- ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
+ ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only);
pagefault_enable();
if (!ret) {
+
+ /*
+ * Restore supervisor states: previous context switch
+ * etc has done XSAVES and saved the supervisor states
+ * in the kernel buffer from which they can be restored
+ * now.
+ *
+ * We cannot do a single XRSTORS here - which would
+ * be nice - because the rest of the FPU registers are
+ * being restored from a user buffer directly. The
+ * single XRSTORS happens below, when the user buffer
+ * has been copied to the kernel one.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD) &&
+ xfeatures_mask_supervisor())
+ copy_kernel_to_xregs(&fpu->state.xsave,
+ xfeatures_mask_supervisor());
fpregs_mark_activate();
fpregs_unlock();
return 0;
}
- fpregs_deactivate(fpu);
fpregs_unlock();
+ } else {
+ /*
+ * For 32-bit frames with fxstate, copy the fxstate so it can
+ * be reconstructed later.
+ */
+ ret = __copy_from_user(&env, buf, sizeof(env));
+ if (ret)
+ goto err_out;
+ envp = &env;
}
+ /*
+ * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
+ * not modified on context switch and that the xstate is considered
+ * to be loaded again on return to userland (overriding last_cpu avoids
+ * the optimisation).
+ */
+ fpregs_lock();
+
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+
+ /*
+ * Supervisor states are not modified by user space input. Save
+ * current supervisor states first and invalidate the FPU regs.
+ */
+ if (xfeatures_mask_supervisor())
+ copy_supervisor_to_kernel(&fpu->state.xsave);
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ }
+ __fpu_invalidate_fpregs_state(fpu);
+ fpregs_unlock();
if (use_xsave() && !fx_only) {
- u64 init_bv = xfeatures_mask & ~xfeatures;
+ u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
if (using_compacted_format()) {
ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
@@ -366,17 +410,24 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
if (!ret && state_size > offsetof(struct xregs_state, header))
- ret = validate_xstate_header(&fpu->state.xsave.header);
+ ret = validate_user_xstate_header(&fpu->state.xsave.header);
}
if (ret)
goto err_out;
- sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+ sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+ fx_only);
fpregs_lock();
if (unlikely(init_bv))
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
- ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
+
+ /*
+ * Restore previously saved supervisor xstates along with
+ * copied-in user xstates.
+ */
+ ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
+ user_xfeatures | xfeatures_mask_supervisor());
} else if (use_fxsr()) {
ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
@@ -385,11 +436,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
goto err_out;
}
- sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+ sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
+ fx_only);
fpregs_lock();
if (use_xsave()) {
- u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+ u64 init_bv;
+
+ init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
}
@@ -410,7 +464,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
err_out:
if (ret)
- fpu__clear(fpu);
+ fpu__clear_user_states(fpu);
return ret;
}
@@ -465,7 +519,7 @@ void fpu__init_prepare_fx_sw_frame(void)
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
- fx_sw_reserved.xfeatures = xfeatures_mask;
+ fx_sw_reserved.xfeatures = xfeatures_mask_user();
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 32b153d38748..bda2e5eaca0e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -54,13 +54,15 @@ static short xsave_cpuid_features[] __initdata = {
};
/*
- * Mask of xstate features supported by the CPU and the kernel:
+ * This represents the full set of bits that should ever be set in a kernel
+ * XSAVE buffer, both supervisor and user xstates.
*/
-u64 xfeatures_mask __read_mostly;
+u64 xfeatures_mask_all __read_mostly;
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@ -76,7 +78,7 @@ unsigned int fpu_user_xstate_size;
*/
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
- u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+ u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
if (unlikely(feature_name)) {
long xfeature_idx, max_idx;
@@ -150,7 +152,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
* None of the feature bits are in init state. So nothing else
* to do for us, as the memory layout is up to date.
*/
- if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+ if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
return;
/*
@@ -177,7 +179,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
* in a special way already:
*/
feature_bit = 0x2;
- xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+ xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
/*
* Update all the remaining memory layouts according to their
@@ -205,30 +207,39 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
*/
void fpu__init_cpu_xstate(void)
{
- if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
+ u64 unsup_bits;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
return;
/*
- * Make it clear that XSAVES supervisor states are not yet
- * implemented should anyone expect it to work by changing
- * bits in XFEATURE_MASK_* macros and XCR0.
+ * Unsupported supervisor xstates should not be found in
+ * the xfeatures mask.
*/
- WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
- "x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+ unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
+ WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
+ unsup_bits);
- xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
+ xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
cr4_set_bits(X86_CR4_OSXSAVE);
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+
+ /*
+ * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
+ * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
+ * states can be set here.
+ */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * MSR_IA32_XSS sets supervisor states managed by XSAVES.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
}
-/*
- * Note that in the future we will likely need a pair of
- * functions here: one for user xstates and the other for
- * system xstates. For now, they are the same.
- */
-static int xfeature_enabled(enum xfeature xfeature)
+static bool xfeature_enabled(enum xfeature xfeature)
{
- return !!(xfeatures_mask & (1UL << xfeature));
+ return xfeatures_mask_all & BIT_ULL(xfeature);
}
/*
@@ -383,6 +394,33 @@ static void __init setup_xstate_comp_offsets(void)
}
/*
+ * Setup offsets of a supervisor-state-only XSAVES buffer:
+ *
+ * The offsets stored in xstate_comp_offsets[] only work for one specific
+ * value of the Requested Feature BitMap (RFBM). In cases where a different
+ * RFBM value is used, a different set of offsets is required. This set of
+ * offsets is for when RFBM=xfeatures_mask_supervisor().
+ */
+static void __init setup_supervisor_only_offsets(void)
+{
+ unsigned int next_offset;
+ int i;
+
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+ continue;
+
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
+
+ xstate_supervisor_only_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
+ }
+}
+
+/*
* Print out xstate component offsets and sizes
*/
static void __init print_xstate_offset_size(void)
@@ -415,7 +453,7 @@ static void __init setup_init_fpu_buf(void)
if (boot_cpu_has(X86_FEATURE_XSAVES))
init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
- xfeatures_mask;
+ xfeatures_mask_all;
/*
* Init all the features state with header.xfeatures being 0x0
@@ -438,7 +476,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
* format. Checking a supervisor state's uncompacted offset is
* an error.
*/
- if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) {
+ if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
return -1;
}
@@ -472,10 +510,10 @@ int using_compacted_format(void)
}
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_xstate_header(const struct xstate_header *hdr)
+int validate_user_xstate_header(const struct xstate_header *hdr)
{
/* No unknown or supervisor features may be set */
- if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+ if (hdr->xfeatures & ~xfeatures_mask_user())
return -EINVAL;
/* Userspace must use the uncompacted format */
@@ -610,15 +648,12 @@ static void do_extra_xstate_size_checks(void)
/*
- * Get total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0 | IA32_XSS.
*
* Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
- *
- * Note that we do not currently set any bits on IA32_XSS so
- * 'XCR0 | IA32_XSS == XCR0' for now.
*/
static unsigned int __init get_xsaves_size(void)
{
@@ -700,7 +735,7 @@ static int __init init_xstate_size(void)
*/
static void fpu__init_disable_system_xstate(void)
{
- xfeatures_mask = 0;
+ xfeatures_mask_all = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
@@ -735,16 +770,26 @@ void __init fpu__init_system_xstate(void)
return;
}
+ /*
+ * Find user xstates supported by the processor.
+ */
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xfeatures_mask = eax + ((u64)edx << 32);
+ xfeatures_mask_all = eax + ((u64)edx << 32);
- if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+ /*
+ * Find supervisor xstates supported by the processor.
+ */
+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ xfeatures_mask_all |= ecx + ((u64)edx << 32);
+
+ if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG().
*/
- pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
+ xfeatures_mask_all);
goto out_disable;
}
@@ -753,10 +798,10 @@ void __init fpu__init_system_xstate(void)
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
if (!boot_cpu_has(xsave_cpuid_features[i]))
- xfeatures_mask &= ~BIT(i);
+ xfeatures_mask_all &= ~BIT_ULL(i);
}
- xfeatures_mask &= fpu__get_supported_xfeatures_mask();
+ xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
@@ -768,15 +813,16 @@ void __init fpu__init_system_xstate(void)
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
*/
- update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
+ update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_comp_offsets();
+ setup_supervisor_only_offsets();
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
- xfeatures_mask,
+ xfeatures_mask_all,
fpu_kernel_xstate_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return;
@@ -795,7 +841,14 @@ void fpu__resume_cpu(void)
* Restore XCR0 on xsave capable CPUs:
*/
if (boot_cpu_has(X86_FEATURE_XSAVE))
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * Restore IA32_XSS. The same CPUID bit enumerates support
+ * of XSAVES and MSR_IA32_XSS.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
}
/*
@@ -840,10 +893,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
/*
* We should not ever be requesting features that we
- * have not enabled. Remember that xfeatures_mask is
- * what we write to the XCR0 register.
+ * have not enabled.
*/
- WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
+ WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
@@ -957,18 +1009,31 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
return true;
}
-/*
- * This is similar to user_regset_copyout(), but will not add offset to
- * the source data pointer or increment pos, count, kbuf, and ubuf.
- */
-static inline void
-__copy_xstate_to_kernel(void *kbuf, const void *data,
- unsigned int offset, unsigned int size, unsigned int size_total)
+static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count)
{
- if (offset < size_total) {
- unsigned int copy = min(size, size_total - offset);
+ if (*pos < to) {
+ unsigned size = to - *pos;
+
+ if (size > *count)
+ size = *count;
+ memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size);
+ *kbuf += size;
+ *pos += size;
+ *count -= size;
+ }
+}
- memcpy(kbuf + offset, data, copy);
+static void copy_part(unsigned offset, unsigned size, void *from,
+ void **kbuf, unsigned *pos, unsigned *count)
+{
+ fill_gap(offset, kbuf, pos, count);
+ if (size > *count)
+ size = *count;
+ if (size) {
+ memcpy(*kbuf, from, size);
+ *kbuf += size;
+ *pos += size;
+ *count -= size;
}
}
@@ -981,8 +1046,9 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
*/
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
{
- unsigned int offset, size;
struct xstate_header header;
+ const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+ unsigned count = size_total;
int i;
/*
@@ -996,48 +1062,44 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
*/
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
-
+ header.xfeatures &= xfeatures_mask_user();
+
+ if (header.xfeatures & XFEATURE_MASK_FP)
+ copy_part(0, off_mxcsr,
+ &xsave->i387, &kbuf, &offset_start, &count);
+ if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM))
+ copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE,
+ &xsave->i387.mxcsr, &kbuf, &offset_start, &count);
+ if (header.xfeatures & XFEATURE_MASK_FP)
+ copy_part(offsetof(struct fxregs_state, st_space), 128,
+ &xsave->i387.st_space, &kbuf, &offset_start, &count);
+ if (header.xfeatures & XFEATURE_MASK_SSE)
+ copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256,
+ &xsave->i387.xmm_space, &kbuf, &offset_start, &count);
+ /*
+ * Fill xsave->i387.sw_reserved value for ptrace frame:
+ */
+ copy_part(offsetof(struct fxregs_state, sw_reserved), 48,
+ xstate_fx_sw_bytes, &kbuf, &offset_start, &count);
/*
* Copy xregs_state->header:
*/
- offset = offsetof(struct xregs_state, header);
- size = sizeof(header);
+ copy_part(offsetof(struct xregs_state, header), sizeof(header),
+ &header, &kbuf, &offset_start, &count);
- __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
-
- for (i = 0; i < XFEATURE_MAX; i++) {
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
/*
* Copy only in-use xstates:
*/
if ((header.xfeatures >> i) & 1) {
void *src = __raw_xsave_addr(xsave, i);
- offset = xstate_offsets[i];
- size = xstate_sizes[i];
-
- /* The next component has to fit fully into the output buffer: */
- if (offset + size > size_total)
- break;
-
- __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
+ copy_part(xstate_offsets[i], xstate_sizes[i],
+ src, &kbuf, &offset_start, &count);
}
}
-
- if (xfeatures_mxcsr_quirk(header.xfeatures)) {
- offset = offsetof(struct fxregs_state, mxcsr);
- size = MXCSR_AND_FLAGS_SIZE;
- __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
- }
-
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- offset = offsetof(struct fxregs_state, sw_reserved);
- size = sizeof(xstate_fx_sw_bytes);
-
- __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+ fill_gap(size_total, &kbuf, &offset_start, &count);
return 0;
}
@@ -1080,7 +1142,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
*/
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+ header.xfeatures &= xfeatures_mask_user();
/*
* Copy xregs_state->header:
@@ -1147,7 +1209,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
memcpy(&hdr, kbuf + offset, size);
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1173,7 +1235,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1201,7 +1263,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
if (__copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT;
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
@@ -1229,7 +1291,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1239,6 +1301,61 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
+/*
+ * Save only supervisor states to the kernel buffer. This blows away all
+ * old states, and is intended to be used only in __fpu__restore_sig(), where
+ * user states are restored from the user buffer.
+ */
+void copy_supervisor_to_kernel(struct xregs_state *xstate)
+{
+ struct xstate_header *header;
+ u64 max_bit, min_bit;
+ u32 lmask, hmask;
+ int err, i;
+
+ if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (!xfeatures_mask_supervisor())
+ return;
+
+ max_bit = __fls(xfeatures_mask_supervisor());
+ min_bit = __ffs(xfeatures_mask_supervisor());
+
+ lmask = xfeatures_mask_supervisor();
+ hmask = xfeatures_mask_supervisor() >> 32;
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+ /* We should never fault when copying to a kernel buffer: */
+ if (WARN_ON_FPU(err))
+ return;
+
+ /*
+ * At this point, the buffer has only supervisor states and must be
+ * converted back to normal kernel format.
+ */
+ header = &xstate->header;
+ header->xcomp_bv |= xfeatures_mask_all;
+
+ /*
+ * This only moves states up in the buffer. Start with
+ * the last state and move backwards so that states are
+ * not overwritten until after they are moved. Note:
+ * memmove() allows overlapping src/dst buffers.
+ */
+ for (i = max_bit; i >= min_bit; i--) {
+ u8 *xbuf = (u8 *)xstate;
+
+ if (!((header->xfeatures >> i) & 1))
+ continue;
+
+ /* Move xfeature 'i' into its normal location */
+ memmove(xbuf + xstate_comp_offsets[i],
+ xbuf + xstate_supervisor_only_offsets[i],
+ xstate_sizes[i]);
+ }
+}
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* Report the amount of time elapsed in millisecond since last AVX512
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 37a0aeaf89e7..c84d28e90a58 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }
/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_regs_caller_end(void);
-extern void ftrace_epilogue(void);
+extern void ftrace_regs_caller_ret(void);
+extern void ftrace_caller_end(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
call_offset = (unsigned long)ftrace_regs_call;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_epilogue;
+ end_offset = (unsigned long)ftrace_caller_end;
op_offset = (unsigned long)ftrace_caller_op_ptr;
call_offset = (unsigned long)ftrace_call;
}
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
if (WARN_ON(ret < 0))
goto fail;
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
+ ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
+ if (WARN_ON(ret < 0))
+ goto fail;
+ }
+
/*
* The address of the ftrace_ops that is used for this trampoline
* is stored at the end of the trampoline. This will be used to
@@ -407,7 +415,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
set_vm_flush_reset_perms(trampoline);
- set_memory_ro((unsigned long)trampoline, npages);
+ if (likely(system_state != SYSTEM_BOOTING))
+ set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);
return (unsigned long)trampoline;
fail:
@@ -415,6 +424,32 @@ fail:
return 0;
}
+void set_ftrace_ops_ro(void)
+{
+ struct ftrace_ops *ops;
+ unsigned long start_offset;
+ unsigned long end_offset;
+ unsigned long npages;
+ unsigned long size;
+
+ do_for_each_ftrace_op(ops, ftrace_ops_list) {
+ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+ continue;
+
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ start_offset = (unsigned long)ftrace_regs_caller;
+ end_offset = (unsigned long)ftrace_regs_caller_end;
+ } else {
+ start_offset = (unsigned long)ftrace_caller;
+ end_offset = (unsigned long)ftrace_caller_end;
+ }
+ size = end_offset - start_offset;
+ size = size + RET_SIZE + sizeof(void *);
+ npages = DIV_ROUND_UP(size, PAGE_SIZE);
+ set_memory_ro((unsigned long)ops->trampoline, npages);
+ } while_for_each_ftrace_op(ops);
+}
+
static unsigned long calc_trampoline_call_offset(bool save_regs)
{
unsigned long start_offset;
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index e8a9f8370112..e405fe1a8bf4 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -189,5 +189,5 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- JMP_NOSPEC %ecx
+ JMP_NOSPEC ecx
#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 369e61faacfe..aa5d28aeb31e 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -23,7 +23,7 @@
#endif /* CONFIG_FRAME_POINTER */
/* Size of stack used to save mcount regs in save_mcount_regs */
-#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE)
+#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE)
/*
* gcc -pg option adds a call to 'mcount' in most functions.
@@ -77,7 +77,7 @@
/*
* We add enough stack to save all regs.
*/
- subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
+ subq $(FRAME_SIZE), %rsp
movq %rax, RAX(%rsp)
movq %rcx, RCX(%rsp)
movq %rdx, RDX(%rsp)
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
* think twice before adding any new code or changing the
* layout here.
*/
-SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
+SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
+ jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_caller);
+
+SYM_FUNC_START(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
jmp ftrace_stub
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
*/
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
retq
-SYM_FUNC_END(ftrace_caller)
+SYM_FUNC_END(ftrace_epilogue)
SYM_FUNC_START(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
pushfq
- UNWIND_HINT_SAVE
-
/* added 8 bytes to save flags */
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
@@ -233,10 +235,13 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq ORIG_RAX(%rsp), %rax
movq %rax, MCOUNT_REG_SIZE-8(%rsp)
- /* If ORIG_RAX is anything but zero, make this a call to that */
+ /*
+ * If ORIG_RAX is anything but zero, make this a call to that.
+ * See arch_ftrace_set_direct_caller().
+ */
movq ORIG_RAX(%rsp), %rax
- cmpq $0, %rax
- je 1f
+ testq %rax, %rax
+ jz 1f
/* Swap the flags with orig_rax */
movq MCOUNT_REG_SIZE(%rsp), %rdi
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
movq %rax, MCOUNT_REG_SIZE(%rsp)
restore_mcount_regs 8
+ /* Restore flags */
+ popfq
- jmp 2f
+SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
+ UNWIND_HINT_RET_OFFSET
+ jmp ftrace_epilogue
1: restore_mcount_regs
-
-
-2:
- /*
- * The stack layout is nondetermistic here, depending on which path was
- * taken. This confuses objtool and ORC, rightfully so. For now,
- * pretend the stack always looks like the non-direct case.
- */
- UNWIND_HINT_RESTORE
-
/* Restore flags */
popfq
@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
* to the return.
*/
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
-
jmp ftrace_epilogue
SYM_FUNC_END(ftrace_regs_caller)
@@ -303,7 +301,7 @@ trace:
* function tracing is enabled.
*/
movq ftrace_trace_function, %r8
- CALL_NOSPEC %r8
+ CALL_NOSPEC r8
restore_mcount_regs
jmp fgraph_trace
@@ -340,6 +338,6 @@ SYM_CODE_START(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- JMP_NOSPEC %rdi
+ JMP_NOSPEC rdi
SYM_CODE_END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index a53e7b4a7419..e2fab3ceb09f 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -33,15 +33,15 @@ void io_bitmap_share(struct task_struct *tsk)
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
}
-static void task_update_io_bitmap(void)
+static void task_update_io_bitmap(struct task_struct *tsk)
{
- struct thread_struct *t = &current->thread;
+ struct thread_struct *t = &tsk->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
/* TSS update is handled on exit to user space */
- set_thread_flag(TIF_IO_BITMAP);
+ set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
} else {
- clear_thread_flag(TIF_IO_BITMAP);
+ clear_tsk_thread_flag(tsk, TIF_IO_BITMAP);
/* Invalidate TSS */
preempt_disable();
tss_update_io_bitmap();
@@ -49,12 +49,12 @@ static void task_update_io_bitmap(void)
}
}
-void io_bitmap_exit(void)
+void io_bitmap_exit(struct task_struct *tsk)
{
- struct io_bitmap *iobm = current->thread.io_bitmap;
+ struct io_bitmap *iobm = tsk->thread.io_bitmap;
- current->thread.io_bitmap = NULL;
- task_update_io_bitmap();
+ tsk->thread.io_bitmap = NULL;
+ task_update_io_bitmap(tsk);
if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm);
}
@@ -102,7 +102,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
if (!iobm)
return -ENOMEM;
refcount_set(&iobm->refcnt, 1);
- io_bitmap_exit();
+ io_bitmap_exit(current);
}
/*
@@ -134,7 +134,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
}
/* All permissions dropped? */
if (max_long == UINT_MAX) {
- io_bitmap_exit();
+ io_bitmap_exit(current);
return 0;
}
@@ -192,7 +192,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
}
t->iopl_emul = level;
- task_update_io_bitmap();
+ task_update_io_bitmap(current);
return 0;
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 12df3a4abfdd..6b32ab009c19 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu)
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
}
- va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL);
if (!va)
return -ENOMEM;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 6407ea21fa1b..bdcc5146de96 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -25,10 +25,6 @@
#include <linux/atomic.h>
#include <linux/sched/clock.h>
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9da70b279dad..ce6cd220f722 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -96,7 +96,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
}
/*
- * Free current thread data structures etc..
+ * Free thread data structures etc..
*/
void exit_thread(struct task_struct *tsk)
{
@@ -104,7 +104,7 @@ void exit_thread(struct task_struct *tsk)
struct fpu *fpu = &t->fpu;
if (test_thread_flag(TIF_IO_BITMAP))
- io_bitmap_exit();
+ io_bitmap_exit(tsk);
free_vm86(t);
@@ -191,7 +191,7 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- fpu__clear(&tsk->thread.fpu);
+ fpu__clear_all(&tsk->thread.fpu);
}
void disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 954b013cc585..538d4e8d6589 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -52,7 +52,7 @@
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/proto.h>
#include "process.h"
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5ef9d8f25b0e..0c169a5687e1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
-#include <asm/resctrl_sched.h>
+#include <asm/resctrl.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4b3fa6cd3106..a3767e74c758 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -237,6 +237,9 @@ static u64 __init get_ramdisk_image(void)
ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
+ if (ramdisk_image == 0)
+ ramdisk_image = phys_initrd_start;
+
return ramdisk_image;
}
static u64 __init get_ramdisk_size(void)
@@ -245,6 +248,9 @@ static u64 __init get_ramdisk_size(void)
ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
+ if (ramdisk_size == 0)
+ ramdisk_size = phys_initrd_size;
+
return ramdisk_size;
}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e6d7894ad127..fd945ce78554 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,9 +287,9 @@ void __init setup_per_cpu_areas(void)
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
- * the smpboot asm. We can't reliably pick up percpu mappings
- * using vmalloc_fault(), because exception dispatch needs
- * percpu data.
+ * the smpboot asm and arch_sync_kernel_mappings() doesn't sync to
+ * swapper_pg_dir on 32-bit. The per-cpu mappings need to be available
+ * there too.
*
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
* this call?
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 83b74fb38c8f..399f97abee02 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -37,6 +37,7 @@
#include <asm/vm86.h>
#ifdef CONFIG_X86_64
+#include <linux/compat.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#endif /* CONFIG_X86_64 */
@@ -511,6 +512,31 @@ Efault:
}
#endif /* CONFIG_X86_32 */
+#ifdef CONFIG_X86_X32_ABI
+static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
+ const struct kernel_siginfo *from)
+{
+ struct compat_siginfo new;
+
+ copy_siginfo_to_external32(&new, from);
+ if (from->si_signo == SIGCHLD) {
+ new._sifields._sigchld_x32._utime = from->si_utime;
+ new._sifields._sigchld_x32._stime = from->si_stime;
+ }
+ if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
+ return -EFAULT;
+ return 0;
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+ const struct kernel_siginfo *from)
+{
+ if (in_x32_syscall())
+ return x32_copy_siginfo_to_user(to, from);
+ return __copy_siginfo_to_user32(to, from);
+}
+#endif /* CONFIG_X86_X32_ABI */
+
static int x32_setup_rt_frame(struct ksignal *ksig,
compat_sigset_t *set,
struct pt_regs *regs)
@@ -543,7 +569,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
user_access_end();
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
+ if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
}
@@ -732,7 +758,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- fpu__clear(fpu);
+ fpu__clear_user_states(fpu);
}
signal_setup_done(failed, ksig, stepping);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe3ab9632f3b..2467f3dd35d3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -147,7 +147,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
-static void init_freq_invariance(void);
+static void init_freq_invariance(bool secondary);
/*
* Report back to the Boot Processor during boot time or to the caller processor
@@ -185,7 +185,7 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
- init_freq_invariance();
+ init_freq_invariance(true);
/*
* Get our bogomips.
@@ -266,6 +266,14 @@ static void notrace start_secondary(void *unused)
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+ /*
+ * Prevent tail call to cpu_startup_entry() because the stack protector
+ * guard has been changed a couple of function calls up, in
+ * boot_init_stack_canary() and must not be checked before tail calling
+ * another function.
+ */
+ prevent_tail_call_optimization();
}
/**
@@ -1341,7 +1349,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
- init_freq_invariance();
+ init_freq_invariance(false);
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1376,12 +1384,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
speculative_store_bypass_ht_init();
}
-void arch_enable_nonboot_cpus_begin(void)
+void arch_thaw_secondary_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
}
-void arch_enable_nonboot_cpus_end(void)
+void arch_thaw_secondary_cpus_end(void)
{
mtrr_aps_init();
}
@@ -1849,24 +1857,25 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
-#define ICPU(model) \
- {X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
+#define X86_MATCH(model) \
+ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
+ INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
- ICPU(INTEL_FAM6_XEON_PHI_KNL),
- ICPU(INTEL_FAM6_XEON_PHI_KNM),
+ X86_MATCH(XEON_PHI_KNL),
+ X86_MATCH(XEON_PHI_KNM),
{}
};
static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
- ICPU(INTEL_FAM6_SKYLAKE_X),
+ X86_MATCH(SKYLAKE_X),
{}
};
static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
- ICPU(INTEL_FAM6_ATOM_GOLDMONT),
- ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
- ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
+ X86_MATCH(ATOM_GOLDMONT),
+ X86_MATCH(ATOM_GOLDMONT_D),
+ X86_MATCH(ATOM_GOLDMONT_PLUS),
{}
};
@@ -1877,9 +1886,6 @@ static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
int err, i;
u64 msr;
- if (!x86_match_cpu(has_knl_turbo_ratio_limits))
- return false;
-
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
@@ -1945,18 +1951,23 @@ static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
+ u64 msr;
int err;
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_freq);
+ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- *turbo_freq = (*turbo_freq >> 24) & 0xFF; /* 4C turbo */
+ *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
+ *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
+
+ /* The CPU may have less than 4 cores */
+ if (!*turbo_freq)
+ *turbo_freq = msr & 0xFF; /* 1C turbo */
return true;
}
@@ -1972,7 +1983,8 @@ static bool intel_set_max_freq_ratio(void)
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
- if (knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+ if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
+ knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
@@ -1985,13 +1997,22 @@ static bool intel_set_max_freq_ratio(void)
return false;
out:
+ /*
+ * Some hypervisors advertise X86_FEATURE_APERFMPERF
+ * but then fill all MSR's with zeroes.
+ */
+ if (!base_freq) {
+ pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n");
+ return false;
+ }
+
arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
base_freq);
arch_set_max_freq_ratio(turbo_disabled());
return true;
}
-static void init_counter_refs(void *arg)
+static void init_counter_refs(void)
{
u64 aperf, mperf;
@@ -2002,18 +2023,25 @@ static void init_counter_refs(void *arg)
this_cpu_write(arch_prev_mperf, mperf);
}
-static void init_freq_invariance(void)
+static void init_freq_invariance(bool secondary)
{
bool ret = false;
- if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
+ if (secondary) {
+ if (static_branch_likely(&arch_scale_freq_key)) {
+ init_counter_refs();
+ }
+ return;
+ }
+
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = intel_set_max_freq_ratio();
if (ret) {
- on_each_cpu(init_counter_refs, NULL, 1);
+ init_counter_refs();
static_branch_enable(&arch_scale_freq_key);
} else {
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index b89f6ac6a0c0..b2942b2dbfcf 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -35,8 +35,7 @@
#include "../realmode/rm/wakeup.h"
/* Global pointer to shared data; NULL means no measured launch. */
-struct tboot *tboot __read_mostly;
-EXPORT_SYMBOL(tboot);
+static struct tboot *tboot __read_mostly;
/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
#define AP_WAIT_TIMEOUT 1
@@ -46,6 +45,11 @@ EXPORT_SYMBOL(tboot);
static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
+bool tboot_enabled(void)
+{
+ return tboot != NULL;
+}
+
void __init tboot_probe(void)
{
/* Look for valid page-aligned address for shared page. */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 106e7f87f534..371a6b348e44 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
*/
x86_init.irqs.intr_mode_init();
tsc_init();
+
+ if (static_cpu_has(X86_FEATURE_WAITPKG))
+ use_tpause_delay();
}
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d54cffdc7cac..428186d9de46 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,10 +37,12 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/io.h>
+#include <linux/hardirq.h>
+#include <linux/atomic.h>
+
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
-#include <linux/atomic.h>
#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
@@ -82,78 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
local_irq_disable();
}
-/*
- * In IST context, we explicitly disable preemption. This serves two
- * purposes: it makes it much less likely that we would accidentally
- * schedule in IST context and it will force a warning if we somehow
- * manage to schedule by accident.
- */
-void ist_enter(struct pt_regs *regs)
-{
- if (user_mode(regs)) {
- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- } else {
- /*
- * We might have interrupted pretty much anything. In
- * fact, if we're a machine check, we can even interrupt
- * NMI processing. We don't want in_nmi() to return true,
- * but we need to notify RCU.
- */
- rcu_nmi_enter();
- }
-
- preempt_disable();
-
- /* This code is a bit fragile. Test it. */
- RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
-}
-NOKPROBE_SYMBOL(ist_enter);
-
-void ist_exit(struct pt_regs *regs)
-{
- preempt_enable_no_resched();
-
- if (!user_mode(regs))
- rcu_nmi_exit();
-}
-
-/**
- * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
- * @regs: regs passed to the IST exception handler
- *
- * IST exception handlers normally cannot schedule. As a special
- * exception, if the exception interrupted userspace code (i.e.
- * user_mode(regs) would return true) and the exception was not
- * a double fault, it can be safe to schedule. ist_begin_non_atomic()
- * begins a non-atomic section within an ist_enter()/ist_exit() region.
- * Callers are responsible for enabling interrupts themselves inside
- * the non-atomic section, and callers must call ist_end_non_atomic()
- * before ist_exit().
- */
-void ist_begin_non_atomic(struct pt_regs *regs)
-{
- BUG_ON(!user_mode(regs));
-
- /*
- * Sanity check: we need to be on the normal thread stack. This
- * will catch asm bugs and any attempt to use ist_preempt_enable
- * from double_fault.
- */
- BUG_ON(!on_thread_stack());
-
- preempt_enable_no_resched();
-}
-
-/**
- * ist_end_non_atomic() - begin a non-atomic section in an IST exception
- *
- * Ends a non-atomic section started with ist_begin_non_atomic().
- */
-void ist_end_non_atomic(void)
-{
- preempt_disable();
-}
-
int is_valid_bugaddr(unsigned long addr)
{
unsigned short ud;
@@ -326,7 +256,6 @@ __visible void __noreturn handle_stack_overflow(const char *message,
}
#endif
-#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
/*
* Runs on an IST stack for x86_64 and on a special task stack for x86_32.
*
@@ -363,7 +292,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
* The net result is that our #GP handler will think that we
* entered from usermode with the bad user context.
*
- * No need for ist_enter here because we don't use RCU.
+ * No need for nmi_enter() here because we don't use RCU.
*/
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
regs->cs == __KERNEL_CS &&
@@ -398,7 +327,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
}
#endif
- ist_enter(regs);
+ nmi_enter();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
tsk->thread.error_code = error_code;
@@ -450,7 +379,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
die("double fault", regs, error_code);
panic("Machine halted.");
}
-#endif
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
@@ -592,19 +520,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
return;
/*
- * Unlike any other non-IST entry, we can be called from a kprobe in
- * non-CONTEXT_KERNEL kernel mode or even during context tracking
- * state changes. Make sure that we wake up RCU even if we're coming
- * from kernel code.
- *
- * This means that we can't schedule even if we came from a
- * preemptible kernel context. That's okay.
+ * Unlike any other non-IST entry, we can be called from pretty much
+ * any location in the kernel through kprobes -- text_poke() will most
+ * likely be handled by poke_int3_handler() above. This means this
+ * handler is effectively NMI-like.
*/
- if (!user_mode(regs)) {
- rcu_nmi_enter();
- preempt_disable();
- }
- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+ if (!user_mode(regs))
+ nmi_enter();
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -626,10 +548,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
cond_local_irq_disable(regs);
exit:
- if (!user_mode(regs)) {
- preempt_enable_no_resched();
- rcu_nmi_exit();
- }
+ if (!user_mode(regs))
+ nmi_exit();
}
NOKPROBE_SYMBOL(do_int3);
@@ -733,7 +653,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
unsigned long dr6;
int si_code;
- ist_enter(regs);
+ nmi_enter();
get_debugreg(dr6, 6);
/*
@@ -826,7 +746,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();
exit:
- ist_exit(regs);
+ nmi_exit();
}
NOKPROBE_SYMBOL(do_debug);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fdd4c1078632..49d925043171 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -41,6 +41,7 @@ EXPORT_SYMBOL(tsc_khz);
* TSC can be unstable due to cpufreq or due to unsynced TSCs
*/
static int __read_mostly tsc_unstable;
+static unsigned int __initdata tsc_early_khz;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -59,6 +60,12 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
+static int __init tsc_early_khz_setup(char *buf)
+{
+ return kstrtouint(buf, 0, &tsc_early_khz);
+}
+early_param("tsc_early_khz", tsc_early_khz_setup);
+
__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -1412,7 +1419,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
if (early) {
cpu_khz = x86_platform.calibrate_cpu();
- tsc_khz = x86_platform.calibrate_tsc();
+ if (tsc_early_khz)
+ tsc_khz = tsc_early_khz;
+ else
+ tsc_khz = x86_platform.calibrate_tsc();
} else {
/* We should not be here with non-native cpu calibration */
WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bad_address:
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;
+ if (state->task != current)
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..7f969b2d240f 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,19 +8,21 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...) \
+({ \
+ if (state->task == current) \
+ orc_warn(args); \
+})
extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
static inline unsigned long orc_ip(const int *ip)
{
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
- if (!orc_init)
- return NULL;
-
if (ip == 0)
return &null_orc_entry;
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
#ifdef CONFIG_MODULES
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
@@ -317,12 +320,19 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address);
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
{
+ struct task_struct *task = state->task;
+
if (unwind_done(state))
return NULL;
if (state->regs)
return &state->regs->ip;
+ if (task != current && state->sp == task->thread.sp) {
+ struct inactive_task_frame *frame = (void *)task->thread.sp;
+ return &frame->ret_addr;
+ }
+
if (state->sp)
return (unsigned long *)state->sp - 1;
@@ -381,9 +391,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true;
}
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs. This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+ unsigned long *val)
+{
+ unsigned int reg = reg_off/8;
+
+ if (!state->regs)
+ return false;
+
+ if (state->full_regs) {
+ *val = ((unsigned long *)state->regs)[reg];
+ return true;
+ }
+
+ if (state->prev_regs) {
+ *val = ((unsigned long *)state->prev_regs)[reg];
+ return true;
+ }
+
+ return false;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -445,43 +484,39 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_REG_R10:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r10;
break;
case ORC_REG_R13:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r13;
break;
case ORC_REG_DI:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+ orc_warn_current("missing RDI value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->di;
break;
case ORC_REG_DX:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->dx;
break;
default:
- orc_warn("unknown SP base reg %d for ip %pB\n",
+ orc_warn("unknown SP base reg %d at %pB\n",
orc->sp_reg, (void *)state->ip);
goto err;
}
@@ -504,44 +539,48 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp;
state->regs = NULL;
+ state->prev_regs = NULL;
state->signal = false;
break;
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
state->regs = (struct pt_regs *)sp;
+ state->prev_regs = NULL;
state->full_regs = true;
state->signal = true;
break;
case ORC_TYPE_REGS_IRET:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference iret registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access iret registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
+ if (state->full_regs)
+ state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false;
state->signal = true;
break;
default:
- orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc_warn("unknown .orc_unwind entry type %d at %pB\n",
orc->type, (void *)orig_ip);
- break;
+ goto err;
}
/* Find BP: */
switch (orc->bp_reg) {
case ORC_REG_UNDEFINED:
- if (state->regs && state->full_regs)
- state->bp = state->regs->bp;
+ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+ state->bp = tmp;
break;
case ORC_REG_PREV_SP:
@@ -564,8 +603,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%pB\n",
- (void *)orig_ip);
+ orc_warn_current("stack going in the wrong direction? at %pB\n",
+ (void *)orig_ip);
goto err;
}
@@ -588,17 +627,20 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
memset(state, 0, sizeof(*state));
state->task = task;
+ if (!orc_init)
+ goto err;
+
/*
* Refuse to unwind the stack of a task while it's executing on another
* CPU. This check is racy, but that's ok: the unwinder has other
* checks to prevent it from going off the rails.
*/
if (task_on_another_cpu(task))
- goto done;
+ goto err;
if (regs) {
if (user_mode(regs))
- goto done;
+ goto the_end;
state->ip = regs->ip;
state->sp = regs->sp;
@@ -631,6 +673,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
* generate some kind of backtrace if this happens.
*/
void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+ state->error = true;
if (get_stack_info(next_page, state->task, &state->stack_info,
&state->stack_mask))
return;
@@ -651,13 +694,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp <= (unsigned long)first_frame))
+ state->sp < (unsigned long)first_frame))
unwind_next_frame(state);
return;
-done:
+err:
+ state->error = true;
+the_end:
state->stack_info.type = STACK_TYPE_UNKNOWN;
- return;
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a789759b7261..4a3081e9f4b5 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,6 +3,10 @@
ccflags-y += -Iarch/x86/kvm
ccflags-$(CONFIG_KVM_WERROR) += -Werror
+ifeq ($(CONFIG_FRAME_POINTER),y)
+OBJECT_FILES_NON_STANDARD_vmenter.o := y
+endif
+
KVM := ../../../virt/kvm
kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1f3c6fd3cdaa..6bc6d7613f76 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1427,7 +1427,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
*/
kvm_make_vcpus_request_mask(kvm,
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
- vcpu_mask, &hv_vcpu->tlb_flush);
+ NULL, vcpu_mask, &hv_vcpu->tlb_flush);
ret_success:
/* We always do full TLB flush, set rep_done = rep_cnt. */
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 750ff0b29404..d057376bd3d3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
}
/*
- * AMD SVM AVIC accelerate EOI write and do not trap,
- * in-kernel IOAPIC will not be able to receive the EOI.
- * In this case, we do lazy update of the pending EOI when
- * trying to set IOAPIC irq.
+ * AMD SVM AVIC accelerate EOI write iff the interrupt is edge
+ * triggered, in which case the in-kernel IOAPIC will not be able
+ * to receive the EOI. In this case, we do a lazy update of the
+ * pending EOI when trying to set IOAPIC irq.
*/
- if (kvm_apicv_activated(ioapic->kvm))
+ if (edge && kvm_apicv_activated(ioapic->kvm))
ioapic_lazy_update_eoi(ioapic, irq);
/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8071952e9cf2..fd59fee84631 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3586,7 +3586,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
/*
* Currently, fast page fault only works for direct mapping
* since the gfn is not stable for indirect shadow page. See
- * Documentation/virt/kvm/locking.txt to get more detail.
+ * Documentation/virt/kvm/locking.rst to get more detail.
*/
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
iterator.sptep, spte,
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 90a1ca939627..9a2a62e5afeb 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <asm/msr-index.h>
+#include <asm/debugreg.h>
#include "kvm_emulate.h"
#include "trace.h"
@@ -267,7 +268,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->save.rsp = nested_vmcb->save.rsp;
svm->vmcb->save.rip = nested_vmcb->save.rip;
svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
- svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
@@ -482,7 +483,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.rsp = vmcb->save.rsp;
nested_vmcb->save.rax = vmcb->save.rax;
nested_vmcb->save.dr7 = vmcb->save.dr7;
- nested_vmcb->save.dr6 = vmcb->save.dr6;
+ nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
nested_vmcb->save.cpl = vmcb->save.cpl;
nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
@@ -606,26 +607,45 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
/* DB exceptions for our internal use must not cause vmexit */
static int nested_svm_intercept_db(struct vcpu_svm *svm)
{
- unsigned long dr6;
+ unsigned long dr6 = svm->vmcb->save.dr6;
+
+ /* Always catch it and pass it to userspace if debugging. */
+ if (svm->vcpu.guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return NESTED_EXIT_HOST;
/* if we're not singlestepping, it's not ours */
if (!svm->nmi_singlestep)
- return NESTED_EXIT_DONE;
+ goto reflected_db;
/* if it's not a singlestep exception, it's not ours */
- if (kvm_get_dr(&svm->vcpu, 6, &dr6))
- return NESTED_EXIT_DONE;
if (!(dr6 & DR6_BS))
- return NESTED_EXIT_DONE;
+ goto reflected_db;
/* if the guest is singlestepping, it should get the vmexit */
if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
disable_nmi_singlestep(svm);
- return NESTED_EXIT_DONE;
+ goto reflected_db;
}
/* it's ours, the nested hypervisor must not see this one */
return NESTED_EXIT_HOST;
+
+reflected_db:
+ /*
+ * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
+ * it will be moved into the nested VMCB by nested_svm_vmexit. Once
+ * exceptions will be moved to svm_check_nested_events, all this stuff
+ * will just go away and we could just return NESTED_EXIT_HOST
+ * unconditionally. db_interception will queue the exception, which
+ * will be processed by svm_check_nested_events if a nested vmexit is
+ * required, and we will just use kvm_deliver_exception_payload to copy
+ * the payload to DR6 before vmexit.
+ */
+ WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
+ svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
+ svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
+ return NESTED_EXIT_DONE;
}
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
@@ -682,6 +702,9 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
if (svm->nested.intercept_exceptions & excp_bits) {
if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
vmexit = nested_svm_intercept_db(svm);
+ else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
+ svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ vmexit = NESTED_EXIT_HOST;
else
vmexit = NESTED_EXIT_DONE;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0e3fc311d7da..5573a97f1520 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/highmem.h>
#include <linux/psp-sev.h>
+#include <linux/pagemap.h>
#include <linux/swap.h>
#include "x86.h"
@@ -335,8 +336,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
- PAGE_KERNEL);
+ pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
@@ -344,7 +344,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
return NULL;
/* Pin the user virtual address. */
- npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
goto err;
@@ -1117,7 +1117,7 @@ int __init sev_hardware_setup(void)
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = cpuid_ecx(0x8000001F);
- if (!max_sev_asid)
+ if (!svm_sev_enabled())
return 1;
/* Minimum ASID value that should be used for SEV guest */
@@ -1156,6 +1156,9 @@ err:
void sev_hardware_teardown(void)
{
+ if (!svm_sev_enabled())
+ return;
+
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2be5bbae3a40..a862c768fd54 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1672,17 +1672,14 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
mark_dirty(svm->vmcb, VMCB_ASID);
}
-static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
+static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
{
- return to_svm(vcpu)->vmcb->save.dr6;
-}
-
-static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
- svm->vmcb->save.dr6 = value;
- mark_dirty(svm->vmcb, VMCB_DR);
+ if (unlikely(value != vmcb->save.dr6)) {
+ vmcb->save.dr6 = value;
+ mark_dirty(vmcb, VMCB_DR);
+ }
}
static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
@@ -1693,9 +1690,12 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
get_debugreg(vcpu->arch.db[3], 3);
- vcpu->arch.dr6 = svm_get_dr6(vcpu);
+ /*
+ * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here,
+ * because db_interception might need it. We can do it before vmentry.
+ */
+ vcpu->arch.dr6 = svm->vmcb->save.dr6;
vcpu->arch.dr7 = svm->vmcb->save.dr7;
-
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
set_dr_intercepts(svm);
}
@@ -1739,7 +1739,8 @@ static int db_interception(struct vcpu_svm *svm)
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
!svm->nmi_singlestep) {
- kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+ u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1;
+ kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload);
return 1;
}
@@ -1752,6 +1753,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+ kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
kvm_run->debug.arch.pc =
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = DB_VECTOR;
@@ -3276,7 +3279,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(svm);
}
-bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
+void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -3315,6 +3318,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ /*
+ * Run with all-zero DR6 unless needed, so that we can get the exact cause
+ * of a #DB.
+ */
+ if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+ svm_set_dr6(svm, vcpu->arch.dr6);
+ else
+ svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
+
clgi();
kvm_load_guest_xsave_state(vcpu);
@@ -3330,13 +3342,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
- local_irq_enable();
-
__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
@@ -3366,8 +3373,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
reload_tss(vcpu);
- local_irq_disable();
-
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
vcpu->arch.cr2 = svm->vmcb->save.cr2;
@@ -3411,7 +3416,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
mark_all_clean(svm->vmcb);
}
-STACK_FRAME_NON_STANDARD(svm_vcpu_run);
static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
{
@@ -3937,8 +3941,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_idt = svm_set_idt,
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
- .get_dr6 = svm_get_dr6,
- .set_dr6 = svm_set_dr6,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index fa1af90067e9..bf944334003a 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
+#include <asm/nospec-branch.h>
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -35,7 +36,6 @@
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
push %r15
push %r14
@@ -78,6 +78,7 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_AX
/* Enter guest mode */
+ sti
1: vmload %_ASM_AX
jmp 3f
2: cmpb $0, kvm_rebooting
@@ -99,6 +100,13 @@ SYM_FUNC_START(__svm_vcpu_run)
ud2
_ASM_EXTABLE(5b, 6b)
7:
+ cli
+
+#ifdef CONFIG_RETPOLINE
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
/* "POP" @regs to RAX. */
pop %_ASM_AX
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index cbc9ea2de28f..e44f33c82332 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
*/
break;
default:
- BUG_ON(1);
+ BUG();
break;
}
@@ -5533,8 +5533,25 @@ static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
return 1 & (b >> (field & 7));
}
+static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
+{
+ u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
+
+ if (nested_cpu_has_mtf(vmcs12))
+ return true;
+
+ /*
+ * An MTF VM-exit may be injected into the guest by setting the
+ * interruption-type to 7 (other event) and the vector field to 0. Such
+ * is the case regardless of the 'monitor trap flag' VM-execution
+ * control.
+ */
+ return entry_intr_info == (INTR_INFO_VALID_MASK
+ | INTR_TYPE_OTHER_EVENT);
+}
+
/*
- * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
+ * Return true if we should exit from L2 to L1 to handle an exit, or false if we
* should handle it ourselves in L0 (and then continue L2). Only call this
* when in is_guest_mode (L2).
*/
@@ -5633,7 +5650,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
case EXIT_REASON_MWAIT_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
case EXIT_REASON_MONITOR_TRAP_FLAG:
- return nested_cpu_has_mtf(vmcs12);
+ return nested_vmx_exit_handled_mtf(vmcs12);
case EXIT_REASON_MONITOR_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
case EXIT_REASON_PAUSE_INSTRUCTION:
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 87f3f24fef37..51d1a82742fd 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit)
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+ or $1, %_ASM_AX
+
pop %_ASM_AX
.Lvmexit_skip_rsb:
#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 83050977490c..89c766fad889 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1372,7 +1372,6 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx_vcpu_pi_load(vcpu, cpu);
- vmx->host_pkru = read_pkru();
vmx->host_debugctlmsr = get_debugctlmsr();
}
@@ -4572,7 +4571,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
*/
static void kvm_machine_check(void)
{
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_MCE)
struct pt_regs regs = {
.cs = 3, /* Fake ring 3 no matter what the guest ran on */
.flags = X86_EFLAGS_IF,
@@ -4677,15 +4676,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
- vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
- kvm_queue_exception(vcpu, DB_VECTOR);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
}
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
/* fall through */
case BP_VECTOR:
@@ -4929,16 +4926,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
* guest debugging itself.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
- vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
+ vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1;
vcpu->run->debug.arch.dr7 = dr7;
vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
vcpu->run->debug.arch.exception = DB_VECTOR;
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
- vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD);
return 1;
}
}
@@ -4969,15 +4964,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.dr6;
-}
-
-static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
-{
-}
-
static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
get_debugreg(vcpu->arch.db[0], 0);
@@ -6577,11 +6563,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_load_guest_xsave_state(vcpu);
- if (static_cpu_has(X86_FEATURE_PKU) &&
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
- vcpu->arch.pkru != vmx->host_pkru)
- __write_pkru(vcpu->arch.pkru);
-
pt_guest_enter(vmx);
if (vcpu_to_pmu(vcpu)->version)
@@ -6671,18 +6652,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
pt_guest_exit(vmx);
- /*
- * eager fpu is enabled if PKEY is supported and CR4 is switched
- * back on host, so it is safe to read guest PKRU from current
- * XSAVE.
- */
- if (static_cpu_has(X86_FEATURE_PKU) &&
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
- vcpu->arch.pkru = rdpkru();
- if (vcpu->arch.pkru != vmx->host_pkru)
- __write_pkru(vmx->host_pkru);
- }
-
kvm_load_host_xsave_state(vcpu);
vmx->nested.nested_run_pending = 0;
@@ -7740,8 +7709,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
- .get_dr6 = vmx_get_dr6,
- .set_dr6 = vmx_set_dr6,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3bf2ecafd027..c17e6eb9ad43 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -572,11 +572,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
- unsigned long payload)
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+ unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
}
+EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
@@ -836,11 +837,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
+
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+ (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
+ vcpu->arch.pkru != vcpu->arch.host_pkru)
+ __write_pkru(vcpu->arch.pkru);
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+ (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
+ vcpu->arch.pkru = rdpkru();
+ if (vcpu->arch.pkru != vcpu->arch.host_pkru)
+ __write_pkru(vcpu->arch.host_pkru);
+ }
+
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -926,19 +941,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
__reserved_bits; \
})
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
- u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
- if (kvm_cpu_cap_has(X86_FEATURE_LA57))
- reserved_bits &= ~X86_CR4_LA57;
-
- if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
- reserved_bits &= ~X86_CR4_UMIP;
-
- return reserved_bits;
-}
-
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
@@ -1058,12 +1060,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
}
}
-static void kvm_update_dr6(struct kvm_vcpu *vcpu)
-{
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6);
-}
-
static void kvm_update_dr7(struct kvm_vcpu *vcpu)
{
unsigned long dr7;
@@ -1103,7 +1099,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
if (val & 0xffffffff00000000ULL)
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
- kvm_update_dr6(vcpu);
break;
case 5:
/* fall through */
@@ -1139,10 +1134,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
case 4:
/* fall through */
case 6:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- *val = vcpu->arch.dr6;
- else
- *val = kvm_x86_ops.get_dr6(vcpu);
+ *val = vcpu->arch.dr6;
break;
case 5:
/* fall through */
@@ -3060,6 +3052,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_PERF_CTL:
case MSR_AMD64_DC_CFG:
case MSR_F15H_EX_CFG:
+ /*
+ * Intel Sandy Bridge CPUs must support the RAPL (running average power
+ * limit) MSRs. Just return 0, as we do not want to expose the host
+ * data here. Do not conditionalize this on CPUID, as KVM does not do
+ * so for existing CPU-specific MSRs.
+ */
+ case MSR_RAPL_POWER_UNIT:
+ case MSR_PP0_ENERGY_STATUS: /* Power plane 0 (core) */
+ case MSR_PP1_ENERGY_STATUS: /* Power plane 1 (graphics uncore) */
+ case MSR_PKG_ENERGY_STATUS: /* Total package */
+ case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */
msr_info->data = 0;
break;
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
@@ -3374,6 +3377,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -3559,6 +3563,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops.vcpu_load(vcpu, cpu);
+ /* Save host pkru register if supported */
+ vcpu->arch.host_pkru = read_pkru();
+
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -3752,7 +3759,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
- if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+ if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
goto out;
if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
goto out;
@@ -4010,7 +4017,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
- kvm_update_dr6(vcpu);
vcpu->arch.dr7 = dbgregs->dr7;
kvm_update_dr7(vcpu);
@@ -5049,10 +5055,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit_out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+set_pit_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_GET_PIT2: {
@@ -5072,10 +5081,13 @@ set_identity_unlock:
r = -EFAULT;
if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit2_out;
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+set_pit2_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_REINJECT_CONTROL: {
@@ -6654,7 +6666,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.exception = DB_VECTOR;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
return 0;
@@ -6714,9 +6726,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.db);
if (dr6 != 0) {
- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
- vcpu->arch.dr6 |= dr6 | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
*r = 1;
return true;
}
@@ -8037,7 +8047,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
- vcpu_bitmap, cpus);
+ NULL, vcpu_bitmap, cpus);
free_cpumask_var(cpus);
}
@@ -8067,6 +8077,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
*/
void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
{
+ struct kvm_vcpu *except;
unsigned long old, new, expected;
if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
@@ -8091,7 +8102,17 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
trace_kvm_apicv_update_request(activate, bit);
if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
- kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+
+ /*
+ * Sending request to update APICV for all other vcpus,
+ * while update the calling vcpu immediately instead of
+ * waiting for another #VMEXIT to handle the request.
+ */
+ except = kvm_get_running_vcpu();
+ kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
+ except);
+ if (except)
+ kvm_vcpu_update_apicv(except);
}
EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
@@ -8415,7 +8436,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
kvm_x86_ops.sync_dirty_debug_regs(vcpu);
kvm_update_dr0123(vcpu);
- kvm_update_dr6(vcpu);
kvm_update_dr7(vcpu);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
@@ -9476,7 +9496,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = DR6_INIT;
- kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
@@ -9658,7 +9677,9 @@ int kvm_arch_hardware_setup(void *opaque)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
- cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
if (kvm_has_tsc_control) {
/*
@@ -9690,7 +9711,8 @@ int kvm_arch_check_processor_compat(void *opaque)
WARN_ON(!irqs_disabled());
- if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+ if (__cr4_reserved_bits(cpu_has, c) !=
+ __cr4_reserved_bits(cpu_has, &boot_cpu_data))
return -EIO;
return ops->check_processor_compatibility();
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4742e8fa7ee7..d1d768912368 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index c66c8b00f236..ee63d7576fd2 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -10,7 +10,7 @@
#include <asm/smap.h>
/**
- * csum_partial_copy_from_user - Copy and checksum from user space.
+ * csum_and_copy_from_user - Copy and checksum from user space.
* @src: source address (user space)
* @dst: destination address
* @len: number of bytes to be copied.
@@ -21,13 +21,13 @@
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
+csum_and_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
{
might_sleep();
*errp = 0;
- if (!likely(access_ok(src, len)))
+ if (!user_access_begin(src, len))
goto out_err;
/*
@@ -42,8 +42,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
while (((unsigned long)src & 6) && len >= 2) {
__u16 val16;
- if (__get_user(val16, (const __u16 __user *)src))
- goto out_err;
+ unsafe_get_user(val16, (const __u16 __user *)src, out);
*(__u16 *)dst = val16;
isum = (__force __wsum)add32_with_carry(
@@ -53,25 +52,26 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
len -= 2;
}
}
- stac();
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
- clac();
+ user_access_end();
if (unlikely(*errp))
goto out_err;
return isum;
+out:
+ user_access_end();
out_err:
*errp = -EFAULT;
memset(dst, 0, len);
return isum;
}
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(csum_and_copy_from_user);
/**
- * csum_partial_copy_to_user - Copy and checksum to user space.
+ * csum_and_copy_to_user - Copy and checksum to user space.
* @src: source address
* @dst: destination address (user space)
* @len: number of bytes to be copied.
@@ -82,14 +82,14 @@ EXPORT_SYMBOL(csum_partial_copy_from_user);
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_partial_copy_to_user(const void *src, void __user *dst,
+csum_and_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp)
{
__wsum ret;
might_sleep();
- if (unlikely(!access_ok(dst, len))) {
+ if (!user_access_begin(dst, len)) {
*errp = -EFAULT;
return 0;
}
@@ -100,9 +100,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
- *errp = __put_user(val16, (__u16 __user *)dst);
- if (*errp)
- return isum;
+ unsafe_put_user(val16, (__u16 __user *)dst, out);
src += 2;
dst += 2;
len -= 2;
@@ -110,13 +108,16 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
}
*errp = 0;
- stac();
ret = csum_partial_copy_generic(src, (void __force *)dst,
len, isum, NULL, errp);
- clac();
+ user_access_end();
return ret;
+out:
+ user_access_end();
+ *errp = -EFAULT;
+ return isum;
}
-EXPORT_SYMBOL(csum_partial_copy_to_user);
+EXPORT_SYMBOL(csum_and_copy_to_user);
/**
* csum_partial_copy_nocheck - Copy and checksum.
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571e5e2e..65d15df6212d 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,20 @@
# include <asm/smp.h>
#endif
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
+
/* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
{
+ unsigned long loops = (unsigned long)__loops;
+
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
@@ -49,9 +60,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
{
- u64 bclock, now, loops = __loops;
+ u64 bclock, now;
int cpu;
preempt_disable();
@@ -59,7 +70,7 @@ static void delay_tsc(unsigned long __loops)
bclock = rdtsc_ordered();
for (;;) {
now = rdtsc_ordered();
- if ((now - bclock) >= loops)
+ if ((now - bclock) >= cycles)
break;
/* Allow RT tasks to run */
@@ -77,7 +88,7 @@ static void delay_tsc(unsigned long __loops)
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
- loops -= (now - bclock);
+ cycles -= (now - bclock);
cpu = smp_processor_id();
bclock = rdtsc_ordered();
}
@@ -86,65 +97,96 @@ static void delay_tsc(unsigned long __loops)
}
/*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
*/
-static void delay_mwaitx(unsigned long __loops)
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
{
- u64 start, end, delay, loops = __loops;
+ u64 delay;
+
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+ /*
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+ * variable as the monitor target.
+ */
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+ * enter any deep C-state and we use it here in delay() to minimize
+ * wakeup latency.
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
+ */
+static void delay_halt(u64 __cycles)
+{
+ u64 start, end, cycles = __cycles;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
* is a store on the memory monitored by MONITORX.
*/
- if (loops == 0)
+ if (!cycles)
return;
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
-
- /*
- * Use cpu_tss_rw as a cacheline-aligned, seldomly
- * accessed per-cpu variable as the monitor target.
- */
- __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
-
- /*
- * AMD, like Intel's MWAIT version, supports the EAX hint and
- * EAX=0xf0 means, do not enter any deep C-state and we use it
- * here in delay() to minimize wakeup latency.
- */
- __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
-
+ delay_halt_fn(start, cycles);
end = rdtsc_ordered();
- if (loops <= end - start)
+ if (cycles <= end - start)
break;
- loops -= end - start;
-
+ cycles -= end - start;
start = end;
}
}
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void use_tsc_delay(void)
+void __init use_tsc_delay(void)
{
if (delay_fn == delay_loop)
delay_fn = delay_tsc;
}
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
+
void use_mwaitx_delay(void)
{
- delay_fn = delay_mwaitx;
+ delay_halt_fn = delay_halt_mwaitx;
+ delay_fn = delay_halt;
}
int read_current_timer(unsigned long *timer_val)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 363ec132df7e..b4c43a9b1483 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,15 +7,31 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
+ .align 32
SYM_FUNC_START(__x86_indirect_thunk_\reg)
- CFI_STARTPROC
- JMP_NOSPEC %\reg
- CFI_ENDPROC
+ JMP_NOSPEC \reg
SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ UNWIND_HINT_EMPTY
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_RET_OFFSET
+ ret
+SYM_FUNC_END(__x86_retpoline_\reg)
+
.endm
/*
@@ -24,25 +40,24 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
* only see one instance of "__x86_indirect_thunk_\reg" rather
* than one per register with the correct names. So we do it
* the simple and nasty way...
+ *
+ * Worse, you can only have a single EXPORT_SYMBOL per line,
+ * and CPP can't insert newlines, so we have to repeat everything
+ * at least twice.
*/
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
-
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
+
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)
+
+#undef GEN
+#define GEN(reg) THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_THUNK(reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) EXPORT_RETPOLINE(reg)
+#include <asm/GEN-for-each-reg.h>
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 56f9189bbadb..5199d8a1daf1 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -17,7 +17,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
#endif
-#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+#ifdef CONFIG_X86_32
DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
#endif
@@ -114,12 +114,10 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
#else
static inline void percpu_setup_exception_stacks(unsigned int cpu)
{
-#ifdef CONFIG_DOUBLEFAULT
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
cea_map_percpu_pages(&cea->doublefault_stack,
&per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
-#endif
}
#endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 69309cd56fdf..ea9010113f69 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr)
(void *)st->start_address);
}
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
{
- return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
- ((prot1 | prot2) & _PAGE_NX);
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ pgprotval_t prot = val & PTE_FLAGS_MASK;
+ pgprotval_t effective;
+
+ if (level > 0) {
+ pgprotval_t higher_prot = st->prot_levels[level - 1];
+
+ effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
+ ((higher_prot | prot) & _PAGE_NX);
+ } else {
+ effective = prot;
+ }
+
+ st->prot_levels[level] = effective;
}
/*
@@ -261,7 +273,7 @@ static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
* print what we collected so far.
*/
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- unsigned long val)
+ u64 val)
{
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
pgprotval_t new_prot, new_eff;
@@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
struct seq_file *m = st->seq;
new_prot = val & PTE_FLAGS_MASK;
-
- if (level > 0) {
- new_eff = effective_prot(st->prot_levels[level - 1],
- new_prot);
- } else {
- new_eff = new_prot;
- }
-
- if (level >= 0)
- st->prot_levels[level] = new_eff;
+ if (!val)
+ new_eff = 0;
+ else
+ new_eff = st->prot_levels[level];
/*
* If we have a "break" in the series, we need to flush the state that
@@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
struct pg_state st = {
.ptdump = {
.note_page = note_page,
+ .effective_prot = effective_prot,
.range = ptdump_ranges
},
.level = -1,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a51df516b87b..dffe8e4d3140 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,16 +190,13 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-static void vmalloc_sync(void)
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
- unsigned long address;
-
- if (SHARED_KERNEL_PMD)
- return;
+ unsigned long addr;
- for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE_MAX && address < VMALLOC_END;
- address += PMD_SIZE) {
+ for (addr = start & PMD_MASK;
+ addr >= TASK_SIZE_MAX && addr < VMALLOC_END;
+ addr += PMD_SIZE) {
struct page *page;
spin_lock(&pgd_lock);
@@ -210,61 +207,13 @@ static void vmalloc_sync(void)
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- vmalloc_sync_one(page_address(page), address);
+ vmalloc_sync_one(page_address(page), addr);
spin_unlock(pgt_lock);
}
spin_unlock(&pgd_lock);
}
}
-void vmalloc_sync_mappings(void)
-{
- vmalloc_sync();
-}
-
-void vmalloc_sync_unmappings(void)
-{
- vmalloc_sync();
-}
-
-/*
- * 32-bit:
- *
- * Handle a fault on the vmalloc or module mapping area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- pgd_paddr = read_cr3_pa();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
-
- if (pmd_large(*pmd_k))
- return 0;
-
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
-
- return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
/*
* Did it hit the DOS screen memory VA from vm86 mode?
*/
@@ -329,96 +278,6 @@ out:
#else /* CONFIG_X86_64: */
-void vmalloc_sync_mappings(void)
-{
- /*
- * 64-bit mappings might allocate new p4d/pud pages
- * that need to be propagated to all tasks' PGDs.
- */
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
-}
-
-void vmalloc_sync_unmappings(void)
-{
- /*
- * Unmappings never allocate or free p4d/pud pages.
- * No work is required here.
- */
-}
-
-/*
- * 64-bit:
- *
- * Handle a fault on the vmalloc area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
- pgd_t *pgd, *pgd_k;
- p4d_t *p4d, *p4d_k;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Copy kernel mappings over when needed. This can also
- * happen within a race in page table update. In the later
- * case just flush:
- */
- pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
- pgd_k = pgd_offset_k(address);
- if (pgd_none(*pgd_k))
- return -1;
-
- if (pgtable_l5_enabled()) {
- if (pgd_none(*pgd)) {
- set_pgd(pgd, *pgd_k);
- arch_flush_lazy_mmu_mode();
- } else {
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
- }
- }
-
- /* With 4-level paging, copying happens on the p4d level. */
- p4d = p4d_offset(pgd, address);
- p4d_k = p4d_offset(pgd_k, address);
- if (p4d_none(*p4d_k))
- return -1;
-
- if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
- set_p4d(p4d, *p4d_k);
- arch_flush_lazy_mmu_mode();
- } else {
- BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
- }
-
- BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
-
- pud = pud_offset(p4d, address);
- if (pud_none(*pud))
- return -1;
-
- if (pud_large(*pud))
- return 0;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd))
- return -1;
-
- if (pmd_large(*pmd))
- return 0;
-
- pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
- return -1;
-
- return 0;
-}
-NOKPROBE_SYMBOL(vmalloc_fault);
-
#ifdef CONFIG_CPU_SUP_AMD
static const char errata93_warning[] =
KERN_ERR
@@ -1257,29 +1116,6 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
*/
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
- /*
- * We can fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * Before doing this on-demand faulting, ensure that the
- * fault is not any of the following:
- * 1. A fault on a PTE with a reserved bit set.
- * 2. A fault caused by a user-mode access. (Do not demand-
- * fault kernel memory due to user-mode accesses).
- * 3. A fault caused by a page-level protection violation.
- * (A demand fault would be on a non-present page which
- * would have X86_PF_PROT==0).
- */
- if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
-
/* Was the fault spurious, caused by lazy TLB invalidation? */
if (spurious_kernel_fault(hw_error_code, address))
return;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1bba16c5742b..a573a3e63f02 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -121,8 +121,6 @@ __ref void *alloc_low_pages(unsigned int num)
} else {
pfn = pgt_buf_end;
pgt_buf_end += num;
- printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
- pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
}
for (i = 0; i < num; i++) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3b289c2f75cd..96274a90c5ff 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,7 @@
#include <asm/init.h>
#include <asm/uv/uv.h>
#include <asm/setup.h>
+#include <asm/ftrace.h>
#include "mm_internal.h"
@@ -217,6 +218,11 @@ void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ sync_global_pgds(start, end);
+}
+
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
@@ -1291,6 +1297,8 @@ void mark_rodata_ro(void)
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
+ set_ftrace_ops_ro();
+
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
set_memory_rw(start, (end-start) >> PAGE_SHIFT);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index cb91eccc4960..c90c20904a60 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -18,7 +18,9 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/compat.h>
+#include <linux/elf-randomize.h>
#include <asm/elf.h>
+#include <asm/io.h>
#include "physaddr.h"
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 109325d77b3e..43fd19b3f118 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -372,7 +372,7 @@ static void enter_uniprocessor(void)
int cpu;
int err;
- if (downed_cpus == NULL &&
+ if (!cpumask_available(downed_cpus) &&
!alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
pr_notice("Failed to allocate mask\n");
goto out;
@@ -402,7 +402,7 @@ static void leave_uniprocessor(void)
int cpu;
int err;
- if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
+ if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
return;
pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index f2bd3d61e16b..104544359d69 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -27,40 +27,6 @@
#include "numa_internal.h"
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * 4) physnode_map - the mapping between a pfn and owning node
- * physnode_map keeps track of the physical memory layout of a generic
- * numa node on a 64Mb break (each element of the array will
- * represent 64Mb of memory and will be marked by the node id. so,
- * if the first gig is on node 0, and the second gig is on node 1
- * physnode_map will contain:
- *
- * physnode_map[0-15] = 0;
- * physnode_map[16-31] = 1;
- * physnode_map[32- ] = -1;
- */
-s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1};
-EXPORT_SYMBOL(physnode_map);
-
-void memory_present(int nid, unsigned long start, unsigned long end)
-{
- unsigned long pfn;
-
- printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n",
- nid, start, end);
- printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
- printk(KERN_DEBUG " ");
- start = round_down(start, PAGES_PER_SECTION);
- end = round_up(end, PAGES_PER_SECTION);
- for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
- physnode_map[pfn / PAGES_PER_SECTION] = nid;
- printk(KERN_CONT "%lx ", pfn);
- }
- printk(KERN_CONT "\n");
-}
-#endif
-
extern unsigned long highend_pfn, highstart_pfn;
void __init initmem_init(void)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 59eca6a94ce7..b8c55a2e402d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -43,7 +43,8 @@ struct cpa_data {
unsigned long pfn;
unsigned int flags;
unsigned int force_split : 1,
- force_static_prot : 1;
+ force_static_prot : 1,
+ force_flush_all : 1;
struct page **pages;
};
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
return;
}
- if (cpa->numpages <= tlb_single_page_flush_ceiling)
- on_each_cpu(__cpa_flush_tlb, cpa, 1);
- else
+ if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
flush_tlb_all();
+ else
+ on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
+
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
/*
* The high mapping range is imprecise, so ignore the
* return value.
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 843aa10a4cb6..da0fb17a1a36 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -448,13 +448,7 @@ static void __init pti_clone_user_shared(void)
* the sp1 and sp2 slots.
*
* This is done for all possible CPUs during boot to ensure
- * that it's propagated to all mms. If we were to add one of
- * these mappings during CPU hotplug, we would need to take
- * some measure to make sure that every mm that subsequently
- * ran on that CPU would have the relevant PGD entry in its
- * pagetables. The usual vmalloc_fault() mechanism would not
- * work for page faults taken in entry_SYSCALL_64 before RSP
- * is set up.
+ * that it's propagated to all mms.
*/
unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 66f96f21a7b6..f3fe261e5936 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -161,34 +161,6 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
-static void sync_current_stack_to_mm(struct mm_struct *mm)
-{
- unsigned long sp = current_stack_pointer;
- pgd_t *pgd = pgd_offset(mm, sp);
-
- if (pgtable_l5_enabled()) {
- if (unlikely(pgd_none(*pgd))) {
- pgd_t *pgd_ref = pgd_offset_k(sp);
-
- set_pgd(pgd, *pgd_ref);
- }
- } else {
- /*
- * "pgd" is faked. The top level entries are "p4d"s, so sync
- * the p4d. This compiles to approximately the same code as
- * the 5-level case.
- */
- p4d_t *p4d = p4d_offset(pgd, sp);
-
- if (unlikely(p4d_none(*p4d))) {
- pgd_t *pgd_ref = pgd_offset_k(sp);
- p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
-
- set_p4d(p4d, *p4d_ref);
- }
- }
-}
-
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
@@ -377,15 +349,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
cond_ibpb(tsk);
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
- /*
- * If our current stack is in vmalloc space and isn't
- * mapped in the new pgd, we'll double-fault. Forcibly
- * map it.
- */
- sync_current_stack_to_mm(next);
- }
-
/*
* Stop remote flushes for the previous mm.
* Skip kernel threads; we never send init_mm TLB flushing IPIs,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 4d2a7a764602..66cd150b7e54 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1847,14 +1847,16 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_B:
case BPF_H:
case BPF_W:
- if (!bpf_prog->aux->verifier_zext)
+ if (bpf_prog->aux->verifier_zext)
break;
if (dstk) {
EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
STACK_VAR(dst_hi));
EMIT(0x0, 4);
} else {
- EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
+ /* xor dst_hi,dst_hi */
+ EMIT2(0x33,
+ add_2reg(0xC0, dst_hi, dst_hi));
}
break;
case BPF_DW:
@@ -2013,8 +2015,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X: {
bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
- u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
- u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 dreg_lo = IA32_EAX;
+ u8 dreg_hi = IA32_EDX;
u8 sreg_lo = sstk ? IA32_ECX : src_lo;
u8 sreg_hi = sstk ? IA32_EBX : src_hi;
@@ -2026,6 +2028,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
add_2reg(0x40, IA32_EBP,
IA32_EDX),
STACK_VAR(dst_hi));
+ } else {
+ /* mov dreg_lo,dst_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
+ if (is_jmp64)
+ /* mov dreg_hi,dst_hi */
+ EMIT2(0x89,
+ add_2reg(0xC0, dreg_hi, dst_hi));
}
if (sstk) {
@@ -2050,8 +2059,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K: {
bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
- u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
- u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 dreg_lo = IA32_EAX;
+ u8 dreg_hi = IA32_EDX;
u8 sreg_lo = IA32_ECX;
u8 sreg_hi = IA32_EBX;
u32 hi;
@@ -2064,6 +2073,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
add_2reg(0x40, IA32_EBP,
IA32_EDX),
STACK_VAR(dst_hi));
+ } else {
+ /* mov dreg_lo,dst_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
+ if (is_jmp64)
+ /* mov dreg_hi,dst_hi */
+ EMIT2(0x89,
+ add_2reg(0xC0, dreg_hi, dst_hi));
}
/* mov ecx,imm32 */
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1aae5302501d..e966115d105c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -62,12 +62,12 @@ static unsigned long efi_runtime, efi_nr_tables;
unsigned long efi_fw_vendor, efi_config_table;
static const efi_config_table_type_t arch_tables[] __initconst = {
- {EFI_PROPERTIES_TABLE_GUID, "PROP", &prop_phys},
- {UGA_IO_PROTOCOL_GUID, "UGA", &uga_phys},
+ {EFI_PROPERTIES_TABLE_GUID, &prop_phys, "PROP" },
+ {UGA_IO_PROTOCOL_GUID, &uga_phys, "UGA" },
#ifdef CONFIG_X86_UV
- {UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
+ {UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" },
#endif
- {NULL_GUID, NULL, NULL},
+ {},
};
static const unsigned long * const efi_tables[] = {
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 15da118f04f0..90380a17ab23 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
mov %r8, %r9
mov %rcx, %r8
mov %rsi, %rcx
- CALL_NOSPEC %rdi
+ CALL_NOSPEC rdi
leave
ret
SYM_FUNC_END(__efi_call)
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index c60255da5a6c..4494589a288a 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -45,7 +45,8 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
return ret;
}
-s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
+static s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4,
+ u64 a5)
{
s64 ret;
@@ -57,10 +58,9 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
return ret;
}
-EXPORT_SYMBOL_GPL(uv_bios_call);
-s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
- u64 a4, u64 a5)
+static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
{
unsigned long bios_flags;
s64 ret;
@@ -77,18 +77,13 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
return ret;
}
-
long sn_partition_id;
EXPORT_SYMBOL_GPL(sn_partition_id);
long sn_coherency_id;
-EXPORT_SYMBOL_GPL(sn_coherency_id);
long sn_region_size;
EXPORT_SYMBOL_GPL(sn_region_size);
long system_serial_number;
-EXPORT_SYMBOL_GPL(system_serial_number);
int uv_type;
-EXPORT_SYMBOL_GPL(uv_type);
-
s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
long *region, long *ssn)
@@ -115,7 +110,6 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
*ssn = v1;
return ret;
}
-EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
int
uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -166,7 +160,6 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
(u64)ticks_per_second, 0, 0, 0);
}
-EXPORT_SYMBOL_GPL(uv_bios_freq_base);
/*
* uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
@@ -185,7 +178,6 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
(u64)decode, (u64)domain, (u64)bus, 0, 0);
}
-EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
int uv_bios_init(void)
{
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 62214731fea5..266773e2fb37 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -21,7 +21,7 @@ static ssize_t partition_id_show(struct kobject *kobj,
static ssize_t coherence_id_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id());
+ return snprintf(buf, PAGE_SIZE, "%ld\n", sn_coherency_id);
}
static struct kobj_attribute partition_id_attr =
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index aaff9ed7ff45..fc3b757afb2c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -307,7 +307,7 @@ int hibernate_resume_nonboot_cpu_disable(void)
if (ret)
return ret;
smp_ops.play_dead = resume_play_dead;
- ret = disable_nonboot_cpus();
+ ret = freeze_secondary_cpus(0);
smp_ops.play_dead = play_dead;
return ret;
}
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index 2a56cac64687..ff6bba2c8ab6 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -36,26 +36,6 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
return csum_partial(dst, len, sum);
}
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr)
-{
- if (copy_from_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- return (__force __wsum)-1;
- }
-
- return csum_partial(dst, len, sum);
-}
-
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 1abe455d926a..205a9bc981b0 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -29,7 +29,7 @@ static efi_system_table_t efi_systab_xen __initdata = {
.fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */
.fw_revision = 0, /* Initialized later. */
.con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
- .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .con_in = NULL, /* Not used under Xen. */
.con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
.con_out = NULL, /* Not used under Xen. */
.stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 8fb8a50a28b4..f2adb63b2d7c 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -93,6 +93,7 @@ asmlinkage __visible void cpu_bringup_and_idle(void)
cpu_bringup();
boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+ prevent_tail_call_optimization();
}
void xen_smp_intr_free_pv(unsigned int cpu)