aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/sh/mm/Kconfig1
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_types.h20
-rw-r--r--arch/x86/mm/pgtable.c8
5 files changed, 45 insertions, 3 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index cb8f9920f4d..0f7c852f355 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -111,6 +111,7 @@ config VSYSCALL
config NUMA
bool "Non Uniform Memory Access (NUMA) Support"
depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+ select ARCH_WANT_NUMA_VARIABLE_LOCALITY
default n
help
Some SH systems have many various memories scattered around
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 65a872bf72f..97f8c5ad8c2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,8 @@ config X86
def_bool y
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_WANTS_PROT_NUMA_PROT_NONE
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PCSPKR_PLATFORM
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a1f780d45f7..5199db2923d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -404,7 +404,14 @@ static inline int pte_same(pte_t a, pte_t b)
static inline int pte_present(pte_t a)
{
- return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
+ _PAGE_NUMA);
+}
+
+#define pte_accessible pte_accessible
+static inline int pte_accessible(pte_t a)
+{
+ return pte_flags(a) & _PAGE_PRESENT;
}
static inline int pte_hidden(pte_t pte)
@@ -420,7 +427,8 @@ static inline int pmd_present(pmd_t pmd)
* the _PAGE_PSE flag will remain set at all times while the
* _PAGE_PRESENT bit is clear).
*/
- return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
+ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
+ _PAGE_NUMA);
}
static inline int pmd_none(pmd_t pmd)
@@ -479,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
static inline int pmd_bad(pmd_t pmd)
{
+#ifdef CONFIG_NUMA_BALANCING
+ /* pmd_numa check */
+ if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
+ return 0;
+#endif
return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index ec8a1fc9505..3c32db8c539 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -64,6 +64,26 @@
#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+/*
+ * _PAGE_NUMA indicates that this page will trigger a numa hinting
+ * minor page fault to gather numa placement statistics (see
+ * pte_numa()). The bit picked (8) is within the range between
+ * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
+ * require changes to the swp entry format because that bit is always
+ * zero when the pte is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ *
+ * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
+ * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
+ * couldn't reach, like handle_mm_fault() (see access_error in
+ * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
+ * handle_mm_fault() to be invoked).
+ */
+#define _PAGE_NUMA _PAGE_PROTNONE
+
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 217eb705fac..e27fbf887f3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -301,6 +301,13 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}
+/*
+ * Used to set accessed or dirty bits in the page table entries
+ * on other architectures. On x86, the accessed and dirty bits
+ * are tracked by hardware. However, do_wp_page calls this function
+ * to also make the pte writeable at the same time the dirty bit is
+ * set. In that case we do actually need to write the PTE.
+ */
int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
@@ -310,7 +317,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*ptep = entry;
pte_update_defer(vma->vm_mm, address, ptep);
- flush_tlb_page(vma, address);
}
return changed;