aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/bounce.c4
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/highmem.c6
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/memory.c58
-rw-r--r--mm/mmu_context.c2
-rw-r--r--mm/page_alloc.c165
-rw-r--r--mm/page_cgroup.c11
-rw-r--r--mm/slab.c227
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slub.c126
-rw-r--r--mm/swap.c30
-rw-r--r--mm/vmalloc.c14
-rw-r--r--mm/vmstat.c6
15 files changed, 495 insertions, 166 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b2d1aed56439..a03dc3c070ca 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -384,7 +384,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
config TRANSPARENT_HUGEPAGE
bool "Transparent Hugepage Support"
- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
select COMPACTION
help
Transparent Hugepages allows the kernel to use huge pages and
diff --git a/mm/bounce.c b/mm/bounce.c
index 5a7d58fb883b..b09bb4e0e3e0 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
unsigned long flags;
unsigned char *vto;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
vto = kmap_atomic(to->bv_page);
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
kunmap_atomic(vto);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
#else /* CONFIG_HIGHMEM */
diff --git a/mm/filemap.c b/mm/filemap.c
index 7905fe721aa8..bf2060d4319e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1974,7 +1974,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
char *kaddr;
size_t copied;
- BUG_ON(!in_atomic());
+ BUG_ON(!pagefault_disabled());
kaddr = kmap_atomic(page);
if (likely(i->nr_segs == 1)) {
int left;
diff --git a/mm/highmem.c b/mm/highmem.c
index b32b70cdaed6..b1c7d434b24c 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,10 +29,11 @@
#include <linux/kgdb.h>
#include <asm/tlbflush.h>
-
+#ifndef CONFIG_PREEMPT_RT_FULL
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
DEFINE_PER_CPU(int, __kmap_atomic_idx);
#endif
+#endif
/*
* Virtual_count is not a pure "count".
@@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
unsigned long totalhigh_pages __read_mostly;
EXPORT_SYMBOL(totalhigh_pages);
-
+#ifndef CONFIG_PREEMPT_RT_FULL
EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
+#endif
unsigned int nr_free_highpages (void)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eaa3accb01e7..33d8392f33eb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2484,7 +2484,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
/* Notify other cpus that system-wide "drain" is running */
get_online_cpus();
- curcpu = get_cpu();
+ curcpu = get_cpu_light();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
@@ -2501,7 +2501,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
schedule_work_on(cpu, &stock->work);
}
}
- put_cpu();
+ put_cpu_light();
if (!sync)
goto out;
diff --git a/mm/memory.c b/mm/memory.c
index 1df7bd48cdae..59c7fb52c271 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3753,6 +3753,32 @@ unlock:
return 0;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+void pagefault_disable(void)
+{
+ migrate_disable();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+EXPORT_SYMBOL(pagefault_disable);
+
+void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ current->pagefault_disabled--;
+ migrate_enable();
+}
+EXPORT_SYMBOL(pagefault_enable);
+#endif
+
/*
* By the time we get here, we already hold the mm semaphore
*/
@@ -4330,3 +4356,35 @@ void copy_user_huge_page(struct page *dst, struct page *src,
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
+/*
+ * Heinous hack, relies on the caller doing something like:
+ *
+ * pte = alloc_pages(PGALLOC_GFP, 0);
+ * if (pte)
+ * pgtable_page_ctor(pte);
+ * return pte;
+ *
+ * This ensures we release the page and return NULL when the
+ * lock allocation fails.
+ */
+struct page *pte_lock_init(struct page *page)
+{
+ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (page->ptl) {
+ spin_lock_init(__pte_lockptr(page));
+ } else {
+ __free_page(page);
+ page = NULL;
+ }
+ return page;
+}
+
+void pte_lock_deinit(struct page *page)
+{
+ kfree(page->ptl);
+ page->mapping = NULL;
+}
+
+#endif
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 8a8cd0265e52..adfce87a001a 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
+ preempt_disable_rt();
active_mm = tsk->active_mm;
if (active_mm != mm) {
atomic_inc(&mm->mm_count);
@@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm)
}
tsk->mm = mm;
switch_mm(active_mm, mm, tsk);
+ preempt_enable_rt();
task_unlock(tsk);
if (active_mm != mm)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 494a081ec5e4..48441f038680 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -60,6 +60,7 @@
#include <linux/page-debug-flags.h>
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
+#include <linux/locallock.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -221,6 +222,18 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define cpu_lock_irqsave(cpu, flags) \
+ local_lock_irqsave_on(pa_lock, flags, cpu)
+# define cpu_unlock_irqrestore(cpu, flags) \
+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
+#else
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
+#endif
+
int page_group_by_mobility_disabled __read_mostly;
void set_pageblock_migratetype(struct page *page, int migratetype)
@@ -623,7 +636,7 @@ static inline int free_pages_check(struct page *page)
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -634,16 +647,50 @@ static inline int free_pages_check(struct page *page)
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct list_head *list)
{
- int migratetype = 0;
- int batch_free = 0;
int to_free = count;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
+ while (!list_empty(list)) {
+ struct page *page = list_first_entry(list, struct page, lru);
+ int mt; /* migratetype of the to-be-freed page */
+
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+
+ mt = get_freepage_migratetype(page);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, mt);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ if (likely(!is_migrate_isolate_page(page))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
+ if (is_migrate_cma(mt))
+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+ }
+
+ to_free--;
+ }
+ WARN_ON(to_free != 0);
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Moves a number of pages from the PCP lists to free list which
+ * is freed outside of the locked region.
+ *
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ */
+static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
+ struct list_head *dst)
+{
+ int migratetype = 0, batch_free = 0;
+
while (to_free) {
struct page *page;
struct list_head *list;
@@ -659,7 +706,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
- list = &pcp->lists[migratetype];
+ list = &src->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
@@ -667,36 +714,26 @@ static void free_pcppages_bulk(struct zone *zone, int count,
batch_free = to_free;
do {
- int mt; /* migratetype of the to-be-freed page */
-
- page = list_entry(list->prev, struct page, lru);
- /* must delete as __free_one_page list manipulates */
+ page = list_last_entry(list, struct page, lru);
list_del(&page->lru);
- mt = get_freepage_migratetype(page);
- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- __free_one_page(page, zone, 0, mt);
- trace_mm_page_pcpu_drain(page, 0, mt);
- if (likely(!is_migrate_isolate_page(page))) {
- __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
- if (is_migrate_cma(mt))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
- }
+ list_add(&page->lru, dst);
} while (--to_free && --batch_free && !list_empty(list));
}
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone, struct page *page, int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
__free_one_page(page, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -733,12 +770,12 @@ static void __free_pages_ok(struct page *page, unsigned int order)
if (!free_pages_prepare(page, order))
return;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
__count_vm_events(PGFREE, 1 << order);
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, order, migratetype);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1180,18 +1217,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
+ LIST_HEAD(dst);
int to_drain;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (pcp->count >= pcp->batch)
to_drain = pcp->batch;
else
to_drain = pcp->count;
if (to_drain > 0) {
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
pcp->count -= to_drain;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, to_drain, &dst);
}
#endif
@@ -1210,16 +1249,21 @@ static void drain_pages(unsigned int cpu)
for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count) {
+ isolate_pcp_pages(count, pcp, &dst);
pcp->count = 0;
}
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
+ if (count)
+ free_pcppages_bulk(zone, count, &dst);
}
}
@@ -1272,7 +1316,12 @@ void drain_all_pages(void)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
+#ifndef CONFIG_PREEMPT_RT_BASE
on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
+#else
+ for_each_cpu(cpu, &cpus_with_pcps)
+ drain_pages(cpu);
+#endif
}
#ifdef CONFIG_HIBERNATION
@@ -1327,7 +1376,7 @@ void free_hot_cold_page(struct page *page, int cold)
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
__count_vm_event(PGFREE);
/*
@@ -1352,12 +1401,19 @@ void free_hot_cold_page(struct page *page, int cold)
list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
- free_pcppages_bulk(zone, pcp->batch, pcp);
+ LIST_HEAD(dst);
+ int count;
+
+ isolate_pcp_pages(pcp->batch, pcp, &dst);
pcp->count -= pcp->batch;
+ count = pcp->batch;
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, count, &dst);
+ return;
}
out:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1487,7 +1543,7 @@ again:
struct per_cpu_pages *pcp;
struct list_head *list;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
@@ -1519,18 +1575,20 @@ again:
*/
WARN_ON_ONCE(order > 1);
}
- spin_lock_irqsave(&zone->lock, flags);
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- spin_unlock(&zone->lock);
- if (!page)
+ if (!page) {
+ spin_unlock(&zone->lock);
goto failed;
+ }
__mod_zone_freepage_state(zone, -(1 << order),
get_pageblock_migratetype(page));
+ spin_unlock(&zone->lock);
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
@@ -1538,7 +1596,7 @@ again:
return page;
failed:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return NULL;
}
@@ -2192,8 +2250,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct page *page;
/* Page migration frees to the PCP lists but we want merging */
- drain_pages(get_cpu());
- put_cpu();
+ drain_pages(get_cpu_light());
+ put_cpu_light();
page = get_page_from_freelist(gfp_mask, nodemask,
order, zonelist, high_zoneidx,
@@ -5243,6 +5301,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
void __init page_alloc_init(void)
{
hotcpu_notifier(page_alloc_cpu_notify, 0);
+ local_irq_lock_init(pa_lock);
}
/*
@@ -5481,11 +5540,11 @@ int __meminit init_per_zone_wmark_min(void)
module_init(init_per_zone_wmark_min)
/*
- * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
+ * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
* that we can call two helper functions whenever min_free_kbytes
* changes.
*/
-int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
+int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
@@ -6061,21 +6120,23 @@ static int __meminit __zone_pcp_update(void *data)
{
struct zone *zone = data;
int cpu;
- unsigned long batch = zone_batchsize(zone), flags;
+ unsigned long flags;
for_each_possible_cpu(cpu) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- local_irq_save(flags);
- if (pcp->count > 0)
- free_pcppages_bulk(zone, pcp->count, pcp);
+ cpu_lock_irqsave(cpu, flags);
+ if (pcp->count > 0) {
+ isolate_pcp_pages(pcp->count, pcp, &dst);
+ free_pcppages_bulk(zone, pcp->count, &dst);
+ }
drain_zonestat(zone, pset);
- setup_pageset(pset, batch);
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
}
return 0;
}
@@ -6093,7 +6154,7 @@ void zone_pcp_reset(struct zone *zone)
struct per_cpu_pageset *pset;
/* avoid races with drain_pages() */
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
@@ -6102,7 +6163,7 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index e007236f345a..384518e5f6eb 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -13,6 +13,14 @@
static unsigned long total_usage;
+static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
+{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ for (; nr_pages; nr_pages--, pc++)
+ spin_lock_init(&pc->pcg_lock);
+#endif
+}
+
#if !defined(CONFIG_SPARSEMEM)
@@ -60,6 +68,7 @@ static int __init alloc_node_page_cgroup(int nid)
return -ENOMEM;
NODE_DATA(nid)->node_page_cgroup = base;
total_usage += table_size;
+ page_cgroup_lock_init(base, nr_pages);
return 0;
}
@@ -150,6 +159,8 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
return -ENOMEM;
}
+ page_cgroup_lock_init(base, PAGES_PER_SECTION);
+
/*
* The passed "pfn" may not be aligned to SECTION. For the calculation
* we need to apply a mask.
diff --git a/mm/slab.c b/mm/slab.c
index bd88411595b9..1e9330f43f46 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,6 +116,7 @@
#include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <linux/prefetch.h>
+#include <linux/locallock.h>
#include <net/sock.h>
@@ -633,12 +634,78 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
#endif
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+static DEFINE_PER_CPU(struct list_head, slab_free_list);
+static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
+
+#ifndef CONFIG_PREEMPT_RT_BASE
+# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
+#else
+/*
+ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
+ * to run on the remote CPUs - we only have to take their CPU-locks.
+ * (This is a rare operation, so cacheline bouncing is not an issue.)
+ */
+static void
+slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
+{
+ unsigned int i;
+
+ get_cpu_light();
+ for_each_online_cpu(i)
+ func(arg, i);
+ put_cpu_light();
+}
+
+static void lock_slab_on(unsigned int cpu)
+{
+ local_lock_irq_on(slab_lock, cpu);
+}
+
+static void unlock_slab_on(unsigned int cpu)
+{
+ local_unlock_irq_on(slab_lock, cpu);
+}
+#endif
+
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_pages(page, page->index);
+ }
+}
+
+static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_spin_unlock_irq(slab_lock, list_lock);
+ free_delayed(&tmp);
+}
+
+static void unlock_slab_and_free_delayed(unsigned long flags)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_unlock_irqrestore(slab_lock, flags);
+ free_delayed(&tmp);
+}
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
}
+static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
+ int cpu)
+{
+ return cachep->array[cpu];
+}
+
static size_t slab_mgmt_size(size_t nr_objs, size_t align)
{
return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
@@ -1073,9 +1140,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
if (n->alien) {
struct array_cache *ac = n->alien[node];
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
+ if (ac && ac->avail &&
+ local_spin_trylock_irq(slab_lock, &ac->lock)) {
__drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ local_spin_unlock_irq(slab_lock, &ac->lock);
}
}
}
@@ -1090,9 +1158,9 @@ static void drain_alien_cache(struct kmem_cache *cachep,
for_each_online_node(i) {
ac = alien[i];
if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
+ local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
__drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
}
}
}
@@ -1171,11 +1239,11 @@ static int init_cache_node_node(int node)
cachep->node[node] = n;
}
- spin_lock_irq(&cachep->node[node]->list_lock);
+ local_spin_lock_irq(slab_lock, &cachep->node[node]->list_lock);
cachep->node[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->node[node]->list_lock);
+ local_spin_unlock_irq(slab_lock, &cachep->node[node]->list_lock);
}
return 0;
}
@@ -1200,7 +1268,7 @@ static void __cpuinit cpuup_canceled(long cpu)
if (!n)
goto free_array_cache;
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
@@ -1208,7 +1276,7 @@ static void __cpuinit cpuup_canceled(long cpu)
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&n->list_lock);
+ unlock_l3_and_free_delayed(&n->list_lock);
goto free_array_cache;
}
@@ -1222,7 +1290,7 @@ static void __cpuinit cpuup_canceled(long cpu)
alien = n->alien;
n->alien = NULL;
- spin_unlock_irq(&n->list_lock);
+ unlock_l3_and_free_delayed(&n->list_lock);
kfree(shared);
if (alien) {
@@ -1296,7 +1364,7 @@ static int __cpuinit cpuup_prepare(long cpu)
n = cachep->node[node];
BUG_ON(!n);
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
if (!n->shared) {
/*
* We are serialised from CPU_DEAD or
@@ -1311,7 +1379,7 @@ static int __cpuinit cpuup_prepare(long cpu)
alien = NULL;
}
#endif
- spin_unlock_irq(&n->list_lock);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
@@ -1512,6 +1580,10 @@ void __init kmem_cache_init(void)
if (num_possible_nodes() == 1)
use_alien_caches = 0;
+ local_irq_lock_init(slab_lock);
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
+
for (i = 0; i < NUM_INIT_LISTS; i++)
kmem_cache_node_init(&init_kmem_cache_node[i]);
@@ -1789,12 +1861,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
{
unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
+ struct page *page, *basepage = virt_to_page(addr);
const unsigned long nr_freed = i;
+ page = basepage;
+
kmemcheck_free_shadow(page, cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1813,7 +1887,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
memcg_release_pages(cachep, cachep->gfporder);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
+ if (!delayed) {
+ free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
+ } else {
+ basepage->index = cachep->gfporder;
+ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
+ }
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -1821,7 +1900,7 @@ static void kmem_rcu_free(struct rcu_head *head)
struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
struct kmem_cache *cachep = slab_rcu->cachep;
- kmem_freepages(cachep, slab_rcu->addr);
+ kmem_freepages(cachep, slab_rcu->addr, false);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slab_rcu);
}
@@ -2038,7 +2117,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
+ bool delayed)
{
void *addr = slabp->s_mem - slabp->colouroff;
@@ -2051,7 +2131,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
slab_rcu->addr = addr;
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
+ kmem_freepages(cachep, addr, delayed);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slabp);
}
@@ -2408,7 +2488,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
#if DEBUG
static void check_irq_off(void)
{
- BUG_ON(!irqs_disabled());
+ BUG_ON_NONRT(!irqs_disabled());
}
static void check_irq_on(void)
@@ -2443,26 +2523,43 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac,
int force, int node);
-static void do_drain(void *arg)
+static void __do_drain(void *arg, unsigned int cpu)
{
struct kmem_cache *cachep = arg;
struct array_cache *ac;
- int node = numa_mem_id();
+ int node = cpu_to_mem(cpu);
- check_irq_off();
- ac = cpu_cache_get(cachep);
+ ac = cpu_cache_get_on_cpu(cachep, cpu);
spin_lock(&cachep->node[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
spin_unlock(&cachep->node[node]->list_lock);
ac->avail = 0;
}
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_drain(void *arg)
+{
+ __do_drain(arg, smp_processor_id());
+}
+#else
+static void do_drain(void *arg, int cpu)
+{
+ LIST_HEAD(tmp);
+
+ lock_slab_on(cpu);
+ __do_drain(arg, cpu);
+ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
+ unlock_slab_on(cpu);
+ free_delayed(&tmp);
+}
+#endif
+
static void drain_cpu_caches(struct kmem_cache *cachep)
{
struct kmem_cache_node *n;
int node;
- on_each_cpu(do_drain, cachep, 1);
+ slab_on_each_cpu(do_drain, cachep);
check_irq_on();
for_each_online_node(node) {
n = cachep->node[node];
@@ -2493,10 +2590,10 @@ static int drain_freelist(struct kmem_cache *cache,
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
p = n->slabs_free.prev;
if (p == &n->slabs_free) {
- spin_unlock_irq(&n->list_lock);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
goto out;
}
@@ -2510,8 +2607,8 @@ static int drain_freelist(struct kmem_cache *cache,
* to the cache.
*/
n->free_objects -= cache->num;
- spin_unlock_irq(&n->list_lock);
- slab_destroy(cache, slabp);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
+ slab_destroy(cache, slabp, false);
nr_freed++;
}
out:
@@ -2785,7 +2882,7 @@ static int cache_grow(struct kmem_cache *cachep,
offset *= cachep->colour_off;
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
/*
* The test for missing atomic flag is performed here, rather than
@@ -2815,7 +2912,7 @@ static int cache_grow(struct kmem_cache *cachep,
cache_init_objs(cachep, slabp);
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
check_irq_off();
spin_lock(&n->list_lock);
@@ -2826,10 +2923,10 @@ static int cache_grow(struct kmem_cache *cachep,
spin_unlock(&n->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, objp, false);
failed:
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
return 0;
}
@@ -3243,11 +3340,11 @@ retry:
* set and go into memory reserves if necessary.
*/
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
kmem_flagcheck(cache, flags);
obj = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
if (obj) {
/*
* Insert into the appropriate per node queues
@@ -3368,7 +3465,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
cachep = memcg_kmem_get_cache(cachep, flags);
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
@@ -3393,7 +3490,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
/* ___cache_alloc_node can fall back to other nodes */
ptr = ____cache_alloc_node(cachep, flags, nodeid);
out:
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
flags);
@@ -3455,9 +3552,9 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
cachep = memcg_kmem_get_cache(cachep, flags);
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
objp = __do_cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
flags);
@@ -3508,7 +3605,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, slabp, true);
} else {
list_add(&slabp->list, &n->slabs_free);
}
@@ -3771,12 +3868,12 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
if (!cachep)
return;
- local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, cachep->object_size);
+ local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, _RET_IP_);
- local_irq_restore(flags);
+ unlock_slab_and_free_delayed(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
@@ -3800,14 +3897,14 @@ void kfree(const void *objp)
if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
- local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, c->object_size);
debug_check_no_obj_freed(objp, c->object_size);
+ local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, _RET_IP_);
- local_irq_restore(flags);
+ unlock_slab_and_free_delayed(flags);
}
EXPORT_SYMBOL(kfree);
@@ -3844,7 +3941,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
if (n) {
struct array_cache *shared = n->shared;
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
if (shared)
free_block(cachep, shared->entry,
@@ -3857,7 +3954,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
}
n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&n->list_lock);
+ unlock_l3_and_free_delayed(&n->list_lock);
+
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3904,18 +4002,29 @@ struct ccupdate_struct {
struct array_cache *new[0];
};
-static void do_ccupdate_local(void *info)
+static void __do_ccupdate_local(void *info, int cpu)
{
struct ccupdate_struct *new = info;
struct array_cache *old;
- check_irq_off();
- old = cpu_cache_get(new->cachep);
+ old = cpu_cache_get_on_cpu(new->cachep, cpu);
- new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
- new->new[smp_processor_id()] = old;
+ new->cachep->array[cpu] = new->new[cpu];
+ new->new[cpu] = old;
}
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_ccupdate_local(void *info)
+{
+ __do_ccupdate_local(info, smp_processor_id());
+}
+#else
+static void do_ccupdate_local(void *info, int cpu)
+{
+ __do_ccupdate_local(info, cpu);
+}
+#endif
+
/* Always called with the slab_mutex held */
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
@@ -3940,7 +4049,7 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
}
new->cachep = cachep;
- on_each_cpu(do_ccupdate_local, (void *)new, 1);
+ slab_on_each_cpu(do_ccupdate_local, (void *)new);
check_irq_on();
cachep->batchcount = batchcount;
@@ -3951,9 +4060,11 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
- spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+ local_spin_lock_irq(slab_lock,
+ &cachep->node[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+ unlock_l3_and_free_delayed(&cachep->node[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);
@@ -4068,7 +4179,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
if (ac->touched && !force) {
ac->touched = 0;
} else {
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
@@ -4078,7 +4189,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
- spin_unlock_irq(&n->list_lock);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
}
}
@@ -4171,7 +4282,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
continue;
check_irq_on();
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
list_for_each_entry(slabp, &n->slabs_full, list) {
if (slabp->inuse != cachep->num && !error)
@@ -4196,7 +4307,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
if (n->shared)
shared_avail += n->shared->avail;
- spin_unlock_irq(&n->list_lock);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
}
num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
@@ -4396,13 +4507,13 @@ static int leaks_show(struct seq_file *m, void *p)
continue;
check_irq_on();
- spin_lock_irq(&n->list_lock);
+ local_spin_lock_irq(slab_lock, &n->list_lock);
list_for_each_entry(slabp, &n->slabs_full, list)
handle_slab(x, cachep, slabp);
list_for_each_entry(slabp, &n->slabs_partial, list)
handle_slab(x, cachep, slabp);
- spin_unlock_irq(&n->list_lock);
+ local_spin_unlock_irq(slab_lock, &n->list_lock);
}
name = cachep->name;
if (x[0] == x[1]) {
diff --git a/mm/slab.h b/mm/slab.h
index 4d6d836247dd..fc3c0976e664 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -247,7 +247,11 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
* The slab lists for all objects.
*/
struct kmem_cache_node {
+#ifdef CONFIG_SLAB
spinlock_t list_lock;
+#else
+ raw_spinlock_t list_lock;
+#endif
#ifdef CONFIG_SLAB
struct list_head slabs_partial; /* partial list first, better asm code */
diff --git a/mm/slub.c b/mm/slub.c
index deaed7b47213..8d8a3a641f0b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1071,7 +1071,7 @@ static noinline struct kmem_cache_node *free_debug_processing(
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- spin_lock_irqsave(&n->list_lock, *flags);
+ raw_spin_lock_irqsave(&n->list_lock, *flags);
slab_lock(page);
if (!check_slab(s, page))
@@ -1119,7 +1119,7 @@ out:
fail:
slab_unlock(page);
- spin_unlock_irqrestore(&n->list_lock, *flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, *flags);
slab_fix(s, "Object at 0x%p not freed", object);
return NULL;
}
@@ -1254,6 +1254,12 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
#endif /* CONFIG_SLUB_DEBUG */
+struct slub_free_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
+
/*
* Slab allocation and freeing
*/
@@ -1275,10 +1281,15 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
struct page *page;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
+ bool enableirqs;
flags &= gfp_allowed_mask;
- if (flags & __GFP_WAIT)
+ enableirqs = (flags & __GFP_WAIT) != 0;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ enableirqs |= system_state == SYSTEM_RUNNING;
+#endif
+ if (enableirqs)
local_irq_enable();
flags |= s->allocflags;
@@ -1318,7 +1329,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
kmemcheck_mark_unallocated_pages(page, pages);
}
- if (flags & __GFP_WAIT)
+ if (enableirqs)
local_irq_disable();
if (!page)
return NULL;
@@ -1336,8 +1347,10 @@ static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
setup_object_debug(s, page, object);
+#ifndef CONFIG_PREEMPT_RT_FULL
if (unlikely(s->ctor))
s->ctor(object);
+#endif
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1415,6 +1428,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_memcg_kmem_pages(page, order);
}
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_slab(page->slab_cache, page);
+ }
+}
+
#define need_reserve_slab_rcu \
(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
@@ -1449,6 +1472,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
}
call_rcu(head, rcu_free_slab);
+ } else if (irqs_disabled()) {
+ struct slub_free_list *f = &__get_cpu_var(slub_free_list);
+
+ raw_spin_lock(&f->lock);
+ list_add(&page->lru, &f->list);
+ raw_spin_unlock(&f->lock);
} else
__free_slab(s, page);
}
@@ -1553,7 +1582,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!n || !n->nr_partial)
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
@@ -1577,7 +1606,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
break;
}
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return object;
}
@@ -1819,7 +1848,7 @@ redo:
* that acquire_slab() will see a slab page that
* is frozen
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
} else {
m = M_FULL;
@@ -1830,7 +1859,7 @@ redo:
* slabs from diagnostic functions will not see
* any frozen slabs.
*/
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
}
@@ -1865,7 +1894,7 @@ redo:
goto redo;
if (lock)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
@@ -1896,10 +1925,10 @@ static void unfreeze_partials(struct kmem_cache *s,
n2 = get_node(s, page_to_nid(page));
if (n != n2) {
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
n = n2;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
}
do {
@@ -1928,7 +1957,7 @@ static void unfreeze_partials(struct kmem_cache *s,
}
if (n)
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
while (discard_page) {
page = discard_page;
@@ -1964,14 +1993,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
pobjects = oldpage->pobjects;
pages = oldpage->pages;
if (drain && pobjects > s->cpu_partial) {
+ struct slub_free_list *f;
unsigned long flags;
+ LIST_HEAD(tofree);
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save(flags);
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ f = &__get_cpu_var(slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2033,7 +2069,22 @@ static bool has_cpu_slab(int cpu, void *info)
static void flush_all(struct kmem_cache *s)
{
+ LIST_HEAD(tofree);
+ int cpu;
+
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
+ for_each_online_cpu(cpu) {
+ struct slub_free_list *f;
+
+ if (!has_cpu_slab(cpu, s))
+ continue;
+
+ f = &per_cpu(slub_free_list, cpu);
+ raw_spin_lock_irq(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock_irq(&f->lock);
+ free_delayed(&tofree);
+ }
}
/*
@@ -2061,10 +2112,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
unsigned long x = 0;
struct page *page;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
@@ -2207,9 +2258,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
+ struct slub_free_list *f;
void *freelist;
struct page *page;
unsigned long flags;
+ LIST_HEAD(tofree);
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@@ -2272,7 +2325,13 @@ load_freelist:
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
+out:
+ f = &__get_cpu_var(slub_free_list);
+ raw_spin_lock(&f->lock);
+ list_splice_init(&f->list, &tofree);
+ raw_spin_unlock(&f->lock);
local_irq_restore(flags);
+ free_delayed(&tofree);
return freelist;
new_slab:
@@ -2290,9 +2349,7 @@ new_slab:
if (unlikely(!freelist)) {
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
-
- local_irq_restore(flags);
- return NULL;
+ goto out;
}
page = c->page;
@@ -2306,8 +2363,7 @@ new_slab:
deactivate_slab(s, page, get_freepointer(s, freelist));
c->page = NULL;
c->freelist = NULL;
- local_irq_restore(flags);
- return freelist;
+ goto out;
}
/*
@@ -2390,6 +2446,10 @@ redo:
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->object_size);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (unlikely(s->ctor) && object)
+ s->ctor(object);
+#endif
slab_post_alloc_hook(s, gfpflags, object);
@@ -2484,7 +2544,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
do {
if (unlikely(n)) {
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
@@ -2514,7 +2574,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
}
}
@@ -2555,7 +2615,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return;
slab_empty:
@@ -2569,7 +2629,7 @@ slab_empty:
/* Slab must be on the full list */
remove_full(s, page);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
@@ -2771,7 +2831,7 @@ static void
init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- spin_lock_init(&n->list_lock);
+ raw_spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
@@ -3393,7 +3453,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
for (i = 0; i < objects; i++)
INIT_LIST_HEAD(slabs_by_inuse + i);
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
/*
* Build lists indexed by the items in use in each slab.
@@ -3414,7 +3474,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
for (i = objects - 1; i > 0; i--)
list_splice(slabs_by_inuse + i, n->partial.prev);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
@@ -3590,6 +3650,12 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
+ }
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -3894,7 +3960,7 @@ static int validate_slab_node(struct kmem_cache *s,
struct page *page;
unsigned long flags;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru) {
validate_slab_slab(s, page, map);
@@ -3917,7 +3983,7 @@ static int validate_slab_node(struct kmem_cache *s,
atomic_long_read(&n->nr_slabs));
out:
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
return count;
}
@@ -4107,12 +4173,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
if (!atomic_long_read(&n->nr_slabs))
continue;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc, map);
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
}
for (i = 0; i < t.count; i++) {
diff --git a/mm/swap.c b/mm/swap.c
index 4e35f3ff0427..351b1cbf3d95 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -32,6 +32,7 @@
#include <linux/gfp.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
+#include <linux/locallock.h>
#include "internal.h"
@@ -42,6 +43,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
+static DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
+
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
@@ -405,11 +409,11 @@ void rotate_reclaimable_page(struct page *page)
unsigned long flags;
page_cache_get(page);
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pvec = &__get_cpu_var(lru_rotate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
}
@@ -454,12 +458,13 @@ static void activate_page_drain(int cpu)
void activate_page(struct page *page)
{
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
+ activate_page_pvecs);
page_cache_get(page);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
- put_cpu_var(activate_page_pvecs);
+ put_locked_var(swapvec_lock, activate_page_pvecs);
}
}
@@ -507,13 +512,13 @@ EXPORT_SYMBOL(mark_page_accessed);
*/
void __lru_cache_add(struct page *page, enum lru_list lru)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvecs)[lru];
page_cache_get(page);
if (!pagevec_space(pvec))
__pagevec_lru_add(pvec, lru);
pagevec_add(pvec, page);
- put_cpu_var(lru_add_pvecs);
+ put_locked_var(swapvec_lock, lru_add_pvecs);
}
EXPORT_SYMBOL(__lru_cache_add);
@@ -648,9 +653,9 @@ void lru_add_drain_cpu(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
@@ -678,18 +683,19 @@ void deactivate_page(struct page *page)
return;
if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
+ lru_deactivate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
- put_cpu_var(lru_deactivate_pvecs);
+ put_locked_var(swapvec_lock, lru_deactivate_pvecs);
}
}
void lru_add_drain(void)
{
- lru_add_drain_cpu(get_cpu());
- put_cpu();
+ lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
+ local_unlock_cpu(swapvec_lock);
}
static void lru_add_drain_per_cpu(struct work_struct *dummy)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d4565606cc96..f9a9a24facd9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -792,7 +792,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
struct vmap_block *vb;
struct vmap_area *va;
unsigned long vb_idx;
- int node, err;
+ int node, err, cpu;
node = numa_node_id();
@@ -831,12 +831,13 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
BUG_ON(err);
radix_tree_preload_end();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
return vb;
}
@@ -910,7 +911,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
- int purge = 0;
+ int purge = 0, cpu;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -926,7 +927,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
again:
rcu_read_lock();
- vbq = &get_cpu_var(vmap_block_queue);
+ cpu = get_cpu_light();
+ vbq = &__get_cpu_var(vmap_block_queue);
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
int i;
@@ -963,7 +965,7 @@ next:
if (purge)
purge_fragmented_blocks_thiscpu();
- put_cpu_var(vmap_block_queue);
+ put_cpu_light();
rcu_read_unlock();
if (!addr) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6d9bace4e589..5242d6da7181 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -215,6 +215,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
long x;
long t;
+ preempt_disable_rt();
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
@@ -224,6 +225,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
x = 0;
}
__this_cpu_write(*p, x);
+ preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_zone_page_state);
@@ -256,6 +258,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -264,6 +267,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep);
}
+ preempt_enable_rt();
}
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -278,6 +282,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -286,6 +291,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep);
}
+ preempt_enable_rt();
}
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)