aboutsummaryrefslogtreecommitdiff
path: root/mm/madvise.c
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2015-06-10 10:10:23 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2015-06-10 10:10:23 +1000
commit5c56bfd370dffaa7fac9f5601e7d6e48c2efe72f (patch)
tree165d03024f235b017a01f3d5bd8d6244a0278700 /mm/madvise.c
parentaf289d3fb8a005fb12de6f484868ca409254ce32 (diff)
mm: free swp_entry in madvise_free
When I test below piece of code with 12 processes(ie, 512M * 12 = 6G consume) on my (3G ram + 12 cpu + 8G swap, the madvise_free is siginficat slower (ie, 2x times) than madvise_dontneed. loop = 5; mmap(512M); while (loop--) { memset(512M); madvise(MADV_FREE or MADV_DONTNEED); } The reason is lots of swapin. 1) dontneed: 1,612 swapin 2) madvfree: 879,585 swapin If we find hinted pages were already swapped out when syscall is called, it's pointless to keep the swapped-out pages in pte. Instead, let's free the cold page because swapin is more expensive than (alloc page + zeroing). With this patch, it reduced swapin from 879,585 to 1,878 so elapsed time 1) dontneed: 6.10user 233.50system 0:50.44elapsed 2) madvfree: 6.03user 401.17system 1:30.67elapsed 2) madvfree + below patch: 6.70user 339.14system 1:04.45elapsed Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/madvise.c')
-rw-r--r--mm/madvise.c26
1 files changed, 25 insertions, 1 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 181550a342bc..f723f03c9779 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -275,7 +275,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
pte_t *pte, ptent;
struct page *page;
+ swp_entry_t entry;
unsigned long next;
+ int nr_swap = 0;
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd)) {
@@ -294,8 +296,22 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
- if (!pte_present(ptent))
+ if (pte_none(ptent))
continue;
+ /*
+ * If the pte has swp_entry, just clear page table to
+ * prevent swap-in which is more expensive rather than
+ * (page allocation + zeroing).
+ */
+ if (!pte_present(ptent)) {
+ entry = pte_to_swp_entry(ptent);
+ if (non_swap_entry(entry))
+ continue;
+ nr_swap--;
+ free_swap_and_cache(entry);
+ pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ continue;
+ }
page = vm_normal_page(vma, addr, ptent);
if (!page)
@@ -327,6 +343,14 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
set_pte_at(mm, addr, pte, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
}
+
+ if (nr_swap) {
+ if (current->mm == mm)
+ sync_mm_rss(mm);
+
+ add_mm_counter(mm, MM_SWAPENTS, nr_swap);
+ }
+
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
next: