summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-24 16:20:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-24 16:20:38 -0800
commit4c48faba5b7f18fb53e4aeeb768932f17c9da1ed (patch)
tree9d9d2ca881a670b5df55a663ee506f212c9514c3 /include
parent062c84fccc4444805738d76a2699c4d3c95184ec (diff)
parenta553e3cd2053501b658feec2be9a3b662eb1b22b (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "A few small subsystems and some of MM. 172 patches. Subsystems affected by this patch series: hexagon, scripts, ntfs, ocfs2, vfs, and mm (slab-generic, slab, slub, debug, pagecache, swap, memcg, pagemap, mprotect, mremap, page-reporting, vmalloc, kasan, pagealloc, memory-failure, hugetlb, vmscan, z3fold, compaction, mempolicy, oom-kill, hugetlbfs, and migration)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (172 commits) mm/migrate: remove unneeded semicolons hugetlbfs: remove unneeded return value of hugetlb_vmtruncate() hugetlbfs: fix some comment typos hugetlbfs: correct some obsolete comments about inode i_mutex hugetlbfs: make hugepage size conversion more readable hugetlbfs: remove meaningless variable avoid_reserve hugetlbfs: correct obsolete function name in hugetlbfs_read_iter() hugetlbfs: use helper macro default_hstate in init_hugetlbfs_fs hugetlbfs: remove useless BUG_ON(!inode) in hugetlbfs_setattr() hugetlbfs: remove special hugetlbfs_set_page_dirty() mm/hugetlb: change hugetlb_reserve_pages() to type bool mm, oom: fix a comment in dump_task() mm/mempolicy: use helper range_in_vma() in queue_pages_test_walk() numa balancing: migrate on fault among multiple bound nodes mm, compaction: make fast_isolate_freepages() stay within zone mm/compaction: fix misbehaviors of fast_find_migrateblock() mm/compaction: correct deferral logic for proactive compaction mm/compaction: remove duplicated VM_BUG_ON_PAGE !PageLocked mm/compaction: remove rcu_read_lock during page compaction z3fold: simplify the zhdr initialization code in init_z3fold_page() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/linux/highmem-internal.h5
-rw-r--r--include/linux/huge_mm.h15
-rw-r--r--include/linux/hugetlb.h98
-rw-r--r--include/linux/kasan-checks.h6
-rw-r--r--include/linux/kasan.h37
-rw-r--r--include/linux/memcontrol.h43
-rw-r--r--include/linux/migrate.h2
-rw-r--r--include/linux/mm.h22
-rw-r--r--include/linux/mm_inline.h113
-rw-r--r--include/linux/mmzone.h22
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/linux/page_counter.h9
-rw-r--r--include/linux/pagemap.h5
-rw-r--r--include/linux/swap.h8
-rw-r--r--include/trace/events/kmem.h24
-rw-r--r--include/trace/events/pagemap.h11
-rw-r--r--include/uapi/linux/mempolicy.h4
19 files changed, 276 insertions, 172 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 418b772d6eca..ec8f3ddf4a6a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3080,8 +3080,8 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
extern int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count);
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
-extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
- struct iov_iter *to, ssize_t already_read);
+ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
+ ssize_t already_read);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 80544d5c08e7..220cd553a9e7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -8,6 +8,20 @@
#include <linux/linkage.h>
#include <linux/topology.h>
+/* The typedef is in types.h but we want the documentation here */
+#if 0
+/**
+ * typedef gfp_t - Memory allocation flags.
+ *
+ * GFP flags are commonly used throughout Linux to indicate how memory
+ * should be allocated. The GFP acronym stands for get_free_pages(),
+ * the underlying memory allocation function. Not every GFP flag is
+ * supported by every function which may allocate memory. Most users
+ * will want to use a plain ``GFP_KERNEL``.
+ */
+typedef unsigned int __bitwise gfp_t;
+#endif
+
struct vm_area_struct;
/*
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 1bbe96dc8be6..7902c7d8b55f 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -127,11 +127,6 @@ static inline unsigned long totalhigh_pages(void)
return (unsigned long)atomic_long_read(&_totalhigh_pages);
}
-static inline void totalhigh_pages_inc(void)
-{
- atomic_long_inc(&_totalhigh_pages);
-}
-
static inline void totalhigh_pages_add(long count)
{
atomic_long_add(count, &_totalhigh_pages);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6a19f35f836b..ba973efcd369 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -78,6 +78,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
}
enum transparent_hugepage_flag {
+ TRANSPARENT_HUGEPAGE_NEVER_DAX,
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
@@ -123,6 +124,13 @@ extern unsigned long transparent_hugepage_flags;
*/
static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
{
+
+ /*
+ * If the hardware/firmware marked hugepage support disabled.
+ */
+ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
+ return false;
+
if (vma->vm_flags & VM_NOHUGEPAGE)
return false;
@@ -134,12 +142,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
return true;
- /*
- * For dax vmas, try to always use hugepage mappings. If the kernel does
- * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
- * mappings, and device-dax namespaces, that try to guarantee a given
- * mapping size, will fail to enable
- */
+
if (vma_is_dax(vma))
return true;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b5807f23caf8..cccd1aab69dd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -37,7 +37,7 @@ struct hugepage_subpool {
struct hstate *hstate;
long min_hpages; /* Minimum huge pages or -1 if no minimum. */
long rsv_hpages; /* Pages reserved against global pool to */
- /* sasitfy minimum size. */
+ /* satisfy minimum size. */
};
struct resv_map {
@@ -139,7 +139,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
unsigned long dst_addr,
unsigned long src_addr,
struct page **pagep);
-int hugetlb_reserve_pages(struct inode *inode, long from, long to,
+bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
@@ -472,6 +472,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags);
#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
+/*
+ * huegtlb page specific state flags. These flags are located in page.private
+ * of the hugetlb head page. Functions created via the below macros should be
+ * used to manipulate these flags.
+ *
+ * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
+ * allocation time. Cleared when page is fully instantiated. Free
+ * routine checks flag to restore a reservation on error paths.
+ * Synchronization: Examined or modified by code that knows it has
+ * the only reference to page. i.e. After allocation but before use
+ * or when the page is being freed.
+ * HPG_migratable - Set after a newly allocated page is added to the page
+ * cache and/or page tables. Indicates the page is a candidate for
+ * migration.
+ * Synchronization: Initially set after new page allocation with no
+ * locking. When examined and modified during migration processing
+ * (isolate, migrate, putback) the hugetlb_lock is held.
+ * HPG_temporary - - Set on a page that is temporarily allocated from the buddy
+ * allocator. Typically used for migration target pages when no pages
+ * are available in the pool. The hugetlb free page path will
+ * immediately free pages with this flag set to the buddy allocator.
+ * Synchronization: Can be set after huge page allocation from buddy when
+ * code knows it has only reference. All other examinations and
+ * modifications require hugetlb_lock.
+ * HPG_freed - Set when page is on the free lists.
+ * Synchronization: hugetlb_lock held for examination and modification.
+ */
+enum hugetlb_page_flags {
+ HPG_restore_reserve = 0,
+ HPG_migratable,
+ HPG_temporary,
+ HPG_freed,
+ __NR_HPAGEFLAGS,
+};
+
+/*
+ * Macros to create test, set and clear function definitions for
+ * hugetlb specific page flags.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+#define TESTHPAGEFLAG(uname, flname) \
+static inline int HPage##uname(struct page *page) \
+ { return test_bit(HPG_##flname, &(page->private)); }
+
+#define SETHPAGEFLAG(uname, flname) \
+static inline void SetHPage##uname(struct page *page) \
+ { set_bit(HPG_##flname, &(page->private)); }
+
+#define CLEARHPAGEFLAG(uname, flname) \
+static inline void ClearHPage##uname(struct page *page) \
+ { clear_bit(HPG_##flname, &(page->private)); }
+#else
+#define TESTHPAGEFLAG(uname, flname) \
+static inline int HPage##uname(struct page *page) \
+ { return 0; }
+
+#define SETHPAGEFLAG(uname, flname) \
+static inline void SetHPage##uname(struct page *page) \
+ { }
+
+#define CLEARHPAGEFLAG(uname, flname) \
+static inline void ClearHPage##uname(struct page *page) \
+ { }
+#endif
+
+#define HPAGEFLAG(uname, flname) \
+ TESTHPAGEFLAG(uname, flname) \
+ SETHPAGEFLAG(uname, flname) \
+ CLEARHPAGEFLAG(uname, flname) \
+
+/*
+ * Create functions associated with hugetlb page flags
+ */
+HPAGEFLAG(RestoreReserve, restore_reserve)
+HPAGEFLAG(Migratable, migratable)
+HPAGEFLAG(Temporary, temporary)
+HPAGEFLAG(Freed, freed)
+
#ifdef CONFIG_HUGETLB_PAGE
#define HSTATE_NAME_LEN 32
@@ -531,6 +609,20 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
+/*
+ * hugetlb page subpool pointer located in hpage[1].private
+ */
+static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
+{
+ return (struct hugepage_subpool *)(hpage+1)->private;
+}
+
+static inline void hugetlb_set_page_subpool(struct page *hpage,
+ struct hugepage_subpool *subpool)
+{
+ set_page_private(hpage+1, (unsigned long)subpool);
+}
+
static inline struct hstate *hstate_file(struct file *f)
{
return hstate_inode(file_inode(f));
@@ -770,8 +862,6 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
}
#endif
-void set_page_huge_active(struct page *page);
-
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h
index ca5e89fb10d3..3d6d22a25bdc 100644
--- a/include/linux/kasan-checks.h
+++ b/include/linux/kasan-checks.h
@@ -5,6 +5,12 @@
#include <linux/types.h>
/*
+ * The annotations present in this file are only relevant for the software
+ * KASAN modes that rely on compiler instrumentation, and will be optimized
+ * away for the hardware tag-based KASAN mode. Use kasan_check_byte() instead.
+ */
+
+/*
* __kasan_check_*: Always available when KASAN is enabled. This may be used
* even in compilation units that selectively disable KASAN, but must use KASAN
* to validate access to an address. Never use these in header files!
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 0aea9e2a2a01..7eaf2d9effb4 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -185,19 +185,18 @@ static __always_inline void * __must_check kasan_init_slab_obj(
}
bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip);
-static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object,
- unsigned long ip)
+static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object)
{
if (kasan_enabled())
- return __kasan_slab_free(s, object, ip);
+ return __kasan_slab_free(s, object, _RET_IP_);
return false;
}
void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
-static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip)
+static __always_inline void kasan_slab_free_mempool(void *ptr)
{
if (kasan_enabled())
- __kasan_slab_free_mempool(ptr, ip);
+ __kasan_slab_free_mempool(ptr, _RET_IP_);
}
void * __must_check __kasan_slab_alloc(struct kmem_cache *s,
@@ -241,12 +240,25 @@ static __always_inline void * __must_check kasan_krealloc(const void *object,
}
void __kasan_kfree_large(void *ptr, unsigned long ip);
-static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip)
+static __always_inline void kasan_kfree_large(void *ptr)
{
if (kasan_enabled())
- __kasan_kfree_large(ptr, ip);
+ __kasan_kfree_large(ptr, _RET_IP_);
}
+/*
+ * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
+ * the hardware tag-based mode that doesn't rely on compiler instrumentation.
+ */
+bool __kasan_check_byte(const void *addr, unsigned long ip);
+static __always_inline bool kasan_check_byte(const void *addr)
+{
+ if (kasan_enabled())
+ return __kasan_check_byte(addr, _RET_IP_);
+ return true;
+}
+
+
bool kasan_save_enable_multi_shot(void);
void kasan_restore_multi_shot(bool enabled);
@@ -277,12 +289,11 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
{
return (void *)object;
}
-static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
- unsigned long ip)
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
{
return false;
}
-static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {}
+static inline void kasan_slab_free_mempool(void *ptr) {}
static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags)
{
@@ -302,7 +313,11 @@ static inline void *kasan_krealloc(const void *object, size_t new_size,
{
return (void *)object;
}
-static inline void kasan_kfree_large(void *ptr, unsigned long ip) {}
+static inline void kasan_kfree_large(void *ptr) {}
+static inline bool kasan_check_byte(const void *address)
+{
+ return true;
+}
#endif /* CONFIG_KASAN */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eeb0b52203e9..e6dc793d587d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -92,6 +92,10 @@ struct lruvec_stat {
long count[NR_VM_NODE_STAT_ITEMS];
};
+struct batched_lruvec_stat {
+ s32 count[NR_VM_NODE_STAT_ITEMS];
+};
+
/*
* Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
* which have elements charged to this memcg.
@@ -107,11 +111,17 @@ struct memcg_shrinker_map {
struct mem_cgroup_per_node {
struct lruvec lruvec;
- /* Legacy local VM stats */
+ /*
+ * Legacy local VM stats. This should be struct lruvec_stat and
+ * cannot be optimized to struct batched_lruvec_stat. Because
+ * the threshold of the lruvec_stat_cpu can be as big as
+ * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this
+ * filed has no upper limit.
+ */
struct lruvec_stat __percpu *lruvec_stat_local;
/* Subtree VM stats (batched updates) */
- struct lruvec_stat __percpu *lruvec_stat_cpu;
+ struct batched_lruvec_stat __percpu *lruvec_stat_cpu;
atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
@@ -475,19 +485,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
-/*
- * set_page_objcgs - associate a page with a object cgroups vector
- * @page: a pointer to the page struct
- * @objcgs: a pointer to the object cgroups vector
- *
- * Atomically associates a page with a vector of object cgroups.
- */
-static inline bool set_page_objcgs(struct page *page,
- struct obj_cgroup **objcgs)
-{
- return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs |
- MEMCG_DATA_OBJCGS);
-}
#else
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
@@ -498,12 +495,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
-
-static inline bool set_page_objcgs(struct page *page,
- struct obj_cgroup **objcgs)
-{
- return true;
-}
#endif
static __always_inline bool memcg_stat_item_in_bytes(int idx)
@@ -689,8 +680,6 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
-struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
-
struct lruvec *lock_page_lruvec(struct page *page);
struct lruvec *lock_page_lruvec_irq(struct page *page);
struct lruvec *lock_page_lruvec_irqsave(struct page *page,
@@ -1200,11 +1189,6 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}
-static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
-{
- return NULL;
-}
-
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
@@ -1601,9 +1585,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
#endif
#ifdef CONFIG_MEMCG_KMEM
-int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
- unsigned int nr_pages);
-void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4594838a0f7c..3a389633b68f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -89,7 +89,7 @@ extern int PageMovable(struct page *page);
extern void __SetPageMovable(struct page *page, struct address_space *mapping);
extern void __ClearPageMovable(struct page *page);
#else
-static inline int PageMovable(struct page *page) { return 0; };
+static inline int PageMovable(struct page *page) { return 0; }
static inline void __SetPageMovable(struct page *page,
struct address_space *mapping)
{
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7deb7168104d..77e64e3eac80 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1187,6 +1187,9 @@ static inline void get_page(struct page *page)
}
bool __must_check try_grab_page(struct page *page, unsigned int flags);
+__maybe_unused struct page *try_grab_compound_head(struct page *page, int refs,
+ unsigned int flags);
+
static inline __must_check bool try_get_page(struct page *page)
{
@@ -2310,32 +2313,20 @@ extern void free_initmem(void);
extern unsigned long free_reserved_area(void *start, void *end,
int poison, const char *s);
-#ifdef CONFIG_HIGHMEM
-/*
- * Free a highmem page into the buddy system, adjusting totalhigh_pages
- * and totalram_pages.
- */
-extern void free_highmem_page(struct page *page);
-#endif
-
extern void adjust_managed_page_count(struct page *page, long count);
extern void mem_init_print_info(const char *str);
extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
/* Free the reserved page into the buddy system, so it gets managed. */
-static inline void __free_reserved_page(struct page *page)
+static inline void free_reserved_page(struct page *page)
{
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
-}
-
-static inline void free_reserved_page(struct page *page)
-{
- __free_reserved_page(page);
adjust_managed_page_count(page, 1);
}
+#define free_highmem_page(page) free_reserved_page(page)
static inline void mark_page_reserved(struct page *page)
{
@@ -2405,9 +2396,10 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long,
+extern void memmap_init_range(unsigned long, int, unsigned long,
unsigned long, unsigned long, enum meminit_context,
struct vmem_altmap *, int migratetype);
+extern void memmap_init_zone(struct zone *zone);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8fc71e9d7bb0..355ea1ee32bd 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -24,7 +24,7 @@ static inline int page_is_file_lru(struct page *page)
return !PageSwapBacked(page);
}
-static __always_inline void __update_lru_size(struct lruvec *lruvec,
+static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
int nr_pages)
{
@@ -33,76 +33,27 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec,
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
-}
-
-static __always_inline void update_lru_size(struct lruvec *lruvec,
- enum lru_list lru, enum zone_type zid,
- int nr_pages)
-{
- __update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif
}
-static __always_inline void add_page_to_lru_list(struct page *page,
- struct lruvec *lruvec, enum lru_list lru)
-{
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add(&page->lru, &lruvec->lists[lru]);
-}
-
-static __always_inline void add_page_to_lru_list_tail(struct page *page,
- struct lruvec *lruvec, enum lru_list lru)
-{
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add_tail(&page->lru, &lruvec->lists[lru]);
-}
-
-static __always_inline void del_page_from_lru_list(struct page *page,
- struct lruvec *lruvec, enum lru_list lru)
-{
- list_del(&page->lru);
- update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page));
-}
-
/**
- * page_lru_base_type - which LRU list type should a page be on?
- * @page: the page to test
- *
- * Used for LRU list index arithmetic.
- *
- * Returns the base LRU type - file or anon - @page should be on.
+ * __clear_page_lru_flags - clear page lru flags before releasing a page
+ * @page: the page that was on lru and now has a zero reference
*/
-static inline enum lru_list page_lru_base_type(struct page *page)
+static __always_inline void __clear_page_lru_flags(struct page *page)
{
- if (page_is_file_lru(page))
- return LRU_INACTIVE_FILE;
- return LRU_INACTIVE_ANON;
-}
+ VM_BUG_ON_PAGE(!PageLRU(page), page);
-/**
- * page_off_lru - which LRU list was page on? clearing its lru flags.
- * @page: the page to test
- *
- * Returns the LRU list a page was on, as an index into the array of LRU
- * lists; and clears its Unevictable or Active flags, ready for freeing.
- */
-static __always_inline enum lru_list page_off_lru(struct page *page)
-{
- enum lru_list lru;
+ __ClearPageLRU(page);
- if (PageUnevictable(page)) {
- __ClearPageUnevictable(page);
- lru = LRU_UNEVICTABLE;
- } else {
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- __ClearPageActive(page);
- lru += LRU_ACTIVE;
- }
- }
- return lru;
+ /* this shouldn't happen, so leave the flags to bad_page() */
+ if (PageActive(page) && PageUnevictable(page))
+ return;
+
+ __ClearPageActive(page);
+ __ClearPageUnevictable(page);
}
/**
@@ -116,13 +67,41 @@ static __always_inline enum lru_list page_lru(struct page *page)
{
enum lru_list lru;
+ VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+
if (PageUnevictable(page))
- lru = LRU_UNEVICTABLE;
- else {
- lru = page_lru_base_type(page);
- if (PageActive(page))
- lru += LRU_ACTIVE;
- }
+ return LRU_UNEVICTABLE;
+
+ lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+ if (PageActive(page))
+ lru += LRU_ACTIVE;
+
return lru;
}
+
+static __always_inline void add_page_to_lru_list(struct page *page,
+ struct lruvec *lruvec)
+{
+ enum lru_list lru = page_lru(page);
+
+ update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
+ list_add(&page->lru, &lruvec->lists[lru]);
+}
+
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+ struct lruvec *lruvec)
+{
+ enum lru_list lru = page_lru(page);
+
+ update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
+ list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
+static __always_inline void del_page_from_lru_list(struct page *page,
+ struct lruvec *lruvec)
+{
+ list_del(&page->lru);
+ update_lru_size(lruvec, page_lru(page), page_zonenum(page),
+ -thp_nr_pages(page));
+}
#endif
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b593316bff3d..9198b7ade85f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -206,10 +206,30 @@ enum node_stat_item {
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
NR_PAGETABLE, /* used for pagetables */
+#ifdef CONFIG_SWAP
+ NR_SWAPCACHE,
+#endif
NR_VM_NODE_STAT_ITEMS
};
/*
+ * Returns true if the item should be printed in THPs (/proc/vmstat
+ * currently prints number of anon, file and shmem THPs. But the item
+ * is charged in pages).
+ */
+static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item)
+{
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ return false;
+
+ return item == NR_ANON_THPS ||
+ item == NR_FILE_THPS ||
+ item == NR_SHMEM_THPS ||
+ item == NR_SHMEM_PMDMAPPED ||
+ item == NR_FILE_PMDMAPPED;
+}
+
+/*
* Returns true if the value is measured in bytes (most vmstat values are
* measured in pages). This defines the API part, the internal representation
* might be different.
@@ -872,8 +892,6 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
#endif
}
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
-
#ifdef CONFIG_HAVE_MEMORYLESS_NODES
int local_memory_node(int node_id);
#else
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index ec5d0290e0ee..db914477057b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -592,15 +592,9 @@ static inline void ClearPageCompound(struct page *page)
#ifdef CONFIG_HUGETLB_PAGE
int PageHuge(struct page *page);
int PageHeadHuge(struct page *page);
-bool page_huge_active(struct page *page);
#else
TESTPAGEFLAG_FALSE(Huge)
TESTPAGEFLAG_FALSE(HeadHuge)
-
-static inline bool page_huge_active(struct page *page)
-{
- return 0;
-}
#endif
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 85bd413e784e..679591301994 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -12,7 +12,6 @@ struct page_counter {
unsigned long low;
unsigned long high;
unsigned long max;
- struct page_counter *parent;
/* effective memory.min and memory.min usage tracking */
unsigned long emin;
@@ -27,6 +26,14 @@ struct page_counter {
/* legacy */
unsigned long watermark;
unsigned long failcnt;
+
+ /*
+ * 'parent' is placed here to be far from 'usage' to reduce
+ * cache false sharing, as 'usage' is written mostly while
+ * parent is frequently read for cgroup's hierarchical
+ * counting nature.
+ */
+ struct page_counter *parent;
};
#if BITS_PER_LONG == 32
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d5570deff400..bd629d676a27 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -681,8 +681,7 @@ static inline int wait_on_page_locked_killable(struct page *page)
return wait_on_page_bit_killable(compound_head(page), PG_locked);
}
-extern void put_and_wait_on_page_locked(struct page *page);
-
+int put_and_wait_on_page_locked(struct page *page, int state);
void wait_on_page_writeback(struct page *page);
extern void end_page_writeback(struct page *page);
void wait_for_stable_page(struct page *page);
@@ -757,7 +756,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page, void *shadow);
-int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
+void replace_page_cache_page(struct page *old, struct page *new);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3f1f7ae0fbe9..32f665b1ee85 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -356,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
-extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
@@ -408,7 +408,11 @@ extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
-extern unsigned long total_swapcache_pages(void);
+static inline unsigned long total_swapcache_pages(void)
+{
+ return global_node_page_state(NR_SWAPCACHE);
+}
+
extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *page);
extern void *get_shadow_from_swap_cache(swp_entry_t entry);
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f65b1f6db22d..40845b0d5dad 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -115,7 +115,7 @@ DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
);
-DECLARE_EVENT_CLASS(kmem_free,
+TRACE_EVENT(kfree,
TP_PROTO(unsigned long call_site, const void *ptr),
@@ -135,18 +135,26 @@ DECLARE_EVENT_CLASS(kmem_free,
(void *)__entry->call_site, __entry->ptr)
);
-DEFINE_EVENT(kmem_free, kfree,
+TRACE_EVENT(kmem_cache_free,
- TP_PROTO(unsigned long call_site, const void *ptr),
+ TP_PROTO(unsigned long call_site, const void *ptr, const char *name),
- TP_ARGS(call_site, ptr)
-);
+ TP_ARGS(call_site, ptr, name),
-DEFINE_EVENT(kmem_free, kmem_cache_free,
+ TP_STRUCT__entry(
+ __field( unsigned long, call_site )
+ __field( const void *, ptr )
+ __field( const char *, name )
+ ),
- TP_PROTO(unsigned long call_site, const void *ptr),
+ TP_fast_assign(
+ __entry->call_site = call_site;
+ __entry->ptr = ptr;
+ __entry->name = name;
+ ),
- TP_ARGS(call_site, ptr)
+ TP_printk("call_site=%pS ptr=%p name=%s",
+ (void *)__entry->call_site, __entry->ptr, __entry->name)
);
TRACE_EVENT(mm_page_free,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 8fd1babae761..e1735fe7c76a 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -27,24 +27,21 @@
TRACE_EVENT(mm_lru_insertion,
- TP_PROTO(
- struct page *page,
- int lru
- ),
+ TP_PROTO(struct page *page),
- TP_ARGS(page, lru),
+ TP_ARGS(page),
TP_STRUCT__entry(
__field(struct page *, page )
__field(unsigned long, pfn )
- __field(int, lru )
+ __field(enum lru_list, lru )
__field(unsigned long, flags )
),
TP_fast_assign(
__entry->page = page;
__entry->pfn = page_to_pfn(page);
- __entry->lru = lru;
+ __entry->lru = page_lru(page);
__entry->flags = trace_pagemap_flags(page);
),
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 3354774af61e..8948467b3992 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -28,12 +28,14 @@ enum {
/* Flags for set_mempolicy */
#define MPOL_F_STATIC_NODES (1 << 15)
#define MPOL_F_RELATIVE_NODES (1 << 14)
+#define MPOL_F_NUMA_BALANCING (1 << 13) /* Optimize with NUMA balancing if possible */
/*
* MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
* either set_mempolicy() or mbind().
*/
-#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
+#define MPOL_MODE_FLAGS \
+ (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES | MPOL_F_NUMA_BALANCING)
/* Flags for get_mempolicy */
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */