From 55f841ce9395a72c6285fbcc4c403c0c786e1c74 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 Aug 2013 10:17:53 +1000 Subject: super: fix calculation of shrinkable objects for small numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl knob sysctl_vfs_cache_pressure is used to determine which percentage of the shrinkable objects in our cache we should actively try to shrink. It works great in situations in which we have many objects (at least more than 100), because the aproximation errors will be negligible. But if this is not the case, specially when total_objects < 100, we may end up concluding that we have no objects at all (total / 100 = 0, if total < 100). This is certainly not the biggest killer in the world, but may matter in very low kernel memory situations. Signed-off-by: Glauber Costa Reviewed-by: Carlos Maiolino Acked-by: KAMEZAWA Hiroyuki Acked-by: Mel Gorman Cc: Dave Chinner Cc: Al Viro Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6218a0aeeee..956da2e1c7a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1722,7 +1722,7 @@ xfs_qm_shake( } out: - return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; + return vfs_pressure_ratio(qi->qi_lru_count); } /* -- cgit v1.2.3 From 0a234c6dcb79a270803f5c9773ed650b78730962 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:17:57 +1000 Subject: shrinker: convert superblock shrinkers to new API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert superblock shrinker to use the new count/scan API, and propagate the API changes through to the filesystem callouts. The filesystem callouts already use a count/scan API, so it's just changing counters to longs to match the VM API. This requires the dentry and inode shrinker callouts to be converted to the count/scan API. This is mainly a mechanical change. [glommer@openvz.org: use mult_frac for fractional proportions, build fixes] Signed-off-by: Dave Chinner Signed-off-by: Glauber Costa Acked-by: Mel Gorman Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_icache.c | 4 ++-- fs/xfs/xfs_icache.h | 2 +- fs/xfs/xfs_super.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c679..73b62a24cea 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1167,7 +1167,7 @@ xfs_reclaim_inodes( * them to be cleaned, which we hope will not be very long due to the * background walker having already kicked the IO off on those dirty inodes. */ -void +long xfs_reclaim_inodes_nr( struct xfs_mount *mp, int nr_to_scan) @@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr( xfs_reclaim_work_queue(mp); xfs_ail_push_all(mp->m_ail); - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); + return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); } /* diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791b..456f0144e1b 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); -void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); +long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 979a77d4b87..71d7aaebb91 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1535,19 +1535,19 @@ xfs_fs_mount( return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } -static int +static long xfs_fs_nr_cached_objects( struct super_block *sb) { return xfs_reclaim_inodes_count(XFS_M(sb)); } -static void +static long xfs_fs_free_cached_objects( struct super_block *sb, - int nr_to_scan) + long nr_to_scan) { - xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); + return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); } static const struct super_operations xfs_super_operations = { -- cgit v1.2.3 From 9b17c62382dd2e7507984b9890bf44e070cdd8bb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:18:05 +1000 Subject: fs: convert inode and dentry shrinking to be node aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the shrinker is passing a node in the scan control structure, we can pass this to the the generic LRU list code to isolate reclaim to the lists on matching nodes. Signed-off-by: Dave Chinner Signed-off-by: Glauber Costa Acked-by: Mel Gorman Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 71d7aaebb91..15188cc9944 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1537,7 +1537,8 @@ xfs_fs_mount( static long xfs_fs_nr_cached_objects( - struct super_block *sb) + struct super_block *sb, + int nid) { return xfs_reclaim_inodes_count(XFS_M(sb)); } @@ -1545,7 +1546,8 @@ xfs_fs_nr_cached_objects( static long xfs_fs_free_cached_objects( struct super_block *sb, - long nr_to_scan) + long nr_to_scan, + int nid) { return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); } -- cgit v1.2.3 From e80dfa19976b884db1ac2bc5d7d6ca0a4027bd1c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:18:05 +1000 Subject: xfs: convert buftarg LRU to generic code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the buftarg LRU to use the new generic LRU list and take advantage of the functionality it supplies to make the buffer cache shrinker node aware. Signed-off-by: Glauber Costa Signed-off-by: Dave Chinner Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_buf.c | 170 ++++++++++++++++++++++++++----------------------------- fs/xfs/xfs_buf.h | 5 +- 2 files changed, 82 insertions(+), 93 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06823fe10d..665ff792d66 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -86,20 +86,14 @@ xfs_buf_vmap_len( * The LRU takes a new reference to the buffer so that it will only be freed * once the shrinker takes the buffer off the LRU. */ -STATIC void +static void xfs_buf_lru_add( struct xfs_buf *bp) { - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; + if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; + atomic_inc(&bp->b_hold); } - spin_unlock(&btp->bt_lru_lock); } /* @@ -108,24 +102,13 @@ xfs_buf_lru_add( * The unlocked check is safe here because it only occurs when there are not * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unnecessary round trip on the - * bt_lru_lock. + * xfs_buf_free(). */ -STATIC void +static void xfs_buf_lru_del( struct xfs_buf *bp) { - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); } /* @@ -152,18 +135,10 @@ xfs_buf_stale( bp->b_flags &= ~_XBF_DELWRI_Q; atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru) && - !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } + if (!(bp->b_lru_flags & _XBF_LRU_DISPOSE) && + (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) + atomic_dec(&bp->b_hold); + ASSERT(atomic_read(&bp->b_hold) >= 1); } @@ -1502,83 +1477,97 @@ xfs_buf_iomove( * returned. These buffers will have an elevated hold count, so wait on those * while freeing all the buffers only held by the LRU. */ -void -xfs_wait_buftarg( - struct xfs_buftarg *btp) +static enum lru_status +xfs_buftarg_wait_rele( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) + { - struct xfs_buf *bp; + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - trace_xfs_buf_wait_buftarg(bp, _RET_IP_); - list_move_tail(&bp->b_lru, &btp->bt_lru); - spin_unlock(&btp->bt_lru_lock); - delay(100); - goto restart; - } + if (atomic_read(&bp->b_hold) > 1) { + /* need to wait */ + trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + spin_unlock(lru_lock); + delay(100); + } else { /* * clear the LRU reference count so the buffer doesn't get * ignored in xfs_buf_rele(). */ atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); + spin_unlock(lru_lock); xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); } - spin_unlock(&btp->bt_lru_lock); + + spin_lock(lru_lock); + return LRU_RETRY; } -int -xfs_buftarg_shrink( +void +xfs_wait_buftarg( + struct xfs_buftarg *btp) +{ + while (list_lru_count(&btp->bt_lru)) + list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, + NULL, LONG_MAX); +} + +static enum lru_status +xfs_buftarg_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; + + /* + * Decrement the b_lru_ref count unless the value is already + * zero. If the value is already zero, we need to reclaim the + * buffer, otherwise it gets another trip through the LRU. + */ + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) + return LRU_ROTATE; + + bp->b_lru_flags |= _XBF_LRU_DISPOSE; + list_move(item, dispose); + return LRU_REMOVED; +} + +static long +xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); + long freed; + unsigned long nr_to_scan = sc->nr_to_scan; - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - bp->b_lru_flags |= _XBF_LRU_DISPOSE; - } - spin_unlock(&btp->bt_lru_lock); + freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, + &dispose, &nr_to_scan); while (!list_empty(&dispose)) { + struct xfs_buf *bp; bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); xfs_buf_rele(bp); } - return btp->bt_lru_nr; + return freed; +} + +static long +xfs_buftarg_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_buftarg *btp = container_of(shrink, + struct xfs_buftarg, bt_shrinker); + return list_lru_count_node(&btp->bt_lru, sc->nid); } void @@ -1660,12 +1649,13 @@ xfs_alloc_buftarg( if (!btp->bt_bdi) goto error; - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); + list_lru_init(&btp->bt_lru); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; + btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; + btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; + btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&btp->bt_shrinker); return btp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 433a12ed7b1..5ec7d35a77e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -25,6 +25,7 @@ #include #include #include +#include /* * Base types @@ -92,9 +93,7 @@ typedef struct xfs_buftarg { /* LRU control structures */ struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; + struct list_lru bt_lru; } xfs_buftarg_t; struct xfs_buf; -- cgit v1.2.3 From addbda40bed47d8942658fca93e14b5f1cbf009a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 28 Aug 2013 10:18:06 +1000 Subject: xfs-convert-buftarg-lru-to-generic-code-fix fix warnings Cc: Dave Chinner Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 665ff792d66..76c595a9ad4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1536,7 +1536,7 @@ xfs_buftarg_isolate( return LRU_REMOVED; } -static long +static unsigned long xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) @@ -1544,7 +1544,7 @@ xfs_buftarg_shrink_scan( struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); LIST_HEAD(dispose); - long freed; + unsigned long freed; unsigned long nr_to_scan = sc->nr_to_scan; freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, @@ -1560,7 +1560,7 @@ xfs_buftarg_shrink_scan( return freed; } -static long +static unsigned long xfs_buftarg_shrink_count( struct shrinker *shrink, struct shrink_control *sc) -- cgit v1.2.3 From a408235726aa82c0358c9ec68124b6f4bc0a79df Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:18:06 +1000 Subject: xfs: rework buffer dispose list tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In converting the buffer lru lists to use the generic code, the locking for marking the buffers as on the dispose list was lost. This results in confusion in LRU buffer tracking and acocunting, resulting in reference counts being mucked up and filesystem beig unmountable. To fix this, introduce an internal buffer spinlock to protect the state field that holds the dispose list information. Because there is now locking needed around xfs_buf_lru_add/del, and they are used in exactly one place each two lines apart, get rid of the wrappers and code the logic directly in place. Further, the LRU emptying code used on unmount is less than optimal. Convert it to use a dispose list as per a normal shrinker walk, and repeat the walk that fills the dispose list until the LRU is empty. Thi avoids needing to drop and regain the LRU lock for every item being freed, and allows the same logic as the shrinker isolate call to be used. Simpler, easier to understand. Signed-off-by: Dave Chinner Signed-off-by: Glauber Costa Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_buf.c | 125 +++++++++++++++++++++++++++++++------------------------ fs/xfs/xfs_buf.h | 12 ++++-- 2 files changed, 79 insertions(+), 58 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 76c595a9ad4..d46f6a3dc1d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -80,37 +80,6 @@ xfs_buf_vmap_len( return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; } -/* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. - */ -static void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { - bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; - atomic_inc(&bp->b_hold); - } -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). - */ -static void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); -} - /* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer @@ -134,12 +103,14 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - atomic_set(&(bp)->b_lru_ref, 0); - if (!(bp->b_lru_flags & _XBF_LRU_DISPOSE) && + spin_lock(&bp->b_lock); + atomic_set(&bp->b_lru_ref, 0); + if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) atomic_dec(&bp->b_hold); ASSERT(atomic_read(&bp->b_hold) >= 1); + spin_unlock(&bp->b_lock); } static int @@ -203,6 +174,7 @@ _xfs_buf_alloc( INIT_LIST_HEAD(&bp->b_list); RB_CLEAR_NODE(&bp->b_rbnode); sema_init(&bp->b_sema, 0); /* held, no waiters */ + spin_lock_init(&bp->b_lock); XB_SET_OWNER(bp); bp->b_target = target; bp->b_flags = flags; @@ -892,12 +864,33 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); + spin_lock(&bp->b_lock); + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + /* + * If the buffer is added to the LRU take a new + * reference to the buffer for the LRU and clear the + * (now stale) dispose list state flag + */ + if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { + bp->b_state &= ~XFS_BSTATE_DISPOSE; + atomic_inc(&bp->b_hold); + } + spin_unlock(&bp->b_lock); spin_unlock(&pag->pag_buf_lock); } else { - xfs_buf_lru_del(bp); + /* + * most of the time buffers will already be removed from + * the LRU, so optimise that case by checking for the + * XFS_BSTATE_DISPOSE flag indicating the last list the + * buffer was on was the disposal list + */ + if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); + } else { + ASSERT(list_empty(&bp->b_lru)); + } + spin_unlock(&bp->b_lock); + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); spin_unlock(&pag->pag_buf_lock); @@ -1485,33 +1478,48 @@ xfs_buftarg_wait_rele( { struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; if (atomic_read(&bp->b_hold) > 1) { - /* need to wait */ + /* need to wait, so skip it this pass */ trace_xfs_buf_wait_buftarg(bp, _RET_IP_); - spin_unlock(lru_lock); - delay(100); - } else { - /* - * clear the LRU reference count so the buffer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(lru_lock); - xfs_buf_rele(bp); + return LRU_SKIP; } + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; - spin_lock(lru_lock); - return LRU_RETRY; + /* + * clear the LRU reference count so the buffer doesn't get + * ignored in xfs_buf_rele(). + */ + atomic_set(&bp->b_lru_ref, 0); + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(item, dispose); + spin_unlock(&bp->b_lock); + return LRU_REMOVED; } void xfs_wait_buftarg( struct xfs_buftarg *btp) { - while (list_lru_count(&btp->bt_lru)) + LIST_HEAD(dispose); + int loop = 0; + + /* loop until there is nothing left on the lru list. */ + while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, - NULL, LONG_MAX); + &dispose, LONG_MAX); + + while (!list_empty(&dispose)) { + struct xfs_buf *bp; + bp = list_first_entry(&dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); + } + if (loop++ != 0) + delay(100); + } } static enum lru_status @@ -1523,16 +1531,25 @@ xfs_buftarg_isolate( struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); struct list_head *dispose = arg; + /* + * we are inverting the lru lock/bp->b_lock here, so use a trylock. + * If we fail to get the lock, just skip it. + */ + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; /* * Decrement the b_lru_ref count unless the value is already * zero. If the value is already zero, we need to reclaim the * buffer, otherwise it gets another trip through the LRU. */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + spin_unlock(&bp->b_lock); return LRU_ROTATE; + } - bp->b_lru_flags |= _XBF_LRU_DISPOSE; + bp->b_state |= XFS_BSTATE_DISPOSE; list_move(item, dispose); + spin_unlock(&bp->b_lock); return LRU_REMOVED; } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5ec7d35a77e..e6568336101 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -60,7 +60,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -79,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } + { _XBF_COMPOUND, "COMPOUND" } + +/* + * Internal state flags. + */ +#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ typedef struct xfs_buftarg { dev_t bt_dev; @@ -136,7 +139,8 @@ typedef struct xfs_buf { * bt_lru_lock and not by b_sema */ struct list_head b_lru; /* lru list */ - xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ + spinlock_t b_lock; /* internal state lock */ + unsigned int b_state; /* internal state flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ -- cgit v1.2.3 From cd56a39a59868911bbf8832725630c1cf43a7b09 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:18:07 +1000 Subject: xfs: convert dquot cache lru to list_lru MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the XFS dquot lru to use the list_lru construct and convert the shrinker to being node aware. [glommer@openvz.org: edited for conflicts + warning fixes] Signed-off-by: Dave Chinner Signed-off-by: Glauber Costa Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_dquot.c | 7 +- fs/xfs/xfs_qm.c | 277 +++++++++++++++++++++++++++-------------------------- fs/xfs/xfs_qm.h | 4 +- 3 files changed, 144 insertions(+), 144 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 251c66632e5..71520e6e5d6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -940,13 +940,8 @@ xfs_qm_dqput_final( trace_xfs_dqput_free(dqp); - mutex_lock(&qi->qi_lru_lock); - if (list_empty(&dqp->q_lru)) { - list_add_tail(&dqp->q_lru, &qi->qi_lru_list); - qi->qi_lru_count++; + if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) XFS_STATS_INC(xs_qm_dquot_unused); - } - mutex_unlock(&qi->qi_lru_lock); /* * If we just added a udquot to the freelist, then we want to release diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 956da2e1c7a..0fa98753bf6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -51,8 +51,9 @@ */ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); + +STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it * currently is the only interface into the radix tree code that allows @@ -203,12 +204,9 @@ xfs_qm_dqpurge( * We move dquots to the freelist as soon as their reference count * hits zero, so it really should be on the freelist here. */ - mutex_lock(&qi->qi_lru_lock); ASSERT(!list_empty(&dqp->q_lru)); - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; + list_lru_del(&qi->qi_lru, &dqp->q_lru); XFS_STATS_DEC(xs_qm_dquot_unused); - mutex_unlock(&qi->qi_lru_lock); xfs_qm_dqdestroy(dqp); @@ -680,6 +678,141 @@ xfs_qm_calc_dquots_per_chunk( return ndquots; } +struct xfs_qm_isolate { + struct list_head buffers; + struct list_head dispose; +}; + +static enum lru_status +xfs_qm_dquot_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_dquot *dqp = container_of(item, + struct xfs_dquot, q_lru); + struct xfs_qm_isolate *isol = arg; + + if (!xfs_dqlock_nowait(dqp)) + goto out_miss_busy; + + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); + XFS_STATS_INC(xs_qm_dqwants); + + trace_xfs_dqreclaim_want(dqp); + list_del_init(&dqp->q_lru); + XFS_STATS_DEC(xs_qm_dquot_unused); + return 0; + } + + /* + * If the dquot is dirty, flush it. If it's already being flushed, just + * skip it so there is time for the IO to complete before we try to + * reclaim it again on the next LRU pass. + */ + if (!xfs_dqflock_nowait(dqp)) { + xfs_dqunlock(dqp); + goto out_miss_busy; + } + + if (XFS_DQ_IS_DIRTY(dqp)) { + struct xfs_buf *bp = NULL; + int error; + + trace_xfs_dqreclaim_dirty(dqp); + + /* we have to drop the LRU lock to flush the dquot */ + spin_unlock(lru_lock); + + error = xfs_qm_dqflush(dqp, &bp); + if (error) { + xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", + __func__, dqp); + goto out_unlock_dirty; + } + + xfs_buf_delwri_queue(bp, &isol->buffers); + xfs_buf_relse(bp); + goto out_unlock_dirty; + } + xfs_dqfunlock(dqp); + + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); + + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_lru, &isol->dispose); + XFS_STATS_DEC(xs_qm_dquot_unused); + trace_xfs_dqreclaim_done(dqp); + XFS_STATS_INC(xs_qm_dqreclaims); + return 0; + +out_miss_busy: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + return 2; + +out_unlock_dirty: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + return 3; +} + +static long +xfs_qm_shrink_scan( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + struct xfs_qm_isolate isol; + long freed; + int error; + unsigned long nr_to_scan = sc->nr_to_scan; + + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) + return 0; + + INIT_LIST_HEAD(&isol.buffers); + INIT_LIST_HEAD(&isol.dispose); + + freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, + &nr_to_scan); + + error = xfs_buf_delwri_submit(&isol.buffers); + if (error) + xfs_warn(NULL, "%s: dquot reclaim failed", __func__); + + while (!list_empty(&isol.dispose)) { + struct xfs_dquot *dqp; + + dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru); + list_del_init(&dqp->q_lru); + xfs_qm_dqfree_one(dqp); + } + + return freed; +} + +static long +xfs_qm_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + + return list_lru_count_node(&qi->qi_lru, sc->nid); +} + /* * This initializes all the quota information that's kept in the * mount structure @@ -711,9 +844,7 @@ xfs_qm_init_quotainfo( INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); mutex_init(&qinf->qi_tree_lock); - INIT_LIST_HEAD(&qinf->qi_lru_list); - qinf->qi_lru_count = 0; - mutex_init(&qinf->qi_lru_lock); + list_lru_init(&qinf->qi_lru); /* mutex used to serialize quotaoffs */ mutex_init(&qinf->qi_quotaofflock); @@ -779,8 +910,10 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } - qinf->qi_shrinker.shrink = xfs_qm_shake; + qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; + qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; + qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; } @@ -1599,132 +1732,6 @@ xfs_qm_dqfree_one( xfs_qm_dqdestroy(dqp); } -STATIC void -xfs_qm_dqreclaim_one( - struct xfs_dquot *dqp, - struct list_head *buffer_list, - struct list_head *dispose_list) -{ - struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; - int error; - - if (!xfs_dqlock_nowait(dqp)) - goto out_move_tail; - - /* - * This dquot has acquired a reference in the meantime remove it from - * the freelist and try again. - */ - if (dqp->q_nrefs) { - xfs_dqunlock(dqp); - - trace_xfs_dqreclaim_want(dqp); - XFS_STATS_INC(xs_qm_dqwants); - - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - return; - } - - /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto out_unlock_move_tail; - - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - - trace_xfs_dqreclaim_dirty(dqp); - - error = xfs_qm_dqflush(dqp, &bp); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - goto out_unlock_move_tail; - } - - xfs_buf_delwri_queue(bp, buffer_list); - xfs_buf_relse(bp); - /* - * Give the dquot another try on the freelist, as the - * flushing will take some time. - */ - goto out_unlock_move_tail; - } - xfs_dqfunlock(dqp); - - /* - * Prevent lookups now that we are past the point of no return. - */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - ASSERT(dqp->q_nrefs == 0); - list_move_tail(&dqp->q_lru, dispose_list); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - - trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); - return; - - /* - * Move the dquot to the tail of the list so that we don't spin on it. - */ -out_unlock_move_tail: - xfs_dqunlock(dqp); -out_move_tail: - list_move_tail(&dqp->q_lru, &qi->qi_lru_list); - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); -} - -STATIC int -xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) -{ - struct xfs_quotainfo *qi = - container_of(shrink, struct xfs_quotainfo, qi_shrinker); - int nr_to_scan = sc->nr_to_scan; - LIST_HEAD (buffer_list); - LIST_HEAD (dispose_list); - struct xfs_dquot *dqp; - int error; - - if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) - return 0; - if (!nr_to_scan) - goto out; - - mutex_lock(&qi->qi_lru_lock); - while (!list_empty(&qi->qi_lru_list)) { - if (nr_to_scan-- <= 0) - break; - dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, - q_lru); - xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); - } - mutex_unlock(&qi->qi_lru_lock); - - error = xfs_buf_delwri_submit(&buffer_list); - if (error) - xfs_warn(NULL, "%s: dquot reclaim failed", __func__); - - while (!list_empty(&dispose_list)) { - dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); - list_del_init(&dqp->q_lru); - xfs_qm_dqfree_one(dqp); - } - -out: - return vfs_pressure_ratio(qi->qi_lru_count); -} - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 670cd446407..2b602df9c24 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -49,9 +49,7 @@ typedef struct xfs_quotainfo { struct xfs_inode *qi_uquotaip; /* user quota inode */ struct xfs_inode *qi_gquotaip; /* group quota inode */ struct xfs_inode *qi_pquotaip; /* project quota inode */ - struct list_head qi_lru_list; - struct mutex qi_lru_lock; - int qi_lru_count; + struct list_lru qi_lru; int qi_dquots; time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ -- cgit v1.2.3 From 2f5b56f85674d75f35a10e2e9a4310e7539280da Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 28 Aug 2013 10:18:08 +1000 Subject: xfs-convert-dquot-cache-lru-to-list_lru-fix fix warnings Cc: Dave Chinner Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_qm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 0fa98753bf6..1a4217e52c9 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -766,7 +766,7 @@ out_unlock_dirty: return 3; } -static long +static unsigned long xfs_qm_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) @@ -774,7 +774,7 @@ xfs_qm_shrink_scan( struct xfs_quotainfo *qi = container_of(shrink, struct xfs_quotainfo, qi_shrinker); struct xfs_qm_isolate isol; - long freed; + unsigned long freed; int error; unsigned long nr_to_scan = sc->nr_to_scan; @@ -802,7 +802,7 @@ xfs_qm_shrink_scan( return freed; } -static long +static unsigned long xfs_qm_shrink_count( struct shrinker *shrink, struct shrink_control *sc) -- cgit v1.2.3 From 35163417fb7a55a24b6b0ebb102e9991adf309aa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Aug 2013 10:18:08 +1000 Subject: xfs: fix dquot isolation hang The new LRU list isolation code in xfs_qm_dquot_isolate() isn't completely up to date. Firstly, it needs conversion to return enum lru_status values, not raw numbers. Secondly - most importantly - it fails to unlock the dquot and relock the LRU in the LRU_RETRY path. This leads to deadlocks in xfstests generic/232. Fix them. Signed-off-by: Dave Chinner Cc: Glauber Costa Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_qm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 1a4217e52c9..a29169b062e 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -707,7 +707,7 @@ xfs_qm_dquot_isolate( trace_xfs_dqreclaim_want(dqp); list_del_init(&dqp->q_lru); XFS_STATS_DEC(xs_qm_dquot_unused); - return 0; + return LRU_REMOVED; } /* @@ -753,17 +753,19 @@ xfs_qm_dquot_isolate( XFS_STATS_DEC(xs_qm_dquot_unused); trace_xfs_dqreclaim_done(dqp); XFS_STATS_INC(xs_qm_dqreclaims); - return 0; + return LRU_REMOVED; out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(xs_qm_dqreclaim_misses); - return 2; + return LRU_SKIP; out_unlock_dirty: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(xs_qm_dqreclaim_misses); - return 3; + xfs_dqunlock(dqp); + spin_lock(lru_lock); + return LRU_RETRY; } static unsigned long -- cgit v1.2.3 From 5ca302c8e502ca53b7d75f12127ec0289904003a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 Aug 2013 10:18:18 +1000 Subject: list_lru: dynamically adjust node arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently use a compile-time constant to size the node array for the list_lru structure. Due to this, we don't need to allocate any memory at initialization time. But as a consequence, the structures that contain embedded list_lru lists can become way too big (the superblock for instance contains two of them). This patch aims at ameliorating this situation by dynamically allocating the node arrays with the firmware provided nr_node_ids. Signed-off-by: Glauber Costa Cc: Dave Chinner Cc: Mel Gorman Cc: "Theodore Ts'o" Cc: Adrian Hunter Cc: Al Viro Cc: Artem Bityutskiy Cc: Arve Hjønnevåg Cc: Carlos Maiolino Cc: Christoph Hellwig Cc: Chuck Lever Cc: Daniel Vetter Cc: David Rientjes Cc: Gleb Natapov Cc: Greg Thelen Cc: J. Bruce Fields Cc: Jan Kara Cc: Jerome Glisse Cc: John Stultz Cc: KAMEZAWA Hiroyuki Cc: Kent Overstreet Cc: Kirill A. Shutemov Cc: Marcelo Tosatti Cc: Mel Gorman Cc: Steven Whitehouse Cc: Thomas Hellstrom Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_buf.c | 6 +++++- fs/xfs/xfs_qm.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d46f6a3dc1d..49fdb7bed48 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1592,6 +1592,7 @@ xfs_free_buftarg( struct xfs_mount *mp, struct xfs_buftarg *btp) { + list_lru_destroy(&btp->bt_lru); unregister_shrinker(&btp->bt_shrinker); if (mp->m_flags & XFS_MOUNT_BARRIER) @@ -1666,9 +1667,12 @@ xfs_alloc_buftarg( if (!btp->bt_bdi) goto error; - list_lru_init(&btp->bt_lru); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; + + if (list_lru_init(&btp->bt_lru)) + goto error; + btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index a29169b062e..7f4138629a8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -831,11 +831,18 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + if ((error = list_lru_init(&qinf->qi_lru))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ if ((error = xfs_qm_init_quotainos(mp))) { + list_lru_destroy(&qinf->qi_lru); kmem_free(qinf); mp->m_quotainfo = NULL; return error; @@ -846,8 +853,6 @@ xfs_qm_init_quotainfo( INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); mutex_init(&qinf->qi_tree_lock); - list_lru_init(&qinf->qi_lru); - /* mutex used to serialize quotaoffs */ mutex_init(&qinf->qi_quotaofflock); @@ -935,6 +940,7 @@ xfs_qm_destroy_quotainfo( qi = mp->m_quotainfo; ASSERT(qi != NULL); + list_lru_destroy(&qi->qi_lru); unregister_shrinker(&qi->qi_shrinker); if (qi->qi_uquotaip) { -- cgit v1.2.3 From f5e1dd34561e0fb06400b378d595198918833021 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 Aug 2013 10:18:18 +1000 Subject: super: fix for destroy lrus This patch adds the missing call to list_lru_destroy (spotted by Li Zhong) and moves the deletion to after the shrinker is unregistered, as correctly spotted by Dave Signed-off-by: Glauber Costa Cc: Michal Hocko Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_qm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 49fdb7bed48..263470075ea 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1592,8 +1592,8 @@ xfs_free_buftarg( struct xfs_mount *mp, struct xfs_buftarg *btp) { - list_lru_destroy(&btp->bt_lru); unregister_shrinker(&btp->bt_shrinker); + list_lru_destroy(&btp->bt_lru); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7f4138629a8..3e6c2e6c9cd 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -940,8 +940,8 @@ xfs_qm_destroy_quotainfo( qi = mp->m_quotainfo; ASSERT(qi != NULL); - list_lru_destroy(&qi->qi_lru); unregister_shrinker(&qi->qi_shrinker); + list_lru_destroy(&qi->qi_lru); if (qi->qi_uquotaip) { IRELE(qi->qi_uquotaip); -- cgit v1.2.3