aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c69
-rw-r--r--fs/xfs/xfs_mount.h1
2 files changed, 67 insertions, 3 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index af3275965c7..debe2822c93 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -461,7 +461,6 @@ xfs_sync_worker(
error = xfs_fs_log_dummy(mp);
else
xfs_log_force(mp, 0);
- xfs_reclaim_inodes(mp, 0);
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
}
@@ -470,6 +469,52 @@ xfs_sync_worker(
}
/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+ struct xfs_mount *mp)
+{
+
+ /*
+ * We can have inodes enter reclaim after we've shut down the syncd
+ * workqueue during unmount, so don't allow reclaim work to be queued
+ * during unmount.
+ */
+ if (!(mp->m_super->s_flags & MS_ACTIVE))
+ return;
+
+ rcu_read_lock();
+ if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+ queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+ msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+ struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(to_delayed_work(work),
+ struct xfs_mount, m_reclaim_work);
+
+ xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+ xfs_syncd_queue_reclaim(mp);
+}
+
+/*
* Flush delayed allocate data, attempting to free up reserved space
* from existing allocations. At this point a new allocation attempt
* has failed with ENOSPC and we are in the process of scratching our
@@ -508,7 +553,10 @@ xfs_syncd_init(
{
INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+ INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
xfs_syncd_queue_sync(mp);
+ xfs_syncd_queue_reclaim(mp);
return 0;
}
@@ -518,6 +566,7 @@ xfs_syncd_stop(
struct xfs_mount *mp)
{
cancel_delayed_work_sync(&mp->m_sync_work);
+ cancel_delayed_work_sync(&mp->m_reclaim_work);
cancel_work_sync(&mp->m_flush_work);
}
@@ -537,6 +586,10 @@ __xfs_inode_set_reclaim_tag(
XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
spin_unlock(&ip->i_mount->m_perag_lock);
+
+ /* schedule periodic background inode reclaim */
+ xfs_syncd_queue_reclaim(ip->i_mount);
+
trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-1, _RET_IP_);
}
@@ -953,7 +1006,13 @@ xfs_reclaim_inodes(
}
/*
- * Shrinker infrastructure.
+ * Inode cache shrinker.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
*/
static int
xfs_reclaim_inode_shrink(
@@ -968,10 +1027,14 @@ xfs_reclaim_inode_shrink(
mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
+ /* kick background reclaimer */
+ xfs_syncd_queue_reclaim(mp);
+
if (!(gfp_mask & __GFP_FS))
return -1;
- xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+ xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
+ &nr_to_scan);
/* terminate if we don't exhaust the scan */
if (nr_to_scan > 0)
return -1;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a0ad90e9529..19af0ab0d0c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,6 +204,7 @@ typedef struct xfs_mount {
#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_sync_work; /* background sync work */
+ struct delayed_work m_reclaim_work; /* background inode reclaim */
struct work_struct m_flush_work; /* background inode flush */
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */