summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2024-07-13 12:07:10 +0200
committerThomas Gleixner <tglx@linutronix.de>2024-07-13 12:07:10 +0200
commitb7625d67eb1a63d33b0a2a4518ce4897d27f7465 (patch)
tree372459935f602bac331ad0185f2dc5cc23d76924 /fs/xfs
parentf7d43dd206e7e18c182f200e67a8db8c209907fa (diff)
parent4bdc3eaa102b6bedb0800f76f53eca516d5cf20c (diff)
Merge tag 'timers-v6.11-rc1' of https://git.linaro.org/people/daniel.lezcano/linux into timers/coretimers/drivers/next
Pull clocksource/event driver updates from Daniel Lezcano: - Remove unnecessary local variables initialization as they will be initialized in the code path anyway right after on the ARM arch timer and the ARM global timer (Li kunyu) - Fix a race condition in the interrupt leading to a deadlock on the SH CMT driver. Note that this fix was not tested on the platform using this timer but the fix seems reasonable enough to be picked confidently (Niklas Söderlund) - Increase the rating of the gic-timer and use the configured width clocksource register on the MIPS architecture (Jiaxun Yang) - Add the DT bindings for the TMU on the Renesas platforms (Geert Uytterhoeven) - Add the DT bindings for the SOPHGO SG2002 clint on RiscV (Thomas Bonnefille) - Add the rtl-otto timer driver along with the DT bindings for the Realtek platform (Chris Packham) Link: https://lore.kernel.org/all/91cd05de-4c5d-4242-a381-3b8a4fe6a2a2@linaro.org
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_fs.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c23
-rw-r--r--fs/xfs/libxfs/xfs_sb.c7
-rw-r--r--fs/xfs/xfs_bmap_util.c30
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c47
-rw-r--r--fs/xfs/xfs_iomap.c34
9 files changed, 118 insertions, 60 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c101cf266bc4..6af6f744fdd6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4058,20 +4058,32 @@ xfs_bmapi_reserve_delalloc(
xfs_extlen_t indlen;
uint64_t fdblocks;
int error;
- xfs_fileoff_t aoff = off;
+ xfs_fileoff_t aoff;
+ bool use_cowextszhint =
+ whichfork == XFS_COW_FORK && !prealloc;
+retry:
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
+ aoff = off;
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
- /* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK) {
+ /*
+ * If we're targetting the COW fork but aren't creating a speculative
+ * posteof preallocation, try to expand the reservation to align with
+ * the COW extent size hint if there's sufficient free space.
+ *
+ * Unlike the data fork, the CoW cancellation functions will free all
+ * the reservations at inactivation, so we don't require that every
+ * delalloc reservation have a dirty pagecache.
+ */
+ if (use_cowextszhint) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
@@ -4090,7 +4102,7 @@ xfs_bmapi_reserve_delalloc(
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
- return error;
+ goto out;
/*
* Split changing sb for alen and indlen since they could be coming
@@ -4140,6 +4152,17 @@ out_unreserve_frextents:
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
+out:
+ if (error == -ENOSPC || error == -EDQUOT) {
+ trace_xfs_delalloc_enospc(ip, off, len);
+
+ if (prealloc || use_cowextszhint) {
+ /* retry without any preallocation */
+ use_cowextszhint = false;
+ prealloc = 0;
+ goto retry;
+ }
+ }
return error;
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 97996cb79aaa..454b63ef7201 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -996,7 +996,7 @@ struct xfs_getparents_by_handle {
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
-#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range)
+#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e7a7bfbe75b4..513b50da6215 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -379,10 +379,13 @@ xfs_dinode_verify_fork(
/*
* A directory small enough to fit in the inode must be stored
* in local format. The directory sf <-> extents conversion
- * code updates the directory size accordingly.
+ * code updates the directory size accordingly. Directories
+ * being truncated have zero size and are not subject to this
+ * check.
*/
if (S_ISDIR(mode)) {
- if (be64_to_cpu(dip->di_size) <= fork_size &&
+ if (dip->di_size &&
+ be64_to_cpu(dip->di_size) <= fork_size &&
fork_format != XFS_DINODE_FMT_LOCAL)
return __this_address;
}
@@ -528,9 +531,19 @@ xfs_dinode_verify(
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
return __this_address;
- /* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
- return __this_address;
+ /*
+ * No zero-length symlinks/dirs unless they're unlinked and hence being
+ * inactivated.
+ */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
+ if (dip->di_version > 1) {
+ if (dip->di_nlink)
+ return __this_address;
+ } else {
+ if (dip->di_onlink)
+ return __this_address;
+ }
+ }
fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 09e4bf949bf8..6b56f0f6d4c1 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1038,11 +1038,12 @@ xfs_log_sb(
* and hence we don't need have to update it here.
*/
if (xfs_has_lazysbcount(mp)) {
- mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+ mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
mp->m_sb.sb_ifree = min_t(uint64_t,
- percpu_counter_sum(&mp->m_ifree),
+ percpu_counter_sum_positive(&mp->m_ifree),
mp->m_sb.sb_icount);
- mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ mp->m_sb.sb_fdblocks =
+ percpu_counter_sum_positive(&mp->m_fdblocks);
}
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index ac2e77ebb54c..a4d9fbc21b83 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -486,13 +486,11 @@ out_unlock:
/*
* Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
+ * blocks.
*/
bool
xfs_can_free_eofblocks(
- struct xfs_inode *ip,
- bool force)
+ struct xfs_inode *ip)
{
struct xfs_bmbt_irec imap;
struct xfs_mount *mp = ip->i_mount;
@@ -526,11 +524,11 @@ xfs_can_free_eofblocks(
return false;
/*
- * Do not free real preallocated or append-only files unless the file
- * has delalloc blocks and we are forced to remove them.
+ * Only free real extents for inodes with persistent preallocations or
+ * the append-only flag.
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
- if (!force || ip->i_delayed_blks == 0)
+ if (ip->i_delayed_blks == 0)
return false;
/*
@@ -584,6 +582,22 @@ xfs_free_eofblocks(
/* Wait on dio to ensure i_size has settled. */
inode_dio_wait(VFS_I(ip));
+ /*
+ * For preallocated files only free delayed allocations.
+ *
+ * Note that this means we also leave speculative preallocations in
+ * place for preallocated files.
+ */
+ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
+ if (ip->i_delayed_blks) {
+ xfs_bmap_punch_delalloc_range(ip,
+ round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
+ LLONG_MAX);
+ }
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(xfs_is_shutdown(mp));
@@ -891,7 +905,7 @@ xfs_prepare_shift(
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
*/
- if (xfs_can_free_eofblocks(ip, true)) {
+ if (xfs_can_free_eofblocks(ip)) {
error = xfs_free_eofblocks(ip);
if (error)
return error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 51f84d8ff372..eb0895bfb9da 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */
-bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+bool xfs_can_free_eofblocks(struct xfs_inode *ip);
int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0953163a2d84..9967334ea99f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1155,7 +1155,7 @@ xfs_inode_free_eofblocks(
}
*lockflags |= XFS_IOLOCK_EXCL;
- if (xfs_can_free_eofblocks(ip, false))
+ if (xfs_can_free_eofblocks(ip))
return xfs_free_eofblocks(ip);
/* inode could be preallocated or append-only */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 58fb7a5062e1..a4e3cd8971fc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,6 +42,7 @@
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
+#include "xfs_sb.h"
struct kmem_cache *xfs_inode_cache;
@@ -870,9 +871,16 @@ xfs_init_new_inode(
* this saves us from needing to run a separate transaction to set the
* fork offset in the immediate future.
*/
- if (init_xattrs && xfs_has_attr(mp)) {
+ if (init_xattrs) {
ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
}
/*
@@ -1595,7 +1603,7 @@ xfs_release(
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
return 0;
- if (xfs_can_free_eofblocks(ip, false)) {
+ if (xfs_can_free_eofblocks(ip)) {
/*
* Check if the inode is being opened, written and closed
* frequently and we have delayed allocation blocks outstanding
@@ -1856,15 +1864,13 @@ xfs_inode_needs_inactive(
/*
* This file isn't being freed, so check if there are post-eof blocks
- * to free. @force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with broken
- * free space accounting.
+ * to free.
*
* Note: don't bother with iolock here since lockdep complains about
* acquiring it in reclaim context. We have the only reference to the
* inode at this point anyways.
*/
- return xfs_can_free_eofblocks(ip, true);
+ return xfs_can_free_eofblocks(ip);
}
/*
@@ -1947,15 +1953,11 @@ xfs_inactive(
if (VFS_I(ip)->i_nlink != 0) {
/*
- * force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with
- * broken free space accounting.
- *
* Note: don't bother with iolock here since lockdep complains
* about acquiring it in reclaim context. We have the only
* reference to the inode at this point anyways.
*/
- if (xfs_can_free_eofblocks(ip, true))
+ if (xfs_can_free_eofblocks(ip))
error = xfs_free_eofblocks(ip);
goto out;
@@ -2548,11 +2550,26 @@ xfs_ifree_cluster(
* This buffer may not have been correctly initialised as we
* didn't read it from disk. That's not important because we are
* only using to mark the buffer as stale in the log, and to
- * attach stale cached inodes on it. That means it will never be
- * dispatched for IO. If it is, we want to know about it, and we
- * want it to fail. We can acheive this by adding a write
- * verifier to the buffer.
+ * attach stale cached inodes on it.
+ *
+ * For the inode that triggered the cluster freeing, this
+ * attachment may occur in xfs_inode_item_precommit() after we
+ * have marked this buffer stale. If this buffer was not in
+ * memory before xfs_ifree_cluster() started, it will not be
+ * marked XBF_DONE and this will cause problems later in
+ * xfs_inode_item_precommit() when we trip over a (stale, !done)
+ * buffer to attached to the transaction.
+ *
+ * Hence we have to mark the buffer as XFS_DONE here. This is
+ * safe because we are also marking the buffer as XBF_STALE and
+ * XFS_BLI_STALE. That means it will never be dispatched for
+ * IO and it won't be unlocked until the cluster freeing has
+ * been committed to the journal and the buffer unpinned. If it
+ * is written, we want to know about it, and we want it to
+ * fail. We can acheive this by adding a write verifier to the
+ * buffer.
*/
+ bp->b_flags |= XBF_DONE;
bp->b_ops = &xfs_inode_buf_ops;
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 378342673925..414903885ab9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1148,33 +1148,23 @@ xfs_buffered_write_iomap_begin(
}
}
-retry:
- error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
- end_fsb - offset_fsb, prealloc_blocks,
- allocfork == XFS_DATA_FORK ? &imap : &cmap,
- allocfork == XFS_DATA_FORK ? &icur : &ccur,
- allocfork == XFS_DATA_FORK ? eof : cow_eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
- trace_xfs_delalloc_enospc(ip, offset, count);
- if (prealloc_blocks) {
- prealloc_blocks = 0;
- goto retry;
- }
- fallthrough;
- default:
- goto out_unlock;
- }
-
if (allocfork == XFS_COW_FORK) {
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &cmap,
+ &ccur, cow_eof);
+ if (error)
+ goto out_unlock;
+
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
goto found_cow;
}
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
+ eof);
+ if (error)
+ goto out_unlock;
+
/*
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.