aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJohn Rigby <john.rigby@linaro.org>2010-10-07 18:35:06 -0600
committerJohn Rigby <john.rigby@linaro.org>2010-10-07 18:35:06 -0600
commitbcf228ed442db2241c4e92bff9f485a79cad3e05 (patch)
tree6e37946d52a04d63ba9cefbe94dabb508af9bce9 /fs
parentbe48733547887ae084b1338eb45ddb597219e5ca (diff)
parentb76b438b667d9c648665c8abeaf215cfd044ba0d (diff)
Merge remote branch 'remotes/linux-linaro-2.6.35/master'
Conflicts: drivers/char/agp/intel-gtt.c sound/pci/hda/patch_realtek.c
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c13
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/char_dev.c4
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/compat.c2
-rw-r--r--fs/direct-io.c30
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/inode.c17
-rw-r--r--fs/fuse/dev.c16
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/notify/inotify/inotify_user.c7
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_ialloc.c16
-rw-r--r--fs/xfs/xfs_inode.c49
23 files changed, 155 insertions, 70 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 1ccf25cef1f..5fb0fd7c80a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
*/
ret = retry(iocb);
- if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
+ if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+ /*
+ * There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+ ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+ ret = -EINTR;
aio_complete(iocb, ret, 0);
+ }
out:
spin_lock_irq(&ctx->ctx_lock);
@@ -1659,6 +1667,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(nr < 0))
return -EINVAL;
+ if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
+ nr = LONG_MAX/sizeof(*iocbpp);
+
if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
return -EFAULT;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e83537ead..42b60b04ea0 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -723,7 +723,7 @@ static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
if (!err) {
- err = register_binfmt(&misc_format);
+ err = insert_binfmt(&misc_format);
if (err)
unregister_filesystem(&bm_fs_type);
}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f25123..143d393881c 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
#endif
/* permit direct mmap, for read, write or exec */
BDI_CAP_MAP_DIRECT |
- BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+ BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
+ /* no writeback happens */
+ BDI_CAP_NO_ACCT_AND_WRITEBACK),
};
static struct kobj_map *cdev_map;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2208f06e4c4..fa1039a7ac4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1647,9 +1647,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses) {
cFYI(1, "Existing smb sess found (status=%d)", ses->status);
- /* existing SMB ses has a server reference already */
- cifs_put_tcp_session(server);
-
mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(xid, ses);
if (rc) {
@@ -1672,6 +1669,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
}
}
mutex_unlock(&ses->session_mutex);
+
+ /* existing SMB ses has a server reference already */
+ cifs_put_tcp_session(server);
FreeXid(xid);
return ses;
}
diff --git a/fs/compat.c b/fs/compat.c
index 6490d2134ff..af7c2301a2e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1150,7 +1150,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
{
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov;
+ struct iovec *iov = iovstack;
ssize_t ret;
io_fn_t fn;
iov_fn_t fnv;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..458fdd360dd 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -632,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
int ret = 0;
if (dio->bio) {
- loff_t cur_offset = dio->block_in_file << dio->blkbits;
+ loff_t cur_offset = dio->cur_page_fs_offset;
loff_t bio_next_offset = dio->logical_offset_in_bio +
dio->bio->bi_size;
@@ -657,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
* Submit now if the underlying fs is about to perform a
* metadata read
*/
- if (dio->boundary)
+ else if (dio->boundary)
dio_bio_submit(dio);
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de57128..4c9f05dc704 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -167,13 +167,15 @@ struct mpage_da_data {
};
#define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end {
- struct list_head list; /* per-file finished AIO list */
+ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
+ struct kiocb *iocb; /* iocb struct for AIO */
+ int result; /* error value for AIO */
} ext4_io_end_t;
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955..3fb64b9e47c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
return ret;
}
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag = 0;
return ret;
@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
io->offset = 0;
io->size = 0;
io->page = NULL;
+ io->iocb = NULL;
+ io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
@@ -3775,7 +3779,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3789,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
return;
}
io_end->offset = offset;
io_end->size = size;
- io_end->flag = EXT4_IO_UNWRITTEN;
+ if (is_async) {
+ io_end->iocb = iocb;
+ io_end->result = ret;
+ }
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9424796d663..e5cdabf4945 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1552,6 +1552,14 @@ __acquires(&fc->lock)
}
}
+static void end_queued_requests(struct fuse_conn *fc)
+{
+ fc->max_background = UINT_MAX;
+ flush_bg_queue(fc);
+ end_requests(fc, &fc->pending);
+ end_requests(fc, &fc->processing);
+}
+
/*
* Abort all requests.
*
@@ -1578,8 +1586,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
fc->connected = 0;
fc->blocked = 0;
end_io_requests(fc);
- end_requests(fc, &fc->pending);
- end_requests(fc, &fc->processing);
+ end_queued_requests(fc);
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1594,8 +1601,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
if (fc) {
spin_lock(&fc->lock);
fc->connected = 0;
- end_requests(fc, &fc->pending);
- end_requests(fc, &fc->processing);
+ fc->blocked = 0;
+ end_queued_requests(fc);
+ wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
fuse_conn_put(fc);
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24f94..83917b50a19 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -932,7 +932,7 @@ int gfs2_logd(void *data)
do {
prepare_to_wait(&sdp->sd_logd_waitq, &wait,
- TASK_UNINTERRUPTIBLE);
+ TASK_INTERRUPTIBLE);
if (!gfs2_ail_flush_reqd(sdp) &&
!gfs2_jrnl_flush_reqd(sdp) &&
!kthread_should_stop())
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d..f3f3578393a 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
inode_inc_link_count(dir);
- inode = minix_new_inode(dir, mode, &err);
+ inode = minix_new_inode(dir, S_IFDIR | mode, &err);
if (!inode)
goto out_dir;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d25b5257b7a..e0067708a55 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -274,7 +274,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
sin1->sin6_scope_id != sin2->sin6_scope_id)
return 0;
- return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
+ return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
}
#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6bf11d745a3..381d9291ea1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
goto out_err;
error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
+ if (unlikely(error == -ESTALE)) {
+ struct dentry *pd_dentry;
+ pd_dentry = dget_parent(dentry);
+ if (pd_dentry != NULL) {
+ nfs_zap_caches(pd_dentry->d_inode);
+ dput(pd_dentry);
+ }
+ }
nfs_free_fattr(res.fattr);
if (error < 0)
goto out_err;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e46ca685b9b..0c6bbc031f1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -96,8 +96,11 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
{
__u32 mask;
- /* everything should accept their own ignored and cares about children */
- mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
+ /*
+ * everything should accept their own ignored, cares about children,
+ * and should receive events when the inode is unmounted
+ */
+ mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
/* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 356e976772b..96337a4fbbd 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -578,7 +578,9 @@ bail:
static void ocfs2_dio_end_io(struct kiocb *iocb,
loff_t offset,
ssize_t bytes,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int level;
@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
if (!level)
up_read(&inode->i_alloc_sem);
ocfs2_rw_unlock(inode, level);
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
/*
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index abb0a95cc71..201e7bcec75 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
- if (!status)
+ /*
+ * If buffer is in jbd, then its checksum may not have been
+ * computed as yet.
+ */
+ if (!status && !buffer_jbd(bh))
status = ocfs2_validate_inode_block(osb->sb, bh);
}
if (status < 0) {
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817ff02c..2367fb3f70b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
static const struct file_operations proc_vmcore_operations = {
.read = read_vmcore,
- .llseek = generic_file_llseek,
+ .llseek = default_llseek,
};
static struct vmcore* __init get_new_element(void)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1beaa739d0a..cd796847c81 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
char *p;
p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
- if (p)
+ if (!IS_ERR(p))
memmove(last_sysfs_file, p, strlen(p) + 1);
/* need attr_sd for attr and ops, its parent for kobj */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdc..f9f5567bf89 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -275,8 +275,11 @@ xfs_end_io(
xfs_finish_ioend(ioend, 0);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else
+ } else {
+ if (ioend->io_iocb)
+ aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
+ }
}
/*
@@ -309,6 +312,8 @@ xfs_alloc_ioend(
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
+ ioend->io_iocb = NULL;
+ ioend->io_result = 0;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -1599,9 +1604,12 @@ xfs_end_io_direct(
struct kiocb *iocb,
loff_t offset,
ssize_t size,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
xfs_ioend_t *ioend = iocb->private;
+ bool complete_aio = is_async;
/*
* Non-NULL private data means we need to issue a transaction to
@@ -1627,7 +1635,14 @@ xfs_end_io_direct(
if (ioend->io_type == IO_READ) {
xfs_finish_ioend(ioend, 0);
} else if (private && size > 0) {
- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
+ if (is_async) {
+ ioend->io_iocb = iocb;
+ ioend->io_result = ret;
+ complete_aio = false;
+ xfs_finish_ioend(ioend, 0);
+ } else {
+ xfs_finish_ioend(ioend, 1);
+ }
} else {
/*
* A direct I/O write ioend starts it's life in unwritten
@@ -1645,6 +1660,9 @@ xfs_end_io_direct(
* against double-freeing.
*/
iocb->private = NULL;
+
+ if (complete_aio)
+ aio_complete(iocb, ret, 0);
}
STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df..9f566d92ae3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct kiocb *io_iocb;
+ int io_result;
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e59a8106283..82a74f6142f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -794,6 +794,8 @@ xfs_ioc_fsgetxattr(
{
struct fsxattr fa;
+ memset(&fa, 0, sizeof(struct fsxattr));
+
xfs_ilock(ip, XFS_ILOCK_SHARED);
fa.fsx_xflags = xfs_ip2xflags(ip);
fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index c7142a064c4..eb779affaaa 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1217,7 +1217,6 @@ xfs_imap_lookup(
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
- xfs_agino_t startino;
int error;
int i;
@@ -1231,13 +1230,13 @@ xfs_imap_lookup(
}
/*
- * derive and lookup the exact inode record for the given agino. If the
- * record cannot be found, then it's an invalid inode number and we
- * should abort.
+ * Lookup the inode record for the given agino. If the record cannot be
+ * found, then it's an invalid inode number and we should abort. Once
+ * we have a record, we need to ensure it contains the inode number
+ * we are looking up.
*/
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
- startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
- error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1250,6 +1249,11 @@ xfs_imap_lookup(
if (error)
return error;
+ /* check that the returned record contains the required inode */
+ if (rec.ir_startino > agino ||
+ rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
+ return EINVAL;
+
/* for untrusted inodes check it is allocated first */
if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b76a829d7e2..f70221820a1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1927,6 +1927,11 @@ xfs_iunlink_remove(
return 0;
}
+/*
+ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * inodes that are in memory - they all must be marked stale and attached to
+ * the cluster buffer.
+ */
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
@@ -1958,8 +1963,6 @@ xfs_ifree_cluster(
}
for (j = 0; j < nbufs; j++, inum += ninodes) {
- int found = 0;
-
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
@@ -1978,7 +1981,9 @@ xfs_ifree_cluster(
/*
* Walk the inodes already attached to the buffer and mark them
* stale. These will all have the flush locks held, so an
- * in-memory inode walk can't lock them.
+ * in-memory inode walk can't lock them. By marking them all
+ * stale first, we will not attempt to lock them in the loop
+ * below as the XFS_ISTALE flag will be set.
*/
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
while (lip) {
@@ -1990,11 +1995,11 @@ xfs_ifree_cluster(
&iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- found++;
}
lip = lip->li_bio_list;
}
+
/*
* For each inode in memory attempt to add it to the inode
* buffer and set it up for being staled on buffer IO
@@ -2006,6 +2011,7 @@ xfs_ifree_cluster(
* even trying to lock them.
*/
for (i = 0; i < ninodes; i++) {
+retry:
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2016,38 +2022,36 @@ xfs_ifree_cluster(
continue;
}
- /* don't try to lock/unlock the current inode */
+ /*
+ * Don't try to lock/unlock the current inode, but we
+ * _cannot_ skip the other inodes that we did not find
+ * in the list attached to the buffer and are not
+ * already marked stale. If we can't lock it, back off
+ * and retry.
+ */
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
- continue;
+ delay(1);
+ goto retry;
}
read_unlock(&pag->pag_ici_lock);
- if (!xfs_iflock_nowait(ip)) {
- if (ip != free_ip)
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
-
+ xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- ASSERT(ip != free_ip);
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
+ /*
+ * we don't need to attach clean inodes or those only
+ * with unlogged changes (which we throw away, anyway).
+ */
iip = ip->i_itemp;
- if (!iip) {
- /* inode with unlogged changes only */
+ if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
- found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2063,8 +2067,7 @@ xfs_ifree_cluster(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (found)
- xfs_trans_stale_inode_buf(tp, bp);
+ xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}