From 21fb89793df408cc116aa52399a5e6f7af459596 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Apr 2011 18:17:09 +0300 Subject: UBIFS: do not free write-buffers when in R/O mode commit b50b9f408502a2ea90459ae36ba8cdc9cc005cfe upstream. Currently UBIFS has a small optimization - it frees write-buffers when it is re-mounted from R/W mode to R/O mode. Of course, when it is mounted R/O, it does not allocate write-buffers as well. This optimization is nice but it leads to subtle problems and complications in recovery, which I can reproduce using the integck test. The symptoms are that after a power cut the file-system cannot be mounted if we first mount it R/O, and then re-mount R/W - 'ubifs_rcvry_gc_commit()' prints: UBIFS error (pid 34456): could not find an empty LEB Analysis of the problem. When mounting R/W, the reply process sets journal heads to buds [1], but when mounting R/O - it does not do this, because the write-buffers are not allocated. So 'ubifs_rcvry_gc_commit()' works completely differently for the same file-system but for the following 2 cases: 1. mounting R/W after a power cut and recover 2. mounting R/O after a power cut, re-mounting R/W and run deferred recovery In the former case, we have journal heads seeked to the a bud, in the latter case, they are non-seeked (wbuf->lnum == -1). So in the latter case we do not try to recover the GC LEB by garbage-collecting to the GC head, but we just try to find an empty LEB, and there may be no empty LEBs, so we just fail. On the other hand, in the former case (mount R/W), we are able to make a GC LEB (@c->gc_lnum) by garbage-collecting. Thus, let's remove this small nice optimization and always allocate write-buffers. This should not make too big difference - we have only 3 of them, each of max. write unit size, which is usually 2KiB. So this is about 6KiB of RAM for the typical case, and only when mounted R/O. [1]: Note, currently the replay process is setting (seeking) the journal heads to _some_ buds, not necessarily to the buds which had been the journal heads before the power cut happened. This will be fixed separately. Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/log.c | 20 -------------------- fs/ubifs/super.c | 15 ++++----------- 2 files changed, 4 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 4d0cb124146..40fa780ebea 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -174,26 +174,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) spin_unlock(&c->buds_lock); } -/** - * ubifs_create_buds_lists - create journal head buds lists for remount rw. - * @c: UBIFS file-system description object - */ -void ubifs_create_buds_lists(struct ubifs_info *c) -{ - struct rb_node *p; - - spin_lock(&c->buds_lock); - p = rb_first(&c->buds); - while (p) { - struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); - struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; - - list_add_tail(&bud->list, &jhead->buds_list); - p = rb_next(p); - } - spin_unlock(&c->buds_lock); -} - /** * ubifs_add_bud_to_log - add a new bud to the log. * @c: UBIFS file-system description object diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e94d9628a49..e20cb5a9aee 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1235,12 +1235,12 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } + err = alloc_wbufs(c); + if (err) + goto out_cbuf; + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); if (!c->ro_mount) { - err = alloc_wbufs(c); - if (err) - goto out_cbuf; - /* Create background thread */ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { @@ -1603,12 +1603,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (err) goto out; - err = alloc_wbufs(c); - if (err) - goto out; - - ubifs_create_buds_lists(c); - /* Create background thread */ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { @@ -1717,7 +1711,6 @@ static void ubifs_remount_ro(struct ubifs_info *c) if (err) ubifs_ro_mode(c, err); - free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; vfree(c->ileb_buf); -- cgit v1.2.3 From f038f4d893e9ef35dd38f1ca01001dfe21c5f8f5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 25 Apr 2011 18:46:31 +0300 Subject: UBIFS: seek journal heads to the latest bud in replay commit 52c6e6f990669deac3f370f1603815adb55a1dbd upstream. This is the second fix of the following symptom: UBIFS error (pid 34456): could not find an empty LEB which sometimes happens after power cuts when we mount the file-system - UBIFS refuses it with the above error message which comes from the 'ubifs_rcvry_gc_commit()' function. I can reproduce this using the integck test with the UBIFS power cut emulation enabled. Analysis of the problem. Currently UBIFS replay seeks the journal heads to the last _replayed_ bud. But the buds are replayed out-of-order, so the replay basically seeks journal heads to the "random" bud belonging to this head, and not to the _last_ one. The result of this is that the GC head may be seeked to a full LEB with no free space, or very little free space. And 'ubifs_rcvry_gc_commit()' tries to find a fully or mostly dirty LEB to match the current GC head (because we need to garbage-collect that dirty LEB at one go, because we do not have @c->gc_lnum). So 'ubifs_find_dirty_leb()' fails and we fall back to finding an empty LEB and also fail. As a result - recovery fails and mounting fails. This patch teaches the replay to initialize the GC heads exactly to the latest buds, i.e. the buds which have the largest sequence number in corresponding log reference nodes. Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/replay.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eed0fcff8d7..d3d6d365bfc 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -59,6 +59,7 @@ enum { * @new_size: truncation new size * @free: amount of free space in a bud * @dirty: amount of dirty space in a bud from padding and deletion nodes + * @jhead: journal head number of the bud * * UBIFS journal replay must compare node sequence numbers, which means it must * build a tree of node information to insert into the TNC. @@ -80,6 +81,7 @@ struct replay_entry { struct { int free; int dirty; + int jhead; }; }; }; @@ -159,6 +161,11 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) err = PTR_ERR(lp); goto out; } + + /* Make sure the journal head points to the latest bud */ + err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, + c->leb_size - r->free, UBI_SHORTTERM); + out: ubifs_release_lprops(c); return err; @@ -627,10 +634,6 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); - if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) - err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, - sleb->endpt, UBI_SHORTTERM); - *dirty = sleb->endpt - offs - used; *free = c->leb_size - sleb->endpt; @@ -653,12 +656,14 @@ out_dump: * @sqnum: sequence number * @free: amount of free space in bud * @dirty: amount of dirty space from padding and deletion nodes + * @jhead: journal head number for the bud * * This function inserts a reference node to the replay tree and returns zero * in case of success or a negative error code in case of failure. */ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, - unsigned long long sqnum, int free, int dirty) + unsigned long long sqnum, int free, int dirty, + int jhead) { struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; @@ -688,6 +693,7 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, r->flags = REPLAY_REF; r->free = free; r->dirty = dirty; + r->jhead = jhead; rb_link_node(&r->rb, parent, p); rb_insert_color(&r->rb, &c->replay_tree); @@ -712,7 +718,7 @@ static int replay_buds(struct ubifs_info *c) if (err) return err; err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, - free, dirty); + free, dirty, b->bud->jhead); if (err) return err; } -- cgit v1.2.3 From 6f0a6240e8ce1659574e944eac04e871a9dc1fe7 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Wed, 20 Apr 2011 13:09:35 +0100 Subject: Open with O_CREAT flag set fails to open existing files on non writable directories commit 1574dff8996ab1ed92c09012f8038b5566fce313 upstream. An open on a NFS4 share using the O_CREAT flag on an existing file for which we have permissions to open but contained in a directory with no write permissions will fail with EACCES. A tcpdump shows that the client had set the open mode to UNCHECKED which indicates that the file should be created if it doesn't exist and encountering an existing flag is not an error. Since in this case the file exists and can be opened by the user, the NFS server is wrong in attempting to check create permissions on the parent directory. The patch adds a conditional statement to check for create permissions only if the file doesn't exist. Signed-off-by: Sachin S. Prabhu Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index da1d9701f8e..435f407377b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; if (!(iap->ia_valid & ATTR_MODE)) iap->ia_mode = 0; - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); if (err) goto out; @@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dchild)) goto out_nfserr; + /* If file doesn't exist, check for permissions to create one */ + if (!dchild->d_inode) { + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (err) + goto out; + } + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); if (err) goto out; -- cgit v1.2.3 From c99afaf0385d61a8e1747511904c66838270b02b Mon Sep 17 00:00:00 2001 From: Timo Warns Date: Thu, 14 Apr 2011 15:21:56 -0700 Subject: fs/partitions/ldm.c: fix oops caused by corrupted partition table commit c340b1d640001c8c9ecff74f68fd90422ae2448a upstream. The kernel automatically evaluates partition tables of storage devices. The code for evaluating LDM partitions (in fs/partitions/ldm.c) contains a bug that causes a kernel oops on certain corrupted LDM partitions. A kernel subsystem seems to crash, because, after the oops, the kernel no longer recognizes newly connected storage devices. The patch validates the value of vblk_size. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Timo Warns Cc: Eugene Teo Cc: Harvey Harrison Cc: Richard Russon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/partitions/ldm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index b10e3540d5b..ce4f6244042 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) BUG_ON (!data || !frags); + if (size < 2 * VBLK_SIZE_HEAD) { + ldm_error("Value of size is to small."); + return false; + } + group = get_unaligned_be32(data + 0x08); rec = get_unaligned_be16(data + 0x0C); num = get_unaligned_be16(data + 0x0E); @@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) ldm_error ("A VBLK claims to have %d parts.", num); return false; } + if (rec >= num) { + ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num); + return false; + } list_for_each (item, frags) { f = list_entry (item, struct frag, list); @@ -1334,10 +1343,9 @@ found: f->map |= (1 << rec); - if (num > 0) { - data += VBLK_SIZE_HEAD; - size -= VBLK_SIZE_HEAD; - } + data += VBLK_SIZE_HEAD; + size -= VBLK_SIZE_HEAD; + memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); return true; -- cgit v1.2.3