From 10eb3a0d517fcc83eeea4242c149461205675eb4 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 14 Jun 2022 11:10:28 -0500 Subject: dm: fix race in dm_start_io_acct After commit 82f6cdcc3676c ("dm: switch dm_io booleans over to proper flags") dm_start_io_acct stopped atomically checking and setting was_accounted, which turned into the DM_IO_ACCOUNTED flag. This opened the possibility for a race where IO accounting is started twice for duplicate bios. To remove the race, check the flag while holding the io->lock. Fixes: 82f6cdcc3676c ("dm: switch dm_io booleans over to proper flags") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Marzinski Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d8f16183bf27..d5e6d33700e5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -555,6 +555,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) unsigned long flags; /* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */ spin_lock_irqsave(&io->lock, flags); + if (dm_io_flagged(io, DM_IO_ACCOUNTED)) { + spin_unlock_irqrestore(&io->lock, flags); + return; + } dm_io_set_flag(io, DM_IO_ACCOUNTED); spin_unlock_irqrestore(&io->lock, flags); } -- cgit v1.2.3 From 5d7362d0d56da3b85b19b5e5ce657026c2eef479 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 16 Jun 2022 13:21:27 -0400 Subject: dm: fix use-after-free in dm_put_live_table_bio dm_put_live_table_bio is called from the end of dm_submit_bio. However, at this point, the bio may be already finished and the caller may have freed the bio. Consequently, dm_put_live_table_bio accesses the stale "bio" pointer. Fix this bug by loading the bi_opf value and passing it to dm_get_live_table_bio and dm_put_live_table_bio instead of the bio. This bug was found by running the lvm2 testsuite with kasan. Fixes: 563a225c9fd2 ("dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d5e6d33700e5..6ea14ab94aa6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -715,18 +715,18 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) } static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, - int *srcu_idx, struct bio *bio) + int *srcu_idx, unsigned bio_opf) { - if (bio->bi_opf & REQ_NOWAIT) + if (bio_opf & REQ_NOWAIT) return dm_get_live_table_fast(md); else return dm_get_live_table(md, srcu_idx); } static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx, - struct bio *bio) + unsigned bio_opf) { - if (bio->bi_opf & REQ_NOWAIT) + if (bio_opf & REQ_NOWAIT) dm_put_live_table_fast(md); else dm_put_live_table(md, srcu_idx); @@ -1715,8 +1715,9 @@ static void dm_submit_bio(struct bio *bio) struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; int srcu_idx; struct dm_table *map; + unsigned bio_opf = bio->bi_opf; - map = dm_get_live_table_bio(md, &srcu_idx, bio); + map = dm_get_live_table_bio(md, &srcu_idx, bio_opf); /* If suspended, or map not yet available, queue this IO for later */ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || @@ -1732,7 +1733,7 @@ static void dm_submit_bio(struct bio *bio) dm_split_and_process_bio(md, map, bio); out: - dm_put_live_table_bio(md, srcu_idx, bio); + dm_put_live_table_bio(md, srcu_idx, bio_opf); } static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, -- cgit v1.2.3 From 1ee88de395c3ad6791c4baeba40e83b6ec97657a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 16 Jun 2022 14:14:39 -0400 Subject: dm: fix narrow race for REQ_NOWAIT bios being issued despite no support Starting with the commit 63a225c9fd20, device mapper has an optimization that it will take cheaper table lock (dm_get_live_table_fast instead of dm_get_live_table) if the bio has REQ_NOWAIT. The bios with REQ_NOWAIT must not block in the target request routine, if they did, we would be blocking while holding rcu_read_lock, which is prohibited. The targets that are suitable for REQ_NOWAIT optimization (and that don't block in the map routine) have the flag DM_TARGET_NOWAIT set. Device mapper will test if all the targets and all the devices in a table support nowait (see the function dm_table_supports_nowait) and it will set or clear the QUEUE_FLAG_NOWAIT flag on its request queue according to this check. There's a test in submit_bio_noacct: "if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q)) goto not_supported" - this will make sure that REQ_NOWAIT bios can't enter a request queue that doesn't support them. This mechanism works to prevent REQ_NOWAIT bios from reaching dm targets that don't support the REQ_NOWAIT flag (and that may block in the map routine) - except that there is a small race condition: submit_bio_noacct checks if the queue has the QUEUE_FLAG_NOWAIT without holding any locks. Immediatelly after this check, the device mapper table may be reloaded with a table that doesn't support REQ_NOWAIT (for example, if we start moving the logical volume or if we activate a snapshot). However the REQ_NOWAIT bio that already passed the check in submit_bio_noacct would be sent to device mapper, where it could be redirected to a dm target that doesn't support REQ_NOWAIT - the result is sleeping while we hold rcu_read_lock. In order to fix this race, we double-check if the target supports REQ_NOWAIT while we hold the table lock (so that the table can't change under us). Fixes: 563a225c9fd2 ("dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6ea14ab94aa6..b6b25d319ef7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1613,7 +1613,12 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ti = dm_table_find_target(ci->map, ci->sector); if (unlikely(!ti)) return BLK_STS_IOERR; - else if (unlikely(ci->is_abnormal_io)) + + if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && + unlikely(!dm_target_supports_nowait(ti->type))) + return BLK_STS_NOTSUPP; + + if (unlikely(ci->is_abnormal_io)) return __process_abnormal_io(ci, ti); /* -- cgit v1.2.3 From 85e123c27d5cbc22cfdc01de1e2ca1d9003a02d0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 16 Jun 2022 13:28:57 -0400 Subject: dm mirror log: round up region bitmap size to BITS_PER_LONG The code in dm-log rounds up bitset_size to 32 bits. It then uses find_next_zero_bit_le on the allocated region. find_next_zero_bit_le accesses the bitmap using unsigned long pointers. So, on 64-bit architectures, it may access 4 bytes beyond the allocated size. Fix this bug by rounding up bitset_size to BITS_PER_LONG. This bug was found by running the lvm2 testsuite with kasan. Fixes: 29121bd0b00e ("[PATCH] dm mirror log: bitset_size fix") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 06f328928a7f..2dda05aada23 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, /* * Work out how many "unsigned long"s we need to hold the bitset. */ - bitset_size = dm_round_up(region_count, - sizeof(*lc->clean_bits) << BYTE_SHIFT); + bitset_size = dm_round_up(region_count, BITS_PER_LONG); bitset_size >>= BYTE_SHIFT; lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); -- cgit v1.2.3