From 8e8441043029e384a11ee387cb35ffa1e548860a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:23:09 -0500 Subject: dm crypt: use unbound workqueue for request processing Use unbound workqueue by default so that work is automatically balanced between available CPUs. The original behavior of encrypting using the same cpu that IO was submitted on can still be enabled by setting the optional 'same_cpu_crypt' table argument. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit f3396c58fd8442850e759843457d78b6ec3a9589) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 08981be7baa1..5063c901c0f5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -108,7 +108,7 @@ struct iv_tcw_private { * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. */ -enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; +enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, DM_CRYPT_SAME_CPU }; /* * The fields in here must be read only after initialization. @@ -1688,7 +1688,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) char dummy; static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, + {0, 2, "Invalid number of feature args"}, }; if (argc < 5) { @@ -1788,15 +1788,23 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (ret) goto bad; - opt_string = dm_shift_arg(&as); + while (opt_params--) { + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + goto bad; + } - if (opt_params == 1 && opt_string && - !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_bios = 1; - else if (opt_params) { - ret = -EINVAL; - ti->error = "Invalid feature arguments"; - goto bad; + if (!strcasecmp(opt_string, "allow_discards")) + ti->num_discard_bios = 1; + + else if (!strcasecmp(opt_string, "same_cpu_crypt")) + set_bit(DM_CRYPT_SAME_CPU, &cc->flags); + + else { + ti->error = "Invalid feature arguments"; + goto bad; + } } } @@ -1807,8 +1815,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - cc->crypt_queue = alloc_workqueue("kcryptd", - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); + if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) + cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); + else + cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + num_online_cpus()); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; goto bad; @@ -1860,6 +1871,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, { struct crypt_config *cc = ti->private; unsigned i, sz = 0; + int num_feature_args = 0; switch (type) { case STATUSTYPE_INFO: @@ -1878,8 +1890,15 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_bios) - DMEMIT(" 1 allow_discards"); + num_feature_args += !!ti->num_discard_bios; + num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); + if (num_feature_args) { + DMEMIT(" %d", num_feature_args); + if (ti->num_discard_bios) + DMEMIT(" allow_discards"); + if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) + DMEMIT(" same_cpu_crypt"); + } break; } @@ -1976,7 +1995,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 13, 0}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From 17ae725dad3b07abfc30b78c2f47884d4a75261e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:23:52 -0500 Subject: dm crypt: don't allocate pages for a partial request Change crypt_alloc_buffer so that it only ever allocates pages for a full request. This is a prerequisite for the commit "dm crypt: offload writes to thread". This change simplifies the dm-crypt code at the expense of reduced throughput in low memory conditions (where allocation for a partial request is most useful). Note: the next commit ("dm crypt: avoid deadlock in mempools") is needed to fix a theoretical deadlock. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit cf2f1abfbd0dba701f7f16ef619e4d2485de3366) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 139 +++++++++++--------------------------------------- 1 file changed, 30 insertions(+), 109 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5063c901c0f5..6199245ea6a6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -58,7 +58,6 @@ struct dm_crypt_io { atomic_t io_pending; int error; sector_t sector; - struct dm_crypt_io *base_io; } CRYPTO_MINALIGN_ATTR; struct dm_crypt_request { @@ -172,7 +171,6 @@ struct crypt_config { }; #define MIN_IOS 16 -#define MIN_POOL_PAGES 32 static struct kmem_cache *_crypt_io_pool; @@ -946,14 +944,13 @@ static int crypt_convert(struct crypt_config *cc, return 0; } +static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); + /* * Generate a new unfragmented bio with the given size * This should never violate the device limitations - * May return a smaller bio when running out of pages, indicated by - * *out_of_pages set to 1. */ -static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, - unsigned *out_of_pages) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) { struct crypt_config *cc = io->cc; struct bio *clone; @@ -961,41 +958,27 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; unsigned i, len; struct page *page; + struct bio_vec *bvec; clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) return NULL; clone_init(io, clone); - *out_of_pages = 0; for (i = 0; i < nr_iovecs; i++) { page = mempool_alloc(cc->page_pool, gfp_mask); - if (!page) { - *out_of_pages = 1; - break; - } - - /* - * If additional pages cannot be allocated without waiting, - * return a partially-allocated bio. The caller will then try - * to allocate more bios while submitting this partial bio. - */ - gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; len = (size > PAGE_SIZE) ? PAGE_SIZE : size; - if (!bio_add_page(clone, page, len, 0)) { - mempool_free(page, cc->page_pool); - break; - } + bvec = &clone->bi_io_vec[clone->bi_vcnt++]; + bvec->bv_page = page; + bvec->bv_len = len; + bvec->bv_offset = 0; - size -= len; - } + clone->bi_iter.bi_size += len; - if (!clone->bi_iter.bi_size) { - bio_put(clone); - return NULL; + size -= len; } return clone; @@ -1020,7 +1003,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; - io->base_io = NULL; io->ctx.req = NULL; atomic_set(&io->io_pending, 0); } @@ -1033,13 +1015,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io) /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. - * If base_io is set, wait for the last fragment to complete. */ static void crypt_dec_pending(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; - struct dm_crypt_io *base_io = io->base_io; int error = io->error; if (!atomic_dec_and_test(&io->io_pending)) @@ -1050,13 +1030,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) mempool_free(io, cc->io_pool); - if (likely(!base_io)) - bio_endio(base_bio, error); - else { - if (error && !base_io->error) - base_io->error = error; - crypt_dec_pending(base_io); - } + bio_endio(base_bio, error); } /* @@ -1192,10 +1166,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; struct bio *clone; - struct dm_crypt_io *new_io; int crypt_finished; - unsigned out_of_pages = 0; - unsigned remaining = io->base_bio->bi_iter.bi_size; sector_t sector = io->sector; int r; @@ -1205,80 +1176,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); - /* - * The allocated buffers can be smaller than the whole bio, - * so repeat the whole process until all the data can be handled. - */ - while (remaining) { - clone = crypt_alloc_buffer(io, remaining, &out_of_pages); - if (unlikely(!clone)) { - io->error = -ENOMEM; - break; - } - - io->ctx.bio_out = clone; - io->ctx.iter_out = clone->bi_iter; - - remaining -= clone->bi_iter.bi_size; - sector += bio_sectors(clone); - - crypt_inc_pending(io); - - r = crypt_convert(cc, &io->ctx); - if (r < 0) - io->error = -EIO; - - crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); - - /* Encryption was already finished, submit io now */ - if (crypt_finished) { - kcryptd_crypt_write_io_submit(io, 0); - - /* - * If there was an error, do not try next fragments. - * For async, error is processed in async handler. - */ - if (unlikely(r < 0)) - break; + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + io->error = -EIO; + goto dec; + } - io->sector = sector; - } + io->ctx.bio_out = clone; + io->ctx.iter_out = clone->bi_iter; - /* - * Out of memory -> run queues - * But don't wait if split was due to the io size restriction - */ - if (unlikely(out_of_pages)) - congestion_wait(BLK_RW_ASYNC, HZ/100); + sector += bio_sectors(clone); - /* - * With async crypto it is unsafe to share the crypto context - * between fragments, so switch to a new dm_crypt_io structure. - */ - if (unlikely(!crypt_finished && remaining)) { - new_io = mempool_alloc(cc->io_pool, GFP_NOIO); - crypt_io_init(new_io, io->cc, io->base_bio, sector); - crypt_inc_pending(new_io); - crypt_convert_init(cc, &new_io->ctx, NULL, - io->base_bio, sector); - new_io->ctx.iter_in = io->ctx.iter_in; - - /* - * Fragments after the first use the base_io - * pending count. - */ - if (!io->base_io) - new_io->base_io = io; - else { - new_io->base_io = io->base_io; - crypt_inc_pending(io->base_io); - crypt_dec_pending(io); - } + crypt_inc_pending(io); + r = crypt_convert(cc, &io->ctx); + if (r) + io->error = -EIO; + crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); - io = new_io; - } + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; } +dec: crypt_dec_pending(io); } @@ -1746,7 +1667,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, ARCH_KMALLOC_MINALIGN); - cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); + cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; goto bad; -- cgit v1.2.3 From 1ecdacb3145e01d3cd19cd779546a0c202f476b8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:24:41 -0500 Subject: dm crypt: avoid deadlock in mempools Fix a theoretical deadlock introduced in the previous commit ("dm crypt: don't allocate pages for a partial request"). The function crypt_alloc_buffer may be called concurrently. If we allocate from the mempool concurrently, there is a possibility of deadlock. For example, if we have mempool of 256 pages, two processes, each wanting 256, pages allocate from the mempool concurrently, it may deadlock in a situation where both processes have allocated 128 pages and the mempool is exhausted. To avoid such a scenario we allocate the pages under a mutex. In order to not degrade performance with excessive locking, we try non-blocking allocations without a mutex first and if that fails, we fallback to a blocking allocations with a mutex. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit 7145c241a1bf2841952c3e297c4080b357b3e52d) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6199245ea6a6..fa1dba1d06f7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -124,6 +124,7 @@ struct crypt_config { mempool_t *req_pool; mempool_t *page_pool; struct bio_set *bs; + struct mutex bio_alloc_lock; struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; @@ -949,27 +950,51 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); /* * Generate a new unfragmented bio with the given size * This should never violate the device limitations + * + * This function may be called concurrently. If we allocate from the mempool + * concurrently, there is a possibility of deadlock. For example, if we have + * mempool of 256 pages, two processes, each wanting 256, pages allocate from + * the mempool concurrently, it may deadlock in a situation where both processes + * have allocated 128 pages and the mempool is exhausted. + * + * In order to avoid this scenario we allocate the pages under a mutex. + * + * In order to not degrade performance with excessive locking, we try + * non-blocking allocations without a mutex first but on failure we fallback + * to blocking allocations with a mutex. */ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) { struct crypt_config *cc = io->cc; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; - unsigned i, len; + gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; + unsigned i, len, remaining_size; struct page *page; struct bio_vec *bvec; +retry: + if (unlikely(gfp_mask & __GFP_WAIT)) + mutex_lock(&cc->bio_alloc_lock); + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) - return NULL; + goto return_clone; clone_init(io, clone); + remaining_size = size; + for (i = 0; i < nr_iovecs; i++) { page = mempool_alloc(cc->page_pool, gfp_mask); + if (!page) { + crypt_free_buffer_pages(cc, clone); + bio_put(clone); + gfp_mask |= __GFP_WAIT; + goto retry; + } - len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; bvec = &clone->bi_io_vec[clone->bi_vcnt++]; bvec->bv_page = page; @@ -978,9 +1003,13 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) clone->bi_iter.bi_size += len; - size -= len; + remaining_size -= len; } +return_clone: + if (unlikely(gfp_mask & __GFP_WAIT)) + mutex_unlock(&cc->bio_alloc_lock); + return clone; } @@ -1679,6 +1708,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + mutex_init(&cc->bio_alloc_lock); + ret = -EINVAL; if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid iv_offset sector"; -- cgit v1.2.3 From d0f3cd10bc922f0537b30c6f2b181ca53d56abeb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:25:26 -0500 Subject: dm crypt: remove unused io_pool and _crypt_io_pool The previous commit ("dm crypt: don't allocate pages for a partial request") stopped using the io_pool slab mempool and backing _crypt_io_pool kmem cache. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit 94f5e0243c48aa01441c987743dc468e2d6eaca2) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index fa1dba1d06f7..c29daf417aaf 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -120,7 +120,6 @@ struct crypt_config { * pool for per bio private data, crypto requests and * encryption requeusts/buffer pages */ - mempool_t *io_pool; mempool_t *req_pool; mempool_t *page_pool; struct bio_set *bs; @@ -173,8 +172,6 @@ struct crypt_config { #define MIN_IOS 16 -static struct kmem_cache *_crypt_io_pool; - static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); @@ -1056,8 +1053,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (io->ctx.req) crypt_free_req(cc, io->ctx.req, base_bio); - if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) - mempool_free(io, cc->io_pool); bio_endio(base_bio, error); } @@ -1445,8 +1440,6 @@ static void crypt_dtr(struct dm_target *ti) mempool_destroy(cc->page_pool); if (cc->req_pool) mempool_destroy(cc->req_pool); - if (cc->io_pool) - mempool_destroy(cc->io_pool); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); @@ -1660,13 +1653,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (ret < 0) goto bad; - ret = -ENOMEM; - cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); - if (!cc->io_pool) { - ti->error = "Cannot allocate crypt io mempool"; - goto bad; - } - cc->dmreq_start = sizeof(struct ablkcipher_request); cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); @@ -1684,6 +1670,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); } + ret = -ENOMEM; cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); if (!cc->req_pool) { @@ -1965,15 +1952,9 @@ static int __init dm_crypt_init(void) { int r; - _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0); - if (!_crypt_io_pool) - return -ENOMEM; - r = dm_register_target(&crypt_target); - if (r < 0) { + if (r < 0) DMERR("register failed %d", r); - kmem_cache_destroy(_crypt_io_pool); - } return r; } @@ -1981,7 +1962,6 @@ static int __init dm_crypt_init(void) static void __exit dm_crypt_exit(void) { dm_unregister_target(&crypt_target); - kmem_cache_destroy(_crypt_io_pool); } module_init(dm_crypt_init); -- cgit v1.2.3 From ca6252dd0992541073a4ecd87f127b38d79b6820 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:25:59 -0500 Subject: dm crypt: offload writes to thread Submitting write bios directly in the encryption thread caused serious performance degradation. On a multiprocessor machine, encryption requests finish in a different order than they were submitted. Consequently, write requests would be submitted in a different order and it could cause severe performance degradation. Move the submission of write requests to a separate thread so that the requests can be sorted before submitting. But this commit improves dm-crypt performance even without having dm-crypt perform request sorting (in particular it enables IO schedulers like CFQ to sort more effectively). Note: it is required that a previous commit ("dm crypt: don't allocate pages for a partial request") be applied before applying this patch. Otherwise, this commit could introduce a crash. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit dc2676210c425ee8e5cb1bec5bc84d004ddf4179) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 114 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index c29daf417aaf..8c0e36b1d0ed 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ struct dm_crypt_io { atomic_t io_pending; int error; sector_t sector; + + struct list_head list; } CRYPTO_MINALIGN_ATTR; struct dm_crypt_request { @@ -128,6 +131,10 @@ struct crypt_config { struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; + struct task_struct *write_thread; + wait_queue_head_t write_thread_wait; + struct list_head write_thread_list; + char *cipher; char *cipher_string; @@ -1136,37 +1143,89 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) return 0; } +static void kcryptd_io_read_work(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + + crypt_inc_pending(io); + if (kcryptd_io_read(io, GFP_NOIO)) + io->error = -ENOMEM; + crypt_dec_pending(io); +} + +static void kcryptd_queue_read(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->cc; + + INIT_WORK(&io->work, kcryptd_io_read_work); + queue_work(cc->io_queue, &io->work); +} + static void kcryptd_io_write(struct dm_crypt_io *io) { struct bio *clone = io->ctx.bio_out; + generic_make_request(clone); } -static void kcryptd_io(struct work_struct *work) +static int dmcrypt_write(void *data) { - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = data; + while (1) { + struct list_head local_list; + struct blk_plug plug; - if (bio_data_dir(io->base_bio) == READ) { - crypt_inc_pending(io); - if (kcryptd_io_read(io, GFP_NOIO)) - io->error = -ENOMEM; - crypt_dec_pending(io); - } else - kcryptd_io_write(io); -} + DECLARE_WAITQUEUE(wait, current); -static void kcryptd_queue_io(struct dm_crypt_io *io) -{ - struct crypt_config *cc = io->cc; + spin_lock_irq(&cc->write_thread_wait.lock); +continue_locked: - INIT_WORK(&io->work, kcryptd_io); - queue_work(cc->io_queue, &io->work); + if (!list_empty(&cc->write_thread_list)) + goto pop_from_list; + + __set_current_state(TASK_INTERRUPTIBLE); + __add_wait_queue(&cc->write_thread_wait, &wait); + + spin_unlock_irq(&cc->write_thread_wait.lock); + + if (unlikely(kthread_should_stop())) { + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->write_thread_wait, &wait); + break; + } + + schedule(); + + set_task_state(current, TASK_RUNNING); + spin_lock_irq(&cc->write_thread_wait.lock); + __remove_wait_queue(&cc->write_thread_wait, &wait); + goto continue_locked; + +pop_from_list: + local_list = cc->write_thread_list; + local_list.next->prev = &local_list; + local_list.prev->next = &local_list; + INIT_LIST_HEAD(&cc->write_thread_list); + + spin_unlock_irq(&cc->write_thread_wait.lock); + + blk_start_plug(&plug); + do { + struct dm_crypt_io *io = container_of(local_list.next, + struct dm_crypt_io, list); + list_del(&io->list); + kcryptd_io_write(io); + } while (!list_empty(&local_list)); + blk_finish_plug(&plug); + } + return 0; } static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; struct crypt_config *cc = io->cc; + unsigned long flags; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1180,10 +1239,10 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) clone->bi_iter.bi_sector = cc->start + io->sector; - if (async) - kcryptd_queue_io(io); - else - generic_make_request(clone); + spin_lock_irqsave(&cc->write_thread_wait.lock, flags); + list_add_tail(&io->list, &cc->write_thread_list); + wake_up_locked(&cc->write_thread_wait); + spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) @@ -1426,6 +1485,9 @@ static void crypt_dtr(struct dm_target *ti) if (!cc) return; + if (cc->write_thread) + kthread_stop(cc->write_thread); + if (cc->io_queue) destroy_workqueue(cc->io_queue); if (cc->crypt_queue) @@ -1764,6 +1826,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + init_waitqueue_head(&cc->write_thread_wait); + INIT_LIST_HEAD(&cc->write_thread_list); + + cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); + if (IS_ERR(cc->write_thread)) { + ret = PTR_ERR(cc->write_thread); + cc->write_thread = NULL; + ti->error = "Couldn't spawn write thread"; + goto bad; + } + wake_up_process(cc->write_thread); + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; @@ -1798,7 +1872,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) - kcryptd_queue_io(io); + kcryptd_queue_read(io); } else kcryptd_queue_crypt(io); -- cgit v1.2.3 From f97bf3c95befc1940b03ae0df30c03e168086a7d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:27:08 -0500 Subject: dm crypt: add 'submit_from_crypt_cpus' option Make it possible to disable offloading writes by setting the optional 'submit_from_crypt_cpus' table argument. There are some situations where offloading write bios from the encryption threads to a single thread degrades performance significantly. The default is to offload write bios to the same thread because it benefits CFQ to have writes submitted using the same IO context. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit 0f5d8e6ee758f7023e4353cca75d785b2d4f6abe) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8c0e36b1d0ed..4519a7c0098c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -110,7 +110,8 @@ struct iv_tcw_private { * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. */ -enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, DM_CRYPT_SAME_CPU }; +enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; /* * The fields in here must be read only after initialization. @@ -1239,6 +1240,11 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) clone->bi_iter.bi_sector = cc->start + io->sector; + if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) { + generic_make_request(clone); + return; + } + spin_lock_irqsave(&cc->write_thread_wait.lock, flags); list_add_tail(&io->list, &cc->write_thread_list); wake_up_locked(&cc->write_thread_wait); @@ -1693,7 +1699,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) char dummy; static struct dm_arg _args[] = { - {0, 2, "Invalid number of feature args"}, + {0, 3, "Invalid number of feature args"}, }; if (argc < 5) { @@ -1802,6 +1808,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) else if (!strcasecmp(opt_string, "same_cpu_crypt")) set_bit(DM_CRYPT_SAME_CPU, &cc->flags); + else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) + set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + else { ti->error = "Invalid feature arguments"; goto bad; @@ -1905,12 +1914,15 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); if (num_feature_args) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) DMEMIT(" allow_discards"); if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) DMEMIT(" same_cpu_crypt"); + if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) + DMEMIT(" submit_from_crypt_cpus"); } break; -- cgit v1.2.3 From b73733c2694f4358c95cd57c116d3b9a4e44915b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Feb 2015 08:27:41 -0500 Subject: dm crypt: sort writes Write requests are sorted in a red-black tree structure and are submitted in the sorted order. In theory the sorting should be performed by the underlying disk scheduler, however, in practice the disk scheduler only accepts and sorts a finite number of requests. To allow the sorting of all requests, dm-crypt needs to implement its own sorting. The overhead associated with rbtree-based sorting is considered negligible so it is not used conditionally. Even on SSD sorting can be beneficial since in-order request dispatch promotes lower latency IO completion to the upper layers. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer (cherry picked from commit b3c5fd3052492f1b8d060799d4f18be5a5438add) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 51 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4519a7c0098c..713a96237a80 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct dm_crypt_io { int error; sector_t sector; - struct list_head list; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; struct dm_crypt_request { @@ -134,7 +135,7 @@ struct crypt_config { struct task_struct *write_thread; wait_queue_head_t write_thread_wait; - struct list_head write_thread_list; + struct rb_root write_tree; char *cipher; char *cipher_string; @@ -1169,11 +1170,15 @@ static void kcryptd_io_write(struct dm_crypt_io *io) generic_make_request(clone); } +#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node) + static int dmcrypt_write(void *data) { struct crypt_config *cc = data; + struct dm_crypt_io *io; + while (1) { - struct list_head local_list; + struct rb_root write_tree; struct blk_plug plug; DECLARE_WAITQUEUE(wait, current); @@ -1181,7 +1186,7 @@ static int dmcrypt_write(void *data) spin_lock_irq(&cc->write_thread_wait.lock); continue_locked: - if (!list_empty(&cc->write_thread_list)) + if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; __set_current_state(TASK_INTERRUPTIBLE); @@ -1203,20 +1208,22 @@ continue_locked: goto continue_locked; pop_from_list: - local_list = cc->write_thread_list; - local_list.next->prev = &local_list; - local_list.prev->next = &local_list; - INIT_LIST_HEAD(&cc->write_thread_list); - + write_tree = cc->write_tree; + cc->write_tree = RB_ROOT; spin_unlock_irq(&cc->write_thread_wait.lock); + BUG_ON(rb_parent(write_tree.rb_node)); + + /* + * Note: we cannot walk the tree here with rb_next because + * the structures may be freed when kcryptd_io_write is called. + */ blk_start_plug(&plug); do { - struct dm_crypt_io *io = container_of(local_list.next, - struct dm_crypt_io, list); - list_del(&io->list); + io = crypt_io_from_node(rb_first(&write_tree)); + rb_erase(&io->rb_node, &write_tree); kcryptd_io_write(io); - } while (!list_empty(&local_list)); + } while (!RB_EMPTY_ROOT(&write_tree)); blk_finish_plug(&plug); } return 0; @@ -1227,6 +1234,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) struct bio *clone = io->ctx.bio_out; struct crypt_config *cc = io->cc; unsigned long flags; + sector_t sector; + struct rb_node **rbp, *parent; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1246,7 +1255,19 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) } spin_lock_irqsave(&cc->write_thread_wait.lock, flags); - list_add_tail(&io->list, &cc->write_thread_list); + rbp = &cc->write_tree.rb_node; + parent = NULL; + sector = io->sector; + while (*rbp) { + parent = *rbp; + if (sector < crypt_io_from_node(parent)->sector) + rbp = &(*rbp)->rb_left; + else + rbp = &(*rbp)->rb_right; + } + rb_link_node(&io->rb_node, parent, rbp); + rb_insert_color(&io->rb_node, &cc->write_tree); + wake_up_locked(&cc->write_thread_wait); spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); } @@ -1836,7 +1857,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } init_waitqueue_head(&cc->write_thread_wait); - INIT_LIST_HEAD(&cc->write_thread_list); + cc->write_tree = RB_ROOT; cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); if (IS_ERR(cc->write_thread)) { -- cgit v1.2.3 From 9ca6d6e13e02dd9f3959fa06e11e24e6ea37050a Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Sun, 5 Apr 2015 18:03:10 +0200 Subject: dm crypt: update URLs to new cryptsetup project page Cryptsetup home page moved to GitLab. Also remove link to abandonded Truecrypt page. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer (cherry picked from commit e44f23b32dc7916b2bc12817e2f723fefa21ba41) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 713a96237a80..ea09d5464a9f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -228,7 +228,7 @@ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) * * tcw: Compatible implementation of the block chaining mode used * by the TrueCrypt device encryption system (prior to version 4.1). - * For more info see: http://www.truecrypt.org + * For more info see: https://gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat * It operates on full 512 byte sectors and uses CBC * with an IV derived from initial key and the sector number. * In addition, whitening value is applied on every sector, whitening -- cgit v1.2.3 From a2c92743350cfbe88562bb9d153d8f62d994302e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 9 Apr 2015 16:53:24 -0400 Subject: dm crypt: leverage immutable biovecs when decrypting on read Commit 003b5c571 ("block: Convert drivers to immutable biovecs") stopped short of changing dm-crypt to leverage the fact that the biovec array of a bio will no longer be modified. Switch to using bio_clone_fast() when cloning bios for decryption after read. Signed-off-by: Mike Snitzer (cherry picked from commit 5977907937afa2b5584a874d44ba6c0f56aeaa9c) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ea09d5464a9f..aa1238facbeb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1124,15 +1124,15 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { struct crypt_config *cc = io->cc; - struct bio *base_bio = io->base_bio; struct bio *clone; /* - * The block layer might modify the bvec array, so always - * copy the required bvecs because we need the original - * one in order to decrypt the whole bio data *afterwards*. + * We need the original biovec array in order to decrypt + * the whole bio data *afterwards* -- thanks to immutable + * biovecs we don't need to worry about the block layer + * modifying the biovec array; so leverage bio_clone_fast(). */ - clone = bio_clone_bioset(base_bio, gfp, cc->bs); + clone = bio_clone_fast(io->base_bio, gfp, cc->bs); if (!clone) return 1; -- cgit v1.2.3 From 5bc4b6f3cb518bb99b481592db234da9d34fe9ac Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 16 Apr 2015 22:00:50 -0400 Subject: dm crypt: fix missing error code return from crypt_ctr error path Fix to return a negative error code from crypt_ctr()'s optional parameter processing error path. Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer (cherry picked from commit 44c144f9c8e8fbd73ede2848da8253b3aae42ec2) Signed-off-by: Alex Shi --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index aa1238facbeb..5503e43e5f28 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1816,6 +1816,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (ret) goto bad; + ret = -EINVAL; while (opt_params--) { opt_string = dm_shift_arg(&as); if (!opt_string) { -- cgit v1.2.3