From 622b8b6893ff3096e130250c1298adf57a0cab03 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 24 Jul 2019 11:48:42 +0800 Subject: nvme: wait until all completed request's complete fn is called When aborting in-flight request for recovering controller, we have to make sure that queue's complete function is called on completed request before moving on. Otherwise, for example, the warning of WARN_ON_ONCE(qp->mrs_used > 0) in ib_destroy_qp_user() may be triggered on nvme-rdma. Fix this issue by using blk_mq_tagset_wait_completed_request. Cc: Max Gurtovoy Cc: Sagi Grimberg Cc: Keith Busch Cc: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/nvme/target/loop.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b16dc3981c69..95c8f1732215 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -407,6 +407,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); nvme_loop_destroy_io_queues(ctrl); } @@ -416,6 +417,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_loop_destroy_admin_queue(ctrl); } -- cgit v1.2.3 From c0f2f45be2976abe973c8cd544f38e2d928771b0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 22 Jul 2019 17:06:53 -0700 Subject: nvme: move sqsize setting to the core nvme_enable_ctrl reads the cap register right after, so no need to do that locally in the transport driver. Have sqsize setting in nvme_init_identify. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 95c8f1732215..ec0bc57d26fc 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -369,17 +369,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); - if (error) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + error = nvme_enable_ctrl(&ctrl->ctrl); if (error) goto out_cleanup_queue; -- cgit v1.2.3 From 3bec2e3754becebd4c452999adb49bc62c575ea4 Mon Sep 17 00:00:00 2001 From: Tom Wu Date: Thu, 8 Aug 2019 02:22:36 +0000 Subject: nvmet: fix data units read and written counters in SMART log In nvme spec 1.3 there is a definition for data write/read counters from SMART log, (See section 5.14.1.2): This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes read) and is rounded up. However, in nvme target where value is reported with actual units, but not thousands of units as the spec requires. Signed-off-by: Tom Wu Reviewed-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 4dc12ea52f23..51800a9ce9a9 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -81,9 +81,11 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, goto out; host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); - data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); + data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, + sectors[READ]), 1000); host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); - data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, + sectors[WRITE]), 1000); put_unaligned_le64(host_reads, &slog->host_reads[0]); put_unaligned_le64(data_units_read, &slog->data_units_read[0]); @@ -111,11 +113,11 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, if (!ns->bdev) continue; host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); - data_units_read += - part_stat_read(ns->bdev->bd_part, sectors[READ]); + data_units_read += DIV_ROUND_UP( + part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000); host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); - data_units_written += - part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + data_units_written += DIV_ROUND_UP( + part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000); } rcu_read_unlock(); -- cgit v1.2.3 From 42df26d4df7b4437db7d3847c36abc3e5aa237f1 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 4 Aug 2019 16:50:50 +0900 Subject: nvmet: trace: parse Get LBA Status command in detail Four different fields are in CDWs of Get LBA Status command which means it would be great if we can see in detail when tracing in target side also. Signed-off-by: Minwoo Im Signed-off-by: Sagi Grimberg --- drivers/nvme/target/trace.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index 6af11d493271..1373a3c67962 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -33,6 +33,22 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p, return ret; } +static const char *nvmet_trace_get_lba_status(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u32 mndw = get_unaligned_le32(cdw10 + 8); + u16 rl = get_unaligned_le16(cdw10 + 12); + u8 atype = cdw10[15]; + + trace_seq_printf(p, "slba=0x%llx, mndw=0x%x, rl=0x%x, atype=%u", + slba, mndw, rl, atype); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -80,6 +96,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, return nvmet_trace_admin_identify(p, cdw10); case nvme_admin_get_features: return nvmet_trace_admin_get_features(p, cdw10); + case nvme_admin_get_lba_status: + return nvmet_trace_get_lba_status(p, cdw10); default: return nvmet_trace_common(p, cdw10); } -- cgit v1.2.3 From b627200762c7e8153fe1620fdd52a68f4ca2f8a5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 20:23:38 -0700 Subject: nvmet-tcp: fix possible NULL deref We must only call sgl_free for sgl that we actually allocated. Signed-off-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 69b83fa0c76c..0d63f3da0117 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,7 +348,8 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - sgl_free(cmd->req.sg); + if (cmd->req.sg_cnt) + sgl_free(cmd->req.sg); return NVME_SC_INTERNAL; } @@ -553,7 +554,8 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) if (queue->nvme_sq.sqhd_disabled) { kfree(cmd->iov); - sgl_free(cmd->req.sg); + if (cmd->req.sg_cnt) + sgl_free(cmd->req.sg); } return 1; @@ -584,7 +586,8 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, return -EAGAIN; kfree(cmd->iov); - sgl_free(cmd->req.sg); + if (cmd->req.sg_cnt) + sgl_free(cmd->req.sg); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -1306,7 +1309,8 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) { nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); - sgl_free(cmd->req.sg); + if (cmd->req.sg_cnt) + sgl_free(cmd->req.sg); } static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) -- cgit v1.2.3 From 35d1a938dcdaeb8e1d860f061a0cd11f67f42774 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 20:29:11 -0700 Subject: nvmet-tcp: fix possible memory leak when we uninit a command in error flow we also need to free an iovec if it was allocated. Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 0d63f3da0117..76e43750b9e5 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1309,6 +1309,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) { nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); + kfree(cmd->iov); if (cmd->req.sg_cnt) sgl_free(cmd->req.sg); } -- cgit v1.2.3 From 89275a9659fe57a3c7eef6778ec64f9e435c75eb Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 18 Aug 2019 12:08:55 +0300 Subject: nvmet-tcp: Add TOS for tcp transport Set the outgoing packets type of service (TOS) according to the receiving TOS. Signed-off-by: Israel Rukshin Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 76e43750b9e5..bf4f03474e89 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1415,6 +1415,7 @@ done: static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + struct inet_sock *inet = inet_sk(sock->sk); struct linger sol = { .l_onoff = 1, .l_linger = 0 }; int ret; @@ -1438,6 +1439,16 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) if (ret) return ret; + /* Set socket type of service */ + if (inet->rcv_tos > 0) { + int tos = inet->rcv_tos; + + ret = kernel_setsockopt(sock, SOL_IP, IP_TOS, + (char *)&tos, sizeof(tos)); + if (ret) + return ret; + } + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = queue; queue->data_ready = sock->sk->sk_data_ready; -- cgit v1.2.3 From e7832cb48a654cd12b2bc9181b2f0ad49d526ac6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 19:33:59 -0700 Subject: nvme: make fabrics command run on a separate request queue We have a fundamental issue that fabric commands use the admin_q. The reason is, that admin-connect, register reads and writes and admin commands cannot be guaranteed ordering while we are running controller resets. For example, when we reset a controller we perform: 1. disable the controller 2. teardown the admin queue 3. re-establish the admin queue 4. enable the controller In order to perform (3), we need to unquiesce the admin queue, however we may have some admin commands that are already pending on the quiesced admin_q and will immediate execute when we unquiesce it before we execute (4). The host must not send admin commands to the controller before enabling the controller. To fix this, we have the fabric commands (admin connect and property get/set, but not I/O queue connect) use a separate fabrics_q and make sure to quiesce the admin_q before we disable the controller, and unquiesce it only after we enable the controller. This fixes the error prints from nvmet in a controller reset storm test: kernel: nvmet: got cmd 6 while CC.EN == 0 on qid = 0 Which indicate that the host is sending an admin command when the controller is not enabled. Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index ec0bc57d26fc..9ee093b9fc74 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -253,6 +253,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_cleanup_queue(ctrl->ctrl.fabrics_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); } @@ -357,10 +358,16 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) goto out_free_sq; ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; + ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.fabrics_q)) { + error = PTR_ERR(ctrl->ctrl.fabrics_q); + goto out_free_tagset; + } + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; + goto out_cleanup_fabrics_q; } error = nvmf_connect_admin_queue(&ctrl->ctrl); @@ -376,6 +383,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + error = nvme_init_identify(&ctrl->ctrl); if (error) goto out_cleanup_queue; @@ -384,6 +393,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) out_cleanup_queue: blk_cleanup_queue(ctrl->ctrl.admin_q); +out_cleanup_fabrics_q: + blk_cleanup_queue(ctrl->ctrl.fabrics_q); out_free_tagset: blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_sq: @@ -401,14 +412,13 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) nvme_loop_destroy_io_queues(ctrl); } + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); if (ctrl->ctrl.state == NVME_CTRL_LIVE) nvme_shutdown_ctrl(&ctrl->ctrl); - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_loop_destroy_admin_queue(ctrl); } -- cgit v1.2.3 From 1179d337be70e67baa2d8121677c310fea4f72c3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 6 Sep 2019 19:50:19 +0200 Subject: nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery() Simplify this function implementation by using a known function. Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Markus Elfring Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/target/discovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 8efca26b4776..3764a8900850 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -381,9 +381,7 @@ int __init nvmet_init_discovery(void) { nvmet_disc_subsys = nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); - if (IS_ERR(nvmet_disc_subsys)) - return PTR_ERR(nvmet_disc_subsys); - return 0; + return PTR_ERR_OR_ZERO(nvmet_disc_subsys); } void nvmet_exit_discovery(void) -- cgit v1.2.3 From 5f8badbcbeac298a77ee634a10a375f3e66923f9 Mon Sep 17 00:00:00 2001 From: Amit Date: Thu, 12 Sep 2019 08:29:39 +0300 Subject: nvmet: fix a wrong error status returned in error log page When the command data_len cannot hold all the controller errors, we should simply return as much errors as we can fit instead of failing the command. Signed-off-by: Amit Engel Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 51800a9ce9a9..831a062d27cb 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -37,7 +37,6 @@ static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) static void nvmet_execute_get_log_page_error(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - u16 status = NVME_SC_SUCCESS; unsigned long flags; off_t offset = 0; u64 slot; @@ -47,9 +46,8 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req) slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS; for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) { - status = nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot], - sizeof(struct nvme_error_slot)); - if (status) + if (nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot], + sizeof(struct nvme_error_slot))) break; if (slot == 0) @@ -59,7 +57,7 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req) offset += sizeof(struct nvme_error_slot); } spin_unlock_irqrestore(&ctrl->error_lock, flags); - nvmet_req_complete(req, status); + nvmet_req_complete(req, 0); } static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, -- cgit v1.2.3