From 3c47c2ccd5a29c78780ccfd0227a805f3873ab1c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 14 Jan 2025 07:35:08 -0800 Subject: [PATCH 01/12] nvmet: fix rw control endian access Fixes: 3ec5c62cfcf060e ("nvmet: handle rw's limited retry flag") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501142128.WexgyMTv-lkp@intel.com/ Cc: Guixin Liu Signed-off-by: Keith Busch --- drivers/nvme/target/io-cmd-bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6380b60fd490..2b09b2c69857 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -272,7 +272,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) iter_flags = SG_MITER_FROM_SG; } - if (req->cmd->rw.control & NVME_RW_LR) + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_LR)) opf |= REQ_FAILFAST_DEV; if (is_pci_p2pdma_page(sg_page(req->sg))) From d68fc95a771e0a7edd876ede7913d61276be77fd Mon Sep 17 00:00:00 2001 From: Francis Pravin Date: Fri, 17 Jan 2025 05:12:09 +0530 Subject: [PATCH 02/12] nvme-pci: remove redundant dma frees in hmb The value of size is 0 when there is no dma buffer allocated. The value of i also remains 0. So, no need to free the dma buffer in out_free_bufs. Hence, remove the redundant dma frees. Signed-off-by: Francis Pravin Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fe0795e16e25..a14f3c74b717 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2153,14 +2153,6 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, return 0; out_free_bufs: - while (--i >= 0) { - size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; - - dma_free_attrs(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr), - DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); - } - kfree(bufs); out_free_descs: dma_free_coherent(dev->dev, descs_size, descs, descs_dma); From dbf2bb1a1319b7c7d8828905378a6696cca6b0f2 Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Mon, 16 Dec 2024 23:28:03 +0100 Subject: [PATCH 03/12] nvme-pci: Add TUXEDO InfinityFlex to Samsung sleep quirk On the TUXEDO InfinityFlex, a Samsung 990 Evo NVMe leads to a high power consumption in s2idle sleep (4 watts). This patch applies 'Force No Simple Suspend' quirk to achieve a sleep with a lower power consumption, typically around 1.4 watts. Signed-off-by: Georg Gottleuber Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a14f3c74b717..60afffc917b7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3134,7 +3134,8 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) * because of high power consumption (> 2 Watt) in s2idle * sleep. Only some boards with Intel CPU are affected. */ - if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || + dmi_match(DMI_BOARD_NAME, "GMxPXxx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) From 11cb3529d18514f7d28ad2190533192aedefd761 Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Mon, 16 Dec 2024 23:28:04 +0100 Subject: [PATCH 04/12] nvme-pci: Add TUXEDO IBP Gen9 to Samsung sleep quirk On the TUXEDO InfinityBook Pro Gen9 Intel, a Samsung 990 Evo NVMe leads to a high power consumption in s2idle sleep (4 watts). This patch applies 'Force No Simple Suspend' quirk to achieve a sleep with a lower power consumption, typically around 1.2 watts. Signed-off-by: Georg Gottleuber Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 60afffc917b7..ac708169efed 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3136,6 +3136,7 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) */ if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + dmi_match(DMI_BOARD_NAME, "GXxMRXx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) From d3d380eded7ee5fc2fc53b3b0e72365ded025c4a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:47 +0100 Subject: [PATCH 05/12] nvme-fc: go straight to connecting state when initializing The initial controller initialization mimiks the reconnect loop behavior by switching from NEW to RESETTING and then to CONNECTING. The transition from NEW to CONNECTING is a valid transition, so there is no point entering the RESETTING state. TCP and RDMA also transition directly to CONNECTING state. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 094be164ffdc..7409da42b9ee 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3578,8 +3578,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); goto fail_ctrl; From 294b2b7516fd06a8dd82e4a6118f318ec521e706 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:48 +0100 Subject: [PATCH 06/12] nvme: handle connectivity loss in nvme_set_queue_count When the set feature attempts fails with any NVME status code set in nvme_set_queue_count, the function still report success. Though the numbers of queues set to 0. This is done to support controllers in degraded state (the admin queue is still up and running but no IO queues). Though there is an exception. When nvme_set_features reports an host path error, nvme_set_queue_count should propagate this error as the connectivity is lost, which means also the admin queue is not working anymore. Fixes: 9a0be7abb62f ("nvme: refactor set_queue_count") Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d21258e2283..2bcd9f710cb6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1695,7 +1695,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); - if (status < 0) + + /* + * It's either a kernel error or the host observed a connection + * lost. In either case it's not possible communicate with the + * controller and thus enter the error code path. + */ + if (status < 0 || status == NVME_SC_HOST_PATH_ERROR) return status; /* From ee59e3820ca92a9f4307ae23dfc7229dc8b8d400 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 9 Jan 2025 14:30:49 +0100 Subject: [PATCH 07/12] nvme-fc: do not ignore connectivity loss during connecting When a connectivity loss occurs while nvme_fc_create_assocation is being executed, it's possible that the ctrl ends up stuck in the LIVE state: 1) nvme nvme10: NVME-FC{10}: create association : ... 2) nvme nvme10: NVME-FC{10}: controller connectivity lost. Awaiting Reconnect nvme nvme10: queue_size 128 > ctrl maxcmd 32, reducing to maxcmd 3) nvme nvme10: Could not set queue count (880) nvme nvme10: Failed to configure AEN (cfg 900) 4) nvme nvme10: NVME-FC{10}: controller connect complete 5) nvme nvme10: failed nvme_keep_alive_end_io error=4 A connection attempt starts 1) and the ctrl is in state CONNECTING. Shortly after the LLDD driver detects a connection lost event and calls nvme_fc_ctrl_connectivity_loss 2). Because we are still in CONNECTING state, this event is ignored. nvme_fc_create_association continues to run in parallel and tries to communicate with the controller and these commands will fail. Though these errors are filtered out, e.g in 3) setting the I/O queues numbers fails which leads to an early exit in nvme_fc_create_io_queues. Because the number of IO queues is 0 at this point, there is nothing left in nvme_fc_create_association which could detected the connection drop. Thus the ctrl enters LIVE state 4). Eventually the keep alive handler times out 5) but because nothing is being done, the ctrl stays in LIVE state. There is already the ASSOC_FAILED flag to track connectivity loss event but this bit is set too late in the recovery code path. Move this into the connectivity loss event handler and synchronize it with the state change. This ensures that the ASSOC_FAILED flag is seen by nvme_fc_create_io_queues and it does not enter the LIVE state after a connectivity loss event. If the connectivity loss event happens after we entered the LIVE state the normal error recovery path is executed. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7409da42b9ee..55884d3df6f2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -781,11 +781,19 @@ restart: static void nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) { + enum nvme_ctrl_state state; + unsigned long flags; + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (nvme_ctrl_state(&ctrl->ctrl)) { + spin_lock_irqsave(&ctrl->lock, flags); + set_bit(ASSOC_FAILED, &ctrl->flags); + state = nvme_ctrl_state(&ctrl->ctrl); + spin_unlock_irqrestore(&ctrl->lock, flags); + + switch (state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: /* @@ -2542,7 +2550,6 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); - set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); @@ -3167,12 +3174,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_term_aen_ops; - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + spin_lock_irqsave(&ctrl->lock, flags); + if (!test_bit(ASSOC_FAILED, &ctrl->flags)) + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + else + ret = -EIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (ret) + goto out_term_aen_ops; ctrl->ctrl.nr_reconnects = 0; From 58f5c8d5ca07a2f9fa93fb073f5b1646ec482ff2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 24 Jan 2025 13:00:33 +0200 Subject: [PATCH 08/12] nvmet: fix a memory leak in controller identify Simply free an allocated buffer once we copied its content to the request sgl. kmemleak complaint: unreferenced object 0xffff8cd40c388000 (size 4096): comm "kworker/2:2H", pid 14739, jiffies 4401313113 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] kmemleak_alloc+0x4a/0x90 [] __kmalloc_cache_noprof+0x35a/0x420 [] nvmet_execute_identify+0x912/0x9f0 [nvmet] [] nvmet_tcp_try_recv_pdu+0x84c/0xc90 [nvmet_tcp] [] nvmet_tcp_io_work+0x82/0x8b0 [nvmet_tcp] [] process_one_work+0x178/0x3e0 [] worker_thread+0x2ec/0x420 [] kthread+0xf0/0x120 [] ret_from_fork+0x44/0x70 [] ret_from_fork_asm+0x1a/0x30 Fixes: 84909f7decbd ("nvmet: use kzalloc instead of ZERO_PAGE in nvme_execute_identify_ns_nvm()") Signed-off-by: Sagi Grimberg Reviewed-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3ddd8e44e148..ec7f70be6daa 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1067,6 +1067,7 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) goto out; } status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); } From 7bf6b497a747b0e28a411beacdd62f1488d0781c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jan 2025 08:55:33 +0100 Subject: [PATCH 09/12] nvmet: the result field in nvmet_alloc_ctrl_args is little endian So use the __le32 type for it. Fixes: 6202783184bf ("nvmet: Improve nvmet_alloc_ctrl() interface and implementation") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f4df458df9db..6a9af4e4d732 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -582,7 +582,7 @@ struct nvmet_alloc_ctrl_args { const struct nvmet_fabrics_ops *ops; struct device *p2p_client; u32 kato; - u32 result; + __le32 result; u16 error_loc; u16 status; }; From cc3d4671a0db9499b201c43faba6c46e1a21274c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jan 2025 08:55:34 +0100 Subject: [PATCH 10/12] nvmet: add a missing endianess conversion in nvmet_execute_admin_connect The kato field is little endian on the wire, but native endian in the in-core structure, add the missing byte swap. Fixes: 6202783184bf ("nvmet: Improve nvmet_alloc_ctrl() interface and implementation") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index a7ff05b3be29..eb406c90c167 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -287,7 +287,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) args.subsysnqn = d->subsysnqn; args.hostnqn = d->hostnqn; args.hostid = &d->hostid; - args.kato = c->kato; + args.kato = le32_to_cpu(c->kato); ctrl = nvmet_alloc_ctrl(&args); if (!ctrl) From 2d1a2dab95cdc6f2e0c6af3c0514b0bea94af482 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 28 Jan 2025 07:22:31 -0800 Subject: [PATCH 11/12] nvme: make nvme_tls_attrs_group static To suppress the compiler "warning: symbol 'nvme_tls_attrs_group' was not declared. Should it be static?" Fixes: 1e48b34c9bc79a ("nvme: split off TLS sysfs attributes into a separate group") Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index b68a9e5f1ea3..3a41b9ab0f13 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_tls_attrs_group = { +static const struct attribute_group nvme_tls_attrs_group = { .attrs = nvme_tls_attrs, .is_visible = nvme_tls_attrs_are_visible, }; From c8ed6cb5d37bc09c7e25e49a670e9fd1a3bd1dfa Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Jan 2025 17:34:47 +0100 Subject: [PATCH 12/12] nvme-fc: use ctrl state getter Do not access the state variable directly, instead use proper synchronization so not stale data is read. Fixes: e6e7f7ac03e4 ("nvme: ensure reset state check ordering") Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 55884d3df6f2..f4f1866fbd5b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2087,7 +2087,8 @@ done: nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) + if (terminate_assoc && + nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } @@ -2541,6 +2542,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* * if an error (io timeout, etc) while (re)connecting, the remote * port requested terminating of the association (disconnect_ls) @@ -2548,7 +2551,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + if (state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", @@ -2557,7 +2560,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) } /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device,