From a115b5716fc9a64652aa9cb332070087178ffafa Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Tue, 9 Jan 2024 12:10:23 +0100 Subject: [PATCH 01/48] vduse: validate block features only with block devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is preliminary work to enable network device type support to VDUSE. As VIRTIO_BLK_F_CONFIG_WCE shares the same value as VIRTIO_NET_F_HOST_TSO4, we need to restrict its check to Virtio-blk device type. Acked-by: Jason Wang Reviewed-by: Xie Yongji Reviewed-by: Eugenio Pérez Signed-off-by: Maxime Coquelin Message-Id: <20240109111025.1320976-2-maxime.coquelin@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 73c89701fc9d..7c3d117b22de 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1705,13 +1705,14 @@ static bool device_is_allowed(u32 device_id) return false; } -static bool features_is_valid(u64 features) +static bool features_is_valid(struct vduse_dev_config *config) { - if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) + if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) return false; /* Now we only support read-only configuration space */ - if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) + if ((config->device_id == VIRTIO_ID_BLOCK) && + (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) return false; return true; @@ -1738,7 +1739,7 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (!device_is_allowed(config->device_id)) return false; - if (!features_is_valid(config->features)) + if (!features_is_valid(config)) return false; return true; From 56e71885b0349241c07631a7b979b61e81afab6a Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Tue, 9 Jan 2024 12:10:24 +0100 Subject: [PATCH 02/48] vduse: Temporarily fail if control queue feature requested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtio-net driver control queue implementation is not safe when used with VDUSE. If the VDUSE application does not reply to control queue messages, it currently ends up hanging the kernel thread sending this command. Some work is on-going to make the control queue implementation robust with VDUSE. Until it is completed, let's fail features check if control-queue feature is requested. Signed-off-by: Maxime Coquelin Message-Id: <20240109111025.1320976-3-maxime.coquelin@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Reviewed-by: Xie Yongji Acked-by: Jason Wang --- drivers/vdpa/vdpa_user/vduse_dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 7c3d117b22de..ac8b5b52e3dc 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -8,6 +8,7 @@ * */ +#include "linux/virtio_net.h" #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include "iova_domain.h" @@ -1714,6 +1716,9 @@ static bool features_is_valid(struct vduse_dev_config *config) if ((config->device_id == VIRTIO_ID_BLOCK) && (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) return false; + else if ((config->device_id == VIRTIO_ID_NET) && + (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + return false; return true; } From 894452180d732413fd29fa95a4820560fa44ca4a Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Tue, 9 Jan 2024 12:10:25 +0100 Subject: [PATCH 03/48] vduse: enable Virtio-net device type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds Virtio-net device type to the supported devices types. Initialization fails if the device does not support VIRTIO_F_VERSION_1 feature, in order to guarantee the configuration space is read-only. It also fails with -EPERM if the CAP_NET_ADMIN is missing. Acked-by: Jason Wang Reviewed-by: Eugenio Pérez Signed-off-by: Maxime Coquelin Message-Id: <20240109111025.1320976-4-maxime.coquelin@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xie Yongji --- drivers/vdpa/vdpa_user/vduse_dev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index ac8b5b52e3dc..7ae99691efdf 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -143,6 +143,7 @@ static struct workqueue_struct *vduse_irq_bound_wq; static u32 allowed_device_id[] = { VIRTIO_ID_BLOCK, + VIRTIO_ID_NET, }; static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) @@ -1720,6 +1721,10 @@ static bool features_is_valid(struct vduse_dev_config *config) (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) return false; + if ((config->device_id == VIRTIO_ID_NET) && + !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) + return false; + return true; } @@ -1827,6 +1832,10 @@ static int vduse_create_dev(struct vduse_dev_config *config, int ret; struct vduse_dev *dev; + ret = -EPERM; + if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) + goto err; + ret = -EEXIST; if (vduse_find_dev(config->name)) goto err; @@ -2070,6 +2079,7 @@ static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { static struct virtio_device_id id_table[] = { { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, { 0 }, }; From 4d5569314dcb0a1690ed06835167af61097d6cf6 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 27 Feb 2024 22:45:18 +0800 Subject: [PATCH 04/48] MAINTAINERS: apply maintainer role of Intel vDPA driver This commit applies maintainer role of Intel vDPA driver for myself. I am the author of this driver and have been contributing to it for long time, I would like to help this solution evolve in future. This driver is still under virtio maintenance. Signed-off-by: Zhu Lingshan Message-Id: <20240227144519.555554-1-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f6dc90559341..2342b34656fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10464,8 +10464,10 @@ F: include/net/nl802154.h F: net/ieee802154/ F: net/mac802154/ -IFCVF VIRTIO DATA PATH ACCELERATOR -R: Zhu Lingshan +Intel VIRTIO DATA PATH ACCELERATOR +M: Zhu Lingshan +L: virtualization@lists.linux.dev +S: Supported F: drivers/vdpa/ifcvf/ IFE PROTOCOL From e117d9b6e79a2466faf382cb7e8e8823e82d6bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 8 Mar 2024 09:51:20 +0100 Subject: [PATCH 05/48] virtio-mmio: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Message-Id: Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 59892a31cf76..173596589c71 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -696,12 +696,10 @@ free_vm_dev: return rc; } -static int virtio_mmio_remove(struct platform_device *pdev) +static void virtio_mmio_remove(struct platform_device *pdev) { struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); unregister_virtio_device(&vm_dev->vdev); - - return 0; } @@ -847,7 +845,7 @@ MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match); static struct platform_driver virtio_mmio_driver = { .probe = virtio_mmio_probe, - .remove = virtio_mmio_remove, + .remove_new = virtio_mmio_remove, .driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, From f181a3736b7f76410040a3f16cbbfbdec2580bf9 Mon Sep 17 00:00:00 2001 From: Yuxue Liu Date: Mon, 25 Mar 2024 18:54:47 +0800 Subject: [PATCH 06/48] vp_vdpa: Fix return value check vp_vdpa_request_irq In the vp_vdpa_set_status function, when setting the device status to VIRTIO_CONFIG_S_DRIVER_OK, the vp_vdpa_request_irq function may fail. In such cases, the device status should not be set to DRIVER_OK. Add exception printing to remind the user. Signed-off-by: Yuxue Liu Message-Id: <20240325105448.235-1-gavin.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/virtio_pci/vp_vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index df5f4a3bccb5..4caca0517cad 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -216,7 +216,10 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) if (status & VIRTIO_CONFIG_S_DRIVER_OK && !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { - vp_vdpa_request_irq(vp_vdpa); + if (vp_vdpa_request_irq(vp_vdpa)) { + WARN_ON(1); + return; + } } vp_modern_set_status(mdev, status); From 7b1b5c7fff05bdf2dc21ee124907df1f8cc819ad Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 14 Mar 2024 17:58:53 +0800 Subject: [PATCH 07/48] vdpa: Convert sprintf/snprintf to sysfs_emit Per filesystems/sysfs.rst, show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. coccinelle complains that there are still a couple of functions that use snprintf(). Convert them to sysfs_emit(). sprintf() will be converted as weel if they have. Generally, this patch is generated by make coccicheck M= MODE=patch \ COCCI=scripts/coccinelle/api/device_attr_show.cocci No functional change intended CC: "Michael S. Tsirkin" CC: Jason Wang CC: Xuan Zhuo CC: virtualization@lists.linux.dev Signed-off-by: Li Zhijian Message-Id: <20240314095853.1326111-1-lizhijian@fujitsu.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 6cb96a1e8b7d..8d391947eb8d 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -98,7 +98,7 @@ static ssize_t driver_override_show(struct device *dev, ssize_t len; device_lock(dev); - len = snprintf(buf, PAGE_SIZE, "%s\n", vdev->driver_override); + len = sysfs_emit(buf, "%s\n", vdev->driver_override); device_unlock(dev); return len; From b1b2ce58ed23c5d56e0ab299a5271ac01f95b75c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:46:59 -0500 Subject: [PATCH 08/48] vhost-scsi: Handle vhost_vq_work_queue failures for events Currently, we can try to queue an event's work before the vhost_task is created. When this happens we just drop it in vhost_scsi_do_plug before even calling vhost_vq_work_queue. During a device shutdown we do the same thing after vhost_scsi_clear_endpoint has cleared the backends. In the next patches we will be able to kill the vhost_task before we have cleared the endpoint. In that case, vhost_vq_work_queue can fail and we will leak the event's memory. This has handle the failure by just freeing the event. This is safe to do, because vhost_vq_work_queue will only return failure for us when the vhost_task is killed and so userspace will not be able to handle events if we sent them. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 282aac45c690..f34f9895b898 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -497,10 +497,8 @@ again: vq_err(vq, "Faulted on vhost_scsi_send_event\n"); } -static void vhost_scsi_evt_work(struct vhost_work *work) +static void vhost_scsi_complete_events(struct vhost_scsi *vs, bool drop) { - struct vhost_scsi *vs = container_of(work, struct vhost_scsi, - vs_event_work); struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct vhost_scsi_evt *evt, *t; struct llist_node *llnode; @@ -508,12 +506,20 @@ static void vhost_scsi_evt_work(struct vhost_work *work) mutex_lock(&vq->mutex); llnode = llist_del_all(&vs->vs_event_list); llist_for_each_entry_safe(evt, t, llnode, list) { - vhost_scsi_do_evt_work(vs, evt); + if (!drop) + vhost_scsi_do_evt_work(vs, evt); vhost_scsi_free_evt(vs, evt); } mutex_unlock(&vq->mutex); } +static void vhost_scsi_evt_work(struct vhost_work *work) +{ + struct vhost_scsi *vs = container_of(work, struct vhost_scsi, + vs_event_work); + vhost_scsi_complete_events(vs, false); +} + static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) { struct iov_iter *iter = &cmd->saved_iter; @@ -1509,7 +1515,8 @@ vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq, } llist_add(&evt->list, &vs->vs_event_list); - vhost_vq_work_queue(vq, &vs->vs_event_work); + if (!vhost_vq_work_queue(vq, &vs->vs_event_work)) + vhost_scsi_complete_events(vs, true); } static void vhost_scsi_evt_handle_kick(struct vhost_work *work) From 1eceddeeb6a9beb7ef2a22c176c03c5ff18c6386 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:00 -0500 Subject: [PATCH 09/48] vhost-scsi: Handle vhost_vq_work_queue failures for cmds In the next patches we will support the vhost_task being killed while in use. The problem for vhost-scsi is that we can't free some structs until we get responses for commands we have submitted to the target layer and we currently process the responses from the vhost_task. This has just drop the responses and free the command's resources. When all commands have completed then operations like flush will be woken up and we can complete device release and endpoint cleanup. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f34f9895b898..6ec1abe7364f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -358,6 +358,16 @@ static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf) vhost_scsi_put_inflight(inflight); } +static void vhost_scsi_drop_cmds(struct vhost_scsi_virtqueue *svq) +{ + struct vhost_scsi_cmd *cmd, *t; + struct llist_node *llnode; + + llnode = llist_del_all(&svq->completion_list); + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) + vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd); +} + static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) { if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { @@ -373,7 +383,8 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) struct vhost_scsi_virtqueue, vq); llist_add(&cmd->tvc_completion_list, &svq->completion_list); - vhost_vq_work_queue(&svq->vq, &svq->completion_work); + if (!vhost_vq_work_queue(&svq->vq, &svq->completion_work)) + vhost_scsi_drop_cmds(svq); } } From 59b701b99e2d01245b951325a8c3c12faa98d3ea Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:01 -0500 Subject: [PATCH 10/48] vhost-scsi: Use system wq to flush dev for TMFs We flush all the workers that are not also used by the ctl vq to make sure that responses queued by LIO before the TMF response are sent before the TMF response. This requires a special vhost_vq_flush function which, in the next patches where we handle SIGKILL killing workers while in use, will require extra locking/complexity. To avoid that, this patch has us flush the entire device from the system work queue, then queue up sending the response from there. This is a little less optimal since we now flush all workers but this will be ok since commands have already timed out and perf is not a concern. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-4-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6ec1abe7364f..04e0d3f1bd77 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -210,6 +210,7 @@ struct vhost_scsi { struct vhost_scsi_tmf { struct vhost_work vwork; + struct work_struct flush_work; struct vhost_scsi *vhost; struct vhost_scsi_virtqueue *svq; @@ -373,9 +374,8 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { struct vhost_scsi_tmf *tmf = container_of(se_cmd, struct vhost_scsi_tmf, se_cmd); - struct vhost_virtqueue *vq = &tmf->svq->vq; - vhost_vq_work_queue(vq, &tmf->vwork); + schedule_work(&tmf->flush_work); } else { struct vhost_scsi_cmd *cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); @@ -1287,33 +1287,31 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) { struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf, vwork); - struct vhost_virtqueue *ctl_vq, *vq; - int resp_code, i; - - if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) { - /* - * Flush IO vqs that don't share a worker with the ctl to make - * sure they have sent their responses before us. - */ - ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq; - for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) { - vq = &tmf->vhost->vqs[i].vq; - - if (vhost_vq_is_setup(vq) && - vq->worker != ctl_vq->worker) - vhost_vq_flush(vq); - } + int resp_code; + if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; - } else { + else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; - } vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); vhost_scsi_release_tmf_res(tmf); } +static void vhost_scsi_tmf_flush_work(struct work_struct *work) +{ + struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf, + flush_work); + struct vhost_virtqueue *vq = &tmf->svq->vq; + /* + * Make sure we have sent responses for other commands before we + * send our response. + */ + vhost_dev_flush(vq->dev); + vhost_vq_work_queue(vq, &tmf->vwork); +} + static void vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg, struct vhost_virtqueue *vq, @@ -1337,6 +1335,7 @@ vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg, if (!tmf) goto send_reject; + INIT_WORK(&tmf->flush_work, vhost_scsi_tmf_flush_work); vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); tmf->vhost = vs; tmf->svq = svq; From d9e59eec4aa7b0c46cec56f4f9cf595ea209da7e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:02 -0500 Subject: [PATCH 11/48] vhost: Remove vhost_vq_flush vhost_vq_flush is no longer used so remove it. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-5-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 12 ------------ drivers/vhost/vhost.h | 1 - 2 files changed, 13 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 8995730ce0bf..45aa83880b2d 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -263,18 +263,6 @@ bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) } EXPORT_SYMBOL_GPL(vhost_vq_work_queue); -void vhost_vq_flush(struct vhost_virtqueue *vq) -{ - struct vhost_flush_struct flush; - - init_completion(&flush.wait_event); - vhost_work_init(&flush.work, vhost_flush_work); - - if (vhost_vq_work_queue(vq, &flush.work)) - wait_for_completion(&flush.wait_event); -} -EXPORT_SYMBOL_GPL(vhost_vq_flush); - /** * vhost_worker_flush - flush a worker * @worker: worker to flush diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 9e942fcda5c3..91ade037f08e 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -205,7 +205,6 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); -void vhost_vq_flush(struct vhost_virtqueue *vq); bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); bool vhost_vq_has_work(struct vhost_virtqueue *vq); bool vhost_vq_is_setup(struct vhost_virtqueue *vq); From 0352c961cb3542b1c03415259d7b85d99457acff Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:03 -0500 Subject: [PATCH 12/48] vhost_scsi: Handle vhost_vq_work_queue failures for TMFs vhost_vq_work_queue will never fail when queueing the TMF's response handling because a guest can only send us TMFs when the device is fully setup so there is always a worker at that time. In the next patches we will modify the worker code so it handles SIGKILL by exiting before outstanding commands/TMFs have sent their responses. In that case vhost_vq_work_queue can fail when we try to send a response. This has us just free the TMF's resources since at this time the guest won't be able to get a response even if we could send it. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-6-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 04e0d3f1bd77..006ffacf1c56 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1309,7 +1309,8 @@ static void vhost_scsi_tmf_flush_work(struct work_struct *work) * send our response. */ vhost_dev_flush(vq->dev); - vhost_vq_work_queue(vq, &tmf->vwork); + if (!vhost_vq_work_queue(vq, &tmf->vwork)) + vhost_scsi_release_tmf_res(tmf); } static void From 34cf9ba5f00a222dddd9fc71de7c68fdaac7fb97 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:04 -0500 Subject: [PATCH 13/48] vhost: Use virtqueue mutex for swapping worker __vhost_vq_attach_worker uses the vhost_dev mutex to serialize the swapping of a virtqueue's worker. This was done for simplicity because we are already holding that mutex. In the next patches where the worker can be killed while in use, we need finer grained locking because some drivers will hold the vhost_dev mutex while flushing. However in the SIGKILL handler in the next patches, we will need to be able to swap workers (set current one to NULL), kill queued works and stop new flushes while flushes are in progress. To prepare us, this has us use the virtqueue mutex for swapping workers instead of the vhost_dev one. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-7-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 45aa83880b2d..245133171a22 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -652,16 +652,22 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, { struct vhost_worker *old_worker; - old_worker = rcu_dereference_check(vq->worker, - lockdep_is_held(&vq->dev->mutex)); - mutex_lock(&worker->mutex); - worker->attachment_cnt++; - mutex_unlock(&worker->mutex); - rcu_assign_pointer(vq->worker, worker); + mutex_lock(&vq->mutex); - if (!old_worker) + old_worker = rcu_dereference_check(vq->worker, + lockdep_is_held(&vq->mutex)); + rcu_assign_pointer(vq->worker, worker); + worker->attachment_cnt++; + + if (!old_worker) { + mutex_unlock(&vq->mutex); + mutex_unlock(&worker->mutex); return; + } + mutex_unlock(&vq->mutex); + mutex_unlock(&worker->mutex); + /* * Take the worker mutex to make sure we see the work queued from * device wide flushes which doesn't use RCU for execution. From ba704ff4e142fd3cfaf3379dd3b3b946754e06e3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:05 -0500 Subject: [PATCH 14/48] vhost: Release worker mutex during flushes In the next patches where the worker can be killed while in use, we need to be able to take the worker mutex and kill queued works for new IO and flushes, and set some new flags to prevent new __vhost_vq_attach_worker calls from swapping in/out killed workers. If we are holding the worker mutex during a flush and the flush's work is still in the queue, the worker code that will handle the SIGKILL cleanup won't be able to take the mutex and perform it's cleanup. So this patch has us drop the worker mutex while waiting for the flush to complete. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 245133171a22..c6448ff37768 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -264,21 +264,36 @@ bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) EXPORT_SYMBOL_GPL(vhost_vq_work_queue); /** - * vhost_worker_flush - flush a worker + * __vhost_worker_flush - flush a worker * @worker: worker to flush * - * This does not use RCU to protect the worker, so the device or worker - * mutex must be held. + * The worker's flush_mutex must be held. */ -static void vhost_worker_flush(struct vhost_worker *worker) +static void __vhost_worker_flush(struct vhost_worker *worker) { struct vhost_flush_struct flush; + if (!worker->attachment_cnt) + return; + init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); vhost_worker_queue(worker, &flush.work); + /* + * Drop mutex in case our worker is killed and it needs to take the + * mutex to force cleanup. + */ + mutex_unlock(&worker->mutex); wait_for_completion(&flush.wait_event); + mutex_lock(&worker->mutex); +} + +static void vhost_worker_flush(struct vhost_worker *worker) +{ + mutex_lock(&worker->mutex); + __vhost_worker_flush(worker); + mutex_unlock(&worker->mutex); } void vhost_dev_flush(struct vhost_dev *dev) @@ -286,15 +301,8 @@ void vhost_dev_flush(struct vhost_dev *dev) struct vhost_worker *worker; unsigned long i; - xa_for_each(&dev->worker_xa, i, worker) { - mutex_lock(&worker->mutex); - if (!worker->attachment_cnt) { - mutex_unlock(&worker->mutex); - continue; - } + xa_for_each(&dev->worker_xa, i, worker) vhost_worker_flush(worker); - mutex_unlock(&worker->mutex); - } } EXPORT_SYMBOL_GPL(vhost_dev_flush); @@ -673,7 +681,6 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, * device wide flushes which doesn't use RCU for execution. */ mutex_lock(&old_worker->mutex); - old_worker->attachment_cnt--; /* * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done @@ -684,6 +691,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq) && !vq->kick) { mutex_unlock(&vq->mutex); + + old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); /* * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. @@ -699,7 +708,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, /* Make sure new vq queue/flush/poll calls see the new worker */ synchronize_rcu(); /* Make sure whatever was queued gets run */ - vhost_worker_flush(old_worker); + __vhost_worker_flush(old_worker); + old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); } @@ -752,6 +762,12 @@ static int vhost_free_worker(struct vhost_dev *dev, mutex_unlock(&worker->mutex); return -EBUSY; } + /* + * A flush might have raced and snuck in before attachment_cnt was set + * to zero. Make sure flushes are flushed from the queue before + * freeing. + */ + __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); vhost_worker_destroy(dev, worker); From db5247d9bf5c6ade9fd70b4e4897441e0269b233 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:06 -0500 Subject: [PATCH 15/48] vhost_task: Handle SIGKILL by flushing work and exiting Instead of lingering until the device is closed, this has us handle SIGKILL by: 1. marking the worker as killed so we no longer try to use it with new virtqueues and new flush operations. 2. setting the virtqueue to worker mapping so no new works are queued. 3. running all the exiting works. Suggested-by: Edward Adam Davis Reported-and-tested-by: syzbot+98edc2df894917b3431f@syzkaller.appspotmail.com Message-Id: Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-9-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 54 +++++++++++++++++++++++++++++--- drivers/vhost/vhost.h | 2 ++ include/linux/sched/vhost_task.h | 3 +- kernel/vhost_task.c | 53 ++++++++++++++++++++----------- 4 files changed, 88 insertions(+), 24 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c6448ff37768..b60955682474 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -273,7 +273,7 @@ static void __vhost_worker_flush(struct vhost_worker *worker) { struct vhost_flush_struct flush; - if (!worker->attachment_cnt) + if (!worker->attachment_cnt || worker->killed) return; init_completion(&flush.wait_event); @@ -388,7 +388,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } -static bool vhost_worker(void *data) +static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; @@ -413,6 +413,40 @@ static bool vhost_worker(void *data) return !!node; } +static void vhost_worker_killed(void *data) +{ + struct vhost_worker *worker = data; + struct vhost_dev *dev = worker->dev; + struct vhost_virtqueue *vq; + int i, attach_cnt = 0; + + mutex_lock(&worker->mutex); + worker->killed = true; + + for (i = 0; i < dev->nvqs; i++) { + vq = dev->vqs[i]; + + mutex_lock(&vq->mutex); + if (worker == + rcu_dereference_check(vq->worker, + lockdep_is_held(&vq->mutex))) { + rcu_assign_pointer(vq->worker, NULL); + attach_cnt++; + } + mutex_unlock(&vq->mutex); + } + + worker->attachment_cnt -= attach_cnt; + if (attach_cnt) + synchronize_rcu(); + /* + * Finish vhost_worker_flush calls and any other works that snuck in + * before the synchronize_rcu. + */ + vhost_run_work_list(worker); + mutex_unlock(&worker->mutex); +} + static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->indirect); @@ -627,9 +661,11 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) if (!worker) return NULL; + worker->dev = dev; snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_worker, worker, name); + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, + worker, name); if (!vtsk) goto free_worker; @@ -661,6 +697,11 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_worker *old_worker; mutex_lock(&worker->mutex); + if (worker->killed) { + mutex_unlock(&worker->mutex); + return; + } + mutex_lock(&vq->mutex); old_worker = rcu_dereference_check(vq->worker, @@ -681,6 +722,11 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, * device wide flushes which doesn't use RCU for execution. */ mutex_lock(&old_worker->mutex); + if (old_worker->killed) { + mutex_unlock(&old_worker->mutex); + return; + } + /* * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done @@ -758,7 +804,7 @@ static int vhost_free_worker(struct vhost_dev *dev, return -ENODEV; mutex_lock(&worker->mutex); - if (worker->attachment_cnt) { + if (worker->attachment_cnt || worker->killed) { mutex_unlock(&worker->mutex); return -EBUSY; } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 91ade037f08e..bb75a292d50c 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -28,12 +28,14 @@ struct vhost_work { struct vhost_worker { struct vhost_task *vtsk; + struct vhost_dev *dev; /* Used to serialize device wide flushing with worker swapping. */ struct mutex mutex; struct llist_head work_list; u64 kcov_handle; u32 id; int attachment_cnt; + bool killed; }; /* Poll a file (eventfd or socket) */ diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index bc60243d43b3..25446c5d3508 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -4,7 +4,8 @@ struct vhost_task; -struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), + void (*handle_kill)(void *), void *arg, const char *name); void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index da35e5b7f047..8800f5acc007 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -10,38 +10,32 @@ enum vhost_task_flags { VHOST_TASK_FLAGS_STOP, + VHOST_TASK_FLAGS_KILLED, }; struct vhost_task { bool (*fn)(void *data); + void (*handle_sigkill)(void *data); void *data; struct completion exited; unsigned long flags; struct task_struct *task; + /* serialize SIGKILL and vhost_task_stop calls */ + struct mutex exit_mutex; }; static int vhost_task_fn(void *data) { struct vhost_task *vtsk = data; - bool dead = false; for (;;) { bool did_work; - if (!dead && signal_pending(current)) { + if (signal_pending(current)) { struct ksignal ksig; - /* - * Calling get_signal will block in SIGSTOP, - * or clear fatal_signal_pending, but remember - * what was set. - * - * This thread won't actually exit until all - * of the file descriptors are closed, and - * the release function is called. - */ - dead = get_signal(&ksig); - if (dead) - clear_thread_flag(TIF_SIGPENDING); + + if (get_signal(&ksig)) + break; } /* mb paired w/ vhost_task_stop */ @@ -57,7 +51,19 @@ static int vhost_task_fn(void *data) schedule(); } + mutex_lock(&vtsk->exit_mutex); + /* + * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL. + * When the vhost layer has called vhost_task_stop it's already stopped + * new work and flushed. + */ + if (!test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { + set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags); + vtsk->handle_sigkill(vtsk->data); + } + mutex_unlock(&vtsk->exit_mutex); complete(&vtsk->exited); + do_exit(0); } @@ -78,12 +84,17 @@ EXPORT_SYMBOL_GPL(vhost_task_wake); * @vtsk: vhost_task to stop * * vhost_task_fn ensures the worker thread exits after - * VHOST_TASK_FLAGS_SOP becomes true. + * VHOST_TASK_FLAGS_STOP becomes true. */ void vhost_task_stop(struct vhost_task *vtsk) { - set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); - vhost_task_wake(vtsk); + mutex_lock(&vtsk->exit_mutex); + if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) { + set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); + vhost_task_wake(vtsk); + } + mutex_unlock(&vtsk->exit_mutex); + /* * Make sure vhost_task_fn is no longer accessing the vhost_task before * freeing it below. @@ -96,14 +107,16 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); /** * vhost_task_create - create a copy of a task to be used by the kernel * @fn: vhost worker function - * @arg: data to be passed to fn + * @handle_sigkill: vhost function to handle when we are killed + * @arg: data to be passed to fn and handled_kill * @name: the thread's name * * This returns a specialized task for use by the vhost layer or NULL on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ -struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), + void (*handle_sigkill)(void *), void *arg, const char *name) { struct kernel_clone_args args = { @@ -122,8 +135,10 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, if (!vtsk) return NULL; init_completion(&vtsk->exited); + mutex_init(&vtsk->exit_mutex); vtsk->data = arg; vtsk->fn = fn; + vtsk->handle_sigkill = handle_sigkill; args.fn_arg = vtsk; From 240a1853b4d2bce51e5cac9ba65cd646152ab6d6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:07 -0500 Subject: [PATCH 16/48] kernel: Remove signal hacks for vhost_tasks This removes the signal/coredump hacks added for vhost_tasks in: Commit f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") When that patch was added vhost_tasks did not handle SIGKILL and would try to ignore/clear the signal and continue on until the device's close function was called. In the previous patches vhost_tasks and the vhost drivers were converted to support SIGKILL by cleaning themselves up and exiting. The hacks are no longer needed so this removes them. Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-10-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- fs/coredump.c | 4 +--- kernel/exit.c | 5 +---- kernel/signal.c | 4 +--- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index be6403b4b14b..8eae24afb3cb 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -371,9 +371,7 @@ static int zap_process(struct task_struct *start, int exit_code) if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); - /* The vhost_worker does not particpate in coredumps */ - if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER) - nr++; + nr++; } } diff --git a/kernel/exit.c b/kernel/exit.c index 41a12630cbbc..fca3a3234954 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -414,10 +414,7 @@ static void coredump_task_exit(struct task_struct *tsk) tsk->flags |= PF_POSTCOREDUMP; core_state = tsk->signal->core_state; spin_unlock_irq(&tsk->sighand->siglock); - - /* The vhost_worker does not particpate in coredumps */ - if (core_state && - ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) { + if (core_state) { struct core_thread self; self.task = current; diff --git a/kernel/signal.c b/kernel/signal.c index 7bdbcf1b78d0..41d5cbccab2a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1375,9 +1375,7 @@ int zap_other_threads(struct task_struct *p) for_other_threads(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - /* Don't require de_thread to wait for the vhost_worker */ - if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER) - count++; + count++; /* Don't bother with already dead threads */ if (t->exit_state) From e4544c550eb1857845bb7114bac53edd04dc078f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Mar 2024 13:06:45 +0100 Subject: [PATCH 17/48] virtio-mem: support suspend+resume With virtio-mem, primarily hibernation is problematic: as the machine shuts down, the virtio-mem device loses its state. Powering the machine back up is like losing a bunch of DIMMs. While there would be ways to add limited support, suspend+resume is more commonly used for VMs and "easier" to support cleanly. s2idle can be supported without any device dependencies. Similarly, one would expect suspend-to-ram (i.e., S3) to work out of the box. However, QEMU currently unplugs all device memory when resuming the VM, using a cold reset on the "wakeup" path. In order to support S3, we need a feature flag for the device to tell us if memory remains plugged when waking up. In the future, QEMU will implement this feature. So let's always support s2idle and support S3 with plugged memory only if the device indicates support. Block hibernation early using the PM notifier. Trying to hibernate now fails early: # echo disk > /sys/power/state [ 26.455369] PM: hibernation: hibernation entry [ 26.458271] virtio_mem virtio0: hibernation is not supported. [ 26.462498] PM: hibernation: hibernation exit -bash: echo: write error: Operation not permitted s2idle works even without the new feature bit: # echo s2idle > /sys/power/mem_sleep # echo mem > /sys/power/state [ 52.083725] PM: suspend entry (s2idle) [ 52.095950] Filesystems sync: 0.010 seconds [ 52.101493] Freezing user space processes [ 52.104213] Freezing user space processes completed (elapsed 0.001 seconds) [ 52.106520] OOM killer disabled. [ 52.107655] Freezing remaining freezable tasks [ 52.110880] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 52.113296] printk: Suspending console(s) (use no_console_suspend to debug) S3 does not work without the feature bit when memory is plugged: # echo deep > /sys/power/mem_sleep # echo mem > /sys/power/state [ 32.788281] PM: suspend entry (deep) [ 32.816630] Filesystems sync: 0.027 seconds [ 32.820029] Freezing user space processes [ 32.823870] Freezing user space processes completed (elapsed 0.001 seconds) [ 32.827756] OOM killer disabled. [ 32.829608] Freezing remaining freezable tasks [ 32.833842] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 32.837953] printk: Suspending console(s) (use no_console_suspend to debug) [ 32.916172] virtio_mem virtio0: suspend+resume with plugged memory is not supported [ 32.916181] virtio-pci 0000:00:02.0: PM: pci_pm_suspend(): virtio_pci_freeze+0x0/0x50 returns -1 [ 32.916197] virtio-pci 0000:00:02.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x170 returns -1 [ 32.916210] virtio-pci 0000:00:02.0: PM: failed to suspend async: error -1 But S3 works with the new feature bit when memory is plugged (patched QEMU): # echo deep > /sys/power/mem_sleep # echo mem > /sys/power/state [ 33.983694] PM: suspend entry (deep) [ 34.009828] Filesystems sync: 0.024 seconds [ 34.013589] Freezing user space processes [ 34.016722] Freezing user space processes completed (elapsed 0.001 seconds) [ 34.019092] OOM killer disabled. [ 34.020291] Freezing remaining freezable tasks [ 34.023549] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 34.026090] printk: Suspending console(s) (use no_console_suspend to debug) Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Xuan Zhuo Signed-off-by: David Hildenbrand Message-Id: <20240318120645.105664-1-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 68 ++++++++++++++++++++++++++++++--- include/uapi/linux/virtio_mem.h | 2 + 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 8e3223294442..51088d02de32 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -252,6 +253,9 @@ struct virtio_mem { /* Memory notifier (online/offline events). */ struct notifier_block memory_notifier; + /* Notifier to block hibernation image storing/reloading. */ + struct notifier_block pm_notifier; + #ifdef CONFIG_PROC_VMCORE /* vmcore callback for /proc/vmcore handling in kdump mode */ struct vmcore_cb vmcore_cb; @@ -1111,6 +1115,25 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, return rc; } +static int virtio_mem_pm_notifier_cb(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct virtio_mem *vm = container_of(nb, struct virtio_mem, + pm_notifier); + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + /* + * When restarting the VM, all memory is unplugged. Don't + * allow to hibernate and restore from an image. + */ + dev_err(&vm->vdev->dev, "hibernation is not supported.\n"); + return NOTIFY_BAD; + default: + return NOTIFY_OK; + } +} + /* * Set a range of pages PG_offline. Remember pages that were never onlined * (via generic_online_page()) using PageDirty(). @@ -2615,11 +2638,19 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) rc = register_memory_notifier(&vm->memory_notifier); if (rc) goto out_unreg_group; - rc = register_virtio_mem_device(vm); + /* Block hibernation as early as possible. */ + vm->pm_notifier.priority = INT_MAX; + vm->pm_notifier.notifier_call = virtio_mem_pm_notifier_cb; + rc = register_pm_notifier(&vm->pm_notifier); if (rc) goto out_unreg_mem; + rc = register_virtio_mem_device(vm); + if (rc) + goto out_unreg_pm; return 0; +out_unreg_pm: + unregister_pm_notifier(&vm->pm_notifier); out_unreg_mem: unregister_memory_notifier(&vm->memory_notifier); out_unreg_group: @@ -2897,6 +2928,7 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) /* unregister callbacks */ unregister_virtio_mem_device(vm); + unregister_pm_notifier(&vm->pm_notifier); unregister_memory_notifier(&vm->memory_notifier); /* @@ -2960,17 +2992,40 @@ static void virtio_mem_config_changed(struct virtio_device *vdev) #ifdef CONFIG_PM_SLEEP static int virtio_mem_freeze(struct virtio_device *vdev) { + struct virtio_mem *vm = vdev->priv; + /* - * When restarting the VM, all memory is usually unplugged. Don't - * allow to suspend/hibernate. + * We block hibernation using the PM notifier completely. The workqueue + * is already frozen by the PM core at this point, so we simply + * reset the device and cleanup the queues. */ - dev_err(&vdev->dev, "save/restore not supported.\n"); - return -EPERM; + if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && + vm->plugged_size && + !virtio_has_feature(vm->vdev, VIRTIO_MEM_F_PERSISTENT_SUSPEND)) { + dev_err(&vm->vdev->dev, + "suspending with plugged memory is not supported\n"); + return -EPERM; + } + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + vm->vq = NULL; + return 0; } static int virtio_mem_restore(struct virtio_device *vdev) { - return -EPERM; + struct virtio_mem *vm = vdev->priv; + int ret; + + ret = virtio_mem_init_vq(vm); + if (ret) + return ret; + virtio_device_ready(vdev); + + /* Let's check if anything changed. */ + virtio_mem_config_changed(vdev); + return 0; } #endif @@ -2979,6 +3034,7 @@ static unsigned int virtio_mem_features[] = { VIRTIO_MEM_F_ACPI_PXM, #endif VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, + VIRTIO_MEM_F_PERSISTENT_SUSPEND, }; static const struct virtio_device_id virtio_mem_id_table[] = { diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index e9122f1d0e0c..6e4b2cf6b7f1 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -90,6 +90,8 @@ #define VIRTIO_MEM_F_ACPI_PXM 0 /* unplugged memory must not be accessed */ #define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE 1 +/* plugged memory will remain plugged when suspending+resuming */ +#define VIRTIO_MEM_F_PERSISTENT_SUSPEND 2 /* --- virtio-mem: guest -> host requests --- */ From 810d831bbbf3cbd86e5aa91c8485b4d35186144d Mon Sep 17 00:00:00 2001 From: David Stevens Date: Thu, 21 Mar 2024 10:24:44 +0900 Subject: [PATCH 18/48] virtio_balloon: Give the balloon its own wakeup source Wakeup sources don't support nesting multiple events, so sharing a single object between multiple drivers can result in one driver overriding the wakeup event processing period specified by another driver. Have the virtio balloon driver use the wakeup source of the device it is bound to rather than the wakeup source of the parent device, to avoid conflicts with the transport layer. Note that although the virtio balloon's virtio_device itself isn't what actually wakes up the device, it is responsible for processing wakeup events. In the same way that EPOLLWAKEUP uses a dedicated wakeup_source to prevent suspend when userspace is processing wakeup events, a dedicated wakeup_source is necessary when processing wakeup events in a higher layer in the kernel. Fixes: b12fbc3f787e ("virtio_balloon: stay awake while adjusting balloon") Signed-off-by: David Stevens Acked-by: David Hildenbrand Message-Id: <20240321012445.1593685-2-stevensd@google.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1f5b3dd31fcf..89bc8da80519 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -450,7 +450,7 @@ static void start_update_balloon_size(struct virtio_balloon *vb) vb->adjustment_signal_pending = true; if (!vb->adjustment_in_progress) { vb->adjustment_in_progress = true; - pm_stay_awake(vb->vdev->dev.parent); + pm_stay_awake(&vb->vdev->dev); } spin_unlock_irqrestore(&vb->adjustment_lock, flags); @@ -462,7 +462,7 @@ static void end_update_balloon_size(struct virtio_balloon *vb) spin_lock_irq(&vb->adjustment_lock); if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { vb->adjustment_in_progress = false; - pm_relax(vb->vdev->dev.parent); + pm_relax(&vb->vdev->dev); } spin_unlock_irq(&vb->adjustment_lock); } @@ -1029,6 +1029,15 @@ static int virtballoon_probe(struct virtio_device *vdev) spin_lock_init(&vb->adjustment_lock); + /* + * The virtio balloon itself can't wake up the device, but it is + * responsible for processing wakeup events passed up from the transport + * layer. Wakeup sources don't support nesting/chaining calls, so we use + * our own wakeup source to ensure wakeup events are properly handled + * without trampling on the transport layer's wakeup source. + */ + device_set_wakeup_capable(&vb->vdev->dev, true); + virtio_device_ready(vdev); if (towards_target(vb)) From c578123e63da31d9d3876c33a8e3415597288e7d Mon Sep 17 00:00:00 2001 From: David Stevens Date: Thu, 21 Mar 2024 10:24:45 +0900 Subject: [PATCH 19/48] virtio_balloon: Treat stats requests as wakeup events Treat stats requests as wakeup events to ensure that the driver responds to device requests in a timely manner. Signed-off-by: David Stevens Acked-by: David Hildenbrand Message-Id: <20240321012445.1593685-3-stevensd@google.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 75 ++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 89bc8da80519..b09e8e3c62e5 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -121,11 +121,14 @@ struct virtio_balloon { struct page_reporting_dev_info pr_dev_info; /* State for keeping the wakeup_source active while adjusting the balloon */ - spinlock_t adjustment_lock; - bool adjustment_signal_pending; - bool adjustment_in_progress; + spinlock_t wakeup_lock; + bool processing_wakeup_event; + u32 wakeup_signal_mask; }; +#define VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST (1 << 0) +#define VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS (1 << 1) + static const struct virtio_device_id id_table[] = { { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -140,6 +143,36 @@ static u32 page_to_balloon_pfn(struct page *page) return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; } +static void start_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + unsigned long flags; + + spin_lock_irqsave(&vb->wakeup_lock, flags); + vb->wakeup_signal_mask |= mask; + if (!vb->processing_wakeup_event) { + vb->processing_wakeup_event = true; + pm_stay_awake(&vb->vdev->dev); + } + spin_unlock_irqrestore(&vb->wakeup_lock, flags); +} + +static void process_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + spin_lock_irq(&vb->wakeup_lock); + vb->wakeup_signal_mask &= ~mask; + spin_unlock_irq(&vb->wakeup_lock); +} + +static void finish_wakeup_event(struct virtio_balloon *vb) +{ + spin_lock_irq(&vb->wakeup_lock); + if (!vb->wakeup_signal_mask && vb->processing_wakeup_event) { + vb->processing_wakeup_event = false; + pm_relax(&vb->vdev->dev); + } + spin_unlock_irq(&vb->wakeup_lock); +} + static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb = vq->vdev->priv; @@ -370,8 +403,10 @@ static void stats_request(struct virtqueue *vq) struct virtio_balloon *vb = vq->vdev->priv; spin_lock(&vb->stop_update_lock); - if (!vb->stop_update) + if (!vb->stop_update) { + start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS); queue_work(system_freezable_wq, &vb->update_balloon_stats_work); + } spin_unlock(&vb->stop_update_lock); } @@ -444,29 +479,10 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) static void start_update_balloon_size(struct virtio_balloon *vb) { - unsigned long flags; - - spin_lock_irqsave(&vb->adjustment_lock, flags); - vb->adjustment_signal_pending = true; - if (!vb->adjustment_in_progress) { - vb->adjustment_in_progress = true; - pm_stay_awake(&vb->vdev->dev); - } - spin_unlock_irqrestore(&vb->adjustment_lock, flags); - + start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST); queue_work(system_freezable_wq, &vb->update_balloon_size_work); } -static void end_update_balloon_size(struct virtio_balloon *vb) -{ - spin_lock_irq(&vb->adjustment_lock); - if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { - vb->adjustment_in_progress = false; - pm_relax(&vb->vdev->dev); - } - spin_unlock_irq(&vb->adjustment_lock); -} - static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -495,7 +511,10 @@ static void update_balloon_stats_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_stats_work); + + process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS); stats_handle_request(vb); + finish_wakeup_event(vb); } static void update_balloon_size_func(struct work_struct *work) @@ -506,9 +525,7 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - spin_lock_irq(&vb->adjustment_lock); - vb->adjustment_signal_pending = false; - spin_unlock_irq(&vb->adjustment_lock); + process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST); diff = towards_target(vb); @@ -523,7 +540,7 @@ static void update_balloon_size_func(struct work_struct *work) if (diff) queue_work(system_freezable_wq, work); else - end_update_balloon_size(vb); + finish_wakeup_event(vb); } static int init_vqs(struct virtio_balloon *vb) @@ -1027,7 +1044,7 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } - spin_lock_init(&vb->adjustment_lock); + spin_lock_init(&vb->wakeup_lock); /* * The virtio balloon itself can't wake up the device, but it is From 7cc5d6caaf403c730ea1aac0b708dea47758877d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:49 +0200 Subject: [PATCH 20/48] virtio: balloon: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-2-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index b09e8e3c62e5..c0a63638f95e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1181,7 +1181,6 @@ static struct virtio_driver virtio_balloon_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .validate = virtballoon_validate, .probe = virtballoon_probe, From ebfb92a96715df5bf95195ed60e6d91f51d36127 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:50 +0200 Subject: [PATCH 21/48] virtio: input: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-3-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_input.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 3aa46703872d..1a730d6c0b55 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -394,7 +394,6 @@ static const struct virtio_device_id id_table[] = { static struct virtio_driver virtio_input_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .id_table = id_table, From b0e55a950ee6145e0debfab941b057046e6d87c2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:51 +0200 Subject: [PATCH 22/48] virtio: mem: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-4-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 51088d02de32..6d4dfbc53a66 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -3046,7 +3046,6 @@ static struct virtio_driver virtio_mem_driver = { .feature_table = virtio_mem_features, .feature_table_size = ARRAY_SIZE(virtio_mem_features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = virtio_mem_id_table, .probe = virtio_mem_probe, .remove = virtio_mem_remove, From 9731be4bc8d5a5df302b27eb9483de0efc7da792 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:52 +0200 Subject: [PATCH 23/48] um: virt-pci: drop owner assignment virtio core already sets the .owner, so driver does not need to. Acked-by: Johannes Berg Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-5-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- arch/um/drivers/virt-pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 97a37c062997..7cb503469bbd 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -752,7 +752,6 @@ MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver um_pci_virtio_driver = { .driver.name = "virtio-pci", - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = um_pci_virtio_probe, .remove = um_pci_virtio_remove, From bdb8e2f8e8b8d22714aee2be004d1bb78c024261 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:53 +0200 Subject: [PATCH 24/48] virtio_blk: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-6-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42dea7601d87..46bdbad1ab48 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1658,7 +1658,6 @@ static struct virtio_driver virtio_blk = { .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtblk_probe, .remove = virtblk_remove, From 1e0d03b7d0215bfebc3b77c704b6b818389c93f5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:54 +0200 Subject: [PATCH 25/48] bluetooth: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-7-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/bluetooth/virtio_bt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 2ac70b560c46..463b49ca2492 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -417,7 +417,6 @@ static const unsigned int virtbt_features[] = { static struct virtio_driver virtbt_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .feature_table = virtbt_features, .feature_table_size = ARRAY_SIZE(virtbt_features), .id_table = virtbt_table, From 6098bb388499e9c0cdca27b657c23a3227816c4d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:55 +0200 Subject: [PATCH 26/48] hwrng: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-8-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/char/hw_random/virtio-rng.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 7a4b45393acb..dd998f4fe4f2 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -245,7 +245,6 @@ static const struct virtio_device_id id_table[] = { static struct virtio_driver virtio_rng_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtrng_probe, .remove = virtrng_remove, From a610e31aa180a54a36130182e1bc74dd2aa6ddb7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:56 +0200 Subject: [PATCH 27/48] virtio_console: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-9-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 035f89f1a251..d9ee2dbc7eab 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -2173,7 +2173,6 @@ static struct virtio_driver virtio_console = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtcons_probe, .remove = virtcons_remove, @@ -2188,7 +2187,6 @@ static struct virtio_driver virtio_rproc_serial = { .feature_table = rproc_serial_features, .feature_table_size = ARRAY_SIZE(rproc_serial_features), .driver.name = "virtio_rproc_serial", - .driver.owner = THIS_MODULE, .id_table = rproc_serial_id_table, .probe = virtcons_probe, .remove = virtcons_remove, From 9e00c140f38d2394426d7e9b2a23b52958c67deb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:57 +0200 Subject: [PATCH 28/48] crypto: virtio - drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-10-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 6a67d70e7f1c..30cd040aa03b 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -581,7 +581,6 @@ static const struct virtio_device_id id_table[] = { static struct virtio_driver virtio_crypto_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .id_table = id_table, From cfffb29c48e40f849090a2b4daa4ee7f8e33c831 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:58 +0200 Subject: [PATCH 29/48] firmware: arm_scmi: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-11-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index d68c01cb7aa0..4892058445ce 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -908,7 +908,6 @@ static const struct virtio_device_id id_table[] = { static struct virtio_driver virtio_scmi_driver = { .driver.name = "scmi-virtio", - .driver.owner = THIS_MODULE, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .id_table = id_table, From 2ce0b26c116f5070f43dae8347141ccd54d1d61c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:43:59 +0200 Subject: [PATCH 30/48] gpio: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Acked-by: Bartosz Golaszewski Acked-by: Viresh Kumar Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-12-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Linus Walleij --- drivers/gpio/gpio-virtio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index fcc5e8c08973..9fae8e396c58 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -653,7 +653,6 @@ static struct virtio_driver virtio_gpio_driver = { .remove = virtio_gpio_remove, .driver = { .name = KBUILD_MODNAME, - .owner = THIS_MODULE, }, }; module_virtio_driver(virtio_gpio_driver); From 41dca8275a692b14184ec9105ebb45d988299a5a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:00 +0200 Subject: [PATCH 31/48] drm/virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-13-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 9539aa28937f..188e126383c2 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -154,7 +154,6 @@ static struct virtio_driver virtio_gpu_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_gpu_probe, .remove = virtio_gpu_remove, From ac5990d3ecc7b4517bddf68a5a99efca78211444 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:01 +0200 Subject: [PATCH 32/48] iommu: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-14-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/iommu/virtio-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 04048f64a2c0..9ed8958a42bf 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1261,7 +1261,6 @@ MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver virtio_iommu_drv = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), From 7249ad5a71bf36cc324b5c6af7dc932e31c3adfd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:02 +0200 Subject: [PATCH 33/48] misc: nsm: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-15-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Alexander Graf --- drivers/misc/nsm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/nsm.c b/drivers/misc/nsm.c index 0eaa3b4484bd..ef7b32742340 100644 --- a/drivers/misc/nsm.c +++ b/drivers/misc/nsm.c @@ -494,7 +494,6 @@ static struct virtio_driver virtio_nsm_driver = { .feature_table_legacy = 0, .feature_table_size_legacy = 0, .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = nsm_device_probe, .remove = nsm_device_remove, From 606f1f9f3d94fe2e42a2e18638eed818d89e1da3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:03 +0200 Subject: [PATCH 34/48] net: caif: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-16-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/net/caif/caif_virtio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 0b0f234b0b50..99d984851fef 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -782,7 +782,6 @@ static struct virtio_driver caif_virtio_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = cfv_probe, .remove = cfv_remove, From 9a03bfa1086545bf18326110329b2d108050936b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:04 +0200 Subject: [PATCH 35/48] net: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-17-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 115c3c5414f2..283b34d50296 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5039,7 +5039,6 @@ static struct virtio_driver virtio_net_driver = { .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .validate = virtnet_validate, .probe = virtnet_probe, From d26dd255ce44e325e163ff3ce53423d26911ad11 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:05 +0200 Subject: [PATCH 36/48] net: 9p: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-18-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- net/9p/trans_virtio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e305071eb7b8..0b8086f58ad5 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -781,7 +781,6 @@ static struct virtio_driver p9_virtio_drv = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = p9_virtio_probe, .remove = p9_virtio_remove, From b1c16d4a33a3625f2049400d97453f8a57f80639 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:06 +0200 Subject: [PATCH 37/48] vsock/virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Acked-by: Stefano Garzarella Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-19-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- net/vmw_vsock/virtio_transport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index ee5d306a96d0..43d405298857 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -859,7 +859,6 @@ static struct virtio_driver virtio_vsock_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, From 19680c70faa1f1070365bb1ad0e573b6ff353c31 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:07 +0200 Subject: [PATCH 38/48] wifi: mac80211_hwsim: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-20-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/net/wireless/virtual/mac80211_hwsim.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 59e1fc0018df..20fa21bb4d1c 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -6662,7 +6662,6 @@ MODULE_DEVICE_TABLE(virtio, id_table); static struct virtio_driver virtio_hwsim = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = hwsim_virtio_probe, .remove = hwsim_virtio_remove, From 9dceed1b0a35d8667e7a8512f7a30a2c9b189f66 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:08 +0200 Subject: [PATCH 39/48] nvdimm: virtio_pmem: drop owner assignment virtio core already sets the .owner, so driver does not need to. Acked-by: Dave Jiang Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-21-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Pankaj Gupta --- drivers/nvdimm/virtio_pmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index 4ceced5cefcf..c9b97aeabf85 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -151,7 +151,6 @@ static struct virtio_driver virtio_pmem_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .validate = virtio_pmem_validate, .probe = virtio_pmem_probe, From f42c7405fdc90b73bf62d32a41b77c5542a09c2b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:09 +0200 Subject: [PATCH 40/48] rpmsg: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Reviewed-by: Mathieu Poirier Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-22-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin --- drivers/rpmsg/virtio_rpmsg_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 1062939c3264..e9e8c1f7829f 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -1053,7 +1053,6 @@ static struct virtio_driver virtio_ipc_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = rpmsg_probe, .remove = rpmsg_remove, From e1e4d376836a134465f8f265281e4db5d3bcd9db Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:10 +0200 Subject: [PATCH 41/48] scsi: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-23-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Acked-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 617eb892f4ad..89ca26945721 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -1052,7 +1052,6 @@ static struct virtio_driver virtio_scsi_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtscsi_probe, #ifdef CONFIG_PM_SLEEP From bc21020f61f5f93f39eeedc7ae56797e59b98816 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:11 +0200 Subject: [PATCH 42/48] fuse: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-24-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- fs/fuse/virtio_fs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 322af827a232..ca7b64f9c3c7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1023,7 +1023,6 @@ static const unsigned int feature_table[] = {}; static struct virtio_driver virtio_fs_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .feature_table = feature_table, .feature_table_size = ARRAY_SIZE(feature_table), From 1fa74f2449001de80bf2548cb42c04dd5b848d43 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 31 Mar 2024 10:44:12 +0200 Subject: [PATCH 43/48] sound: virtio: drop owner assignment virtio core already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Message-Id: <20240331-module-owner-virtio-v2-25-98f04bfaf46a@linaro.org> Signed-off-by: Michael S. Tsirkin Acked-by: Anton Yakovlev --- sound/virtio/virtio_card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 2da20c625247..7805daea0102 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -438,7 +438,6 @@ static unsigned int features[] = { static struct virtio_driver virtsnd_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), From 4d685629b727cf17be9446865076d52dd3fa0933 Mon Sep 17 00:00:00 2001 From: Yuxue Liu Date: Wed, 10 Apr 2024 11:30:20 +0800 Subject: [PATCH 44/48] vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu Acked-by: Jason Wang Reviewed-by: Heng Qi Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/virtio_pci/vp_vdpa.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 4caca0517cad..ac4ab22f7d8b 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -160,7 +160,13 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) struct pci_dev *pdev = mdev->pci_dev; int i, ret, irq; int queues = vp_vdpa->queues; - int vectors = queues + 1; + int vectors = 1; + int msix_vec = 0; + + for (i = 0; i < queues; i++) { + if (vp_vdpa->vring[i].cb.callback) + vectors++; + } ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); if (ret != vectors) { @@ -173,9 +179,12 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) vp_vdpa->vectors = vectors; for (i = 0; i < queues; i++) { + if (!vp_vdpa->vring[i].cb.callback) + continue; + snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-%d\n", pci_name(pdev), i); - irq = pci_irq_vector(pdev, i); + irq = pci_irq_vector(pdev, msix_vec); ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_vq_handler, 0, vp_vdpa->vring[i].msix_name, @@ -185,21 +194,22 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) "vp_vdpa: fail to request irq for vq %d\n", i); goto err; } - vp_modern_queue_vector(mdev, i, i); + vp_modern_queue_vector(mdev, i, msix_vec); vp_vdpa->vring[i].irq = irq; + msix_vec++; } snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n", pci_name(pdev)); - irq = pci_irq_vector(pdev, queues); + irq = pci_irq_vector(pdev, msix_vec); ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0, vp_vdpa->msix_name, vp_vdpa); if (ret) { dev_err(&pdev->dev, - "vp_vdpa: fail to request irq for vq %d\n", i); + "vp_vdpa: fail to request irq for config: %d\n", ret); goto err; } - vp_modern_config_vector(mdev, queues); + vp_modern_config_vector(mdev, msix_vec); vp_vdpa->config_irq = irq; return 0; From f452001dca301d05e509f3e39998c442900e9937 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Apr 2024 12:04:26 +0200 Subject: [PATCH 45/48] vhost-vdpa: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_max() is inclusive. So a -1 has been added when needed. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Message-Id: <67c2edf49788c27d5f7a49fc701520b9fcf739b5.1713088999.git.christophe.jaillet@wanadoo.fr> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ba52d128aeb7..63a53680a85c 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1548,7 +1548,7 @@ static void vhost_vdpa_release_dev(struct device *device) struct vhost_vdpa *v = container_of(device, struct vhost_vdpa, dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); + ida_free(&vhost_vdpa_ida, v->minor); kfree(v->vqs); kfree(v); } @@ -1571,8 +1571,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) if (!v) return -ENOMEM; - minor = ida_simple_get(&vhost_vdpa_ida, 0, - VHOST_VDPA_DEV_MAX, GFP_KERNEL); + minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1, + GFP_KERNEL); if (minor < 0) { kfree(v); return minor; From 10e49da815e3e3dda7a86af2124edd687ea29a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 13 Feb 2024 19:24:50 +0100 Subject: [PATCH 46/48] =?UTF-8?q?MAINTAINERS:=20add=20Eugenio=20P=C3=A9rez?= =?UTF-8?q?=20as=20reviewer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add myself as a reviewer of some VirtIO areas I'm interested. Until this point I've been scanning manually the list looking for series that touches this area. Adding myself to make this task easier. Signed-off-by: Eugenio Pérez Message-Id: <20240213182450.106796-1-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2342b34656fb..f2f6cfbaa11a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23395,6 +23395,7 @@ M: "Michael S. Tsirkin" M: Jason Wang R: Paolo Bonzini R: Stefan Hajnoczi +R: Eugenio Pérez L: virtualization@lists.linux.dev S: Maintained F: drivers/block/virtio_blk.c @@ -23413,6 +23414,7 @@ VIRTIO CORE AND NET DRIVERS M: "Michael S. Tsirkin" M: Jason Wang R: Xuan Zhuo +R: Eugenio Pérez L: virtualization@lists.linux.dev S: Maintained F: Documentation/ABI/testing/sysfs-bus-vdpa @@ -23453,6 +23455,7 @@ VIRTIO FILE SYSTEM M: Vivek Goyal M: Stefan Hajnoczi M: Miklos Szeredi +R: Eugenio Pérez L: virtualization@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported @@ -23486,6 +23489,7 @@ F: include/uapi/linux/virtio_gpu.h VIRTIO HOST (VHOST) M: "Michael S. Tsirkin" M: Jason Wang +R: Eugenio Pérez L: kvm@vger.kernel.org L: virtualization@lists.linux.dev L: netdev@vger.kernel.org From 89875151fccdd024d571aa884ea97a0128b968b6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 Apr 2024 17:08:45 +0200 Subject: [PATCH 47/48] virtio: delete vq in vp_find_vqs_msix() when request_irq() fails When request_irq() fails, error path calls vp_del_vqs(). There, as vq is present in the list, free_irq() is called for the same vector. That causes following splat: [ 0.414355] Trying to free already-free IRQ 27 [ 0.414403] WARNING: CPU: 1 PID: 1 at kernel/irq/manage.c:1899 free_irq+0x1a1/0x2d0 [ 0.414510] Modules linked in: [ 0.414540] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.9.0-rc4+ #27 [ 0.414540] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 [ 0.414540] RIP: 0010:free_irq+0x1a1/0x2d0 [ 0.414540] Code: 1e 00 48 83 c4 08 48 89 e8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 90 8b 74 24 04 48 c7 c7 98 80 6c b1 e8 00 c9 f7 ff 90 <0f> 0b 90 90 48 89 ee 4c 89 ef e8 e0 20 b8 00 49 8b 47 40 48 8b 40 [ 0.414540] RSP: 0000:ffffb71480013ae0 EFLAGS: 00010086 [ 0.414540] RAX: 0000000000000000 RBX: ffffa099c2722000 RCX: 0000000000000000 [ 0.414540] RDX: 0000000000000000 RSI: ffffb71480013998 RDI: 0000000000000001 [ 0.414540] RBP: 0000000000000246 R08: 00000000ffffdfff R09: 0000000000000001 [ 0.414540] R10: 00000000ffffdfff R11: ffffffffb18729c0 R12: ffffa099c1c91760 [ 0.414540] R13: ffffa099c1c916a4 R14: ffffa099c1d2f200 R15: ffffa099c1c91600 [ 0.414540] FS: 0000000000000000(0000) GS:ffffa099fec40000(0000) knlGS:0000000000000000 [ 0.414540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.414540] CR2: 0000000000000000 CR3: 0000000008e3e001 CR4: 0000000000370ef0 [ 0.414540] Call Trace: [ 0.414540] [ 0.414540] ? __warn+0x80/0x120 [ 0.414540] ? free_irq+0x1a1/0x2d0 [ 0.414540] ? report_bug+0x164/0x190 [ 0.414540] ? handle_bug+0x3b/0x70 [ 0.414540] ? exc_invalid_op+0x17/0x70 [ 0.414540] ? asm_exc_invalid_op+0x1a/0x20 [ 0.414540] ? free_irq+0x1a1/0x2d0 [ 0.414540] vp_del_vqs+0xc1/0x220 [ 0.414540] vp_find_vqs_msix+0x305/0x470 [ 0.414540] vp_find_vqs+0x3e/0x1a0 [ 0.414540] vp_modern_find_vqs+0x1b/0x70 [ 0.414540] init_vqs+0x387/0x600 [ 0.414540] virtnet_probe+0x50a/0xc80 [ 0.414540] virtio_dev_probe+0x1e0/0x2b0 [ 0.414540] really_probe+0xc0/0x2c0 [ 0.414540] ? __pfx___driver_attach+0x10/0x10 [ 0.414540] __driver_probe_device+0x73/0x120 [ 0.414540] driver_probe_device+0x1f/0xe0 [ 0.414540] __driver_attach+0x88/0x180 [ 0.414540] bus_for_each_dev+0x85/0xd0 [ 0.414540] bus_add_driver+0xec/0x1f0 [ 0.414540] driver_register+0x59/0x100 [ 0.414540] ? __pfx_virtio_net_driver_init+0x10/0x10 [ 0.414540] virtio_net_driver_init+0x90/0xb0 [ 0.414540] do_one_initcall+0x58/0x230 [ 0.414540] kernel_init_freeable+0x1a3/0x2d0 [ 0.414540] ? __pfx_kernel_init+0x10/0x10 [ 0.414540] kernel_init+0x1a/0x1c0 [ 0.414540] ret_from_fork+0x31/0x50 [ 0.414540] ? __pfx_kernel_init+0x10/0x10 [ 0.414540] ret_from_fork_asm+0x1a/0x30 [ 0.414540] Fix this by calling deleting the current vq when request_irq() fails. Fixes: 0b0f9dc52ed0 ("Revert "virtio_pci: use shared interrupts for virtqueues"") Signed-off-by: Jiri Pirko Message-Id: <20240426150845.3999481-1-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b655fccaf773..584af7816532 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -348,8 +348,10 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); - if (err) + if (err) { + vp_del_vq(vqs[i]); goto error_find; + } } return 0; From c8fae27d141a32a1624d0d0d5419d94252824498 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Sat, 16 Mar 2024 13:25:54 +0800 Subject: [PATCH 48/48] virtio-pci: Check if is_avq is NULL [bug] In the virtio_pci_common.c function vp_del_vqs, vp_dev->is_avq is involved to determine whether it is admin virtqueue, but this function vp_dev->is_avq may be empty. For installations, virtio_pci_legacy does not assign a value to vp_dev->is_avq. [fix] Check whether it is vp_dev->is_avq before use. [test] Test with virsh Attach device Before this patch, the following command would crash the guest system After applying the patch, everything seems to be working fine. Signed-off-by: Li Zhang Message-Id: <1710566754-3532-1-git-send-email-zhanglikernel@gmail.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 584af7816532..f6b0b00e4599 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -236,7 +236,7 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->is_avq(vdev, vq->index)) + if (vp_dev->is_avq && vp_dev->is_avq(vdev, vq->index)) continue; if (vp_dev->per_vq_vectors) {