iommu/vt-d: Use device rbtree in iopf reporting path
The existing I/O page fault handler currently locates the PCI device by calling pci_get_domain_bus_and_slot(). This function searches the list of all PCI devices until the desired device is found. To improve lookup efficiency, replace it with device_rbtree_find() to search the device within the probed device rbtree. The I/O page fault is initiated by the device, which does not have any synchronization mechanism with the software to ensure that the device stays in the probed device tree. Theoretically, a device could be released by the IOMMU subsystem after device_rbtree_find() and before iopf_get_dev_fault_param(), which would cause a use-after-free problem. Add a mutex to synchronize the I/O page fault reporting path and the IOMMU release device path. This lock doesn't introduce any performance overhead, as the conflict between I/O page fault reporting and device releasing is very rare. Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de>
This commit is contained in:
parent
1a75cc710b
commit
def054b01a
4 changed files with 15 additions and 8 deletions
|
@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
|
||||||
iommu->segment = drhd->segment;
|
iommu->segment = drhd->segment;
|
||||||
iommu->device_rbtree = RB_ROOT;
|
iommu->device_rbtree = RB_ROOT;
|
||||||
spin_lock_init(&iommu->device_rbtree_lock);
|
spin_lock_init(&iommu->device_rbtree_lock);
|
||||||
|
mutex_init(&iommu->iopf_lock);
|
||||||
iommu->node = NUMA_NO_NODE;
|
iommu->node = NUMA_NO_NODE;
|
||||||
|
|
||||||
ver = readl(iommu->reg + DMAR_VER_REG);
|
ver = readl(iommu->reg + DMAR_VER_REG);
|
||||||
|
|
|
@ -4362,8 +4362,11 @@ free:
|
||||||
static void intel_iommu_release_device(struct device *dev)
|
static void intel_iommu_release_device(struct device *dev)
|
||||||
{
|
{
|
||||||
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
||||||
|
struct intel_iommu *iommu = info->iommu;
|
||||||
|
|
||||||
|
mutex_lock(&iommu->iopf_lock);
|
||||||
device_rbtree_remove(info);
|
device_rbtree_remove(info);
|
||||||
|
mutex_unlock(&iommu->iopf_lock);
|
||||||
dmar_remove_one_dev_info(dev);
|
dmar_remove_one_dev_info(dev);
|
||||||
intel_pasid_free_table(dev);
|
intel_pasid_free_table(dev);
|
||||||
intel_iommu_debugfs_remove_dev(info);
|
intel_iommu_debugfs_remove_dev(info);
|
||||||
|
|
|
@ -713,6 +713,8 @@ struct intel_iommu {
|
||||||
#endif
|
#endif
|
||||||
struct iopf_queue *iopf_queue;
|
struct iopf_queue *iopf_queue;
|
||||||
unsigned char iopfq_name[16];
|
unsigned char iopfq_name[16];
|
||||||
|
/* Synchronization between fault report and iommu device release. */
|
||||||
|
struct mutex iopf_lock;
|
||||||
struct q_inval *qi; /* Queued invalidation info */
|
struct q_inval *qi; /* Queued invalidation info */
|
||||||
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
|
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
|
||||||
|
|
||||||
|
|
|
@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
|
||||||
struct intel_iommu *iommu = d;
|
struct intel_iommu *iommu = d;
|
||||||
struct page_req_dsc *req;
|
struct page_req_dsc *req;
|
||||||
int head, tail, handled;
|
int head, tail, handled;
|
||||||
struct pci_dev *pdev;
|
struct device *dev;
|
||||||
u64 address;
|
u64 address;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -689,23 +689,24 @@ bad_req:
|
||||||
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
|
if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
|
||||||
goto prq_advance;
|
goto prq_advance;
|
||||||
|
|
||||||
pdev = pci_get_domain_bus_and_slot(iommu->segment,
|
|
||||||
PCI_BUS_NUM(req->rid),
|
|
||||||
req->rid & 0xff);
|
|
||||||
/*
|
/*
|
||||||
* If prq is to be handled outside iommu driver via receiver of
|
* If prq is to be handled outside iommu driver via receiver of
|
||||||
* the fault notifiers, we skip the page response here.
|
* the fault notifiers, we skip the page response here.
|
||||||
*/
|
*/
|
||||||
if (!pdev)
|
mutex_lock(&iommu->iopf_lock);
|
||||||
|
dev = device_rbtree_find(iommu, req->rid);
|
||||||
|
if (!dev) {
|
||||||
|
mutex_unlock(&iommu->iopf_lock);
|
||||||
goto bad_req;
|
goto bad_req;
|
||||||
|
}
|
||||||
|
|
||||||
if (intel_svm_prq_report(iommu, &pdev->dev, req))
|
if (intel_svm_prq_report(iommu, dev, req))
|
||||||
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
|
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
|
||||||
else
|
else
|
||||||
trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
|
trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
|
||||||
req->priv_data[0], req->priv_data[1],
|
req->priv_data[0], req->priv_data[1],
|
||||||
iommu->prq_seq_number++);
|
iommu->prq_seq_number++);
|
||||||
pci_dev_put(pdev);
|
mutex_unlock(&iommu->iopf_lock);
|
||||||
prq_advance:
|
prq_advance:
|
||||||
head = (head + sizeof(*req)) & PRQ_RING_MASK;
|
head = (head + sizeof(*req)) & PRQ_RING_MASK;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue