powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains
Up until now PPC64 managed to avoid using iommu_ops. The VFIO driver uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in the Type1 VFIO driver. Recent development added 2 uses of iommu_ops to the generic VFIO which broke POWER: - a coherency capability check; - blocking IOMMU domain - iommu_group_dma_owner_claimed()/... This adds a simple iommu_ops which reports support for cache coherency and provides a basic support for blocking domains. No other domain types are implemented so the default domain is NULL. Since now iommu_ops controls the group ownership, this takes it out of VFIO. This adds an IOMMU device into a pci_controller (=PHB) and registers it in the IOMMU subsystem, iommu_ops is registered at this point. This setup is done in postcore_initcall_sync. This replaces iommu_group_add_device() with iommu_probe_device() as the former misses necessary steps in connecting PCI devices to IOMMU devices. This adds a comment about why explicit iommu_probe_device() is still needed. The previous discussion is here: https://lore.kernel.org/r/20220707135552.3688927-1-aik@ozlabs.ru/ https://lore.kernel.org/r/20220701061751.1955857-1-aik@ozlabs.ru/ Fixes:e8ae0e140c
("vfio: Require that devices support DMA cache coherence") Fixes:70693f4708
("vfio: Set DMA ownership for VFIO devices") Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> Acked-by: Alex Williamson <alex.williamson@redhat.com> [mpe: Fix CONFIG_IOMMU_API=n build] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/2000135730.16998523.1678123860135.JavaMail.zimbra@raptorengineeringinc.com
This commit is contained in:
parent
76f351096c
commit
a940904443
7 changed files with 218 additions and 10 deletions
|
@ -8,6 +8,7 @@
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/ioport.h>
|
#include <linux/ioport.h>
|
||||||
#include <linux/numa.h>
|
#include <linux/numa.h>
|
||||||
|
#include <linux/iommu.h>
|
||||||
|
|
||||||
struct device_node;
|
struct device_node;
|
||||||
|
|
||||||
|
@ -44,6 +45,9 @@ struct pci_controller_ops {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void (*shutdown)(struct pci_controller *hose);
|
void (*shutdown)(struct pci_controller *hose);
|
||||||
|
|
||||||
|
struct iommu_group *(*device_group)(struct pci_controller *hose,
|
||||||
|
struct pci_dev *pdev);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -131,6 +135,9 @@ struct pci_controller {
|
||||||
struct irq_domain *dev_domain;
|
struct irq_domain *dev_domain;
|
||||||
struct irq_domain *msi_domain;
|
struct irq_domain *msi_domain;
|
||||||
struct fwnode_handle *fwnode;
|
struct fwnode_handle *fwnode;
|
||||||
|
|
||||||
|
/* iommu_ops support */
|
||||||
|
struct iommu_device iommu;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These are used for config access before all the PCI probing
|
/* These are used for config access before all the PCI probing
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#include <asm/vio.h>
|
#include <asm/vio.h>
|
||||||
#include <asm/tce.h>
|
#include <asm/tce.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
#include <asm/ppc-pci.h>
|
||||||
|
|
||||||
#define DBG(...)
|
#define DBG(...)
|
||||||
|
|
||||||
|
@ -1156,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
|
||||||
|
|
||||||
pr_debug("%s: Adding %s to iommu group %d\n",
|
pr_debug("%s: Adding %s to iommu group %d\n",
|
||||||
__func__, dev_name(dev), iommu_group_id(table_group->group));
|
__func__, dev_name(dev), iommu_group_id(table_group->group));
|
||||||
|
/*
|
||||||
return iommu_group_add_device(table_group->group, dev);
|
* This is still not adding devices via the IOMMU bus notifier because
|
||||||
|
* of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
|
||||||
|
* pcibios_scan_phb() first (and this guy adds devices and triggers
|
||||||
|
* the notifier) and only then it calls pci_bus_add_devices() which
|
||||||
|
* configures DMA for buses which also creates PEs and IOMMU groups.
|
||||||
|
*/
|
||||||
|
return iommu_probe_device(dev);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iommu_add_device);
|
EXPORT_SYMBOL_GPL(iommu_add_device);
|
||||||
|
|
||||||
|
@ -1237,6 +1244,7 @@ static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
|
||||||
rc = iommu_take_ownership(tbl);
|
rc = iommu_take_ownership(tbl);
|
||||||
if (!rc)
|
if (!rc)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for (j = 0; j < i; ++j)
|
for (j = 0; j < i; ++j)
|
||||||
iommu_release_ownership(table_group->tables[j]);
|
iommu_release_ownership(table_group->tables[j]);
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -1269,4 +1277,140 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = {
|
||||||
.release_ownership = spapr_tce_release_ownership,
|
.release_ownership = spapr_tce_release_ownership,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A simple iommu_ops to allow less cruft in generic VFIO code.
|
||||||
|
*/
|
||||||
|
static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
|
||||||
|
struct device *dev)
|
||||||
|
{
|
||||||
|
struct iommu_group *grp = iommu_group_get(dev);
|
||||||
|
struct iommu_table_group *table_group;
|
||||||
|
int ret = -EINVAL;
|
||||||
|
|
||||||
|
if (!grp)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
table_group = iommu_group_get_iommudata(grp);
|
||||||
|
ret = table_group->ops->take_ownership(table_group);
|
||||||
|
iommu_group_put(grp);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
|
||||||
|
{
|
||||||
|
struct iommu_group *grp = iommu_group_get(dev);
|
||||||
|
struct iommu_table_group *table_group;
|
||||||
|
|
||||||
|
table_group = iommu_group_get_iommudata(grp);
|
||||||
|
table_group->ops->release_ownership(table_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
|
||||||
|
.attach_dev = spapr_tce_blocking_iommu_attach_dev,
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
|
||||||
|
{
|
||||||
|
switch (cap) {
|
||||||
|
case IOMMU_CAP_CACHE_COHERENCY:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
|
||||||
|
{
|
||||||
|
struct iommu_domain *dom;
|
||||||
|
|
||||||
|
if (type != IOMMU_DOMAIN_BLOCKED)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
|
||||||
|
if (!dom)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
dom->ops = &spapr_tce_blocking_domain_ops;
|
||||||
|
|
||||||
|
return dom;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev;
|
||||||
|
struct pci_controller *hose;
|
||||||
|
|
||||||
|
if (!dev_is_pci(dev))
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
pdev = to_pci_dev(dev);
|
||||||
|
hose = pdev->bus->sysdata;
|
||||||
|
|
||||||
|
return &hose->iommu;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void spapr_tce_iommu_release_device(struct device *dev)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
|
||||||
|
{
|
||||||
|
struct pci_controller *hose;
|
||||||
|
struct pci_dev *pdev;
|
||||||
|
|
||||||
|
pdev = to_pci_dev(dev);
|
||||||
|
hose = pdev->bus->sysdata;
|
||||||
|
|
||||||
|
if (!hose->controller_ops.device_group)
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
|
return hose->controller_ops.device_group(hose, pdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct iommu_ops spapr_tce_iommu_ops = {
|
||||||
|
.capable = spapr_tce_iommu_capable,
|
||||||
|
.domain_alloc = spapr_tce_iommu_domain_alloc,
|
||||||
|
.probe_device = spapr_tce_iommu_probe_device,
|
||||||
|
.release_device = spapr_tce_iommu_release_device,
|
||||||
|
.device_group = spapr_tce_iommu_device_group,
|
||||||
|
.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute *spapr_tce_iommu_attrs[] = {
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute_group spapr_tce_iommu_group = {
|
||||||
|
.name = "spapr-tce-iommu",
|
||||||
|
.attrs = spapr_tce_iommu_attrs,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct attribute_group *spapr_tce_iommu_groups[] = {
|
||||||
|
&spapr_tce_iommu_group,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This registers IOMMU devices of PHBs. This needs to happen
|
||||||
|
* after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
|
||||||
|
* before subsys_initcall(iommu_subsys_init).
|
||||||
|
*/
|
||||||
|
static int __init spapr_tce_setup_phb_iommus_initcall(void)
|
||||||
|
{
|
||||||
|
struct pci_controller *hose;
|
||||||
|
|
||||||
|
list_for_each_entry(hose, &hose_list, list_node) {
|
||||||
|
iommu_device_sysfs_add(&hose->iommu, hose->parent,
|
||||||
|
spapr_tce_iommu_groups, "iommu-phb%04x",
|
||||||
|
hose->global_number);
|
||||||
|
iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
|
||||||
|
hose->parent);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
|
||||||
|
|
||||||
#endif /* CONFIG_IOMMU_API */
|
#endif /* CONFIG_IOMMU_API */
|
||||||
|
|
|
@ -1899,6 +1899,13 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
|
||||||
/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
|
/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
|
||||||
struct iommu_table *tbl = pe->table_group.tables[0];
|
struct iommu_table *tbl = pe->table_group.tables[0];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* iommu_ops transfers the ownership per a device and we mode
|
||||||
|
* the group ownership with the first device in the group.
|
||||||
|
*/
|
||||||
|
if (!tbl)
|
||||||
|
return 0;
|
||||||
|
|
||||||
pnv_pci_ioda2_set_bypass(pe, false);
|
pnv_pci_ioda2_set_bypass(pe, false);
|
||||||
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
|
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
|
||||||
if (pe->pbus)
|
if (pe->pbus)
|
||||||
|
@ -1915,6 +1922,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
|
||||||
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
|
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
|
||||||
table_group);
|
table_group);
|
||||||
|
|
||||||
|
/* See the comment about iommu_ops above */
|
||||||
|
if (pe->table_group.tables[0])
|
||||||
|
return;
|
||||||
pnv_pci_ioda2_setup_default_config(pe);
|
pnv_pci_ioda2_setup_default_config(pe);
|
||||||
if (pe->pbus)
|
if (pe->pbus)
|
||||||
pnv_ioda_setup_bus_dma(pe, pe->pbus);
|
pnv_ioda_setup_bus_dma(pe, pe->pbus);
|
||||||
|
@ -2921,6 +2931,27 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMU_API
|
||||||
|
static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
|
||||||
|
struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
struct pnv_phb *phb = hose->private_data;
|
||||||
|
struct pnv_ioda_pe *pe;
|
||||||
|
|
||||||
|
if (WARN_ON(!phb))
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
|
||||||
|
if (!pe)
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
if (!pe->table_group.group)
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
return iommu_group_ref_get(pe->table_group.group);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
|
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
|
||||||
.dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
|
.dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
|
||||||
.dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
|
.dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
|
||||||
|
@ -2931,6 +2962,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
|
||||||
.setup_bridge = pnv_pci_fixup_bridge_resources,
|
.setup_bridge = pnv_pci_fixup_bridge_resources,
|
||||||
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
|
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
|
||||||
.shutdown = pnv_pci_ioda_shutdown,
|
.shutdown = pnv_pci_ioda_shutdown,
|
||||||
|
#ifdef CONFIG_IOMMU_API
|
||||||
|
.device_group = pnv_pci_device_group,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
|
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
|
||||||
|
|
|
@ -1729,3 +1729,27 @@ static int __init tce_iommu_bus_notifier_init(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
|
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
||||||
|
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
|
||||||
|
struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
struct device_node *pdn, *dn = pdev->dev.of_node;
|
||||||
|
struct iommu_group *grp;
|
||||||
|
struct pci_dn *pci;
|
||||||
|
|
||||||
|
pdn = pci_dma_find(dn, NULL);
|
||||||
|
if (!pdn || !PCI_DN(pdn))
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
pci = PCI_DN(pdn);
|
||||||
|
if (!pci->table_group)
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
grp = pci->table_group->group;
|
||||||
|
if (!grp)
|
||||||
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
|
return iommu_group_ref_get(grp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -123,5 +123,9 @@ static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void pseries_rng_init(void);
|
void pseries_rng_init(void);
|
||||||
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
||||||
|
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
|
||||||
|
struct pci_dev *pdev);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _PSERIES_PSERIES_H */
|
#endif /* _PSERIES_PSERIES_H */
|
||||||
|
|
|
@ -1118,6 +1118,9 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
|
||||||
|
|
||||||
struct pci_controller_ops pseries_pci_controller_ops = {
|
struct pci_controller_ops pseries_pci_controller_ops = {
|
||||||
.probe_mode = pSeries_pci_probe_mode,
|
.probe_mode = pSeries_pci_probe_mode,
|
||||||
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
||||||
|
.device_group = pSeries_pci_device_group,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
define_machine(pseries) {
|
define_machine(pseries) {
|
||||||
|
|
|
@ -1200,8 +1200,6 @@ static void tce_iommu_release_ownership(struct tce_container *container,
|
||||||
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
|
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
|
||||||
if (container->tables[i])
|
if (container->tables[i])
|
||||||
table_group->ops->unset_window(table_group, i);
|
table_group->ops->unset_window(table_group, i);
|
||||||
|
|
||||||
table_group->ops->release_ownership(table_group);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static long tce_iommu_take_ownership(struct tce_container *container,
|
static long tce_iommu_take_ownership(struct tce_container *container,
|
||||||
|
@ -1209,10 +1207,6 @@ static long tce_iommu_take_ownership(struct tce_container *container,
|
||||||
{
|
{
|
||||||
long i, ret = 0;
|
long i, ret = 0;
|
||||||
|
|
||||||
ret = table_group->ops->take_ownership(table_group);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
/* Set all windows to the new group */
|
/* Set all windows to the new group */
|
||||||
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
|
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
|
||||||
struct iommu_table *tbl = container->tables[i];
|
struct iommu_table *tbl = container->tables[i];
|
||||||
|
@ -1231,8 +1225,6 @@ release_exit:
|
||||||
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
|
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
|
||||||
table_group->ops->unset_window(table_group, i);
|
table_group->ops->unset_window(table_group, i);
|
||||||
|
|
||||||
table_group->ops->release_ownership(table_group);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue