We execute certain NPU2 setup code (such as mapping an LPID to a device
in NPU2) unconditionally if an Nvlink bridge is detected. However this
cannot succeed on POWER8NVL machines and errors appear in dmesg. This is
harmless as skiboot returns an error and the only place we check it is
vfio-pci but that code does not get called on P8+ either.
This adds a check if pnv_npu2_xxx helpers are called on a machine with
NPU2 which initializes pnv_phb::npu in pnv_npu2_init();
pnv_phb::npu==NULL on POWER8/NVL (Naples).
While at this, fix NULL derefencing in pnv_npu_peers_take_ownership/
pnv_npu_peers_release_ownership which occurs when GPUs on mentioned P8s
cause EEH which happens if "vfio-pci" disables devices using
the D3 power state; the vfio-pci's disable_idle_d3 module parameter
controls this and must be set on Naples. The EEH handling clears
the entire pnv_ioda_pe struct in pnv_ioda_free_pe() hence
the NULL derefencing. We cannot recover from that but at least we stop
crashing.
Tested on
- POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm,
NVIDIA GV100 10de:1db1 driver 418.39
- POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm,
NVIDIA P100 10de:15f9 driver 396.47
Fixes: 1b785611e1
("powerpc/powernv/npu: Add release_ownership hook")
Cc: stable@vger.kernel.org # 5.0
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201122073828.15446-1-aik@ozlabs.ru
705 lines
18 KiB
C
705 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* This file implements the DMA operations for NVLink devices. The NPU
|
|
* devices all point to the same iommu table as the parent PCI device.
|
|
*
|
|
* Copyright Alistair Popple, IBM Corporation 2015.
|
|
*/
|
|
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/mmu_context.h>
|
|
#include <linux/of.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/sizes.h>
|
|
|
|
#include <asm/debugfs.h>
|
|
#include <asm/powernv.h>
|
|
#include <asm/ppc-pci.h>
|
|
#include <asm/opal.h>
|
|
|
|
#include "pci.h"
|
|
|
|
static struct pci_dev *get_pci_dev(struct device_node *dn)
|
|
{
|
|
struct pci_dn *pdn = PCI_DN(dn);
|
|
struct pci_dev *pdev;
|
|
|
|
pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
|
|
pdn->busno, pdn->devfn);
|
|
|
|
/*
|
|
* pci_get_domain_bus_and_slot() increased the reference count of
|
|
* the PCI device, but callers don't need that actually as the PE
|
|
* already holds a reference to the device. Since callers aren't
|
|
* aware of the reference count change, call pci_dev_put() now to
|
|
* avoid leaks.
|
|
*/
|
|
if (pdev)
|
|
pci_dev_put(pdev);
|
|
|
|
return pdev;
|
|
}
|
|
|
|
/* Given a NPU device get the associated PCI device. */
|
|
struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
|
|
{
|
|
struct device_node *dn;
|
|
struct pci_dev *gpdev;
|
|
|
|
if (WARN_ON(!npdev))
|
|
return NULL;
|
|
|
|
if (WARN_ON(!npdev->dev.of_node))
|
|
return NULL;
|
|
|
|
/* Get assoicated PCI device */
|
|
dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
|
|
if (!dn)
|
|
return NULL;
|
|
|
|
gpdev = get_pci_dev(dn);
|
|
of_node_put(dn);
|
|
|
|
return gpdev;
|
|
}
|
|
EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
|
|
|
|
/* Given the real PCI device get a linked NPU device. */
|
|
struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
|
|
{
|
|
struct device_node *dn;
|
|
struct pci_dev *npdev;
|
|
|
|
if (WARN_ON(!gpdev))
|
|
return NULL;
|
|
|
|
/* Not all PCI devices have device-tree nodes */
|
|
if (!gpdev->dev.of_node)
|
|
return NULL;
|
|
|
|
/* Get assoicated PCI device */
|
|
dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
|
|
if (!dn)
|
|
return NULL;
|
|
|
|
npdev = get_pci_dev(dn);
|
|
of_node_put(dn);
|
|
|
|
return npdev;
|
|
}
|
|
EXPORT_SYMBOL(pnv_pci_get_npu_dev);
|
|
|
|
#ifdef CONFIG_IOMMU_API
|
|
/*
|
|
* Returns the PE assoicated with the PCI device of the given
|
|
* NPU. Returns the linked pci device if pci_dev != NULL.
|
|
*/
|
|
static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
|
|
struct pci_dev **gpdev)
|
|
{
|
|
struct pnv_phb *phb;
|
|
struct pci_controller *hose;
|
|
struct pci_dev *pdev;
|
|
struct pnv_ioda_pe *pe;
|
|
struct pci_dn *pdn;
|
|
|
|
pdev = pnv_pci_get_gpu_dev(npe->pdev);
|
|
if (!pdev)
|
|
return NULL;
|
|
|
|
pdn = pci_get_pdn(pdev);
|
|
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
|
|
return NULL;
|
|
|
|
hose = pci_bus_to_host(pdev->bus);
|
|
phb = hose->private_data;
|
|
pe = &phb->ioda.pe_array[pdn->pe_number];
|
|
|
|
if (gpdev)
|
|
*gpdev = pdev;
|
|
|
|
return pe;
|
|
}
|
|
|
|
static long pnv_npu_unset_window(struct iommu_table_group *table_group,
|
|
int num);
|
|
|
|
static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
|
|
struct iommu_table *tbl)
|
|
{
|
|
struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
|
|
table_group);
|
|
struct pnv_phb *phb = npe->phb;
|
|
int64_t rc;
|
|
const unsigned long size = tbl->it_indirect_levels ?
|
|
tbl->it_level_size : tbl->it_size;
|
|
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
|
|
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
|
|
int num2 = (num == 0) ? 1 : 0;
|
|
|
|
/* NPU has just one TVE so if there is another table, remove it first */
|
|
if (npe->table_group.tables[num2])
|
|
pnv_npu_unset_window(&npe->table_group, num2);
|
|
|
|
pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
|
|
start_addr, start_addr + win_size - 1,
|
|
IOMMU_PAGE_SIZE(tbl));
|
|
|
|
rc = opal_pci_map_pe_dma_window(phb->opal_id,
|
|
npe->pe_number,
|
|
npe->pe_number,
|
|
tbl->it_indirect_levels + 1,
|
|
__pa(tbl->it_base),
|
|
size << 3,
|
|
IOMMU_PAGE_SIZE(tbl));
|
|
if (rc) {
|
|
pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
|
|
return rc;
|
|
}
|
|
pnv_pci_ioda2_tce_invalidate_entire(phb, false);
|
|
|
|
/* Add the table to the list so its TCE cache will get invalidated */
|
|
pnv_pci_link_table_and_group(phb->hose->node, num,
|
|
tbl, &npe->table_group);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
|
|
{
|
|
struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
|
|
table_group);
|
|
struct pnv_phb *phb = npe->phb;
|
|
int64_t rc;
|
|
|
|
if (!npe->table_group.tables[num])
|
|
return 0;
|
|
|
|
pe_info(npe, "Removing DMA window\n");
|
|
|
|
rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
|
|
npe->pe_number,
|
|
0/* levels */, 0/* table address */,
|
|
0/* table size */, 0/* page size */);
|
|
if (rc) {
|
|
pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
|
|
return rc;
|
|
}
|
|
pnv_pci_ioda2_tce_invalidate_entire(phb, false);
|
|
|
|
pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
|
|
&npe->table_group);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Switch ownership from platform code to external user (e.g. VFIO) */
|
|
static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
|
|
{
|
|
struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
|
|
table_group);
|
|
struct pnv_phb *phb = npe->phb;
|
|
int64_t rc;
|
|
struct pci_dev *gpdev = NULL;
|
|
|
|
/*
|
|
* Note: NPU has just a single TVE in the hardware which means that
|
|
* while used by the kernel, it can have either 32bit window or
|
|
* DMA bypass but never both. So we deconfigure 32bit window only
|
|
* if it was enabled at the moment of ownership change.
|
|
*/
|
|
if (npe->table_group.tables[0]) {
|
|
pnv_npu_unset_window(&npe->table_group, 0);
|
|
return;
|
|
}
|
|
|
|
/* Disable bypass */
|
|
rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
|
|
npe->pe_number, npe->pe_number,
|
|
0 /* bypass base */, 0);
|
|
if (rc) {
|
|
pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
|
|
return;
|
|
}
|
|
pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
|
|
|
|
get_gpu_pci_dev_and_pe(npe, &gpdev);
|
|
if (gpdev)
|
|
pnv_npu2_unmap_lpar_dev(gpdev);
|
|
}
|
|
|
|
static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
|
|
{
|
|
struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
|
|
table_group);
|
|
struct pci_dev *gpdev = NULL;
|
|
|
|
get_gpu_pci_dev_and_pe(npe, &gpdev);
|
|
if (gpdev)
|
|
pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
|
|
}
|
|
|
|
static struct iommu_table_group_ops pnv_pci_npu_ops = {
|
|
.set_window = pnv_npu_set_window,
|
|
.unset_window = pnv_npu_unset_window,
|
|
.take_ownership = pnv_npu_take_ownership,
|
|
.release_ownership = pnv_npu_release_ownership,
|
|
};
|
|
#endif /* !CONFIG_IOMMU_API */
|
|
|
|
/*
|
|
* NPU2 ATS
|
|
*/
|
|
/* Maximum possible number of ATSD MMIO registers per NPU */
|
|
#define NV_NMMU_ATSD_REGS 8
|
|
#define NV_NPU_MAX_PE_NUM 16
|
|
|
|
/*
|
|
* A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
|
|
* up to 3 x (GPU + 2xNPUs) (POWER9).
|
|
*/
|
|
struct npu_comp {
|
|
struct iommu_table_group table_group;
|
|
int pe_num;
|
|
struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
|
|
};
|
|
|
|
/* An NPU descriptor, valid for POWER9 only */
|
|
struct npu {
|
|
int index;
|
|
struct npu_comp npucomp;
|
|
};
|
|
|
|
#ifdef CONFIG_IOMMU_API
|
|
static long pnv_npu_peers_create_table_userspace(
|
|
struct iommu_table_group *table_group,
|
|
int num, __u32 page_shift, __u64 window_size, __u32 levels,
|
|
struct iommu_table **ptbl)
|
|
{
|
|
struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
|
|
table_group);
|
|
|
|
if (!npucomp->pe_num || !npucomp->pe[0] ||
|
|
!npucomp->pe[0]->table_group.ops ||
|
|
!npucomp->pe[0]->table_group.ops->create_table)
|
|
return -EFAULT;
|
|
|
|
return npucomp->pe[0]->table_group.ops->create_table(
|
|
&npucomp->pe[0]->table_group, num, page_shift,
|
|
window_size, levels, ptbl);
|
|
}
|
|
|
|
static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
|
|
int num, struct iommu_table *tbl)
|
|
{
|
|
int i, j;
|
|
long ret = 0;
|
|
struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
|
|
table_group);
|
|
|
|
for (i = 0; i < npucomp->pe_num; ++i) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[i];
|
|
|
|
if (!pe->table_group.ops->set_window)
|
|
continue;
|
|
|
|
ret = pe->table_group.ops->set_window(&pe->table_group,
|
|
num, tbl);
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
for (j = 0; j < i; ++j) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[j];
|
|
|
|
if (!pe->table_group.ops->unset_window)
|
|
continue;
|
|
|
|
ret = pe->table_group.ops->unset_window(
|
|
&pe->table_group, num);
|
|
if (ret)
|
|
break;
|
|
}
|
|
} else {
|
|
table_group->tables[num] = iommu_tce_table_get(tbl);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
|
|
int num)
|
|
{
|
|
int i, j;
|
|
long ret = 0;
|
|
struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
|
|
table_group);
|
|
|
|
for (i = 0; i < npucomp->pe_num; ++i) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[i];
|
|
|
|
WARN_ON(npucomp->table_group.tables[num] !=
|
|
table_group->tables[num]);
|
|
if (!npucomp->table_group.tables[num])
|
|
continue;
|
|
|
|
if (!pe->table_group.ops->unset_window)
|
|
continue;
|
|
|
|
ret = pe->table_group.ops->unset_window(&pe->table_group, num);
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
for (j = 0; j < i; ++j) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[j];
|
|
|
|
if (!npucomp->table_group.tables[num])
|
|
continue;
|
|
|
|
if (!pe->table_group.ops->set_window)
|
|
continue;
|
|
|
|
ret = pe->table_group.ops->set_window(&pe->table_group,
|
|
num, table_group->tables[num]);
|
|
if (ret)
|
|
break;
|
|
}
|
|
} else if (table_group->tables[num]) {
|
|
iommu_tce_table_put(table_group->tables[num]);
|
|
table_group->tables[num] = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
|
|
{
|
|
int i;
|
|
struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
|
|
table_group);
|
|
|
|
for (i = 0; i < npucomp->pe_num; ++i) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[i];
|
|
|
|
if (!pe->table_group.ops ||
|
|
!pe->table_group.ops->take_ownership)
|
|
continue;
|
|
pe->table_group.ops->take_ownership(&pe->table_group);
|
|
}
|
|
}
|
|
|
|
static void pnv_npu_peers_release_ownership(
|
|
struct iommu_table_group *table_group)
|
|
{
|
|
int i;
|
|
struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
|
|
table_group);
|
|
|
|
for (i = 0; i < npucomp->pe_num; ++i) {
|
|
struct pnv_ioda_pe *pe = npucomp->pe[i];
|
|
|
|
if (!pe->table_group.ops ||
|
|
!pe->table_group.ops->release_ownership)
|
|
continue;
|
|
pe->table_group.ops->release_ownership(&pe->table_group);
|
|
}
|
|
}
|
|
|
|
static struct iommu_table_group_ops pnv_npu_peers_ops = {
|
|
.get_table_size = pnv_pci_ioda2_get_table_size,
|
|
.create_table = pnv_npu_peers_create_table_userspace,
|
|
.set_window = pnv_npu_peers_set_window,
|
|
.unset_window = pnv_npu_peers_unset_window,
|
|
.take_ownership = pnv_npu_peers_take_ownership,
|
|
.release_ownership = pnv_npu_peers_release_ownership,
|
|
};
|
|
|
|
static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
|
|
struct pnv_ioda_pe *pe)
|
|
{
|
|
if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
|
|
return;
|
|
|
|
npucomp->pe[npucomp->pe_num] = pe;
|
|
++npucomp->pe_num;
|
|
}
|
|
|
|
static struct iommu_table_group *
|
|
pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
|
|
{
|
|
struct iommu_table_group *compound_group;
|
|
struct npu_comp *npucomp;
|
|
struct pci_dev *gpdev = NULL;
|
|
struct pci_controller *hose;
|
|
struct pci_dev *npdev = NULL;
|
|
|
|
list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
|
|
npdev = pnv_pci_get_npu_dev(gpdev, 0);
|
|
if (npdev)
|
|
break;
|
|
}
|
|
|
|
if (!npdev)
|
|
/* It is not an NPU attached device, skip */
|
|
return NULL;
|
|
|
|
hose = pci_bus_to_host(npdev->bus);
|
|
|
|
if (hose->npu) {
|
|
/* P9 case: compound group is per-NPU (all gpus, all links) */
|
|
npucomp = &hose->npu->npucomp;
|
|
} else {
|
|
/* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
|
|
npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
|
|
}
|
|
|
|
compound_group = &npucomp->table_group;
|
|
if (!compound_group->group) {
|
|
compound_group->ops = &pnv_npu_peers_ops;
|
|
iommu_register_group(compound_group, hose->global_number,
|
|
pe->pe_number);
|
|
|
|
/* Steal capabilities from a GPU PE */
|
|
compound_group->max_dynamic_windows_supported =
|
|
pe->table_group.max_dynamic_windows_supported;
|
|
compound_group->tce32_start = pe->table_group.tce32_start;
|
|
compound_group->tce32_size = pe->table_group.tce32_size;
|
|
compound_group->max_levels = pe->table_group.max_levels;
|
|
if (!compound_group->pgsizes)
|
|
compound_group->pgsizes = pe->table_group.pgsizes;
|
|
}
|
|
|
|
/*
|
|
* The gpu would have been added to the iommu group that's created
|
|
* for the PE. Pull it out now.
|
|
*/
|
|
iommu_del_device(&gpdev->dev);
|
|
|
|
/*
|
|
* I'm not sure this is strictly required, but it's probably a good idea
|
|
* since the table_group for the PE is going to be attached to the
|
|
* compound table group. If we leave the PE's iommu group active then
|
|
* we might have the same table_group being modifiable via two sepeate
|
|
* iommu groups.
|
|
*/
|
|
iommu_group_put(pe->table_group.group);
|
|
|
|
/* now put the GPU into the compound group */
|
|
pnv_comp_attach_table_group(npucomp, pe);
|
|
iommu_add_device(compound_group, &gpdev->dev);
|
|
|
|
return compound_group;
|
|
}
|
|
|
|
static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
|
|
{
|
|
struct iommu_table_group *table_group;
|
|
struct npu_comp *npucomp;
|
|
struct pci_dev *gpdev = NULL;
|
|
struct pci_dev *npdev;
|
|
struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
|
|
|
|
WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
|
|
if (!gpe)
|
|
return NULL;
|
|
|
|
/*
|
|
* IODA2 bridges get this set up from pci_controller_ops::setup_bridge
|
|
* but NPU bridges do not have this hook defined so we do it here.
|
|
* We do not setup other table group parameters as they won't be used
|
|
* anyway - NVLink bridges are subordinate PEs.
|
|
*/
|
|
pe->table_group.ops = &pnv_pci_npu_ops;
|
|
|
|
table_group = iommu_group_get_iommudata(
|
|
iommu_group_get(&gpdev->dev));
|
|
|
|
/*
|
|
* On P9 NPU PHB and PCI PHB support different page sizes,
|
|
* keep only matching. We expect here that NVLink bridge PE pgsizes is
|
|
* initialized by the caller.
|
|
*/
|
|
table_group->pgsizes &= pe->table_group.pgsizes;
|
|
npucomp = container_of(table_group, struct npu_comp, table_group);
|
|
pnv_comp_attach_table_group(npucomp, pe);
|
|
|
|
list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
|
|
struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
|
|
|
|
if (gpdevtmp != gpdev)
|
|
continue;
|
|
|
|
iommu_add_device(table_group, &npdev->dev);
|
|
}
|
|
|
|
return table_group;
|
|
}
|
|
|
|
void pnv_pci_npu_setup_iommu_groups(void)
|
|
{
|
|
struct pci_controller *hose;
|
|
struct pnv_phb *phb;
|
|
struct pnv_ioda_pe *pe;
|
|
|
|
/*
|
|
* For non-nvlink devices the IOMMU group is registered when the PE is
|
|
* configured and devices are added to the group when the per-device
|
|
* DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
|
|
* only initialise for "normal" IODA PHBs.
|
|
*
|
|
* For NVLink devices we need to ensure the NVLinks and the GPU end up
|
|
* in the same IOMMU group, so that's handled here.
|
|
*/
|
|
list_for_each_entry(hose, &hose_list, list_node) {
|
|
phb = hose->private_data;
|
|
|
|
if (phb->type == PNV_PHB_IODA2)
|
|
list_for_each_entry(pe, &phb->ioda.pe_list, list)
|
|
pnv_try_setup_npu_table_group(pe);
|
|
}
|
|
|
|
/*
|
|
* Now we have all PHBs discovered, time to add NPU devices to
|
|
* the corresponding IOMMU groups.
|
|
*/
|
|
list_for_each_entry(hose, &hose_list, list_node) {
|
|
unsigned long pgsizes;
|
|
|
|
phb = hose->private_data;
|
|
|
|
if (phb->type != PNV_PHB_NPU_NVLINK)
|
|
continue;
|
|
|
|
pgsizes = pnv_ioda_parse_tce_sizes(phb);
|
|
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
|
|
/*
|
|
* IODA2 bridges get this set up from
|
|
* pci_controller_ops::setup_bridge but NPU bridges
|
|
* do not have this hook defined so we do it here.
|
|
*/
|
|
pe->table_group.pgsizes = pgsizes;
|
|
pnv_npu_compound_attach(pe);
|
|
}
|
|
}
|
|
}
|
|
#endif /* CONFIG_IOMMU_API */
|
|
|
|
int pnv_npu2_init(struct pci_controller *hose)
|
|
{
|
|
static int npu_index;
|
|
struct npu *npu;
|
|
int ret;
|
|
|
|
npu = kzalloc(sizeof(*npu), GFP_KERNEL);
|
|
if (!npu)
|
|
return -ENOMEM;
|
|
|
|
npu_index++;
|
|
if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
|
|
ret = -ENOSPC;
|
|
goto fail_exit;
|
|
}
|
|
npu->index = npu_index;
|
|
hose->npu = npu;
|
|
|
|
return 0;
|
|
|
|
fail_exit:
|
|
kfree(npu);
|
|
return ret;
|
|
}
|
|
|
|
int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
|
|
unsigned long msr)
|
|
{
|
|
int ret;
|
|
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
|
|
struct pci_controller *hose;
|
|
struct pnv_phb *nphb;
|
|
|
|
if (!npdev)
|
|
return -ENODEV;
|
|
|
|
hose = pci_bus_to_host(npdev->bus);
|
|
if (hose->npu == NULL) {
|
|
dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
|
|
return 0;
|
|
}
|
|
|
|
nphb = hose->private_data;
|
|
|
|
dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
|
|
nphb->opal_id, lparid);
|
|
/*
|
|
* Currently we only support radix and non-zero LPCR only makes sense
|
|
* for hash tables so skiboot expects the LPCR parameter to be a zero.
|
|
*/
|
|
ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
|
|
0 /* LPCR bits */);
|
|
if (ret) {
|
|
dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
|
|
nphb->opal_id, msr);
|
|
ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
|
|
pci_dev_id(gpdev));
|
|
if (ret < 0)
|
|
dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
|
|
else
|
|
ret = 0;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
|
|
|
|
void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
|
|
{
|
|
struct pci_dev *gpdev;
|
|
|
|
list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
|
|
pnv_npu2_map_lpar_dev(gpdev, 0, msr);
|
|
}
|
|
|
|
int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
|
|
{
|
|
int ret;
|
|
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
|
|
struct pci_controller *hose;
|
|
struct pnv_phb *nphb;
|
|
|
|
if (!npdev)
|
|
return -ENODEV;
|
|
|
|
hose = pci_bus_to_host(npdev->bus);
|
|
if (hose->npu == NULL) {
|
|
dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
|
|
return 0;
|
|
}
|
|
|
|
nphb = hose->private_data;
|
|
|
|
dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
|
|
nphb->opal_id);
|
|
ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
|
|
pci_dev_id(gpdev));
|
|
if (ret < 0) {
|
|
dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
/* Set LPID to 0 anyway, just to be safe */
|
|
dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
|
|
ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
|
|
0 /* LPCR bits */);
|
|
if (ret)
|
|
dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
|