1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/vfio/pci/mlx5/cmd.c
Yishai Hadas 6e97eba8ad vfio: Split migration ops from main device ops
vfio core checks whether the driver sets some migration op (e.g.
set_state/get_state) and accordingly calls its op.

However, currently mlx5 driver sets the above ops without regards to its
migration caps.

This might lead to unexpected usage/Oops if user space may call to the
above ops even if the driver doesn't support migration. As for example,
the migration state_mutex is not initialized in that case.

The cleanest way to manage that seems to split the migration ops from
the main device ops, this will let the driver setting them separately
from the main ops when it's applicable.

As part of that, validate ops construction on registration and include a
check for VFIO_MIGRATION_STOP_COPY since the uAPI claims it must be set
in migration_flags.

HISI driver was changed as well to match this scheme.

This scheme may enable down the road to come with some extra group of
ops (e.g. DMA log) that can be set without regards to the other options
based on driver caps.

Fixes: 6fadb02126 ("vfio/mlx5: Implement vfio_pci driver for mlx5 devices")
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20220628155910.171454-3-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-06-30 10:47:22 -06:00

393 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include "cmd.h"
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
}
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
}
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size)
{
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
int ret;
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
MLX5_SET(query_vhca_migration_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
out);
if (ret)
return ret;
*state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size);
return 0;
}
static int mlx5fv_vf_event(struct notifier_block *nb,
unsigned long event, void *data)
{
struct mlx5vf_pci_core_device *mvdev =
container_of(nb, struct mlx5vf_pci_core_device, nb);
mutex_lock(&mvdev->state_mutex);
switch (event) {
case MLX5_PF_NOTIFY_ENABLE_VF:
mvdev->mdev_detach = false;
break;
case MLX5_PF_NOTIFY_DISABLE_VF:
mlx5vf_disable_fds(mvdev);
mvdev->mdev_detach = true;
break;
default:
break;
}
mlx5vf_state_mutex_unlock(mvdev);
return 0;
}
void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
{
if (!mvdev->migrate_cap)
return;
mutex_lock(&mvdev->state_mutex);
mlx5vf_disable_fds(mvdev);
mlx5vf_state_mutex_unlock(mvdev);
}
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
{
if (!mvdev->migrate_cap)
return;
mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
&mvdev->nb);
destroy_workqueue(mvdev->cb_wq);
}
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
const struct vfio_migration_ops *mig_ops)
{
struct pci_dev *pdev = mvdev->core_device.pdev;
int ret;
if (!pdev->is_virtfn)
return;
mvdev->mdev = mlx5_vf_get_core_dev(pdev);
if (!mvdev->mdev)
return;
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
goto end;
mvdev->vf_id = pci_iov_vf_id(pdev);
if (mvdev->vf_id < 0)
goto end;
if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
&mvdev->vhca_id))
goto end;
mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
if (!mvdev->cb_wq)
goto end;
mutex_init(&mvdev->state_mutex);
spin_lock_init(&mvdev->reset_lock);
mvdev->nb.notifier_call = mlx5fv_vf_event;
ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
&mvdev->nb);
if (ret) {
destroy_workqueue(mvdev->cb_wq);
goto end;
}
mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P;
mvdev->core_device.vdev.mig_ops = mig_ops;
end:
mlx5_vf_put_core_dev(mvdev->mdev);
}
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id)
{
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
int out_size;
void *out;
int ret;
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, 1);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, op_mod,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
HCA_CAP_OPMOD_GET_CUR);
ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
if (ret)
goto err_exec;
*vhca_id = MLX5_GET(query_hca_cap_out, out,
capability.cmd_hca_cap.vhca_id);
err_exec:
kfree(out);
return ret;
}
static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
struct mlx5_vf_migration_file *migf, u32 *mkey)
{
size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
struct sg_dma_page_iter dma_iter;
int err = 0, inlen;
__be64 *mtt;
void *mkc;
u32 *in;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
sizeof(*mtt) * round_up(npages, 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(npages, 2));
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, rr, 1);
MLX5_SET(mkc, mkc, rw, 1);
MLX5_SET(mkc, mkc, pd, pdn);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
MLX5_SET64(mkc, mkc, len, migf->total_length);
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
kvfree(in);
return err;
}
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
{
struct mlx5vf_async_data *async_data = container_of(_work,
struct mlx5vf_async_data, work);
struct mlx5_vf_migration_file *migf = container_of(async_data,
struct mlx5_vf_migration_file, async_data);
struct mlx5_core_dev *mdev = migf->mvdev->mdev;
mutex_lock(&migf->lock);
if (async_data->status) {
migf->is_err = true;
wake_up_interruptible(&migf->poll_wait);
}
mutex_unlock(&migf->lock);
mlx5_core_destroy_mkey(mdev, async_data->mkey);
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
mlx5_core_dealloc_pd(mdev, async_data->pdn);
kvfree(async_data->out);
fput(migf->filp);
}
static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
{
struct mlx5vf_async_data *async_data = container_of(context,
struct mlx5vf_async_data, cb_work);
struct mlx5_vf_migration_file *migf = container_of(async_data,
struct mlx5_vf_migration_file, async_data);
if (!status) {
WRITE_ONCE(migf->total_length,
MLX5_GET(save_vhca_state_out, async_data->out,
actual_image_size));
wake_up_interruptible(&migf->poll_wait);
}
/*
* The error and the cleanup flows can't run from an
* interrupt context
*/
async_data->status = status;
queue_work(migf->mvdev->cb_wq, &async_data->work);
}
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
struct mlx5vf_async_data *async_data;
struct mlx5_core_dev *mdev;
u32 pdn, mkey;
int err;
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
return err;
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
0);
if (err)
goto err_dma_map;
err = _create_state_mkey(mdev, pdn, migf, &mkey);
if (err)
goto err_create_mkey;
MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, mkey);
MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
async_data = &migf->async_data;
async_data->out = kvzalloc(out_size, GFP_KERNEL);
if (!async_data->out) {
err = -ENOMEM;
goto err_out;
}
/* no data exists till the callback comes back */
migf->total_length = 0;
get_file(migf->filp);
async_data->mkey = mkey;
async_data->pdn = pdn;
err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
async_data->out,
out_size, mlx5vf_save_callback,
&async_data->cb_work);
if (err)
goto err_exec;
return 0;
err_exec:
fput(migf->filp);
kvfree(async_data->out);
err_out:
mlx5_core_destroy_mkey(mdev, mkey);
err_create_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
mlx5_core_dealloc_pd(mdev, pdn);
return err;
}
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
struct mlx5_core_dev *mdev;
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
u32 pdn, mkey;
int err;
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;
mutex_lock(&migf->lock);
if (!migf->total_length) {
err = -EINVAL;
goto end;
}
mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
if (err)
goto err_reg;
err = _create_state_mkey(mdev, pdn, migf, &mkey);
if (err)
goto err_mkey;
MLX5_SET(load_vhca_state_in, in, opcode,
MLX5_CMD_OP_LOAD_VHCA_STATE);
MLX5_SET(load_vhca_state_in, in, op_mod, 0);
MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(load_vhca_state_in, in, mkey, mkey);
MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
mlx5_core_destroy_mkey(mdev, mkey);
err_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
err_reg:
mlx5_core_dealloc_pd(mdev, pdn);
end:
mutex_unlock(&migf->lock);
return err;
}