Centos-kernel-stream-9/drivers/infiniband/hw/mlx5/umr.c

828 lines
21 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include "umr.h"
#include "wr.h"
/*
* We can't use an array for xlt_emergency_page because dma_map_single doesn't
* work on kernel modules memory
*/
void *xlt_emergency_page;
static DEFINE_MUTEX(xlt_emergency_page_mutex);
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW;
if (MLX5_CAP_GEN(dev->mdev, atomic))
result |= MLX5_MKEY_MASK_A;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
return cpu_to_be64(result);
}
static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_A &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return -EPERM;
return 0;
}
enum {
MAX_UMR_WR = 128,
};
static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
struct ib_qp_attr attr = {};
int ret;
attr.qp_state = IB_QPS_INIT;
attr.port_num = 1;
ret = ib_modify_qp(qp, &attr,
IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
return ret;
}
return 0;
}
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr init_attr = {};
struct ib_cq *cq;
struct ib_qp *qp;
int ret = 0;
/*
* UMR qp is set once, never changed until device unload.
* Avoid taking the mutex if initialization is already done.
*/
if (dev->umrc.qp)
return 0;
mutex_lock(&dev->umrc.init_lock);
/* First user allocates the UMR resources. Skip if already allocated. */
if (dev->umrc.qp)
goto unlock;
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto unlock;
}
init_attr.send_cq = cq;
init_attr.recv_cq = cq;
init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr.cap.max_send_wr = MAX_UMR_WR;
init_attr.cap.max_send_sge = 1;
init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
init_attr.port_num = 1;
qp = ib_create_qp(dev->umrc.pd, &init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto destroy_cq;
}
ret = mlx5r_umr_qp_rst2rts(dev, qp);
if (ret)
goto destroy_qp;
dev->umrc.cq = cq;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
mutex_init(&dev->umrc.lock);
dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
dev->umrc.qp = qp;
mutex_unlock(&dev->umrc.init_lock);
return 0;
destroy_qp:
ib_destroy_qp(qp);
destroy_cq:
ib_free_cq(cq);
unlock:
mutex_unlock(&dev->umrc.init_lock);
return ret;
}
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
return;
mutex_destroy(&dev->umrc.lock);
/* After device init, UMR cp/qp are not unset during the lifetime. */
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
}
int mlx5r_umr_init(struct mlx5_ib_dev *dev)
{
struct ib_pd *pd;
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
return PTR_ERR(pd);
}
dev->umrc.pd = pd;
mutex_init(&dev->umrc.init_lock);
return 0;
}
void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
{
IB/mlx5: Fix UMR pd cleanup on error flow of driver init JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit 112e6e83a894260cc7efe79a1fc47d4d51461742 Author: Chris Mi <cmi@nvidia.com> Date: Mon Sep 2 13:35:40 2024 +0300 IB/mlx5: Fix UMR pd cleanup on error flow of driver init The cited commit moves the pd allocation from function mlx5r_umr_resource_cleanup() to a new function mlx5r_umr_cleanup(). So the fix in commit [1] is broken. In error flow, will hit panic [2]. Fix it by checking pd pointer to avoid panic if it is NULL; [1] RDMA/mlx5: Fix UMR cleanup on error flow of driver init [2] [ 347.567063] infiniband mlx5_0: Couldn't register device with driver model [ 347.591382] BUG: kernel NULL pointer dereference, address: 0000000000000020 [ 347.593438] #PF: supervisor read access in kernel mode [ 347.595176] #PF: error_code(0x0000) - not-present page [ 347.596962] PGD 0 P4D 0 [ 347.601361] RIP: 0010:ib_dealloc_pd_user+0x12/0xc0 [ib_core] [ 347.604171] RSP: 0018:ffff888106293b10 EFLAGS: 00010282 [ 347.604834] RAX: 0000000000000000 RBX: 000000000000000e RCX: 0000000000000000 [ 347.605672] RDX: ffff888106293ad0 RSI: 0000000000000000 RDI: 0000000000000000 [ 347.606529] RBP: 0000000000000000 R08: ffff888106293ae0 R09: ffff888106293ae0 [ 347.607379] R10: 0000000000000a06 R11: 0000000000000000 R12: 0000000000000000 [ 347.608224] R13: ffffffffa0704dc0 R14: 0000000000000001 R15: 0000000000000001 [ 347.609067] FS: 00007fdc720cd9c0(0000) GS:ffff88852c880000(0000) knlGS:0000000000000000 [ 347.610094] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 347.610727] CR2: 0000000000000020 CR3: 0000000103012003 CR4: 0000000000370eb0 [ 347.611421] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 347.612113] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 347.612804] Call Trace: [ 347.613130] <TASK> [ 347.613417] ? __die+0x20/0x60 [ 347.613793] ? page_fault_oops+0x150/0x3e0 [ 347.614243] ? free_msg+0x68/0x80 [mlx5_core] [ 347.614840] ? cmd_exec+0x48f/0x11d0 [mlx5_core] [ 347.615359] ? exc_page_fault+0x74/0x130 [ 347.615808] ? asm_exc_page_fault+0x22/0x30 [ 347.616273] ? ib_dealloc_pd_user+0x12/0xc0 [ib_core] [ 347.616801] mlx5r_umr_cleanup+0x23/0x90 [mlx5_ib] [ 347.617365] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x36/0x40 [mlx5_ib] [ 347.618025] __mlx5_ib_add+0x96/0xd0 [mlx5_ib] [ 347.618539] mlx5r_probe+0xe9/0x310 [mlx5_ib] [ 347.619032] ? kernfs_add_one+0x107/0x150 [ 347.619478] ? __mlx5_ib_add+0xd0/0xd0 [mlx5_ib] [ 347.619984] auxiliary_bus_probe+0x3e/0x90 [ 347.620448] really_probe+0xc5/0x3a0 [ 347.620857] __driver_probe_device+0x80/0x160 [ 347.621325] driver_probe_device+0x1e/0x90 [ 347.621770] __driver_attach+0xec/0x1c0 [ 347.622213] ? __device_attach_driver+0x100/0x100 [ 347.622724] bus_for_each_dev+0x71/0xc0 [ 347.623151] bus_add_driver+0xed/0x240 [ 347.623570] driver_register+0x58/0x100 [ 347.623998] __auxiliary_driver_register+0x6a/0xc0 [ 347.624499] ? driver_register+0xae/0x100 [ 347.624940] ? 0xffffffffa0893000 [ 347.625329] mlx5_ib_init+0x16a/0x1e0 [mlx5_ib] [ 347.625845] do_one_initcall+0x4a/0x2a0 [ 347.626273] ? gcov_event+0x2e2/0x3a0 [ 347.626706] do_init_module+0x8a/0x260 [ 347.627126] init_module_from_file+0x8b/0xd0 [ 347.627596] __x64_sys_finit_module+0x1ca/0x2f0 [ 347.628089] do_syscall_64+0x4c/0x100 Fixes: 638420115cc4 ("IB/mlx5: Create UMR QP just before first reg_mr occurs") Signed-off-by: Chris Mi <cmi@nvidia.com> Reviewed-by: Jianbo Liu <jianbol@nvidia.com> Link: https://patch.msgid.link/778c40c60287992da5d6ec92bb07b67f7bb5e6ef.1725273295.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
if (!dev->umrc.pd)
return;
mutex_destroy(&dev->umrc.init_lock);
ib_dealloc_pd(dev->umrc.pd);
}
static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
{
struct umr_common *umrc = &dev->umrc;
struct ib_qp_attr attr;
int err;
attr.qp_state = IB_QPS_RESET;
err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
if (err) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
goto err;
}
err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
if (err)
goto err;
umrc->state = MLX5_UMR_STATE_ACTIVE;
return 0;
err:
umrc->state = MLX5_UMR_STATE_ERR;
return err;
}
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
unsigned int wqe_size =
with_data ? sizeof(struct mlx5r_umr_wqe) :
sizeof(struct mlx5r_umr_wqe) -
sizeof(struct mlx5_wqe_data_seg);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
union {
struct ib_cqe *ib_cqe;
u64 wr_id;
} id;
void *cur_edge, *seg;
unsigned long flags;
unsigned int idx;
int size, err;
if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
spin_lock_irqsave(&qp->sq.lock, flags);
err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
cpu_to_be32(mkey), false, false);
if (WARN_ON(err))
goto out;
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
out:
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
context->status = wc->status;
complete(&context->done);
}
static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
{
context->cqe.done = mlx5r_umr_done;
init_completion(&context->done);
}
static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5r_umr_context umr_context;
int err;
err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
if (WARN_ON(err))
return err;
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
while (true) {
mutex_lock(&umrc->lock);
if (umrc->state == MLX5_UMR_STATE_ERR) {
mutex_unlock(&umrc->lock);
err = -EFAULT;
break;
}
if (umrc->state == MLX5_UMR_STATE_RECOVER) {
mutex_unlock(&umrc->lock);
usleep_range(3000, 5000);
continue;
}
err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
with_data);
mutex_unlock(&umrc->lock);
if (err) {
mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
err);
break;
}
wait_for_completion(&umr_context.done);
if (umr_context.status == IB_WC_SUCCESS)
break;
if (umr_context.status == IB_WC_WR_FLUSH_ERR)
continue;
WARN_ON_ONCE(1);
mlx5_ib_warn(dev,
"reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
umr_context.status, mkey);
mutex_lock(&umrc->lock);
err = mlx5r_umr_recover(dev);
mutex_unlock(&umrc->lock);
if (err)
mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
err);
err = -EFAULT;
break;
}
up(&umrc->sem);
return err;
}
/**
* mlx5r_umr_revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
* and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
}
static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *seg,
unsigned int access_flags)
{
RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR Bugzilla: https://bugzilla.redhat.com/2165364 Upstream-status: v6.4-rc1 commit d43b020b0f82c088ef8ff3196ef00575a97d200e Author: Avihai Horon <avihaih@nvidia.com> Date: Mon Apr 10 16:07:51 2023 +0300 RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR relaxed_ordering_read HCA capability is set if both the device supports relaxed ordering (RO) read and RO is set in PCI config space. RO in PCI config space can change during runtime. This will change the value of relaxed_ordering_read HCA capability in FW, but the driver will not see it since it queries the capabilities only once. This can lead to the following scenario: 1. RO in PCI config space is enabled. 2. User creates MKey without RO. 3. RO in PCI config space is disabled. As a result, relaxed_ordering_read HCA capability is turned off in FW but remains on in driver copy of the capabilities. 4. User requests to reconfig the MKey with RO via UMR. 5. Driver will try to reconfig the MKey with RO read although it shouldn't (as relaxed_ordering_read HCA capability is really off). To fix this, check pcie_relaxed_ordering_enabled() before setting RO read in UMR. Fixes: 896ec9735336 ("RDMA/mlx5: Set mkey relaxed ordering by UMR with ConnectX-7") Signed-off-by: Avihai Horon <avihaih@nvidia.com> Reviewed-by: Shay Drory <shayd@nvidia.com> Link: https://lore.kernel.org/r/8d39eb8317e7bed1a354311a20ae707788fd94ed.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-05-21 13:35:50 +00:00
bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) &&
RDMA/mlx5: Allow relaxed ordering read in VFs and VMs JIRA: https://issues.redhat.com/browse/RHEL-882 Upstream-status: v6.4-rc1 commit bd4ba605c4a92b46ab414626a4f969a19103f97a Author: Avihai Horon <avihaih@nvidia.com> Date: Mon Apr 10 16:07:53 2023 +0300 RDMA/mlx5: Allow relaxed ordering read in VFs and VMs According to PCIe spec, Enable Relaxed Ordering value in the VF's PCI config space is wired to 0 and PF relaxed ordering (RO) setting should be applied to the VF. In QEMU (and maybe others), when assigning VFs, the RO bit in PCI config space is not emulated properly and is always set to 0. Therefore, pcie_relaxed_ordering_enabled() always returns 0 for VFs and VMs and thus MKeys can't be created with RO read even if the PF supports it. pcie_relaxed_ordering_enabled() check was added to avoid a syndrome when creating a MKey with relaxed ordering (RO) enabled when the driver's relaxed_ordering_read_pci_enabled HCA capability is out of sync with FW. With the new relaxed_ordering_read capability this can't happen, as it's set regardless of RO value in PCI config space and thus can't change during runtime. Hence, to allow RO read in VFs and VMs, use the new HCA capability relaxed_ordering_read without checking pcie_relaxed_ordering_enabled(). The old capability checks are kept for backward compatibility with older FWs. Allowing RO in VFs and VMs is valuable since it can greatly improve performance on some setups. For example, testing throughput of a VF on an AMD EPYC 7763 and ConnectX-6 Dx setup showed roughly 60% performance improvement. Signed-off-by: Avihai Horon <avihaih@nvidia.com> Reviewed-by: Shay Drory <shayd@nvidia.com> Reviewed-by: Aya Levin <ayal@nvidia.com> Link: https://lore.kernel.org/r/e7048640d66c341a8fa0465e099926e7989184bc.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-12-13 09:06:28 +00:00
(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
pcie_relaxed_ordering_enabled(dev->mdev->pdev));
RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR Bugzilla: https://bugzilla.redhat.com/2165364 Upstream-status: v6.4-rc1 commit d43b020b0f82c088ef8ff3196ef00575a97d200e Author: Avihai Horon <avihaih@nvidia.com> Date: Mon Apr 10 16:07:51 2023 +0300 RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR relaxed_ordering_read HCA capability is set if both the device supports relaxed ordering (RO) read and RO is set in PCI config space. RO in PCI config space can change during runtime. This will change the value of relaxed_ordering_read HCA capability in FW, but the driver will not see it since it queries the capabilities only once. This can lead to the following scenario: 1. RO in PCI config space is enabled. 2. User creates MKey without RO. 3. RO in PCI config space is disabled. As a result, relaxed_ordering_read HCA capability is turned off in FW but remains on in driver copy of the capabilities. 4. User requests to reconfig the MKey with RO via UMR. 5. Driver will try to reconfig the MKey with RO read although it shouldn't (as relaxed_ordering_read HCA capability is really off). To fix this, check pcie_relaxed_ordering_enabled() before setting RO read in UMR. Fixes: 896ec9735336 ("RDMA/mlx5: Set mkey relaxed ordering by UMR with ConnectX-7") Signed-off-by: Avihai Horon <avihaih@nvidia.com> Reviewed-by: Shay Drory <shayd@nvidia.com> Link: https://lore.kernel.org/r/8d39eb8317e7bed1a354311a20ae707788fd94ed.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-05-21 13:35:50 +00:00
MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
MLX5_SET(mkc, seg, relaxed_ordering_write,
!!(access_flags & IB_ACCESS_RELAXED_ORDERING));
RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR Bugzilla: https://bugzilla.redhat.com/2165364 Upstream-status: v6.4-rc1 commit d43b020b0f82c088ef8ff3196ef00575a97d200e Author: Avihai Horon <avihaih@nvidia.com> Date: Mon Apr 10 16:07:51 2023 +0300 RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR relaxed_ordering_read HCA capability is set if both the device supports relaxed ordering (RO) read and RO is set in PCI config space. RO in PCI config space can change during runtime. This will change the value of relaxed_ordering_read HCA capability in FW, but the driver will not see it since it queries the capabilities only once. This can lead to the following scenario: 1. RO in PCI config space is enabled. 2. User creates MKey without RO. 3. RO in PCI config space is disabled. As a result, relaxed_ordering_read HCA capability is turned off in FW but remains on in driver copy of the capabilities. 4. User requests to reconfig the MKey with RO via UMR. 5. Driver will try to reconfig the MKey with RO read although it shouldn't (as relaxed_ordering_read HCA capability is really off). To fix this, check pcie_relaxed_ordering_enabled() before setting RO read in UMR. Fixes: 896ec9735336 ("RDMA/mlx5: Set mkey relaxed ordering by UMR with ConnectX-7") Signed-off-by: Avihai Horon <avihaih@nvidia.com> Reviewed-by: Shay Drory <shayd@nvidia.com> Link: https://lore.kernel.org/r/8d39eb8317e7bed1a354311a20ae707788fd94ed.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Mohammad Kabat <mkabat@redhat.com>
2023-05-21 13:35:50 +00:00
MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read);
}
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
int err;
wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
if (err)
return err;
mr->access_flags = access_flags;
return 0;
}
#define MLX5_MAX_UMR_CHUNK \
((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
/*
* Allocate a temporary buffer to hold the per-page information to transfer to
* HW. For efficiency this should be as large as it can be, but buffer
* allocation failure is not allowed, so try smaller sizes.
*/
static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
{
const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size;
size_t size;
void *res = NULL;
static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0);
/*
* MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
* allocation can't trigger any kind of reclaim.
*/
might_sleep();
gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
* that, but don't try hard to create one. This max is about 1M, so a
* free x86 huge page will satisfy it.
*/
size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
MLX5_MAX_UMR_CHUNK);
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
if (size > MLX5_SPARE_UMR_CHUNK) {
size = MLX5_SPARE_UMR_CHUNK;
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
}
*nents = PAGE_SIZE / ent_size;
res = (void *)__get_free_page(gfp_mask);
if (res)
return res;
mutex_lock(&xlt_emergency_page_mutex);
memset(xlt_emergency_page, 0, PAGE_SIZE);
return xlt_emergency_page;
}
static void mlx5r_umr_free_xlt(void *xlt, size_t length)
{
if (xlt == xlt_emergency_page) {
mutex_unlock(&xlt_emergency_page_mutex);
return;
}
free_pages((unsigned long)xlt, get_order(length));
}
static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
struct ib_sge *sg)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
mlx5r_umr_free_xlt(xlt, sg->length);
}
/*
* Create an XLT buffer ready for submission.
*/
static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
size_t nents, size_t ent_size,
unsigned int flags)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_addr_t dma;
void *xlt;
xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
GFP_KERNEL);
sg->length = nents * ent_size;
dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
mlx5r_umr_free_xlt(xlt, sg->length);
return NULL;
}
sg->addr = dma;
sg->lkey = dev->umrc.pd->local_dma_lkey;
return xlt;
}
static void
mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
unsigned int flags, struct ib_sge *sg)
{
if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
/* fail if free */
ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
else
/* fail if not free */
ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
ctrl_seg->xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
}
static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *mkey_seg,
struct mlx5_ib_mr *mr,
unsigned int page_shift)
{
mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
}
static void
mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
struct ib_sge *sg)
{
data_seg->byte_count = cpu_to_be32(sg->length);
data_seg->lkey = cpu_to_be32(sg->lkey);
data_seg->addr = cpu_to_be64(sg->addr);
}
static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
u64 offset)
{
u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
struct mlx5r_umr_wqe *wqe,
struct mlx5_ib_mr *mr, struct ib_sge *sg,
unsigned int flags)
{
bool update_pd_access, update_translation;
if (flags & MLX5_IB_UPD_XLT_ENABLE)
wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
flags & MLX5_IB_UPD_XLT_PD ||
flags & MLX5_IB_UPD_XLT_ACCESS;
if (update_pd_access) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
}
update_translation =
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
}
wqe->ctrl_seg.xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
static int
_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
{
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
struct mlx5_ksm *cur_ksm;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
size_t final_size;
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
void *curr_entry;
struct ib_sge sg;
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
void *entry;
u64 offset = 0;
int err = 0;
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
entry = mlx5r_umr_create_xlt(dev, &sg,
ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
ent_size, flags);
if (!entry)
return -ENOMEM;
orig_sg_length = sg.length;
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
if (dd) {
/* Use the data direct internal kernel PD */
MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
cur_ksm = entry;
} else {
cur_mtt = entry;
}
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
curr_entry = entry;
rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
if (curr_entry == entry + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
true);
if (err)
goto err;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
if (dd)
cur_ksm = entry;
else
cur_mtt = entry;
}
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
if (dd) {
cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
cur_ksm++;
curr_entry = cur_ksm;
} else {
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
curr_entry = cur_mtt;
}
}
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
final_size = curr_entry - entry;
sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
memset(curr_entry, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
err:
sg.length = orig_sg_length;
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
return err;
}
RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct JIRA: https://issues.redhat.com/browse/RHEL-52869 Upstream-status: v6.12-rc1 commit de8f847a5114ff7cfcdfc114af8485c431dec703 Author: Yishai Hadas <yishaih@nvidia.com> Date: Thu Aug 1 15:05:16 2024 +0300 RDMA/mlx5: Add support for DMABUF MR registrations with Data-direct Add support for DMABUF MR registrations with Data-direct device. Upon userspace calling to register a DMABUF MR with the data direct bit set, the below algorithm will be followed. 1) Obtain a pinned DMABUF umem from the IB core using the user input parameters (FD, offset, length) and the DMA PF device. The DMA PF device is needed to allow the IOMMU to enable the DMA PF to access the user buffer over PCI. 2) Create a KSM MKEY by setting its entries according to the user buffer VA to IOVA mapping, with the MKEY being the data direct device-crossed MKEY. This KSM MKEY is umrable and will be used as part of the MR cache. The PD for creating it is the internal device 'data direct' kernel one. 3) Create a crossing MKEY that points to the KSM MKEY using the crossing access mode. 4) Manage the KSM MKEY by adding it to a list of 'data direct' MKEYs managed on the mlx5_ib device. 5) Return the crossing MKEY to the user, created with its supplied PD. Upon DMA PF unbind flow, the driver will revoke the KSM entries. The final deregistration will occur under the hood once the application deregisters its MKEY. Notes: - This version supports only the PINNED UMEM mode, so there is no dependency on ODP. - The IOVA supplied by the application must be system page aligned due to HW translations of KSM. - The crossing MKEY will not be umrable or part of the MR cache, as we cannot change its crossed (i.e. KSM) MKEY over UMR. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://patch.msgid.link/1f99d8020ed540d9702b9e2252a145a439609ba6.1722512548.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
2024-12-05 15:32:08 +00:00
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
/* No invalidation flow is expected */
if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, true);
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, false);
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size;
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
const int page_mask = page_align - 1;
struct mlx5r_umr_wqe wqe = {};
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t size_to_map = 0;
size_t orig_sg_length;
size_t pages_iter;
struct ib_sge sg;
int err = 0;
void *xlt;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
if (WARN_ON(!mr->umem->is_odp))
return -EINVAL;
/* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
if (idx & page_mask) {
npages += idx & page_mask;
idx &= ~page_mask;
}
pages_to_map = ALIGN(npages, page_align);
xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
if (!xlt)
return -ENOMEM;
pages_iter = sg.length / desc_size;
orig_sg_length = sg.length;
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map)
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
}
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
return err;
}