mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-29 14:05:19 +08:00
RDMA/mlx5: Use ib_umem_find_best_pgsz() for devx
Since devx uses the new rdma_for_each_block() to fill the PAS it can also
use ib_umem_find_best_pgsz().
However, the umem constructionin devx is complicated, the umem must still
respect all the HW limits such as page_offset_quantized and the IOVA
alignment.
Since we don't know what the user intends to use the umem for we have to
limit it to PAGE_SIZE.
There are users trying to mix umem's with mkeys so this makes them work
reliably, at least for an identity IOVA, by ensuring the IOVA matches the
selected page size.
Last user of mlx5_ib_get_buf_offset() so it can also be removed.
Fixes: aeae94579c
("IB/mlx5: Add DEVX support for memory registration")
Link: https://lore.kernel.org/r/20201115114311.136250-7-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
c08fbdc577
commit
878f7b31c3
@ -93,8 +93,6 @@ struct devx_async_event_file {
|
|||||||
struct devx_umem {
|
struct devx_umem {
|
||||||
struct mlx5_core_dev *mdev;
|
struct mlx5_core_dev *mdev;
|
||||||
struct ib_umem *umem;
|
struct ib_umem *umem;
|
||||||
u32 page_offset;
|
|
||||||
int page_shift;
|
|
||||||
u32 dinlen;
|
u32 dinlen;
|
||||||
u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
|
u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
|
||||||
};
|
};
|
||||||
@ -2057,7 +2055,6 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
|
|||||||
size_t size;
|
size_t size;
|
||||||
u32 access;
|
u32 access;
|
||||||
int err;
|
int err;
|
||||||
u32 page_mask;
|
|
||||||
|
|
||||||
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
|
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
|
||||||
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
|
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
|
||||||
@ -2078,46 +2075,55 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
|
|||||||
obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
|
obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
|
||||||
if (IS_ERR(obj->umem))
|
if (IS_ERR(obj->umem))
|
||||||
return PTR_ERR(obj->umem);
|
return PTR_ERR(obj->umem);
|
||||||
|
|
||||||
mlx5_ib_cont_pages(obj->umem, obj->umem->address,
|
|
||||||
MLX5_MKEY_PAGE_SHIFT_MASK, &obj->page_shift);
|
|
||||||
page_mask = (1 << obj->page_shift) - 1;
|
|
||||||
obj->page_offset = obj->umem->address & page_mask;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
|
static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
|
||||||
|
struct uverbs_attr_bundle *attrs,
|
||||||
struct devx_umem *obj,
|
struct devx_umem *obj,
|
||||||
struct devx_umem_reg_cmd *cmd)
|
struct devx_umem_reg_cmd *cmd)
|
||||||
{
|
{
|
||||||
cmd->inlen =
|
unsigned int page_size;
|
||||||
MLX5_ST_SZ_BYTES(create_umem_in) +
|
|
||||||
(MLX5_ST_SZ_BYTES(mtt) *
|
|
||||||
ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift));
|
|
||||||
cmd->in = uverbs_zalloc(attrs, cmd->inlen);
|
|
||||||
return PTR_ERR_OR_ZERO(cmd->in);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
|
|
||||||
struct devx_umem *obj,
|
|
||||||
struct devx_umem_reg_cmd *cmd)
|
|
||||||
{
|
|
||||||
void *umem;
|
|
||||||
__be64 *mtt;
|
__be64 *mtt;
|
||||||
|
void *umem;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't know what the user intends to use this umem for, but the HW
|
||||||
|
* restrictions must be met. MR, doorbell records, QP, WQ and CQ all
|
||||||
|
* have different requirements. Since we have no idea how to sort this
|
||||||
|
* out, only support PAGE_SIZE with the expectation that userspace will
|
||||||
|
* provide the necessary alignments inside the known PAGE_SIZE and that
|
||||||
|
* FW will check everything.
|
||||||
|
*/
|
||||||
|
page_size = ib_umem_find_best_pgoff(
|
||||||
|
obj->umem, PAGE_SIZE,
|
||||||
|
__mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset),
|
||||||
|
0));
|
||||||
|
if (!page_size)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
|
||||||
|
(MLX5_ST_SZ_BYTES(mtt) *
|
||||||
|
ib_umem_num_dma_blocks(obj->umem, page_size));
|
||||||
|
cmd->in = uverbs_zalloc(attrs, cmd->inlen);
|
||||||
|
if (!cmd->in)
|
||||||
|
return PTR_ERR(cmd->in);
|
||||||
|
|
||||||
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
|
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
|
||||||
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
|
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
|
||||||
|
|
||||||
MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
|
MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
|
||||||
MLX5_SET64(umem, umem, num_of_mtt,
|
MLX5_SET64(umem, umem, num_of_mtt,
|
||||||
ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift));
|
ib_umem_num_dma_blocks(obj->umem, page_size));
|
||||||
MLX5_SET(umem, umem, log_page_size, obj->page_shift -
|
MLX5_SET(umem, umem, log_page_size,
|
||||||
MLX5_ADAPTER_PAGE_SHIFT);
|
order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
|
||||||
MLX5_SET(umem, umem, page_offset, obj->page_offset);
|
MLX5_SET(umem, umem, page_offset,
|
||||||
mlx5_ib_populate_pas(obj->umem, 1UL << obj->page_shift, mtt,
|
ib_umem_dma_offset(obj->umem, page_size));
|
||||||
|
|
||||||
|
mlx5_ib_populate_pas(obj->umem, page_size, mtt,
|
||||||
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
|
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
|
||||||
MLX5_IB_MTT_READ);
|
MLX5_IB_MTT_READ);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
|
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
|
||||||
@ -2144,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
|
|||||||
if (err)
|
if (err)
|
||||||
goto err_obj_free;
|
goto err_obj_free;
|
||||||
|
|
||||||
err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
|
err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_umem_release;
|
goto err_umem_release;
|
||||||
|
|
||||||
devx_umem_reg_cmd_build(dev, obj, &cmd);
|
|
||||||
|
|
||||||
MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
|
MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
|
||||||
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
|
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
|
||||||
sizeof(cmd.out));
|
sizeof(cmd.out));
|
||||||
|
@ -36,61 +36,6 @@
|
|||||||
#include "mlx5_ib.h"
|
#include "mlx5_ib.h"
|
||||||
#include <linux/jiffies.h>
|
#include <linux/jiffies.h>
|
||||||
|
|
||||||
/* @umem: umem object to scan
|
|
||||||
* @addr: ib virtual address requested by the user
|
|
||||||
* @max_page_shift: high limit for page_shift - 0 means no limit
|
|
||||||
* @shift: page shift for the compound pages found in the region
|
|
||||||
*/
|
|
||||||
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
|
||||||
unsigned long max_page_shift, int *shift)
|
|
||||||
{
|
|
||||||
unsigned long tmp;
|
|
||||||
unsigned long m;
|
|
||||||
u64 base = ~0, p = 0;
|
|
||||||
u64 len, pfn;
|
|
||||||
int i = 0;
|
|
||||||
struct scatterlist *sg;
|
|
||||||
int entry;
|
|
||||||
|
|
||||||
if (umem->is_odp) {
|
|
||||||
struct ib_umem_odp *odp = to_ib_umem_odp(umem);
|
|
||||||
|
|
||||||
*shift = odp->page_shift;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
addr = addr >> PAGE_SHIFT;
|
|
||||||
tmp = (unsigned long)addr;
|
|
||||||
m = find_first_bit(&tmp, BITS_PER_LONG);
|
|
||||||
if (max_page_shift)
|
|
||||||
m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
|
|
||||||
|
|
||||||
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
|
|
||||||
len = sg_dma_len(sg) >> PAGE_SHIFT;
|
|
||||||
pfn = sg_dma_address(sg) >> PAGE_SHIFT;
|
|
||||||
if (base + p != pfn) {
|
|
||||||
/* If either the offset or the new
|
|
||||||
* base are unaligned update m
|
|
||||||
*/
|
|
||||||
tmp = (unsigned long)(pfn | p);
|
|
||||||
if (!IS_ALIGNED(tmp, 1 << m))
|
|
||||||
m = find_first_bit(&tmp, BITS_PER_LONG);
|
|
||||||
|
|
||||||
base = pfn;
|
|
||||||
p = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
p += len;
|
|
||||||
i += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i)
|
|
||||||
m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
|
|
||||||
else
|
|
||||||
m = 0;
|
|
||||||
*shift = PAGE_SHIFT + m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
|
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
|
||||||
* filled in the pas array.
|
* filled in the pas array.
|
||||||
|
@ -40,8 +40,6 @@
|
|||||||
#define MLX5_IB_DEFAULT_UIDX 0xffffff
|
#define MLX5_IB_DEFAULT_UIDX 0xffffff
|
||||||
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
|
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
|
||||||
|
|
||||||
#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
|
|
||||||
|
|
||||||
static __always_inline unsigned long
|
static __always_inline unsigned long
|
||||||
__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
|
__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
|
||||||
unsigned int pgsz_shift)
|
unsigned int pgsz_shift)
|
||||||
@ -1296,9 +1294,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
|
|||||||
struct ib_port_attr *props);
|
struct ib_port_attr *props);
|
||||||
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
|
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
|
||||||
struct ib_port_attr *props);
|
struct ib_port_attr *props);
|
||||||
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
|
|
||||||
unsigned long max_page_shift,
|
|
||||||
int *shift);
|
|
||||||
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
||||||
u64 access_flags);
|
u64 access_flags);
|
||||||
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
|
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
|
||||||
|
Loading…
Reference in New Issue
Block a user