RDMA/mlx5: Store in the cache mkeys instead of mrs

Currently, the driver stores mlx5_ib_mr struct in the cache entries,
although the only use of the cached MR is the mkey. Store only the mkey in
the cache.

Link: https://lore.kernel.org/r/20220726071911.122765-5-michaelgur@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Aharon Landau 2022-07-26 10:19:10 +03:00 committed by Jason Gunthorpe
parent 19591f134c
commit 6b75338695
2 changed files with 97 additions and 129 deletions

View File

@ -619,6 +619,7 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
struct mlx5_cache_ent *cache_ent;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@ -641,18 +642,9 @@ struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;
/* User MR data */
struct mlx5_cache_ent *cache_ent;
/* Everything after cache_ent is zero'd when MR allocated */
struct ib_umem *umem;
union {
/* Used only while the MR is in the cache */
struct {
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_async_work cb_work;
};
/* Used only by kernel MRs (umem == NULL) */
struct {
void *descs;
@ -692,12 +684,6 @@ struct mlx5_ib_mr {
};
};
/* Zero the fields in the mr that are variant depending on usage */
static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr)
{
memset_after(mr, 0, cache_ent);
}
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
{
return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
@ -768,6 +754,16 @@ struct mlx5_cache_ent {
struct delayed_work dwork;
};
struct mlx5r_async_create_mkey {
union {
u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
};
struct mlx5_async_work cb_work;
struct mlx5_cache_ent *ent;
u32 mkey;
};
struct mlx5_mr_cache {
struct workqueue_struct *wq;
struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];

View File

@ -82,15 +82,14 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
MLX5_SET64(mkc, mkc, start_addr, start_addr);
}
static void assign_mkey_variant(struct mlx5_ib_dev *dev,
struct mlx5_ib_mkey *mkey, u32 *in)
static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
{
u8 key = atomic_inc_return(&dev->mkey_var);
void *mkc;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, mkey_7_0, key);
mkey->key = key;
*mkey = key;
}
static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
@ -98,7 +97,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
{
int ret;
assign_mkey_variant(dev, mkey, in);
assign_mkey_variant(dev, &mkey->key, in);
ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
if (!ret)
init_waitqueue_head(&mkey->wait);
@ -106,17 +105,18 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
return ret;
}
static int
mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
struct mlx5_ib_mkey *mkey,
struct mlx5_async_ctx *async_ctx,
u32 *in, int inlen, u32 *out, int outlen,
struct mlx5_async_work *context)
static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
{
MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
assign_mkey_variant(dev, mkey, in);
return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
create_mkey_callback, context);
struct mlx5_ib_dev *dev = async_create->ent->dev;
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
MLX5_SET(create_mkey_in, async_create->in, opcode,
MLX5_CMD_OP_CREATE_MKEY);
assign_mkey_variant(dev, &async_create->mkey, async_create->in);
return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
async_create->out, outlen, create_mkey_callback,
&async_create->cb_work);
}
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
@ -209,48 +209,47 @@ static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
WARN_ON(old);
}
static void push_to_reserved(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr)
static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
{
void *old;
old = __xa_store(&ent->mkeys, ent->stored, mr, 0);
old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
WARN_ON(old);
ent->stored++;
}
static struct mlx5_ib_mr *pop_stored_mkey(struct mlx5_cache_ent *ent)
static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
void *old;
void *old, *xa_mkey;
ent->stored--;
ent->reserved--;
if (ent->stored == ent->reserved) {
mr = __xa_erase(&ent->mkeys, ent->stored);
WARN_ON(!mr);
return mr;
xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
WARN_ON(!xa_mkey);
return (u32)xa_to_value(xa_mkey);
}
mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
GFP_KERNEL);
WARN_ON(!mr || xa_is_err(mr));
xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
GFP_KERNEL);
WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
old = __xa_erase(&ent->mkeys, ent->reserved);
WARN_ON(old);
return mr;
return (u32)xa_to_value(xa_mkey);
}
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
container_of(context, struct mlx5_ib_mr, cb_work);
struct mlx5_cache_ent *ent = mr->cache_ent;
struct mlx5r_async_create_mkey *mkey_out =
container_of(context, struct mlx5r_async_create_mkey, cb_work);
struct mlx5_cache_ent *ent = mkey_out->ent;
struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
create_mkey_warn(dev, status, mr->out);
kfree(mr);
create_mkey_warn(dev, status, mkey_out->out);
kfree(mkey_out);
xa_lock_irqsave(&ent->mkeys, flags);
undo_push_reserve_mkey(ent);
WRITE_ONCE(dev->fill_delay, 1);
@ -259,18 +258,16 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
return;
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->mmkey.key |= mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, mr->out, mkey_index));
init_waitqueue_head(&mr->mmkey.wait);
mkey_out->mkey |= mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
WRITE_ONCE(dev->cache.last_add, jiffies);
xa_lock_irqsave(&ent->mkeys, flags);
push_to_reserved(ent, mr);
push_to_reserved(ent, mkey_out->mkey);
/* If we are doing fill_to_high_water then keep going. */
queue_adjust_cache_locked(ent);
xa_unlock_irqrestore(&ent->mkeys, flags);
kfree(mkey_out);
}
static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
@ -292,15 +289,8 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
return ret;
}
static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
{
struct mlx5_ib_mr *mr;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
mr->cache_ent = ent;
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
@ -310,106 +300,82 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
MLX5_SET(mkc, mkc, translations_octword_size,
get_mkc_octo_size(ent->access_mode, ent->ndescs));
MLX5_SET(mkc, mkc, log_page_size, ent->page);
return mr;
}
/* Asynchronously schedule new MRs to be populated in the cache. */
static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
struct mlx5r_async_create_mkey *async_create;
void *mkc;
u32 *in;
int err = 0;
int i;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
goto free_in;
}
async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
GFP_KERNEL);
if (!async_create)
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
memory_key_mkey_entry);
set_cache_mkc(ent, mkc);
async_create->ent = ent;
err = push_mkey(ent, true, NULL);
if (err)
goto free_mr;
goto free_async_create;
err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
&ent->dev->async_ctx, in, inlen,
mr->out, sizeof(mr->out),
&mr->cb_work);
err = mlx5_ib_create_mkey_cb(async_create);
if (err) {
mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
goto err_undo_reserve;
}
}
kfree(in);
return 0;
err_undo_reserve:
xa_lock_irq(&ent->mkeys);
undo_push_reserve_mkey(ent);
xa_unlock_irq(&ent->mkeys);
free_mr:
kfree(mr);
free_in:
kfree(in);
free_async_create:
kfree(async_create);
return err;
}
/* Synchronously create a MR in the cache */
static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
int err;
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return ERR_PTR(-ENOMEM);
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
set_cache_mkc(ent, mkc);
mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
goto free_in;
}
err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
if (err)
goto free_mr;
goto free_in;
init_waitqueue_head(&mr->mmkey.wait);
mr->mmkey.type = MLX5_MKEY_MR;
WRITE_ONCE(ent->dev->cache.last_add, jiffies);
kfree(in);
return mr;
free_mr:
kfree(mr);
free_in:
kfree(in);
return ERR_PTR(err);
return err;
}
static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
u32 mkey;
lockdep_assert_held(&ent->mkeys.xa_lock);
if (!ent->stored)
return;
mr = pop_stored_mkey(ent);
mkey = pop_stored_mkey(ent);
xa_unlock_irq(&ent->mkeys);
mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
kfree(mr);
mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
xa_lock_irq(&ent->mkeys);
}
@ -678,11 +644,15 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
int access_flags)
{
struct mlx5_ib_mr *mr;
int err;
/* Matches access in alloc_cache_mr() */
if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
xa_lock_irq(&ent->mkeys);
ent->in_use++;
@ -690,20 +660,22 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
queue_adjust_cache_locked(ent);
ent->miss++;
xa_unlock_irq(&ent->mkeys);
mr = create_cache_mr(ent);
if (IS_ERR(mr)) {
err = create_cache_mkey(ent, &mr->mmkey.key);
if (err) {
xa_lock_irq(&ent->mkeys);
ent->in_use--;
xa_unlock_irq(&ent->mkeys);
return mr;
kfree(mr);
return ERR_PTR(err);
}
} else {
mr = pop_stored_mkey(ent);
mr->mmkey.key = pop_stored_mkey(ent);
queue_adjust_cache_locked(ent);
xa_unlock_irq(&ent->mkeys);
mlx5_clear_mr(mr);
}
mr->mmkey.cache_ent = ent;
mr->mmkey.type = MLX5_MKEY_MR;
init_waitqueue_head(&mr->mmkey.wait);
return mr;
}
@ -711,15 +683,14 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
u32 mkey;
cancel_delayed_work(&ent->dwork);
xa_lock_irq(&ent->mkeys);
while (ent->stored) {
mr = pop_stored_mkey(ent);
mkey = pop_stored_mkey(ent);
xa_unlock_irq(&ent->mkeys);
mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
kfree(mr);
mlx5_core_destroy_mkey(dev->mdev, mkey);
xa_lock_irq(&ent->mkeys);
}
xa_unlock_irq(&ent->mkeys);
@ -1391,7 +1362,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
/* We only track the allocated sizes of MRs from the cache */
if (!mr->cache_ent)
if (!mr->mmkey.cache_ent)
return false;
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
@ -1400,7 +1371,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
if (WARN_ON(!*page_size))
return false;
return (1ULL << mr->cache_ent->order) >=
return (1ULL << mr->mmkey.cache_ent->order) >=
ib_umem_num_dma_blocks(new_umem, *page_size);
}
@ -1641,16 +1612,17 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
if (mr->cache_ent) {
xa_lock_irq(&mr->cache_ent->mkeys);
mr->cache_ent->in_use--;
xa_unlock_irq(&mr->cache_ent->mkeys);
if (mr->mmkey.cache_ent) {
xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
mr->mmkey.cache_ent->in_use--;
xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
if (mlx5r_umr_revoke_mr(mr) ||
push_mkey(mr->cache_ent, false, mr))
mr->cache_ent = NULL;
push_mkey(mr->mmkey.cache_ent, false,
xa_mk_value(mr->mmkey.key)))
mr->mmkey.cache_ent = NULL;
}
if (!mr->cache_ent) {
if (!mr->mmkey.cache_ent) {
rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
if (rc)
return rc;
@ -1667,10 +1639,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mlx5_ib_free_odp_mr(mr);
}
if (!mr->cache_ent) {
if (!mr->mmkey.cache_ent)
mlx5_free_priv_descs(mr);
kfree(mr);
}
kfree(mr);
return 0;
}