rpcrdma: Implement generic device removal

Commit e87a911fed ("nvme-rdma: use ib_client API to detect device
removal") explains the benefits of handling device removal outside
of the CM event handler.

Sketch in an IB device removal notification mechanism that can be
used by both the client and server side RPC-over-RDMA transport
implementations.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
Chuck Lever 2024-06-04 15:45:24 -04:00 committed by Anna Schumaker
parent acd9f2dd23
commit 7e86845a03
5 changed files with 258 additions and 4 deletions

View File

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* * Copyright (c) 2024, Oracle and/or its affiliates.
*/
#ifndef _LINUX_SUNRPC_RDMA_RN_H
#define _LINUX_SUNRPC_RDMA_RN_H
#include <rdma/ib_verbs.h>
/**
* rpcrdma_notification - request removal notification
*/
struct rpcrdma_notification {
void (*rn_done)(struct rpcrdma_notification *rn);
u32 rn_index;
};
int rpcrdma_rn_register(struct ib_device *device,
struct rpcrdma_notification *rn,
void (*done)(struct rpcrdma_notification *rn));
void rpcrdma_rn_unregister(struct ib_device *device,
struct rpcrdma_notification *rn);
int rpcrdma_ib_client_register(void);
void rpcrdma_ib_client_unregister(void);
#endif /* _LINUX_SUNRPC_RDMA_RN_H */

View File

@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err,
)
);
DECLARE_EVENT_CLASS(rpcrdma_client_device_class,
TP_PROTO(
const struct ib_device *device
),
TP_ARGS(device),
TP_STRUCT__entry(
__string(name, device->name)
),
TP_fast_assign(
__assign_str(name);
),
TP_printk("device=%s",
__get_str(name)
)
);
#define DEFINE_CLIENT_DEVICE_EVENT(name) \
DEFINE_EVENT(rpcrdma_client_device_class, name, \
TP_PROTO( \
const struct ib_device *device \
), \
TP_ARGS(device) \
)
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
#endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h>

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o

View File

@ -0,0 +1,181 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2024 Oracle. All rights reserved.
*/
/* #include <linux/module.h>
#include <linux/slab.h> */
#include <linux/xarray.h>
#include <linux/types.h>
#include <linux/kref.h>
#include <linux/completion.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/rdma_rn.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
/* Per-ib_device private data for rpcrdma */
struct rpcrdma_device {
struct kref rd_kref;
unsigned long rd_flags;
struct ib_device *rd_device;
struct xarray rd_xa;
struct completion rd_done;
};
#define RPCRDMA_RD_F_REMOVING (0)
static struct ib_client rpcrdma_ib_client;
/*
* Listeners have no associated device, so we never register them.
* Note that ib_get_client_data() does not check if @device is
* NULL for us.
*/
static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
{
if (!device)
return NULL;
return ib_get_client_data(device, &rpcrdma_ib_client);
}
/**
* rpcrdma_rn_register - register to get device removal notifications
* @device: device to monitor
* @rn: notification object that wishes to be notified
* @done: callback to notify caller of device removal
*
* Returns zero on success. The callback in rn_done is guaranteed
* to be invoked when the device is removed, unless this notification
* is unregistered first.
*
* On failure, a negative errno is returned.
*/
int rpcrdma_rn_register(struct ib_device *device,
struct rpcrdma_notification *rn,
void (*done)(struct rpcrdma_notification *rn))
{
struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
return -ENETUNREACH;
kref_get(&rd->rd_kref);
if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
return -ENOMEM;
rn->rn_done = done;
return 0;
}
static void rpcrdma_rn_release(struct kref *kref)
{
struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
rd_kref);
trace_rpcrdma_client_completion(rd->rd_device);
complete(&rd->rd_done);
}
/**
* rpcrdma_rn_unregister - stop device removal notifications
* @device: monitored device
* @rn: notification object that no longer wishes to be notified
*/
void rpcrdma_rn_unregister(struct ib_device *device,
struct rpcrdma_notification *rn)
{
struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
if (!rd)
return;
xa_erase(&rd->rd_xa, rn->rn_index);
kref_put(&rd->rd_kref, rpcrdma_rn_release);
}
/**
* rpcrdma_add_one - ib_client device insertion callback
* @device: device about to be inserted
*
* Returns zero on success. xprtrdma private data has been allocated
* for this device. On failure, a negative errno is returned.
*/
static int rpcrdma_add_one(struct ib_device *device)
{
struct rpcrdma_device *rd;
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
return -ENOMEM;
kref_init(&rd->rd_kref);
xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
rd->rd_device = device;
init_completion(&rd->rd_done);
ib_set_client_data(device, &rpcrdma_ib_client, rd);
trace_rpcrdma_client_add_one(device);
return 0;
}
/**
* rpcrdma_remove_one - ib_client device removal callback
* @device: device about to be removed
* @client_data: this module's private per-device data
*
* Upon return, all transports associated with @device have divested
* themselves from IB hardware resources.
*/
static void rpcrdma_remove_one(struct ib_device *device,
void *client_data)
{
struct rpcrdma_device *rd = client_data;
struct rpcrdma_notification *rn;
unsigned long index;
trace_rpcrdma_client_remove_one(device);
set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
xa_for_each(&rd->rd_xa, index, rn)
rn->rn_done(rn);
/*
* Wait only if there are still outstanding notification
* registrants for this device.
*/
if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
trace_rpcrdma_client_wait_on(device);
wait_for_completion(&rd->rd_done);
}
trace_rpcrdma_client_remove_one_done(device);
kfree(rd);
}
static struct ib_client rpcrdma_ib_client = {
.name = "rpcrdma",
.add = rpcrdma_add_one,
.remove = rpcrdma_remove_one,
};
/**
* rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
*
* cel: watch for orphaned rpcrdma_device objects on module unload
*/
void rpcrdma_ib_client_unregister(void)
{
ib_unregister_client(&rpcrdma_ib_client);
}
/**
* rpcrdma_ib_client_register - register ib_client for rpcrdma
*
* Returns zero on success, or a negative errno.
*/
int rpcrdma_ib_client_register(void)
{
return ib_register_client(&rpcrdma_ib_client);
}

View File

@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
rc = rpcrdma_ib_client_register();
if (rc)
goto out_rc;
rc = svc_rdma_init();
if (rc)
goto out;
goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
svc_rdma_cleanup();
goto out_svc_rdma;
out:
return 0;
out_svc_rdma:
svc_rdma_cleanup();
out_ib_client:
rpcrdma_ib_client_unregister();
out_rc:
return rc;
}