mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 23:34:05 +08:00
RDMA/rtrs-srv: Don't guard the whole __alloc_srv with srv_mutex
The purpose of srv_mutex is to protect srv_list as in put_srv, so no need
to hold it when allocate memory for srv since it could be time consuming.
Otherwise if one machine has limited memory, rsrv_close_work could be
blocked for a longer time due to the mutex is held by get_or_create_srv
since it can't get memory in time.
INFO: task kworker/1:1:27478 blocked for more than 120 seconds.
Tainted: G O 4.14.171-1-storage #4.14.171-1.3~deb9
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/1:1 D 0 27478 2 0x80000000
Workqueue: rtrs_server_wq rtrs_srv_close_work [rtrs_server]
Call Trace:
? __schedule+0x38c/0x7e0
schedule+0x32/0x80
schedule_preempt_disabled+0xa/0x10
__mutex_lock.isra.2+0x25e/0x4d0
? put_srv+0x44/0x100 [rtrs_server]
put_srv+0x44/0x100 [rtrs_server]
rtrs_srv_close_work+0x16c/0x280 [rtrs_server]
process_one_work+0x1c5/0x3c0
worker_thread+0x47/0x3e0
kthread+0xfc/0x130
? trace_event_raw_event_workqueue_execute_start+0xa0/0xa0
? kthread_create_on_node+0x70/0x70
ret_from_fork+0x1f/0x30
Let's move all the logics from __find_srv_and_get and __alloc_srv to
get_or_create_srv, and remove the two functions. Then it should be safe
for multiple processes to access the same srv since it is protected with
srv_mutex.
And since we don't want to allocate chunks with srv_mutex held, let's
check the srv->refcount after get srv because the chunks could not be
allocated yet.
Fixes: 9cb8374804
("RDMA/rtrs: server: main functionality")
Link: https://lore.kernel.org/r/20201023074353.21946-6-jinpu.wang@cloud.ionos.com
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
f553e7601d
commit
d715ff8acb
@ -1328,51 +1328,6 @@ static void rtrs_srv_dev_release(struct device *dev)
|
||||
kfree(srv);
|
||||
}
|
||||
|
||||
static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx,
|
||||
const uuid_t *paths_uuid)
|
||||
{
|
||||
struct rtrs_srv *srv;
|
||||
int i;
|
||||
|
||||
srv = kzalloc(sizeof(*srv), GFP_KERNEL);
|
||||
if (!srv)
|
||||
return NULL;
|
||||
|
||||
refcount_set(&srv->refcount, 1);
|
||||
INIT_LIST_HEAD(&srv->paths_list);
|
||||
mutex_init(&srv->paths_mutex);
|
||||
mutex_init(&srv->paths_ev_mutex);
|
||||
uuid_copy(&srv->paths_uuid, paths_uuid);
|
||||
srv->queue_depth = sess_queue_depth;
|
||||
srv->ctx = ctx;
|
||||
device_initialize(&srv->dev);
|
||||
srv->dev.release = rtrs_srv_dev_release;
|
||||
|
||||
srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks),
|
||||
GFP_KERNEL);
|
||||
if (!srv->chunks)
|
||||
goto err_free_srv;
|
||||
|
||||
for (i = 0; i < srv->queue_depth; i++) {
|
||||
srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL);
|
||||
if (!srv->chunks[i])
|
||||
goto err_free_chunks;
|
||||
}
|
||||
list_add(&srv->ctx_list, &ctx->srv_list);
|
||||
|
||||
return srv;
|
||||
|
||||
err_free_chunks:
|
||||
while (i--)
|
||||
mempool_free(srv->chunks[i], chunk_pool);
|
||||
kfree(srv->chunks);
|
||||
|
||||
err_free_srv:
|
||||
kfree(srv);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_srv(struct rtrs_srv *srv)
|
||||
{
|
||||
int i;
|
||||
@ -1387,32 +1342,61 @@ static void free_srv(struct rtrs_srv *srv)
|
||||
put_device(&srv->dev);
|
||||
}
|
||||
|
||||
static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx,
|
||||
const uuid_t *paths_uuid)
|
||||
{
|
||||
struct rtrs_srv *srv;
|
||||
|
||||
list_for_each_entry(srv, &ctx->srv_list, ctx_list) {
|
||||
if (uuid_equal(&srv->paths_uuid, paths_uuid) &&
|
||||
refcount_inc_not_zero(&srv->refcount))
|
||||
return srv;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
|
||||
const uuid_t *paths_uuid)
|
||||
{
|
||||
struct rtrs_srv *srv;
|
||||
int i;
|
||||
|
||||
mutex_lock(&ctx->srv_mutex);
|
||||
srv = __find_srv_and_get(ctx, paths_uuid);
|
||||
if (!srv)
|
||||
srv = __alloc_srv(ctx, paths_uuid);
|
||||
list_for_each_entry(srv, &ctx->srv_list, ctx_list) {
|
||||
if (uuid_equal(&srv->paths_uuid, paths_uuid) &&
|
||||
refcount_inc_not_zero(&srv->refcount)) {
|
||||
mutex_unlock(&ctx->srv_mutex);
|
||||
return srv;
|
||||
}
|
||||
}
|
||||
|
||||
/* need to allocate a new srv */
|
||||
srv = kzalloc(sizeof(*srv), GFP_KERNEL);
|
||||
if (!srv) {
|
||||
mutex_unlock(&ctx->srv_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&srv->paths_list);
|
||||
mutex_init(&srv->paths_mutex);
|
||||
mutex_init(&srv->paths_ev_mutex);
|
||||
uuid_copy(&srv->paths_uuid, paths_uuid);
|
||||
srv->queue_depth = sess_queue_depth;
|
||||
srv->ctx = ctx;
|
||||
device_initialize(&srv->dev);
|
||||
srv->dev.release = rtrs_srv_dev_release;
|
||||
list_add(&srv->ctx_list, &ctx->srv_list);
|
||||
mutex_unlock(&ctx->srv_mutex);
|
||||
|
||||
srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks),
|
||||
GFP_KERNEL);
|
||||
if (!srv->chunks)
|
||||
goto err_free_srv;
|
||||
|
||||
for (i = 0; i < srv->queue_depth; i++) {
|
||||
srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL);
|
||||
if (!srv->chunks[i])
|
||||
goto err_free_chunks;
|
||||
}
|
||||
refcount_set(&srv->refcount, 1);
|
||||
|
||||
return srv;
|
||||
|
||||
err_free_chunks:
|
||||
while (i--)
|
||||
mempool_free(srv->chunks[i], chunk_pool);
|
||||
kfree(srv->chunks);
|
||||
|
||||
err_free_srv:
|
||||
kfree(srv);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void put_srv(struct rtrs_srv *srv)
|
||||
@ -1813,7 +1797,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
|
||||
}
|
||||
recon_cnt = le16_to_cpu(msg->recon_cnt);
|
||||
srv = get_or_create_srv(ctx, &msg->paths_uuid);
|
||||
if (!srv) {
|
||||
/*
|
||||
* "refcount == 0" happens if a previous thread calls get_or_create_srv
|
||||
* allocate srv, but chunks of srv are not allocated yet.
|
||||
*/
|
||||
if (!srv || refcount_read(&srv->refcount) == 0) {
|
||||
err = -ENOMEM;
|
||||
goto reject_w_err;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user