mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-25 20:14:25 +08:00
ipc/mqueue: use get_tree_nodev() in mqueue_get_tree()
[ Upstream commitd60c4d01a9
] When running the stress-ng clone benchmark with multiple testing threads, it was found that there were significant spinlock contention in sget_fc(). The contended spinlock was the sb_lock. It is under heavy contention because the following code in the critcal section of sget_fc(): hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) { if (test(old, fc)) goto share_extant_sb; } After testing with added instrumentation code, it was found that the benchmark could generate thousands of ipc namespaces with the corresponding number of entries in the mqueue's fs_supers list where the namespaces are the key for the search. This leads to excessive time in scanning the list for a match. Looking back at the mqueue calling sequence leading to sget_fc(): mq_init_ns() => mq_create_mount() => fc_mount() => vfs_get_tree() => mqueue_get_tree() => get_tree_keyed() => vfs_get_super() => sget_fc() Currently, mq_init_ns() is the only mqueue function that will indirectly call mqueue_get_tree() with a newly allocated ipc namespace as the key for searching. As a result, there will never be a match with the exising ipc namespaces stored in the mqueue's fs_supers list. So using get_tree_keyed() to do an existing ipc namespace search is just a waste of time. Instead, we could use get_tree_nodev() to eliminate the useless search. By doing so, we can greatly reduce the sb_lock hold time and avoid the spinlock contention problem in case a large number of ipc namespaces are present. Of course, if the code is modified in the future to allow mqueue_get_tree() to be called with an existing ipc namespace instead of a new one, we will have to use get_tree_keyed() in this case. The following stress-ng clone benchmark command was run on a 2-socket 48-core Intel system: ./stress-ng --clone 32 --verbose --oomable --metrics-brief -t 20 The "bogo ops/s" increased from 5948.45 before patch to 9137.06 after patch. This is an increase of 54% in performance. Link: https://lkml.kernel.org/r/20220121172315.19652-1-longman@redhat.com Fixes:935c6912b1
("ipc: Convert mqueue fs to fs_context") Signed-off-by: Waiman Long <longman@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: David Howells <dhowells@redhat.com> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
f588b92591
commit
6df8af611d
14
ipc/mqueue.c
14
ipc/mqueue.c
@ -45,6 +45,7 @@
|
||||
|
||||
struct mqueue_fs_context {
|
||||
struct ipc_namespace *ipc_ns;
|
||||
bool newns; /* Set if newly created ipc namespace */
|
||||
};
|
||||
|
||||
#define MQUEUE_MAGIC 0x19800202
|
||||
@ -427,6 +428,14 @@ static int mqueue_get_tree(struct fs_context *fc)
|
||||
{
|
||||
struct mqueue_fs_context *ctx = fc->fs_private;
|
||||
|
||||
/*
|
||||
* With a newly created ipc namespace, we don't need to do a search
|
||||
* for an ipc namespace match, but we still need to set s_fs_info.
|
||||
*/
|
||||
if (ctx->newns) {
|
||||
fc->s_fs_info = ctx->ipc_ns;
|
||||
return get_tree_nodev(fc, mqueue_fill_super);
|
||||
}
|
||||
return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
|
||||
}
|
||||
|
||||
@ -454,6 +463,10 @@ static int mqueue_init_fs_context(struct fs_context *fc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mq_init_ns() is currently the only caller of mq_create_mount().
|
||||
* So the ns parameter is always a newly created ipc namespace.
|
||||
*/
|
||||
static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
|
||||
{
|
||||
struct mqueue_fs_context *ctx;
|
||||
@ -465,6 +478,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
|
||||
return ERR_CAST(fc);
|
||||
|
||||
ctx = fc->fs_private;
|
||||
ctx->newns = true;
|
||||
put_ipc_ns(ctx->ipc_ns);
|
||||
ctx->ipc_ns = get_ipc_ns(ns);
|
||||
put_user_ns(fc->user_ns);
|
||||
|
Loading…
Reference in New Issue
Block a user