nfsd: protect concurrent access to nfsd stats counters

nfsd stats counters can be updated by concurrent nfsd threads without any
protection.

Convert some nfsd_stats and nfsd_net struct members to use percpu counters.

The longest_chain* members of struct nfsd_net remain unprotected.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
Amir Goldstein 2021-01-06 09:52:35 +02:00 committed by Chuck Lever
parent 1b76d1df1a
commit e567b98ce9
8 changed files with 194 additions and 55 deletions

View File

@ -10,6 +10,7 @@
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <linux/percpu_counter.h>
/* Hash tables for nfs4_clientid state */ /* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4 #define CLIENT_HASH_BITS 4
@ -21,6 +22,14 @@
struct cld_net; struct cld_net;
struct nfsd4_client_tracking_ops; struct nfsd4_client_tracking_ops;
enum {
/* cache misses due only to checksum comparison failures */
NFSD_NET_PAYLOAD_MISSES,
/* amount of memory (in bytes) currently consumed by the DRC */
NFSD_NET_DRC_MEM_USAGE,
NFSD_NET_COUNTERS_NUM
};
/* /*
* Represents a nfsd "container". With respect to nfsv4 state tracking, the * Represents a nfsd "container". With respect to nfsv4 state tracking, the
* fields of interest are the *_id_hashtbls and the *_name_tree. These track * fields of interest are the *_id_hashtbls and the *_name_tree. These track
@ -149,20 +158,16 @@ struct nfsd_net {
/* /*
* Stats and other tracking of on the duplicate reply cache. * Stats and other tracking of on the duplicate reply cache.
* These fields and the "rc" fields in nfsdstats are modified * The longest_chain* fields are modified with only the per-bucket
* with only the per-bucket cache lock, which isn't really safe * cache lock, which isn't really safe and should be fixed if we want
* and should be fixed if we want the statistics to be * these statistics to be completely accurate.
* completely accurate.
*/ */
/* total number of entries */ /* total number of entries */
atomic_t num_drc_entries; atomic_t num_drc_entries;
/* cache misses due only to checksum comparison failures */ /* Per-netns stats counters */
unsigned int payload_misses; struct percpu_counter counter[NFSD_NET_COUNTERS_NUM];
/* amount of memory (in bytes) currently consumed by the DRC */
unsigned int drc_mem_usage;
/* longest hash chain seen */ /* longest hash chain seen */
unsigned int longest_chain; unsigned int longest_chain;

View File

@ -2174,7 +2174,7 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
static inline void nfsd4_increment_op_stats(u32 opnum) static inline void nfsd4_increment_op_stats(u32 opnum)
{ {
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
nfsdstats.nfs4_opcount[opnum]++; percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
} }
static const struct nfsd4_operation nfsd4_ops[]; static const struct nfsd4_operation nfsd4_ops[];

View File

@ -121,14 +121,14 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
struct nfsd_net *nn) struct nfsd_net *nn)
{ {
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
nn->drc_mem_usage -= rp->c_replvec.iov_len; nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
kfree(rp->c_replvec.iov_base); kfree(rp->c_replvec.iov_base);
} }
if (rp->c_state != RC_UNUSED) { if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head); rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru); list_del(&rp->c_lru);
atomic_dec(&nn->num_drc_entries); atomic_dec(&nn->num_drc_entries);
nn->drc_mem_usage -= sizeof(*rp); nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
} }
kmem_cache_free(drc_slab, rp); kmem_cache_free(drc_slab, rp);
} }
@ -154,6 +154,16 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab); kmem_cache_destroy(drc_slab);
} }
static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
int nfsd_reply_cache_init(struct nfsd_net *nn) int nfsd_reply_cache_init(struct nfsd_net *nn)
{ {
unsigned int hashsize; unsigned int hashsize;
@ -165,12 +175,16 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries); hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize); nn->maskbits = ilog2(hashsize);
status = nfsd_reply_cache_stats_init(nn);
if (status)
goto out_nomem;
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1; nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker); status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status) if (status)
goto out_nomem; goto out_stats_destroy;
nn->drc_hashtbl = kvzalloc(array_size(hashsize, nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL); sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@ -186,6 +200,8 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0; return 0;
out_shrinker: out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker); unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_stats_destroy:
nfsd_reply_cache_stats_destroy(nn);
out_nomem: out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM; return -ENOMEM;
@ -196,6 +212,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
struct svc_cacherep *rp; struct svc_cacherep *rp;
unsigned int i; unsigned int i;
nfsd_reply_cache_stats_destroy(nn);
unregister_shrinker(&nn->nfsd_reply_cache_shrinker); unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
for (i = 0; i < nn->drc_hashsize; i++) { for (i = 0; i < nn->drc_hashsize; i++) {
@ -324,7 +341,7 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
{ {
if (key->c_key.k_xid == rp->c_key.k_xid && if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum) { key->c_key.k_csum != rp->c_key.k_csum) {
++nn->payload_misses; nfsd_stats_payload_misses_inc(nn);
trace_nfsd_drc_mismatch(nn, key, rp); trace_nfsd_drc_mismatch(nn, key, rp);
} }
@ -407,7 +424,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = NULL; rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) { if (type == RC_NOCACHE) {
nfsdstats.rcnocache++; nfsd_stats_rc_nocache_inc();
goto out; goto out;
} }
@ -429,12 +446,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
goto found_entry; goto found_entry;
} }
nfsdstats.rcmisses++; nfsd_stats_rc_misses_inc();
rqstp->rq_cacherep = rp; rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG; rp->c_state = RC_INPROG;
atomic_inc(&nn->num_drc_entries); atomic_inc(&nn->num_drc_entries);
nn->drc_mem_usage += sizeof(*rp); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
/* go ahead and prune the cache */ /* go ahead and prune the cache */
prune_bucket(b, nn); prune_bucket(b, nn);
@ -446,7 +463,7 @@ out:
found_entry: found_entry:
/* We found a matching entry which is either in progress or done. */ /* We found a matching entry which is either in progress or done. */
nfsdstats.rchits++; nfsd_stats_rc_hits_inc();
rtn = RC_DROPIT; rtn = RC_DROPIT;
/* Request being processed */ /* Request being processed */
@ -548,7 +565,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
return; return;
} }
spin_lock(&b->cache_lock); spin_lock(&b->cache_lock);
nn->drc_mem_usage += bufsize; nfsd_stats_drc_mem_usage_add(nn, bufsize);
lru_put_end(b, rp); lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype; rp->c_type = cachetype;
@ -588,13 +605,18 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n", seq_printf(m, "num entries: %u\n",
atomic_read(&nn->num_drc_entries)); atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); seq_printf(m, "mem usage: %lld\n",
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); seq_printf(m, "cache hits: %lld\n",
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
seq_printf(m, "payload misses: %u\n", nn->payload_misses); seq_printf(m, "cache misses: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
seq_printf(m, "not cached: %lld\n",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
seq_printf(m, "payload misses: %lld\n",
percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
seq_printf(m, "longest chain len: %u\n", nn->longest_chain); seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0; return 0;

View File

@ -1534,7 +1534,9 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs(); retval = nfsd4_init_pnfs();
if (retval) if (retval)
goto out_free_slabs; goto out_free_slabs;
nfsd_stat_init(); /* Statistics */ retval = nfsd_stat_init(); /* Statistics */
if (retval)
goto out_free_pnfs;
retval = nfsd_drc_slab_create(); retval = nfsd_drc_slab_create();
if (retval) if (retval)
goto out_free_stat; goto out_free_stat;
@ -1554,6 +1556,7 @@ out_free_lockd:
nfsd_drc_slab_free(); nfsd_drc_slab_free();
out_free_stat: out_free_stat:
nfsd_stat_shutdown(); nfsd_stat_shutdown();
out_free_pnfs:
nfsd4_exit_pnfs(); nfsd4_exit_pnfs();
out_free_slabs: out_free_slabs:
nfsd4_free_slabs(); nfsd4_free_slabs();

View File

@ -422,7 +422,7 @@ skip_pseudoflavor_check:
} }
out: out:
if (error == nfserr_stale) if (error == nfserr_stale)
nfsdstats.fh_stale++; nfsd_stats_fh_stale_inc();
return error; return error;
} }

View File

@ -36,13 +36,13 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
{ {
int i; int i;
seq_printf(seq, "rc %u %u %u\nfh %u 0 0 0 0\nio %u %u\n", seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
nfsdstats.rchits, percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
nfsdstats.rcmisses, percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
nfsdstats.rcnocache, percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
nfsdstats.fh_stale, percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
nfsdstats.io_read, percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
nfsdstats.io_write); percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */ /* thread usage: */
seq_printf(seq, "th %u 0", nfsdstats.th_cnt); seq_printf(seq, "th %u 0", nfsdstats.th_cnt);
@ -61,8 +61,10 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
/* Show count for individual nfsv4 operations */ /* Show count for individual nfsv4 operations */
/* Writing operation numbers 0 1 2 also for maintaining uniformity */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */
seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
for (i = 0; i <= LAST_NFS4_OP; i++) for (i = 0; i <= LAST_NFS4_OP; i++) {
seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
}
seq_putc(seq, '\n'); seq_putc(seq, '\n');
#endif #endif
@ -82,14 +84,63 @@ static const struct proc_ops nfsd_proc_ops = {
.proc_release = single_release, .proc_release = single_release,
}; };
void int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
nfsd_stat_init(void)
{ {
svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); int i, err = 0;
for (i = 0; !err && i < num; i++)
err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
if (!err)
return 0;
for (; i > 0; i--)
percpu_counter_destroy(&counters[i-1]);
return err;
} }
void void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
nfsd_stat_shutdown(void)
{ {
int i;
for (i = 0; i < num; i++)
percpu_counter_set(&counters[i], 0);
}
void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
{
int i;
for (i = 0; i < num; i++)
percpu_counter_destroy(&counters[i]);
}
static int nfsd_stat_counters_init(void)
{
return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
}
static void nfsd_stat_counters_destroy(void)
{
nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
}
int nfsd_stat_init(void)
{
int err;
err = nfsd_stat_counters_init();
if (err)
return err;
svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
return 0;
}
void nfsd_stat_shutdown(void)
{
nfsd_stat_counters_destroy();
svc_proc_unregister(&init_net, "nfsd"); svc_proc_unregister(&init_net, "nfsd");
} }

View File

@ -8,27 +8,85 @@
#define _NFSD_STATS_H #define _NFSD_STATS_H
#include <uapi/linux/nfsd/stats.h> #include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
enum {
NFSD_STATS_RC_HITS, /* repcache hits */
NFSD_STATS_RC_MISSES, /* repcache misses */
NFSD_STATS_RC_NOCACHE, /* uncached reqs */
NFSD_STATS_FH_STALE, /* FH stale error */
NFSD_STATS_IO_READ, /* bytes returned to read requests */
NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
#ifdef CONFIG_NFSD_V4
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
#endif
NFSD_STATS_COUNTERS_NUM
};
struct nfsd_stats { struct nfsd_stats {
unsigned int rchits; /* repcache hits */ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
unsigned int rcmisses; /* repcache hits */
unsigned int rcnocache; /* uncached reqs */
unsigned int fh_stale; /* FH stale error */
unsigned int io_read; /* bytes returned to read requests */
unsigned int io_write; /* bytes passed in write requests */
unsigned int th_cnt; /* number of available threads */
#ifdef CONFIG_NFSD_V4
unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */
#endif
/* Protected by nfsd_mutex */
unsigned int th_cnt; /* number of available threads */
}; };
extern struct nfsd_stats nfsdstats; extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats; extern struct svc_stat nfsd_svcstats;
void nfsd_stat_init(void); int nfsd_percpu_counters_init(struct percpu_counter counters[], int num);
void nfsd_stat_shutdown(void); void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num);
void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num);
int nfsd_stat_init(void);
void nfsd_stat_shutdown(void);
static inline void nfsd_stats_rc_hits_inc(void)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
}
static inline void nfsd_stats_rc_misses_inc(void)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
}
static inline void nfsd_stats_rc_nocache_inc(void)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
}
static inline void nfsd_stats_fh_stale_inc(void)
{
percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
}
static inline void nfsd_stats_io_read_add(s64 amount)
{
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
}
static inline void nfsd_stats_io_write_add(s64 amount)
{
percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
}
static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
{
percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
}
static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
{
percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
}
static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
{
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
}
#endif /* _NFSD_STATS_H */ #endif /* _NFSD_STATS_H */

View File

@ -889,7 +889,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count, u32 *eof, ssize_t host_err) unsigned long *count, u32 *eof, ssize_t host_err)
{ {
if (host_err >= 0) { if (host_err >= 0) {
nfsdstats.io_read += host_err; nfsd_stats_io_read_add(host_err);
*eof = nfsd_eof_on_read(file, offset, host_err, *count); *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err; *count = host_err;
fsnotify_access(file); fsnotify_access(file);
@ -1040,7 +1040,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
goto out_nfserr; goto out_nfserr;
} }
*cnt = host_err; *cnt = host_err;
nfsdstats.io_write += *cnt; nfsd_stats_io_write_add(*cnt);
fsnotify_modify(file); fsnotify_modify(file);
if (stable && use_wgather) { if (stable && use_wgather) {