GFS2: Use resizable hash table for glocks

This patch changes the glock hash table from a normal hash table to
a resizable hash table, which scales better. This also simplifies
a lot of code.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
Bob Peterson 2015-03-16 11:02:46 -05:00
parent 15562c439d
commit 88ffbf3e03
2 changed files with 102 additions and 166 deletions

View File

@ -34,6 +34,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/list_sort.h> #include <linux/list_sort.h>
#include <linux/lockref.h> #include <linux/lockref.h>
#include <linux/rhashtable.h>
#include "gfs2.h" #include "gfs2.h"
#include "incore.h" #include "incore.h"
@ -50,9 +51,8 @@
#include "trace_gfs2.h" #include "trace_gfs2.h"
struct gfs2_glock_iter { struct gfs2_glock_iter {
int hash; /* hash bucket index */
unsigned nhash; /* Index within current bucket */
struct gfs2_sbd *sdp; /* incore superblock */ struct gfs2_sbd *sdp; /* incore superblock */
struct rhashtable_iter hti; /* rhashtable iterator */
struct gfs2_glock *gl; /* current glock struct */ struct gfs2_glock *gl; /* current glock struct */
loff_t last_pos; /* last position */ loff_t last_pos; /* last position */
}; };
@ -70,44 +70,19 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; static struct rhashtable_params ht_parms = {
static struct dentry *gfs2_root; .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
.key_len = sizeof(struct lm_lockname),
.key_offset = offsetof(struct gfs2_glock, gl_name),
.head_offset = offsetof(struct gfs2_glock, gl_node),
};
/** static struct rhashtable gl_hash_table;
* gl_hash() - Turn glock number into hash bucket number
* @lock: The glock number
*
* Returns: The number of the corresponding hash bucket
*/
static unsigned int gl_hash(const struct gfs2_sbd *sdp, void gfs2_glock_free(struct gfs2_glock *gl)
const struct lm_lockname *name)
{ {
unsigned int h; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
h = jhash(&name->ln_number, sizeof(u64), 0);
h = jhash(&name->ln_type, sizeof(unsigned int), h);
h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
h &= GFS2_GL_HASH_MASK;
return h;
}
static inline void spin_lock_bucket(unsigned int hash)
{
hlist_bl_lock(&gl_hash_table[hash]);
}
static inline void spin_unlock_bucket(unsigned int hash)
{
hlist_bl_unlock(&gl_hash_table[hash]);
}
static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
if (gl->gl_ops->go_flags & GLOF_ASPACE) { if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl); kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@ -115,13 +90,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
kfree(gl->gl_lksb.sb_lvbptr); kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl); kmem_cache_free(gfs2_glock_cachep, gl);
} }
}
void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal)) if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait); wake_up(&sdp->sd_glock_wait);
} }
@ -202,39 +170,13 @@ void gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl); gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
spin_lock_bucket(gl->gl_hash); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
hlist_bl_del_rcu(&gl->gl_list);
spin_unlock_bucket(gl->gl_hash);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages); GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl); trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
} }
/**
* search_bucket() - Find struct gfs2_glock by lock number
* @bucket: the bucket to search
* @name: The lock name
*
* Returns: NULL, or the struct gfs2_glock with the requested number
*/
static struct gfs2_glock *search_bucket(unsigned int hash,
const struct lm_lockname *name)
{
struct gfs2_glock *gl;
struct hlist_bl_node *h;
hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
if (!lm_name_equal(&gl->gl_name, name))
continue;
if (lockref_get_not_dead(&gl->gl_lockref))
return gl;
}
return NULL;
}
/** /**
* may_grant - check if its ok to grant a new lock * may_grant - check if its ok to grant a new lock
* @gl: The glock * @gl: The glock
@ -704,14 +646,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct lm_lockname name = { .ln_number = number, struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type, .ln_type = glops->go_type,
.ln_sbd = sdp }; .ln_sbd = sdp };
struct gfs2_glock *gl, *tmp; struct gfs2_glock *gl, *tmp = NULL;
unsigned int hash = gl_hash(sdp, &name);
struct address_space *mapping; struct address_space *mapping;
struct kmem_cache *cachep; struct kmem_cache *cachep;
int ret, tries = 0;
rcu_read_lock(); gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
gl = search_bucket(hash, &name); if (gl && !lockref_get_not_dead(&gl->gl_lockref))
rcu_read_unlock(); gl = NULL;
*glp = gl; *glp = gl;
if (gl) if (gl)
@ -738,13 +680,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
} }
atomic_inc(&sdp->sd_glock_disposal); atomic_inc(&sdp->sd_glock_disposal);
gl->gl_node.next = NULL;
gl->gl_flags = 0; gl->gl_flags = 0;
gl->gl_name = name; gl->gl_name = name;
gl->gl_lockref.count = 1; gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED; gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_ops = glops; gl->gl_ops = glops;
gl->gl_dstamp = ktime_set(0, 0); gl->gl_dstamp = ktime_set(0, 0);
preempt_disable(); preempt_disable();
@ -769,22 +711,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0; mapping->writeback_index = 0;
} }
spin_lock_bucket(hash); again:
tmp = search_bucket(hash, &name); ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
if (tmp) { ht_parms);
spin_unlock_bucket(hash); if (ret == 0) {
kfree(gl->gl_lksb.sb_lvbptr); *glp = gl;
kmem_cache_free(cachep, gl); return 0;
atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
} else {
hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
spin_unlock_bucket(hash);
} }
*glp = gl; if (ret == -EEXIST) {
ret = 0;
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
} else {
WARN_ON_ONCE(ret);
}
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
*glp = tmp;
return 0; return ret;
} }
/** /**
@ -1460,31 +1414,26 @@ static struct shrinker glock_shrinker = {
* *
*/ */
static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
unsigned int hash)
{ {
struct gfs2_glock *gl; struct gfs2_glock *gl;
struct hlist_bl_head *head = &gl_hash_table[hash]; struct rhash_head *pos, *next;
struct hlist_bl_node *pos; const struct bucket_table *tbl;
int i;
rcu_read_lock(); rcu_read_lock();
hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) for (i = 0; i < tbl->size; i++) {
examiner(gl); rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
if ((gl->gl_name.ln_sbd == sdp) &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
}
} }
rcu_read_unlock(); rcu_read_unlock();
cond_resched(); cond_resched();
} }
static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
{
unsigned x;
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(examiner, sdp, x);
}
/** /**
* thaw_glock - thaw out a glock which has an unprocessed reply waiting * thaw_glock - thaw out a glock which has an unprocessed reply waiting
* @gl: The glock to thaw * @gl: The glock to thaw
@ -1802,20 +1751,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
int __init gfs2_glock_init(void) int __init gfs2_glock_init(void)
{ {
unsigned i; int ret;
for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
INIT_HLIST_BL_HEAD(&gl_hash_table[i]); ret = rhashtable_init(&gl_hash_table, &ht_parms);
} if (ret < 0)
return ret;
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZABLE, 0); WQ_HIGHPRI | WQ_FREEZABLE, 0);
if (!glock_workqueue) if (!glock_workqueue) {
rhashtable_destroy(&gl_hash_table);
return -ENOMEM; return -ENOMEM;
}
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
WQ_MEM_RECLAIM | WQ_FREEZABLE, WQ_MEM_RECLAIM | WQ_FREEZABLE,
0); 0);
if (!gfs2_delete_workqueue) { if (!gfs2_delete_workqueue) {
destroy_workqueue(glock_workqueue); destroy_workqueue(glock_workqueue);
rhashtable_destroy(&gl_hash_table);
return -ENOMEM; return -ENOMEM;
} }
@ -1827,72 +1780,41 @@ int __init gfs2_glock_init(void)
void gfs2_glock_exit(void) void gfs2_glock_exit(void)
{ {
unregister_shrinker(&glock_shrinker); unregister_shrinker(&glock_shrinker);
rhashtable_destroy(&gl_hash_table);
destroy_workqueue(glock_workqueue); destroy_workqueue(glock_workqueue);
destroy_workqueue(gfs2_delete_workqueue); destroy_workqueue(gfs2_delete_workqueue);
} }
static inline struct gfs2_glock *glock_hash_chain(unsigned hash) static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{ {
return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
struct gfs2_glock, gl_list);
}
static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
{
return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
struct gfs2_glock, gl_list);
}
static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
struct gfs2_glock *gl;
do { do {
gl = gi->gl; gi->gl = rhashtable_walk_next(&gi->hti);
if (gl) { if (IS_ERR(gi->gl)) {
gi->gl = glock_hash_next(gl); if (PTR_ERR(gi->gl) == -EAGAIN)
gi->nhash++; continue;
} else { gi->gl = NULL;
if (gi->hash >= GFS2_GL_HASH_SIZE) {
rcu_read_unlock();
return 1;
}
gi->gl = glock_hash_chain(gi->hash);
gi->nhash = 0;
}
while (gi->gl == NULL) {
gi->hash++;
if (gi->hash >= GFS2_GL_HASH_SIZE) {
rcu_read_unlock();
return 1;
}
gi->gl = glock_hash_chain(gi->hash);
gi->nhash = 0;
} }
/* Skip entries for other sb and dead entries */ /* Skip entries for other sb and dead entries */
} while (gi->sdp != gi->gl->gl_name.ln_sbd || } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
__lockref_is_dead(&gi->gl->gl_lockref)); __lockref_is_dead(&gi->gl->gl_lockref)));
return 0;
} }
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{ {
struct gfs2_glock_iter *gi = seq->private; struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos; loff_t n = *pos;
int ret;
if (gi->last_pos <= *pos) if (gi->last_pos <= *pos)
n = gi->nhash + (*pos - gi->last_pos); n = (*pos - gi->last_pos);
else
gi->hash = 0;
gi->nhash = 0; ret = rhashtable_walk_start(&gi->hti);
rcu_read_lock(); if (ret)
return NULL;
do { do {
if (gfs2_glock_iter_next(gi)) gfs2_glock_iter_next(gi);
return NULL; } while (gi->gl && n--);
} while (n--);
gi->last_pos = *pos; gi->last_pos = *pos;
return gi->gl; return gi->gl;
@ -1905,9 +1827,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
(*pos)++; (*pos)++;
gi->last_pos = *pos; gi->last_pos = *pos;
if (gfs2_glock_iter_next(gi)) gfs2_glock_iter_next(gi);
return NULL;
return gi->gl; return gi->gl;
} }
@ -1915,9 +1835,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
{ {
struct gfs2_glock_iter *gi = seq->private; struct gfs2_glock_iter *gi = seq->private;
if (gi->gl)
rcu_read_unlock();
gi->gl = NULL; gi->gl = NULL;
rhashtable_walk_stop(&gi->hti);
} }
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@ -1978,14 +1897,28 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
if (ret == 0) { if (ret == 0) {
struct seq_file *seq = file->private_data; struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private; struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private; gi->sdp = inode->i_private;
gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf) if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE; seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
} }
return ret; return ret;
} }
static int gfs2_glocks_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->gl = NULL;
rhashtable_walk_exit(&gi->hti);
return seq_release_private(inode, file);
}
static int gfs2_glstats_open(struct inode *inode, struct file *file) static int gfs2_glstats_open(struct inode *inode, struct file *file)
{ {
int ret = seq_open_private(file, &gfs2_glstats_seq_ops, int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
@ -1994,9 +1927,12 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data; struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private; struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private; gi->sdp = inode->i_private;
gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf) if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE; seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
} }
return ret; return ret;
} }
@ -2016,7 +1952,7 @@ static const struct file_operations gfs2_glocks_fops = {
.open = gfs2_glocks_open, .open = gfs2_glocks_open,
.read = seq_read, .read = seq_read,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = seq_release_private, .release = gfs2_glocks_release,
}; };
static const struct file_operations gfs2_glstats_fops = { static const struct file_operations gfs2_glstats_fops = {
@ -2024,7 +1960,7 @@ static const struct file_operations gfs2_glstats_fops = {
.open = gfs2_glstats_open, .open = gfs2_glstats_open,
.read = seq_read, .read = seq_read,
.llseek = seq_lseek, .llseek = seq_lseek,
.release = seq_release_private, .release = gfs2_glocks_release,
}; };
static const struct file_operations gfs2_sbstats_fops = { static const struct file_operations gfs2_sbstats_fops = {

View File

@ -22,6 +22,7 @@
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/lockref.h> #include <linux/lockref.h>
#include <linux/rhashtable.h>
#define DIO_WAIT 0x00000010 #define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020 #define DIO_METADATA 0x00000020
@ -342,7 +343,6 @@ struct gfs2_glock {
gl_req:2, /* State in last dlm request */ gl_req:2, /* State in last dlm request */
gl_reply:8; /* Last reply from the dlm */ gl_reply:8; /* Last reply from the dlm */
unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */ unsigned long gl_demote_time; /* time of first demote request */
long gl_hold_time; long gl_hold_time;
struct list_head gl_holders; struct list_head gl_holders;
@ -368,7 +368,7 @@ struct gfs2_glock {
loff_t end; loff_t end;
} gl_vm; } gl_vm;
}; };
struct rcu_head gl_rcu; struct rhash_head gl_node;
}; };
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */