[DLM] fix requestqueue race

Red Hat BZ 211914

There's a race between dlm_recoverd (1) enabling locking and (2) clearing
out the requestqueue, and dlm_recvd (1) checking if locking is enabled and
(2) adding a message to the requestqueue.  An order of recoverd(1),
recvd(1), recvd(2), recoverd(2) will result in a message being left on the
requestqueue.  The fix is to have dlm_recvd check if dlm_recoverd has
enabled locking after taking the mutex for the requestqueue and if it has
processing the message instead of queueing it.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
David Teigland 2006-10-31 11:55:56 -06:00 committed by Steven Whitehouse
parent 435618b75b
commit d4400156d4
3 changed files with 29 additions and 9 deletions

View File

@ -3028,10 +3028,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
while (1) { while (1) {
if (dlm_locking_stopped(ls)) { if (dlm_locking_stopped(ls)) {
if (!recovery) if (recovery) {
dlm_add_requestqueue(ls, nodeid, hd); error = -EINTR;
error = -EINTR; goto out;
goto out; }
error = dlm_add_requestqueue(ls, nodeid, hd);
if (error == -EAGAIN)
continue;
else {
error = -EINTR;
goto out;
}
} }
if (lock_recovery_try(ls)) if (lock_recovery_try(ls))

View File

@ -30,26 +30,39 @@ struct rq_entry {
* lockspace is enabled on some while still suspended on others. * lockspace is enabled on some while still suspended on others.
*/ */
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
{ {
struct rq_entry *e; struct rq_entry *e;
int length = hd->h_length; int length = hd->h_length;
int rv = 0;
if (dlm_is_removed(ls, nodeid)) if (dlm_is_removed(ls, nodeid))
return; return 0;
e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
if (!e) { if (!e) {
log_print("dlm_add_requestqueue: out of memory\n"); log_print("dlm_add_requestqueue: out of memory\n");
return; return 0;
} }
e->nodeid = nodeid; e->nodeid = nodeid;
memcpy(e->request, hd, length); memcpy(e->request, hd, length);
/* We need to check dlm_locking_stopped() after taking the mutex to
avoid a race where dlm_recoverd enables locking and runs
process_requestqueue between our earlier dlm_locking_stopped check
and this addition to the requestqueue. */
mutex_lock(&ls->ls_requestqueue_mutex); mutex_lock(&ls->ls_requestqueue_mutex);
list_add_tail(&e->list, &ls->ls_requestqueue); if (dlm_locking_stopped(ls))
list_add_tail(&e->list, &ls->ls_requestqueue);
else {
log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
kfree(e);
rv = -EAGAIN;
}
mutex_unlock(&ls->ls_requestqueue_mutex); mutex_unlock(&ls->ls_requestqueue_mutex);
return rv;
} }
int dlm_process_requestqueue(struct dlm_ls *ls) int dlm_process_requestqueue(struct dlm_ls *ls)

View File

@ -13,7 +13,7 @@
#ifndef __REQUESTQUEUE_DOT_H__ #ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
int dlm_process_requestqueue(struct dlm_ls *ls); int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls);