mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-11 21:14:07 +08:00
um: time-travel: fix signal blocking race/hang
[ Upstream commit2cf3a3c4b8
] When signals are hard-blocked in order to do time-travel socket processing, we set signals_blocked and then handle SIGIO signals by setting the SIGIO bit in signals_pending. When unblocking, we first set signals_blocked to 0, and then handle all pending signals. We have to set it first, so that we can again properly block/unblock inside the unblock, if the time-travel handlers need to be processed. Unfortunately, this is racy. We can get into this situation: // signals_pending = SIGIO_MASK unblock_signals_hard() signals_blocked = 0; if (signals_pending && signals_enabled) { block_signals(); unblock_signals() ... sig_handler_common(SIGIO, NULL, NULL); sigio_handler() ... sigio_reg_handler() irq_do_timetravel_handler() reg->timetravel_handler() == vu_req_interrupt_comm_handler() vu_req_read_message() vhost_user_recv_req() vhost_user_recv() vhost_user_recv_header() // reads 12 bytes header of // 20 bytes message <-- receive SIGIO here <-- sig_handler() int enabled = signals_enabled; // 1 if ((signals_blocked || !enabled) && (sig == SIGIO)) { if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL) sigio_run_timetravel_handlers() _sigio_handler() sigio_reg_handler() ... as above ... vhost_user_recv_header() // reads 8 bytes that were message payload // as if it were header - but aborts since // it then gets -EAGAIN ... --> end signal handler --> // continue in vhost_user_recv() // full_read() for 8 bytes payload busy loops // entire process hangs here Conceptually, to fix this, we need to ensure that the signal handler cannot run while we hard-unblock signals. The thing that makes this more complex is that we can be doing hard-block/unblock while unblocking. Introduce a new signals_blocked_pending variable that we can keep at non-zero as long as pending signals are being processed, then we only need to ensure it's decremented safely and the signal handler will only increment it if it's already non-zero (or signals_blocked is set, of course.) Note also that only the outermost call to hard-unblock is allowed to decrement signals_blocked_pending, since it could otherwise reach zero in an inner call, and leave the same race happening if the timetravel_handler loops, but that's basically required of it. Fixes:d6b399a0e0
("um: time-travel/signals: fix ndelay() in interrupt") Link: https://patch.msgid.link/20240703110144.28034-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
f66d436204
commit
0aa0284818
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
|||||||
|
|
||||||
int signals_enabled;
|
int signals_enabled;
|
||||||
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
|
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
|
||||||
static int signals_blocked;
|
static int signals_blocked, signals_blocked_pending;
|
||||||
#else
|
|
||||||
#define signals_blocked 0
|
|
||||||
#endif
|
#endif
|
||||||
static unsigned int signals_pending;
|
static unsigned int signals_pending;
|
||||||
static unsigned int signals_active = 0;
|
static unsigned int signals_active = 0;
|
||||||
@ -76,14 +75,27 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
|
|||||||
{
|
{
|
||||||
int enabled = signals_enabled;
|
int enabled = signals_enabled;
|
||||||
|
|
||||||
if ((signals_blocked || !enabled) && (sig == SIGIO)) {
|
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
|
||||||
|
if ((signals_blocked ||
|
||||||
|
__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
|
||||||
|
(sig == SIGIO)) {
|
||||||
|
/* increment so unblock will do another round */
|
||||||
|
__atomic_add_fetch(&signals_blocked_pending, 1,
|
||||||
|
__ATOMIC_SEQ_CST);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!enabled && (sig == SIGIO)) {
|
||||||
/*
|
/*
|
||||||
* In TT_MODE_EXTERNAL, need to still call time-travel
|
* In TT_MODE_EXTERNAL, need to still call time-travel
|
||||||
* handlers unless signals are also blocked for the
|
* handlers. This will mark signals_pending by itself
|
||||||
* external time message processing. This will mark
|
* (only if necessary.)
|
||||||
* signals_pending by itself (only if necessary.)
|
* Note we won't get here if signals are hard-blocked
|
||||||
|
* (which is handled above), in that case the hard-
|
||||||
|
* unblock will handle things.
|
||||||
*/
|
*/
|
||||||
if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
|
if (time_travel_mode == TT_MODE_EXTERNAL)
|
||||||
sigio_run_timetravel_handlers();
|
sigio_run_timetravel_handlers();
|
||||||
else
|
else
|
||||||
signals_pending |= SIGIO_MASK;
|
signals_pending |= SIGIO_MASK;
|
||||||
@ -380,33 +392,99 @@ int um_set_signals_trace(int enable)
|
|||||||
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
|
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
|
||||||
void mark_sigio_pending(void)
|
void mark_sigio_pending(void)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* It would seem that this should be atomic so
|
||||||
|
* it isn't a read-modify-write with a signal
|
||||||
|
* that could happen in the middle, losing the
|
||||||
|
* value set by the signal.
|
||||||
|
*
|
||||||
|
* However, this function is only called when in
|
||||||
|
* time-travel=ext simulation mode, in which case
|
||||||
|
* the only signal ever pending is SIGIO, which
|
||||||
|
* is blocked while this can be called, and the
|
||||||
|
* timer signal (SIGALRM) cannot happen.
|
||||||
|
*/
|
||||||
signals_pending |= SIGIO_MASK;
|
signals_pending |= SIGIO_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_signals_hard(void)
|
void block_signals_hard(void)
|
||||||
{
|
{
|
||||||
if (signals_blocked)
|
signals_blocked++;
|
||||||
return;
|
|
||||||
signals_blocked = 1;
|
|
||||||
barrier();
|
barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
void unblock_signals_hard(void)
|
void unblock_signals_hard(void)
|
||||||
{
|
{
|
||||||
|
static bool unblocking;
|
||||||
|
|
||||||
if (!signals_blocked)
|
if (!signals_blocked)
|
||||||
|
panic("unblocking signals while not blocked");
|
||||||
|
|
||||||
|
if (--signals_blocked)
|
||||||
return;
|
return;
|
||||||
/* Must be set to 0 before we check the pending bits etc. */
|
/*
|
||||||
signals_blocked = 0;
|
* Must be set to 0 before we check pending so the
|
||||||
|
* SIGIO handler will run as normal unless we're still
|
||||||
|
* going to process signals_blocked_pending.
|
||||||
|
*/
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
if (signals_pending && signals_enabled) {
|
/*
|
||||||
/* this is a bit inefficient, but that's not really important */
|
* Note that block_signals_hard()/unblock_signals_hard() can be called
|
||||||
block_signals();
|
* within the unblock_signals()/sigio_run_timetravel_handlers() below.
|
||||||
unblock_signals();
|
* This would still be prone to race conditions since it's actually a
|
||||||
} else if (signals_pending & SIGIO_MASK) {
|
* call _within_ e.g. vu_req_read_message(), where we observed this
|
||||||
/* we need to run time-travel handlers even if not enabled */
|
* issue, which loops. Thus, if the inner call handles the recorded
|
||||||
sigio_run_timetravel_handlers();
|
* pending signals, we can get out of the inner call with the real
|
||||||
|
* signal hander no longer blocked, and still have a race. Thus don't
|
||||||
|
* handle unblocking in the inner call, if it happens, but only in
|
||||||
|
* the outermost call - 'unblocking' serves as an ownership for the
|
||||||
|
* signals_blocked_pending decrement.
|
||||||
|
*/
|
||||||
|
if (unblocking)
|
||||||
|
return;
|
||||||
|
unblocking = true;
|
||||||
|
|
||||||
|
while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
|
||||||
|
if (signals_enabled) {
|
||||||
|
/* signals are enabled so we can touch this */
|
||||||
|
signals_pending |= SIGIO_MASK;
|
||||||
|
/*
|
||||||
|
* this is a bit inefficient, but that's
|
||||||
|
* not really important
|
||||||
|
*/
|
||||||
|
block_signals();
|
||||||
|
unblock_signals();
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* we need to run time-travel handlers even
|
||||||
|
* if not enabled
|
||||||
|
*/
|
||||||
|
sigio_run_timetravel_handlers();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The decrement of signals_blocked_pending must be atomic so
|
||||||
|
* that the signal handler will either happen before or after
|
||||||
|
* the decrement, not during a read-modify-write:
|
||||||
|
* - If it happens before, it can increment it and we'll
|
||||||
|
* decrement it and do another round in the loop.
|
||||||
|
* - If it happens after it'll see 0 for both signals_blocked
|
||||||
|
* and signals_blocked_pending and thus run the handler as
|
||||||
|
* usual (subject to signals_enabled, but that's unrelated.)
|
||||||
|
*
|
||||||
|
* Note that a call to unblock_signals_hard() within the calls
|
||||||
|
* to unblock_signals() or sigio_run_timetravel_handlers() above
|
||||||
|
* will do nothing due to the 'unblocking' state, so this cannot
|
||||||
|
* underflow as the only one decrementing will be the outermost
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
if (__atomic_sub_fetch(&signals_blocked_pending, 1,
|
||||||
|
__ATOMIC_SEQ_CST) < 0)
|
||||||
|
panic("signals_blocked_pending underflow");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unblocking = false;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user