s390: Clarify pagefault interrupt

While looking at set_task_state() users I stumbled over the s390 pfault
interrupt code.  Since Heiko provided a great explanation on how it
worked, I figured we ought to preserve this.

Also make a few little tweaks to the code to aid in readability and
explicitly comment the unusual blocking scheme.

Based-on-text-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Peter Zijlstra 2016-03-22 21:42:53 +01:00 committed by Martin Schwidefsky
parent 5a3b7b1128
commit 0227f7c42d

View File

@ -631,6 +631,29 @@ void pfault_fini(void)
static DEFINE_SPINLOCK(pfault_lock); static DEFINE_SPINLOCK(pfault_lock);
static LIST_HEAD(pfault_list); static LIST_HEAD(pfault_list);
#define PF_COMPLETE 0x0080
/*
* The mechanism of our pfault code: if Linux is running as guest, runs a user
* space process and the user space process accesses a page that the host has
* paged out we get a pfault interrupt.
*
* This allows us, within the guest, to schedule a different process. Without
* this mechanism the host would have to suspend the whole virtual cpu until
* the page has been paged in.
*
* So when we get such an interrupt then we set the state of the current task
* to uninterruptible and also set the need_resched flag. Both happens within
* interrupt context(!). If we later on want to return to user space we
* recognize the need_resched flag and then call schedule(). It's not very
* obvious how this works...
*
* Of course we have a lot of additional fun with the completion interrupt (->
* host signals that a page of a process has been paged in and the process can
* continue to run). This interrupt can arrive on any cpu and, since we have
* virtual cpus, actually appear before the interrupt that signals that a page
* is missing.
*/
static void pfault_interrupt(struct ext_code ext_code, static void pfault_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64) unsigned int param32, unsigned long param64)
{ {
@ -639,10 +662,9 @@ static void pfault_interrupt(struct ext_code ext_code,
pid_t pid; pid_t pid;
/* /*
* Get the external interruption subcode & pfault * Get the external interruption subcode & pfault initial/completion
* initial/completion signal bit. VM stores this * signal bit. VM stores this in the 'cpu address' field associated
* in the 'cpu address' field associated with the * with the external interrupt.
* external interrupt.
*/ */
subcode = ext_code.subcode; subcode = ext_code.subcode;
if ((subcode & 0xff00) != __SUBCODE_MASK) if ((subcode & 0xff00) != __SUBCODE_MASK)
@ -658,7 +680,7 @@ static void pfault_interrupt(struct ext_code ext_code,
if (!tsk) if (!tsk)
return; return;
spin_lock(&pfault_lock); spin_lock(&pfault_lock);
if (subcode & 0x0080) { if (subcode & PF_COMPLETE) {
/* signal bit is set -> a page has been swapped in by VM */ /* signal bit is set -> a page has been swapped in by VM */
if (tsk->thread.pfault_wait == 1) { if (tsk->thread.pfault_wait == 1) {
/* Initial interrupt was faster than the completion /* Initial interrupt was faster than the completion
@ -687,8 +709,7 @@ static void pfault_interrupt(struct ext_code ext_code,
goto out; goto out;
if (tsk->thread.pfault_wait == 1) { if (tsk->thread.pfault_wait == 1) {
/* Already on the list with a reference: put to sleep */ /* Already on the list with a reference: put to sleep */
__set_task_state(tsk, TASK_UNINTERRUPTIBLE); goto block;
set_tsk_need_resched(tsk);
} else if (tsk->thread.pfault_wait == -1) { } else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial /* Completion interrupt was faster than the initial
* interrupt (pfault_wait == -1). Set pfault_wait * interrupt (pfault_wait == -1). Set pfault_wait
@ -703,7 +724,11 @@ static void pfault_interrupt(struct ext_code ext_code,
get_task_struct(tsk); get_task_struct(tsk);
tsk->thread.pfault_wait = 1; tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list); list_add(&tsk->thread.list, &pfault_list);
__set_task_state(tsk, TASK_UNINTERRUPTIBLE); block:
/* Since this must be a userspace fault, there
* is no kernel task state to trample. Rely on the
* return to userspace schedule() to block. */
__set_current_state(TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk); set_tsk_need_resched(tsk);
} }
} }