linux/arch/powerpc/mm/mmu_context.c

/*
 *  Common implementation of switch_mm_irqs_off
 *
 *  Copyright IBM Corp. 2017
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 *
 */

#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/sched/mm.h>

#include <asm/mmu_context.h>

#if defined(CONFIG_PPC32)
static inline void switch_mm_pgdir(struct task_struct *tsk,
				   struct mm_struct *mm)
{
	/* 32-bit keeps track of the current PGDIR in the thread struct */
	tsk->thread.pgdir = mm->pgd;
}
#elif defined(CONFIG_PPC_BOOK3E_64)
static inline void switch_mm_pgdir(struct task_struct *tsk,
				   struct mm_struct *mm)
{
	/* 64-bit Book3E keeps track of current PGD in the PACA */
	get_paca()->pgd = mm->pgd;
}
#else
static inline void switch_mm_pgdir(struct task_struct *tsk,
				   struct mm_struct *mm) { }
#endif

void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
{
	bool new_on_cpu = false;

	/* Mark this context has been used on the new CPU */
	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
		inc_mm_active_cpus(next);

		/*
		 * This full barrier orders the store to the cpumask above vs
		 * a subsequent operation which allows this CPU to begin loading
		 * translations for next.
		 *
		 * When using the radix MMU that operation is the load of the
		 * MMU context id, which is then moved to SPRN_PID.
		 *
		 * For the hash MMU it is either the first load from slb_cache
		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
		 * copy_mm_to_paca().
		 *
		 * On the other side, the barrier is in mm/tlb-radix.c for
		 * radix which orders earlier stores to clear the PTEs vs
		 * the load of mm_cpumask. And pte_xchg which does the same
		 * thing for hash.
		 *
		 * This full barrier is needed by membarrier when switching
		 * between processes after store to rq->curr, before user-space
		 * memory accesses.
		 */
		smp_mb();

		new_on_cpu = true;
	}

	/* Some subarchs need to track the PGD elsewhere */
	switch_mm_pgdir(tsk, next);

	/* Nothing else to do if we aren't actually switching */
	if (prev == next)
		return;

	/*
	 * We must stop all altivec streams before changing the HW
	 * context
	 */
	if (cpu_has_feature(CPU_FTR_ALTIVEC))
		asm volatile ("dssall");

	if (new_on_cpu)
		radix_kvm_prefetch_workaround(next);
	else
		membarrier_arch_switch_mm(prev, next, tsk);

	/*
	 * The actual HW switching method differs between the various
	 * sub architectures. Out of line for now
	 */
	switch_mmu_context(prev, next, tsk);
}
powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-07-24 12:28:03 +08:00			`/*`
			`* Common implementation of switch_mm_irqs_off`
			`*`
			`* Copyright IBM Corp. 2017`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public License`
			`* as published by the Free Software Foundation; either version`
			`* 2 of the License, or (at your option) any later version.`
			`*`
			`*/`

			`#include <linux/mm.h>`
			`#include <linux/cpu.h>`
powerpc, membarrier: Skip memory barrier in switch_mm() Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org> 2018-01-30 04:20:11 +08:00			`#include <linux/sched/mm.h>`
powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-07-24 12:28:03 +08:00
			`#include <asm/mmu_context.h>`

			`#if defined(CONFIG_PPC32)`
			`static inline void switch_mm_pgdir(struct task_struct *tsk,`
			`struct mm_struct *mm)`
			`{`
			`/* 32-bit keeps track of the current PGDIR in the thread struct */`
			`tsk->thread.pgdir = mm->pgd;`
			`}`
			`#elif defined(CONFIG_PPC_BOOK3E_64)`
			`static inline void switch_mm_pgdir(struct task_struct *tsk,`
			`struct mm_struct *mm)`
			`{`
			`/* 64-bit Book3E keeps track of current PGD in the PACA */`
			`get_paca()->pgd = mm->pgd;`
			`}`
			`#else`
			`static inline void switch_mm_pgdir(struct task_struct *tsk,`
			`struct mm_struct *mm) { }`
			`#endif`

			`void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,`
			`struct task_struct *tsk)`
			`{`
			`bool new_on_cpu = false;`

			`/* Mark this context has been used on the new CPU */`
			`if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {`
			`cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));`
			`inc_mm_active_cpus(next);`

			`/*`
			`* This full barrier orders the store to the cpumask above vs`
			`* a subsequent operation which allows this CPU to begin loading`
			`* translations for next.`
			`*`
			`* When using the radix MMU that operation is the load of the`
			`* MMU context id, which is then moved to SPRN_PID.`
			`*`
			`* For the hash MMU it is either the first load from slb_cache`
			`* in switch_slb(), and/or the store of paca->mm_ctx_id in`
			`* copy_mm_to_paca().`
			`*`
powerpc/64s/radix: optimise pte_update Implementing pte_update with pte_xchg (which uses cmpxchg) is inefficient. A single larx/stcx. works fine, no need for the less efficient cmpxchg sequence. Then remove the memory barriers from the operation. There is a requirement for TLB flushing to load mm_cpumask after the store that reduces pte permissions, which is moved into the TLB flush code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2018-06-01 18:01:20 +08:00			`* On the other side, the barrier is in mm/tlb-radix.c for`
			`* radix which orders earlier stores to clear the PTEs vs`
			`* the load of mm_cpumask. And pte_xchg which does the same`
			`* thing for hash.`
powerpc, membarrier: Skip memory barrier in switch_mm() Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org> 2018-01-30 04:20:11 +08:00			`*`
			`* This full barrier is needed by membarrier when switching`
			`* between processes after store to rq->curr, before user-space`
			`* memory accesses.`
powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-07-24 12:28:03 +08:00			`*/`
			`smp_mb();`

			`new_on_cpu = true;`
			`}`

			`/* Some subarchs need to track the PGD elsewhere */`
			`switch_mm_pgdir(tsk, next);`

			`/* Nothing else to do if we aren't actually switching */`
			`if (prev == next)`
			`return;`

			`/*`
			`* We must stop all altivec streams before changing the HW`
			`* context`
			`*/`
			`if (cpu_has_feature(CPU_FTR_ALTIVEC))`
			`asm volatile ("dssall");`

			`if (new_on_cpu)`
			`radix_kvm_prefetch_workaround(next);`
powerpc, membarrier: Skip memory barrier in switch_mm() Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org> 2018-01-30 04:20:11 +08:00			`else`
			`membarrier_arch_switch_mm(prev, next, tsk);`
powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> 2017-07-24 12:28:03 +08:00
			`/*`
			`* The actual HW switching method differs between the various`
			`* sub architectures. Out of line for now`
			`*/`
			`switch_mmu_context(prev, next, tsk);`
			`}`