2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2005-10-18 09:17:58 +08:00
|
|
|
* Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2005-10-18 09:17:58 +08:00
|
|
|
* PowerPC version
|
|
|
|
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
2005-04-17 06:20:36 +08:00
|
|
|
* Copyright (C) 2001 IBM
|
|
|
|
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
|
|
|
|
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
|
|
|
|
*
|
2005-10-18 09:17:58 +08:00
|
|
|
* Derived from "arch/i386/kernel/signal.c"
|
|
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
|
|
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#include <linux/mm.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/signal.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/elf.h>
|
2007-12-14 07:56:06 +08:00
|
|
|
#include <linux/ptrace.h>
|
2018-04-25 00:04:25 +08:00
|
|
|
#include <linux/pagemap.h>
|
2011-06-04 13:36:54 +08:00
|
|
|
#include <linux/ratelimit.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#include <linux/syscalls.h>
|
2018-05-02 21:20:47 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/compat.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#else
|
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/tty.h>
|
|
|
|
#include <linux/binfmts.h>
|
|
|
|
#endif
|
|
|
|
|
2016-12-25 03:46:01 +08:00
|
|
|
#include <linux/uaccess.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#include <asm/cacheflush.h>
|
2006-03-23 07:00:08 +08:00
|
|
|
#include <asm/syscalls.h>
|
2005-11-09 08:21:07 +08:00
|
|
|
#include <asm/sigcontext.h>
|
2005-11-11 18:15:21 +08:00
|
|
|
#include <asm/vdso.h>
|
2012-03-29 01:30:02 +08:00
|
|
|
#include <asm/switch_to.h>
|
2013-02-14 00:21:41 +08:00
|
|
|
#include <asm/tm.h>
|
2016-09-06 13:32:43 +08:00
|
|
|
#include <asm/asm-prototypes.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-11-03 12:32:07 +08:00
|
|
|
#include "ppc32.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/unistd.h>
|
2005-10-18 09:17:58 +08:00
|
|
|
#else
|
|
|
|
#include <asm/ucontext.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-06-04 13:15:49 +08:00
|
|
|
#include "signal.h"
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#define old_sigaction old_sigaction32
|
|
|
|
#define sigcontext sigcontext32
|
|
|
|
#define mcontext mcontext32
|
|
|
|
#define ucontext ucontext32
|
|
|
|
|
2012-12-23 16:26:46 +08:00
|
|
|
#define __save_altstack __compat_save_altstack
|
|
|
|
|
2008-07-08 16:43:41 +08:00
|
|
|
/*
|
|
|
|
* Userspace code may pass a ucontext which doesn't include VSX added
|
|
|
|
* at the end. We need to check for this case.
|
|
|
|
*/
|
|
|
|
#define UCONTEXTSIZEWITHOUTVSX \
|
|
|
|
(sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32))
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
/*
|
|
|
|
* Returning 0 means we return to userspace via
|
|
|
|
* ret_from_except and thus restore all user
|
|
|
|
* registers from *regs. This is what we need
|
|
|
|
* to do when a signal has been delivered.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define GP_REGS_SIZE min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
|
|
|
|
#undef __SIGNAL_FRAMESIZE
|
|
|
|
#define __SIGNAL_FRAMESIZE __SIGNAL_FRAMESIZE32
|
|
|
|
#undef ELF_NVRREG
|
|
|
|
#define ELF_NVRREG ELF_NVRREG32
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Functions for flipping sigsets (thanks to brain dead generic
|
|
|
|
* implementation that makes things simple for little endian only)
|
|
|
|
*/
|
|
|
|
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
|
|
|
|
{
|
2017-09-05 00:17:38 +08:00
|
|
|
return put_compat_sigset(uset, set, sizeof(*uset));
|
2005-10-18 09:17:58 +08:00
|
|
|
}
|
|
|
|
|
2005-10-19 21:13:04 +08:00
|
|
|
static inline int get_sigset_t(sigset_t *set,
|
|
|
|
const compat_sigset_t __user *uset)
|
2005-10-18 09:17:58 +08:00
|
|
|
{
|
2017-09-05 00:17:38 +08:00
|
|
|
return get_compat_sigset(set, uset);
|
2005-10-18 09:17:58 +08:00
|
|
|
}
|
|
|
|
|
2006-02-01 18:28:09 +08:00
|
|
|
#define to_user_ptr(p) ptr_to_compat(p)
|
2005-10-18 09:17:58 +08:00
|
|
|
#define from_user_ptr(p) compat_ptr(p)
|
|
|
|
|
|
|
|
static inline int save_general_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *frame)
|
|
|
|
{
|
|
|
|
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
|
|
|
|
int i;
|
2017-08-21 01:58:24 +08:00
|
|
|
/* Force usr to alway see softe as 1 (interrupts enabled) */
|
|
|
|
elf_greg_t64 softe = 0x1;
|
2005-10-18 09:17:58 +08:00
|
|
|
|
2006-03-08 10:24:22 +08:00
|
|
|
WARN_ON(!FULL_REGS(regs));
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
|
|
|
|
for (i = 0; i <= PT_RESULT; i ++) {
|
|
|
|
if (i == 14 && !FULL_REGS(regs))
|
|
|
|
i = 32;
|
2017-08-21 01:58:24 +08:00
|
|
|
if ( i == PT_SOFTE) {
|
|
|
|
if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
|
|
|
|
return -EFAULT;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-18 09:17:58 +08:00
|
|
|
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
|
|
|
|
return -EFAULT;
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
}
|
2005-10-18 09:17:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int restore_general_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *sr)
|
|
|
|
{
|
|
|
|
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i <= PT_RESULT; i++) {
|
|
|
|
if ((i == PT_MSR) || (i == PT_SOFTE))
|
|
|
|
continue;
|
|
|
|
if (__get_user(gregs[i], &sr->mc_gregs[i]))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* CONFIG_PPC64 */
|
|
|
|
|
|
|
|
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
|
|
|
|
|
|
|
|
static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
|
|
|
|
{
|
|
|
|
return copy_to_user(uset, set, sizeof(*uset));
|
|
|
|
}
|
|
|
|
|
2005-10-19 21:13:04 +08:00
|
|
|
static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
|
2005-10-18 09:17:58 +08:00
|
|
|
{
|
|
|
|
return copy_from_user(set, uset, sizeof(*uset));
|
|
|
|
}
|
|
|
|
|
2006-02-01 18:28:09 +08:00
|
|
|
#define to_user_ptr(p) ((unsigned long)(p))
|
|
|
|
#define from_user_ptr(p) ((void __user *)(p))
|
2005-10-18 09:17:58 +08:00
|
|
|
|
|
|
|
static inline int save_general_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *frame)
|
|
|
|
{
|
2006-03-08 10:24:22 +08:00
|
|
|
WARN_ON(!FULL_REGS(regs));
|
2005-10-18 09:17:58 +08:00
|
|
|
return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int restore_general_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *sr)
|
|
|
|
{
|
|
|
|
/* copy up to but not including MSR */
|
|
|
|
if (__copy_from_user(regs, &sr->mc_gregs,
|
|
|
|
PT_MSR * sizeof(elf_greg_t)))
|
|
|
|
return -EFAULT;
|
|
|
|
/* copy from orig_r3 (the word after the MSR) up to the end */
|
|
|
|
if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
|
|
|
|
GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
|
|
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* When we have signals to deliver, we set up on the
|
|
|
|
* user stack, going down from the original stack pointer:
|
2007-06-04 15:22:48 +08:00
|
|
|
* an ABI gap of 56 words
|
|
|
|
* an mcontext struct
|
2005-10-18 09:17:58 +08:00
|
|
|
* a sigcontext struct
|
|
|
|
* a gap of __SIGNAL_FRAMESIZE bytes
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2007-06-04 15:22:48 +08:00
|
|
|
* Each of these things must be a multiple of 16 bytes in size. The following
|
|
|
|
* structure represent all of this except the __SIGNAL_FRAMESIZE gap
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
*/
|
2007-06-04 15:22:48 +08:00
|
|
|
struct sigframe {
|
|
|
|
struct sigcontext sctx; /* the sigcontext */
|
2005-10-18 09:17:58 +08:00
|
|
|
struct mcontext mctx; /* all the register values */
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
struct sigcontext sctx_transact;
|
|
|
|
struct mcontext mctx_transact;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Programs using the rs6000/xcoff abi can save up to 19 gp
|
|
|
|
* regs and 18 fp regs below sp before decrementing it.
|
|
|
|
*/
|
|
|
|
int abigap[56];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* We use the mc_pad field for the signal return trampoline. */
|
|
|
|
#define tramp mc_pad
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we have rt signals to deliver, we set up on the
|
|
|
|
* user stack, going down from the original stack pointer:
|
2005-10-18 09:17:58 +08:00
|
|
|
* one rt_sigframe struct (siginfo + ucontext + ABI gap)
|
|
|
|
* a gap of __SIGNAL_FRAMESIZE+16 bytes
|
|
|
|
* (the +16 is to get the siginfo and ucontext in the same
|
2005-04-17 06:20:36 +08:00
|
|
|
* positions as in older kernels).
|
|
|
|
*
|
|
|
|
* Each of these things must be a multiple of 16 bytes in size.
|
|
|
|
*
|
|
|
|
*/
|
2005-10-18 09:17:58 +08:00
|
|
|
struct rt_sigframe {
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
compat_siginfo_t info;
|
|
|
|
#else
|
|
|
|
struct siginfo info;
|
|
|
|
#endif
|
|
|
|
struct ucontext uc;
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
struct ucontext uc_transact;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Programs using the rs6000/xcoff abi can save up to 19 gp
|
|
|
|
* regs and 18 fp regs below sp before decrementing it.
|
|
|
|
*/
|
|
|
|
int abigap[56];
|
|
|
|
};
|
|
|
|
|
2008-07-02 12:06:37 +08:00
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
unsigned long copy_fpr_to_user(void __user *to,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NFPREG];
|
2008-07-02 12:06:37 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* save FPR copy to local buffer then write to the thread_struct */
|
|
|
|
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
|
|
|
|
buf[i] = task->thread.TS_FPR(i);
|
2013-09-10 18:20:42 +08:00
|
|
|
buf[i] = task->thread.fp_state.fpscr;
|
2008-07-02 12:06:37 +08:00
|
|
|
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long copy_fpr_from_user(struct task_struct *task,
|
|
|
|
void __user *from)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NFPREG];
|
2008-07-02 12:06:37 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
|
|
|
|
return 1;
|
|
|
|
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
|
|
|
|
task->thread.TS_FPR(i) = buf[i];
|
2013-09-10 18:20:42 +08:00
|
|
|
task->thread.fp_state.fpscr = buf[i];
|
2008-07-02 12:06:37 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long copy_vsx_to_user(void __user *to,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NVSRHALFREG];
|
2008-07-02 12:06:37 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* save FPR copy to local buffer then write to the thread_struct */
|
|
|
|
for (i = 0; i < ELF_NVSRHALFREG; i++)
|
2013-09-10 18:20:42 +08:00
|
|
|
buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
|
2008-07-02 12:06:37 +08:00
|
|
|
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long copy_vsx_from_user(struct task_struct *task,
|
|
|
|
void __user *from)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NVSRHALFREG];
|
2008-07-02 12:06:37 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
|
|
|
|
return 1;
|
|
|
|
for (i = 0; i < ELF_NVSRHALFREG ; i++)
|
2013-09-10 18:20:42 +08:00
|
|
|
task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
|
2008-07-02 12:06:37 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
2016-09-23 14:18:25 +08:00
|
|
|
unsigned long copy_ckfpr_to_user(void __user *to,
|
2013-02-14 00:21:41 +08:00
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NFPREG];
|
2013-02-14 00:21:41 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* save FPR copy to local buffer then write to the thread_struct */
|
|
|
|
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
|
2016-09-23 14:18:25 +08:00
|
|
|
buf[i] = task->thread.TS_CKFPR(i);
|
|
|
|
buf[i] = task->thread.ckfp_state.fpscr;
|
2013-02-14 00:21:41 +08:00
|
|
|
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
2016-09-23 14:18:25 +08:00
|
|
|
unsigned long copy_ckfpr_from_user(struct task_struct *task,
|
2013-02-14 00:21:41 +08:00
|
|
|
void __user *from)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NFPREG];
|
2013-02-14 00:21:41 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
|
|
|
|
return 1;
|
|
|
|
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
|
2016-09-23 14:18:25 +08:00
|
|
|
task->thread.TS_CKFPR(i) = buf[i];
|
|
|
|
task->thread.ckfp_state.fpscr = buf[i];
|
2013-02-14 00:21:41 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-23 14:18:25 +08:00
|
|
|
unsigned long copy_ckvsx_to_user(void __user *to,
|
2013-02-14 00:21:41 +08:00
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NVSRHALFREG];
|
2013-02-14 00:21:41 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* save FPR copy to local buffer then write to the thread_struct */
|
|
|
|
for (i = 0; i < ELF_NVSRHALFREG; i++)
|
2016-09-23 14:18:25 +08:00
|
|
|
buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
|
2013-02-14 00:21:41 +08:00
|
|
|
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
2016-09-23 14:18:25 +08:00
|
|
|
unsigned long copy_ckvsx_from_user(struct task_struct *task,
|
2013-02-14 00:21:41 +08:00
|
|
|
void __user *from)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
u64 buf[ELF_NVSRHALFREG];
|
2013-02-14 00:21:41 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
|
|
|
|
return 1;
|
|
|
|
for (i = 0; i < ELF_NVSRHALFREG ; i++)
|
2016-09-23 14:18:25 +08:00
|
|
|
task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
|
2013-02-14 00:21:41 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
|
2008-07-02 12:06:37 +08:00
|
|
|
#else
|
|
|
|
inline unsigned long copy_fpr_to_user(void __user *to,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
return __copy_to_user(to, task->thread.fp_state.fpr,
|
2008-07-02 12:06:37 +08:00
|
|
|
ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
|
|
|
inline unsigned long copy_fpr_from_user(struct task_struct *task,
|
|
|
|
void __user *from)
|
|
|
|
{
|
2013-09-10 18:20:42 +08:00
|
|
|
return __copy_from_user(task->thread.fp_state.fpr, from,
|
2008-07-02 12:06:37 +08:00
|
|
|
ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
2016-09-23 14:18:25 +08:00
|
|
|
inline unsigned long copy_ckfpr_to_user(void __user *to,
|
2013-02-14 00:21:41 +08:00
|
|
|
struct task_struct *task)
|
|
|
|
{
|
2016-09-23 14:18:25 +08:00
|
|
|
return __copy_to_user(to, task->thread.ckfp_state.fpr,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
|
|
|
|
2016-09-23 14:18:25 +08:00
|
|
|
inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
|
2013-02-14 00:21:41 +08:00
|
|
|
void __user *from)
|
|
|
|
{
|
2016-09-23 14:18:25 +08:00
|
|
|
return __copy_from_user(task->thread.ckfp_state.fpr, from,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NFPREG * sizeof(double));
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
|
2008-07-02 12:06:37 +08:00
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Save the current user registers on the user stack.
|
2005-10-18 09:17:58 +08:00
|
|
|
* We only save the altivec/spe registers if the process has used
|
|
|
|
* altivec/spe instructions at some point.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-10-18 09:17:58 +08:00
|
|
|
static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
|
2013-06-09 19:23:15 +08:00
|
|
|
struct mcontext __user *tm_frame, int sigret,
|
|
|
|
int ctx_has_vsx_region)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2008-06-25 12:07:17 +08:00
|
|
|
unsigned long msr = regs->msr;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Make sure floating point registers are stored in regs */
|
|
|
|
flush_fp_to_thread(current);
|
|
|
|
|
2008-06-25 12:07:18 +08:00
|
|
|
/* save general registers */
|
|
|
|
if (save_general_regs(regs, frame))
|
2005-04-17 06:20:36 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
/* save altivec registers */
|
|
|
|
if (current->thread.used_vr) {
|
|
|
|
flush_altivec_to_thread(current);
|
2013-09-10 18:20:42 +08:00
|
|
|
if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state,
|
2005-10-18 09:17:58 +08:00
|
|
|
ELF_NVRREG * sizeof(vector128)))
|
2005-04-17 06:20:36 +08:00
|
|
|
return 1;
|
|
|
|
/* set MSR_VEC in the saved MSR value to indicate that
|
|
|
|
frame->mc_vregs contains valid data */
|
2008-06-25 12:07:17 +08:00
|
|
|
msr |= MSR_VEC;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
/* else assert((regs->msr & MSR_VEC) == 0) */
|
|
|
|
|
|
|
|
/* We always copy to/from vrsave, it's 0 if we don't have or don't
|
|
|
|
* use altivec. Since VSCR only contains 32 bits saved in the least
|
|
|
|
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
|
|
|
|
* most significant bits of that same vector. --BenH
|
2013-08-05 12:13:16 +08:00
|
|
|
* Note that the current VRSAVE value is in the SPR at this point.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2013-08-05 12:13:16 +08:00
|
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
|
|
|
current->thread.vrsave = mfspr(SPRN_VRSAVE);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
|
|
|
|
return 1;
|
|
|
|
#endif /* CONFIG_ALTIVEC */
|
2008-07-02 12:06:37 +08:00
|
|
|
if (copy_fpr_to_user(&frame->mc_fregs, current))
|
2008-06-25 12:07:18 +08:00
|
|
|
return 1;
|
2013-11-25 08:12:20 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear the MSR VSX bit to indicate there is no valid state attached
|
|
|
|
* to this context, except in the specific case below where we set it.
|
|
|
|
*/
|
|
|
|
msr &= ~MSR_VSX;
|
2008-07-02 12:06:37 +08:00
|
|
|
#ifdef CONFIG_VSX
|
2008-06-25 12:07:18 +08:00
|
|
|
/*
|
|
|
|
* Copy VSR 0-31 upper half from thread_struct to local
|
|
|
|
* buffer, then write that to userspace. Also set MSR_VSX in
|
|
|
|
* the saved MSR value to indicate that frame->mc_vregs
|
|
|
|
* contains valid data
|
|
|
|
*/
|
2008-10-23 08:42:36 +08:00
|
|
|
if (current->thread.used_vsr && ctx_has_vsx_region) {
|
2015-10-29 08:44:02 +08:00
|
|
|
flush_vsx_to_thread(current);
|
2008-07-02 12:06:37 +08:00
|
|
|
if (copy_vsx_to_user(&frame->mc_vsregs, current))
|
2008-06-25 12:07:18 +08:00
|
|
|
return 1;
|
|
|
|
msr |= MSR_VSX;
|
2013-11-25 08:12:20 +08:00
|
|
|
}
|
2008-06-25 12:07:18 +08:00
|
|
|
#endif /* CONFIG_VSX */
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
/* save spe registers */
|
|
|
|
if (current->thread.used_spe) {
|
|
|
|
flush_spe_to_thread(current);
|
|
|
|
if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
|
|
|
|
ELF_NEVRREG * sizeof(u32)))
|
|
|
|
return 1;
|
|
|
|
/* set MSR_SPE in the saved MSR value to indicate that
|
|
|
|
frame->mc_vregs contains valid data */
|
2008-06-25 12:07:17 +08:00
|
|
|
msr |= MSR_SPE;
|
2005-10-18 09:17:58 +08:00
|
|
|
}
|
|
|
|
/* else assert((regs->msr & MSR_SPE) == 0) */
|
|
|
|
|
|
|
|
/* We always copy to/from spefscr */
|
|
|
|
if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
|
|
|
|
return 1;
|
|
|
|
#endif /* CONFIG_SPE */
|
|
|
|
|
2008-06-25 12:07:17 +08:00
|
|
|
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
|
|
|
|
return 1;
|
2013-06-09 19:23:15 +08:00
|
|
|
/* We need to write 0 the MSR top 32 bits in the tm frame so that we
|
|
|
|
* can check it on the restore to see if TM is active
|
|
|
|
*/
|
|
|
|
if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
|
|
|
|
return 1;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
if (sigret) {
|
2018-11-10 01:33:28 +08:00
|
|
|
/* Set up the sigreturn trampoline: li 0,sigret; sc */
|
|
|
|
if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
|
|
|
|
|| __put_user(PPC_INST_SC, &frame->tramp[1]))
|
2005-04-17 06:20:36 +08:00
|
|
|
return 1;
|
|
|
|
flush_icache_range((unsigned long) &frame->tramp[0],
|
|
|
|
(unsigned long) &frame->tramp[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
/*
|
|
|
|
* Save the current user registers on the user stack.
|
|
|
|
* We only save the altivec/spe registers if the process has used
|
|
|
|
* altivec/spe instructions at some point.
|
|
|
|
* We also save the transactional registers to a second ucontext in the
|
|
|
|
* frame.
|
|
|
|
*
|
|
|
|
* See save_user_regs() and signal_64.c:setup_tm_sigcontexts().
|
|
|
|
*/
|
|
|
|
static int save_tm_user_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *frame,
|
|
|
|
struct mcontext __user *tm_frame, int sigret)
|
|
|
|
{
|
|
|
|
unsigned long msr = regs->msr;
|
|
|
|
|
2017-10-12 18:17:19 +08:00
|
|
|
WARN_ON(tm_suspend_disabled);
|
|
|
|
|
powerpc: Don't corrupt transactional state when using FP/VMX in kernel
Currently, when we have a process using the transactional memory
facilities on POWER8 (that is, the processor is in transactional
or suspended state), and the process enters the kernel and the
kernel then uses the floating-point or vector (VMX/Altivec) facility,
we end up corrupting the user-visible FP/VMX/VSX state. This
happens, for example, if a page fault causes a copy-on-write
operation, because the copy_page function will use VMX to do the
copy on POWER8. The test program below demonstrates the bug.
The bug happens because when FP/VMX state for a transactional process
is stored in the thread_struct, we store the checkpointed state in
.fp_state/.vr_state and the transactional (current) state in
.transact_fp/.transact_vr. However, when the kernel wants to use
FP/VMX, it calls enable_kernel_fp() or enable_kernel_altivec(),
which saves the current state in .fp_state/.vr_state. Furthermore,
when we return to the user process we return with FP/VMX/VSX
disabled. The next time the process uses FP/VMX/VSX, we don't know
which set of state (the current register values, .fp_state/.vr_state,
or .transact_fp/.transact_vr) we should be using, since we have no
way to tell if we are still in the same transaction, and if not,
whether the previous transaction succeeded or failed.
Thus it is necessary to strictly adhere to the rule that if FP has
been enabled at any point in a transaction, we must keep FP enabled
for the user process with the current transactional state in the
FP registers, until we detect that it is no longer in a transaction.
Similarly for VMX; once enabled it must stay enabled until the
process is no longer transactional.
In order to keep this rule, we add a new thread_info flag which we
test when returning from the kernel to userspace, called TIF_RESTORE_TM.
This flag indicates that there is FP/VMX/VSX state to be restored
before entering userspace, and when it is set the .tm_orig_msr field
in the thread_struct indicates what state needs to be restored.
The restoration is done by restore_tm_state(). The TIF_RESTORE_TM
bit is set by new giveup_fpu/altivec_maybe_transactional helpers,
which are called from enable_kernel_fp/altivec, giveup_vsx, and
flush_fp/altivec_to_thread instead of giveup_fpu/altivec.
The other thing to be done is to get the transactional FP/VMX/VSX
state from .fp_state/.vr_state when doing reclaim, if that state
has been saved there by giveup_fpu/altivec_maybe_transactional.
Having done this, we set the FP/VMX bit in the thread's MSR after
reclaim to indicate that that part of the state is now valid
(having been reclaimed from the processor's checkpointed state).
Finally, in the signal handling code, we move the clearing of the
transactional state bits in the thread's MSR a bit earlier, before
calling flush_fp_to_thread(), so that we don't unnecessarily set
the TIF_RESTORE_TM bit.
This is the test program:
/* Michael Neuling 4/12/2013
*
* See if the altivec state is leaked out of an aborted transaction due to
* kernel vmx copy loops.
*
* gcc -m64 htm_vmxcopy.c -o htm_vmxcopy
*
*/
/* We don't use all of these, but for reference: */
int main(int argc, char *argv[])
{
long double vecin = 1.3;
long double vecout;
unsigned long pgsize = getpagesize();
int i;
int fd;
int size = pgsize*16;
char tmpfile[] = "/tmp/page_faultXXXXXX";
char buf[pgsize];
char *a;
uint64_t aborted = 0;
fd = mkstemp(tmpfile);
assert(fd >= 0);
memset(buf, 0, pgsize);
for (i = 0; i < size; i += pgsize)
assert(write(fd, buf, pgsize) == pgsize);
unlink(tmpfile);
a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
assert(a != MAP_FAILED);
asm __volatile__(
"lxvd2x 40,0,%[vecinptr] ; " // set 40 to initial value
TBEGIN
"beq 3f ;"
TSUSPEND
"xxlxor 40,40,40 ; " // set 40 to 0
"std 5, 0(%[map]) ;" // cause kernel vmx copy page
TABORT
TRESUME
TEND
"li %[res], 0 ;"
"b 5f ;"
"3: ;" // Abort handler
"li %[res], 1 ;"
"5: ;"
"stxvd2x 40,0,%[vecoutptr] ; "
: [res]"=r"(aborted)
: [vecinptr]"r"(&vecin),
[vecoutptr]"r"(&vecout),
[map]"r"(a)
: "memory", "r0", "r3", "r4", "r5", "r6", "r7");
if (aborted && (vecin != vecout)){
printf("FAILED: vector state leaked on abort %f != %f\n",
(double)vecin, (double)vecout);
exit(1);
}
munmap(a, size);
close(fd);
printf("PASSED!\n");
return 0;
}
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-01-13 12:56:29 +08:00
|
|
|
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
|
|
|
|
* just indicates to userland that we were doing a transaction, but we
|
|
|
|
* don't want to return in transactional state. This also ensures
|
|
|
|
* that flush_fp_to_thread won't set TIF_RESTORE_TM again.
|
|
|
|
*/
|
|
|
|
regs->msr &= ~MSR_TS_MASK;
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
/* Save both sets of general registers */
|
|
|
|
if (save_general_regs(¤t->thread.ckpt_regs, frame)
|
|
|
|
|| save_general_regs(regs, tm_frame))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* Stash the top half of the 64bit MSR into the 32bit MSR word
|
|
|
|
* of the transactional mcontext. This way we have a backward-compatible
|
|
|
|
* MSR in the 'normal' (checkpointed) mcontext and additionally one can
|
|
|
|
* also look at what type of transaction (T or S) was active at the
|
|
|
|
* time of the signal.
|
|
|
|
*/
|
|
|
|
if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR]))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
/* save altivec registers */
|
|
|
|
if (current->thread.used_vr) {
|
2016-09-23 14:18:25 +08:00
|
|
|
if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NVRREG * sizeof(vector128)))
|
|
|
|
return 1;
|
|
|
|
if (msr & MSR_VEC) {
|
|
|
|
if (__copy_to_user(&tm_frame->mc_vregs,
|
2016-09-23 14:18:24 +08:00
|
|
|
¤t->thread.vr_state,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NVRREG * sizeof(vector128)))
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
if (__copy_to_user(&tm_frame->mc_vregs,
|
2016-09-23 14:18:25 +08:00
|
|
|
¤t->thread.ckvr_state,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NVRREG * sizeof(vector128)))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* set MSR_VEC in the saved MSR value to indicate that
|
|
|
|
* frame->mc_vregs contains valid data
|
|
|
|
*/
|
|
|
|
msr |= MSR_VEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We always copy to/from vrsave, it's 0 if we don't have or don't
|
|
|
|
* use altivec. Since VSCR only contains 32 bits saved in the least
|
|
|
|
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
|
|
|
|
* most significant bits of that same vector. --BenH
|
|
|
|
*/
|
2013-08-05 12:13:16 +08:00
|
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
2016-09-23 14:18:25 +08:00
|
|
|
current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
|
|
|
|
if (__put_user(current->thread.ckvrsave,
|
2013-02-14 00:21:41 +08:00
|
|
|
(u32 __user *)&frame->mc_vregs[32]))
|
|
|
|
return 1;
|
|
|
|
if (msr & MSR_VEC) {
|
2016-09-23 14:18:24 +08:00
|
|
|
if (__put_user(current->thread.vrsave,
|
2013-02-14 00:21:41 +08:00
|
|
|
(u32 __user *)&tm_frame->mc_vregs[32]))
|
|
|
|
return 1;
|
|
|
|
} else {
|
2016-09-23 14:18:25 +08:00
|
|
|
if (__put_user(current->thread.ckvrsave,
|
2013-02-14 00:21:41 +08:00
|
|
|
(u32 __user *)&tm_frame->mc_vregs[32]))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_ALTIVEC */
|
|
|
|
|
2016-09-23 14:18:25 +08:00
|
|
|
if (copy_ckfpr_to_user(&frame->mc_fregs, current))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
if (msr & MSR_FP) {
|
2016-09-23 14:18:24 +08:00
|
|
|
if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
} else {
|
2016-09-23 14:18:25 +08:00
|
|
|
if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
/*
|
|
|
|
* Copy VSR 0-31 upper half from thread_struct to local
|
|
|
|
* buffer, then write that to userspace. Also set MSR_VSX in
|
|
|
|
* the saved MSR value to indicate that frame->mc_vregs
|
|
|
|
* contains valid data
|
|
|
|
*/
|
|
|
|
if (current->thread.used_vsr) {
|
2016-09-23 14:18:25 +08:00
|
|
|
if (copy_ckvsx_to_user(&frame->mc_vsregs, current))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
if (msr & MSR_VSX) {
|
2016-09-23 14:18:24 +08:00
|
|
|
if (copy_vsx_to_user(&tm_frame->mc_vsregs,
|
2013-02-14 00:21:41 +08:00
|
|
|
current))
|
|
|
|
return 1;
|
|
|
|
} else {
|
2016-09-23 14:18:25 +08:00
|
|
|
if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
msr |= MSR_VSX;
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_VSX */
|
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
/* SPE regs are not checkpointed with TM, so this section is
|
|
|
|
* simply the same as in save_user_regs().
|
|
|
|
*/
|
|
|
|
if (current->thread.used_spe) {
|
|
|
|
flush_spe_to_thread(current);
|
|
|
|
if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
|
|
|
|
ELF_NEVRREG * sizeof(u32)))
|
|
|
|
return 1;
|
|
|
|
/* set MSR_SPE in the saved MSR value to indicate that
|
|
|
|
* frame->mc_vregs contains valid data */
|
|
|
|
msr |= MSR_SPE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We always copy to/from spefscr */
|
|
|
|
if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
|
|
|
|
return 1;
|
|
|
|
#endif /* CONFIG_SPE */
|
|
|
|
|
|
|
|
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
|
|
|
|
return 1;
|
|
|
|
if (sigret) {
|
2018-11-10 01:33:28 +08:00
|
|
|
/* Set up the sigreturn trampoline: li 0,sigret; sc */
|
|
|
|
if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0])
|
|
|
|
|| __put_user(PPC_INST_SC, &frame->tramp[1]))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
flush_icache_range((unsigned long) &frame->tramp[0],
|
|
|
|
(unsigned long) &frame->tramp[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Restore the current user register values from the user stack,
|
|
|
|
* (except for MSR).
|
|
|
|
*/
|
|
|
|
static long restore_user_regs(struct pt_regs *regs,
|
2005-10-18 09:17:58 +08:00
|
|
|
struct mcontext __user *sr, int sig)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-10-18 09:17:58 +08:00
|
|
|
long err;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned int save_r2 = 0;
|
|
|
|
unsigned long msr;
|
2008-06-25 12:07:18 +08:00
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
int i;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* restore general registers but not including MSR or SOFTE. Also
|
|
|
|
* take care of keeping r2 (TLS) intact if not a signal
|
|
|
|
*/
|
|
|
|
if (!sig)
|
|
|
|
save_r2 = (unsigned int)regs->gpr[2];
|
2005-10-18 09:17:58 +08:00
|
|
|
err = restore_general_regs(regs, sr);
|
2010-09-21 04:48:57 +08:00
|
|
|
regs->trap = 0;
|
2006-06-07 14:14:40 +08:00
|
|
|
err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!sig)
|
|
|
|
regs->gpr[2] = (unsigned long) save_r2;
|
|
|
|
if (err)
|
|
|
|
return 1;
|
|
|
|
|
2006-06-07 14:14:40 +08:00
|
|
|
/* if doing signal return, restore the previous little-endian mode */
|
|
|
|
if (sig)
|
|
|
|
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef CONFIG_ALTIVEC
|
2008-06-25 12:07:18 +08:00
|
|
|
/*
|
|
|
|
* Force the process to reload the altivec registers from
|
|
|
|
* current->thread when it next does altivec instructions
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
regs->msr &= ~MSR_VEC;
|
2006-06-07 14:14:40 +08:00
|
|
|
if (msr & MSR_VEC) {
|
2005-04-17 06:20:36 +08:00
|
|
|
/* restore altivec registers from the stack */
|
2013-09-10 18:20:42 +08:00
|
|
|
if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs,
|
2005-04-17 06:20:36 +08:00
|
|
|
sizeof(sr->mc_vregs)))
|
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_vr = true;
|
2005-04-17 06:20:36 +08:00
|
|
|
} else if (current->thread.used_vr)
|
2013-09-10 18:20:42 +08:00
|
|
|
memset(¤t->thread.vr_state, 0,
|
|
|
|
ELF_NVRREG * sizeof(vector128));
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Always get VRSAVE back */
|
|
|
|
if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
|
|
|
|
return 1;
|
2013-08-05 12:13:16 +08:00
|
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
|
|
|
mtspr(SPRN_VRSAVE, current->thread.vrsave);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* CONFIG_ALTIVEC */
|
2008-07-02 12:06:37 +08:00
|
|
|
if (copy_fpr_from_user(current, &sr->mc_fregs))
|
|
|
|
return 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-06-25 12:07:18 +08:00
|
|
|
#ifdef CONFIG_VSX
|
2008-06-25 12:07:18 +08:00
|
|
|
/*
|
|
|
|
* Force the process to reload the VSX registers from
|
|
|
|
* current->thread when it next does VSX instruction.
|
|
|
|
*/
|
|
|
|
regs->msr &= ~MSR_VSX;
|
|
|
|
if (msr & MSR_VSX) {
|
|
|
|
/*
|
|
|
|
* Restore altivec registers from the stack to a local
|
|
|
|
* buffer, then write this out to the thread_struct
|
|
|
|
*/
|
2008-07-02 12:06:37 +08:00
|
|
|
if (copy_vsx_from_user(current, &sr->mc_vsregs))
|
2008-06-25 12:07:18 +08:00
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_vsr = true;
|
2008-06-25 12:07:18 +08:00
|
|
|
} else if (current->thread.used_vsr)
|
|
|
|
for (i = 0; i < 32 ; i++)
|
2013-09-10 18:20:42 +08:00
|
|
|
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
|
2008-06-25 12:07:18 +08:00
|
|
|
#endif /* CONFIG_VSX */
|
|
|
|
/*
|
|
|
|
* force the process to reload the FP registers from
|
|
|
|
* current->thread when it next does FP instructions
|
|
|
|
*/
|
|
|
|
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
/* force the process to reload the spe registers from
|
|
|
|
current->thread when it next does spe instructions */
|
|
|
|
regs->msr &= ~MSR_SPE;
|
2006-06-07 14:14:40 +08:00
|
|
|
if (msr & MSR_SPE) {
|
2005-10-18 09:17:58 +08:00
|
|
|
/* restore spe registers from the stack */
|
|
|
|
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
|
|
|
|
ELF_NEVRREG * sizeof(u32)))
|
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_spe = true;
|
2005-10-18 09:17:58 +08:00
|
|
|
} else if (current->thread.used_spe)
|
|
|
|
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
|
|
|
|
|
|
|
|
/* Always get SPEFSCR back */
|
|
|
|
if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
|
|
|
|
return 1;
|
|
|
|
#endif /* CONFIG_SPE */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
/*
|
|
|
|
* Restore the current user register values from the user stack, except for
|
|
|
|
* MSR, and recheckpoint the original checkpointed register state for processes
|
|
|
|
* in transactions.
|
|
|
|
*/
|
|
|
|
static long restore_tm_user_regs(struct pt_regs *regs,
|
|
|
|
struct mcontext __user *sr,
|
|
|
|
struct mcontext __user *tm_sr)
|
|
|
|
{
|
|
|
|
long err;
|
2013-06-09 19:23:17 +08:00
|
|
|
unsigned long msr, msr_hi;
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
int i;
|
|
|
|
#endif
|
|
|
|
|
2017-10-12 18:17:19 +08:00
|
|
|
if (tm_suspend_disabled)
|
|
|
|
return 1;
|
2013-02-14 00:21:41 +08:00
|
|
|
/*
|
|
|
|
* restore general registers but not including MSR or SOFTE. Also
|
|
|
|
* take care of keeping r2 (TLS) intact if not a signal.
|
|
|
|
* See comment in signal_64.c:restore_tm_sigcontexts();
|
|
|
|
* TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
|
|
|
|
* were set by the signal delivery.
|
|
|
|
*/
|
|
|
|
err = restore_general_regs(regs, tm_sr);
|
|
|
|
err |= restore_general_regs(¤t->thread.ckpt_regs, sr);
|
|
|
|
|
|
|
|
err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
|
|
|
|
|
|
|
|
err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
|
|
|
|
if (err)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* Restore the previous little-endian mode */
|
|
|
|
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
|
|
|
|
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
regs->msr &= ~MSR_VEC;
|
|
|
|
if (msr & MSR_VEC) {
|
|
|
|
/* restore altivec registers from the stack */
|
2016-09-23 14:18:25 +08:00
|
|
|
if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs,
|
2013-02-14 00:21:41 +08:00
|
|
|
sizeof(sr->mc_vregs)) ||
|
2016-09-23 14:18:24 +08:00
|
|
|
__copy_from_user(¤t->thread.vr_state,
|
2013-02-14 00:21:41 +08:00
|
|
|
&tm_sr->mc_vregs,
|
|
|
|
sizeof(sr->mc_vregs)))
|
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_vr = true;
|
2013-02-14 00:21:41 +08:00
|
|
|
} else if (current->thread.used_vr) {
|
2013-09-10 18:20:42 +08:00
|
|
|
memset(¤t->thread.vr_state, 0,
|
|
|
|
ELF_NVRREG * sizeof(vector128));
|
2016-09-23 14:18:25 +08:00
|
|
|
memset(¤t->thread.ckvr_state, 0,
|
2013-02-14 00:21:41 +08:00
|
|
|
ELF_NVRREG * sizeof(vector128));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Always get VRSAVE back */
|
2016-09-23 14:18:25 +08:00
|
|
|
if (__get_user(current->thread.ckvrsave,
|
2013-02-14 00:21:41 +08:00
|
|
|
(u32 __user *)&sr->mc_vregs[32]) ||
|
2016-09-23 14:18:24 +08:00
|
|
|
__get_user(current->thread.vrsave,
|
2013-02-14 00:21:41 +08:00
|
|
|
(u32 __user *)&tm_sr->mc_vregs[32]))
|
|
|
|
return 1;
|
2013-08-05 12:13:16 +08:00
|
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
2016-09-23 14:18:25 +08:00
|
|
|
mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
|
2013-02-14 00:21:41 +08:00
|
|
|
#endif /* CONFIG_ALTIVEC */
|
|
|
|
|
|
|
|
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
|
|
|
|
|
|
|
|
if (copy_fpr_from_user(current, &sr->mc_fregs) ||
|
2016-09-23 14:18:25 +08:00
|
|
|
copy_ckfpr_from_user(current, &tm_sr->mc_fregs))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
regs->msr &= ~MSR_VSX;
|
|
|
|
if (msr & MSR_VSX) {
|
|
|
|
/*
|
|
|
|
* Restore altivec registers from the stack to a local
|
|
|
|
* buffer, then write this out to the thread_struct
|
|
|
|
*/
|
2016-09-23 14:18:24 +08:00
|
|
|
if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) ||
|
2016-09-23 14:18:25 +08:00
|
|
|
copy_ckvsx_from_user(current, &sr->mc_vsregs))
|
2013-02-14 00:21:41 +08:00
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_vsr = true;
|
2013-02-14 00:21:41 +08:00
|
|
|
} else if (current->thread.used_vsr)
|
|
|
|
for (i = 0; i < 32 ; i++) {
|
2013-09-10 18:20:42 +08:00
|
|
|
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
|
2016-09-23 14:18:25 +08:00
|
|
|
current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
|
2013-02-14 00:21:41 +08:00
|
|
|
}
|
|
|
|
#endif /* CONFIG_VSX */
|
|
|
|
|
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
/* SPE regs are not checkpointed with TM, so this section is
|
|
|
|
* simply the same as in restore_user_regs().
|
|
|
|
*/
|
|
|
|
regs->msr &= ~MSR_SPE;
|
|
|
|
if (msr & MSR_SPE) {
|
|
|
|
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
|
|
|
|
ELF_NEVRREG * sizeof(u32)))
|
|
|
|
return 1;
|
2016-07-26 16:06:01 +08:00
|
|
|
current->thread.used_spe = true;
|
2013-02-14 00:21:41 +08:00
|
|
|
} else if (current->thread.used_spe)
|
|
|
|
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
|
|
|
|
|
|
|
|
/* Always get SPEFSCR back */
|
|
|
|
if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
|
|
|
|
+ ELF_NEVRREG))
|
|
|
|
return 1;
|
|
|
|
#endif /* CONFIG_SPE */
|
|
|
|
|
2015-11-19 12:44:44 +08:00
|
|
|
/* Get the top half of the MSR from the user context */
|
|
|
|
if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
|
|
|
|
return 1;
|
|
|
|
msr_hi <<= 32;
|
|
|
|
/* If TM bits are set to the reserved value, it's an invalid context */
|
|
|
|
if (MSR_TM_RESV(msr_hi))
|
|
|
|
return 1;
|
powerpc/tm: Set MSR[TS] just prior to recheckpoint
On a signal handler return, the user could set a context with MSR[TS] bits
set, and these bits would be copied to task regs->msr.
At restore_tm_sigcontexts(), after current task regs->msr[TS] bits are set,
several __get_user() are called and then a recheckpoint is executed.
This is a problem since a page fault (in kernel space) could happen when
calling __get_user(). If it happens, the process MSR[TS] bits were
already set, but recheckpoint was not executed, and SPRs are still invalid.
The page fault can cause the current process to be de-scheduled, with
MSR[TS] active and without tm_recheckpoint() being called. More
importantly, without TEXASR[FS] bit set also.
Since TEXASR might not have the FS bit set, and when the process is
scheduled back, it will try to reclaim, which will be aborted because of
the CPU is not in the suspended state, and, then, recheckpoint. This
recheckpoint will restore thread->texasr into TEXASR SPR, which might be
zero, hitting a BUG_ON().
kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
cpu 0xb: Vector: 700 (Program Check) at [c00000041f1576d0]
pc: c000000000054550: restore_gprs+0xb0/0x180
lr: 0000000000000000
sp: c00000041f157950
msr: 8000000100021033
current = 0xc00000041f143000
paca = 0xc00000000fb86300 softe: 0 irq_happened: 0x01
pid = 1021, comm = kworker/11:1
kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
enter ? for help
[c00000041f157b30] c00000000001bc3c tm_recheckpoint.part.11+0x6c/0xa0
[c00000041f157b70] c00000000001d184 __switch_to+0x1e4/0x4c0
[c00000041f157bd0] c00000000082eeb8 __schedule+0x2f8/0x990
[c00000041f157cb0] c00000000082f598 schedule+0x48/0xc0
[c00000041f157ce0] c0000000000f0d28 worker_thread+0x148/0x610
[c00000041f157d80] c0000000000f96b0 kthread+0x120/0x140
[c00000041f157e30] c00000000000c0e0 ret_from_kernel_thread+0x5c/0x7c
This patch simply delays the MSR[TS] set, so, if there is any page fault in
the __get_user() section, it does not have regs->msr[TS] set, since the TM
structures are still invalid, thus avoiding doing TM operations for
in-kernel exceptions and possible process reschedule.
With this patch, the MSR[TS] will only be set just before recheckpointing
and setting TEXASR[FS] = 1, thus avoiding an interrupt with TM registers in
invalid state.
Other than that, if CONFIG_PREEMPT is set, there might be a preemption just
after setting MSR[TS] and before tm_recheckpoint(), thus, this block must
be atomic from a preemption perspective, thus, calling
preempt_disable/enable() on this code.
It is not possible to move tm_recheckpoint to happen earlier, because it is
required to get the checkpointed registers from userspace, with
__get_user(), thus, the only way to avoid this undesired behavior is
delaying the MSR[TS] set.
The 32-bits signal handler seems to be safe this current issue, but, it
might be exposed to the preemption issue, thus, disabling preemption in
this chunk of code.
Changes from v2:
* Run the critical section with preempt_disable.
Fixes: 87b4e5393af7 ("powerpc/tm: Fix return of active 64bit signals")
Cc: stable@vger.kernel.org (v3.9+)
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-11-22 03:21:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Disabling preemption, since it is unsafe to be preempted
|
|
|
|
* with MSR[TS] set without recheckpointing.
|
|
|
|
*/
|
|
|
|
preempt_disable();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CAUTION:
|
|
|
|
* After regs->MSR[TS] being updated, make sure that get_user(),
|
|
|
|
* put_user() or similar functions are *not* called. These
|
|
|
|
* functions can generate page faults which will cause the process
|
|
|
|
* to be de-scheduled with MSR[TS] set but without calling
|
|
|
|
* tm_recheckpoint(). This can cause a bug.
|
|
|
|
*
|
|
|
|
* Pull in the MSR TM bits from the user context
|
|
|
|
*/
|
2015-11-19 12:44:44 +08:00
|
|
|
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
|
2013-02-14 00:21:41 +08:00
|
|
|
/* Now, recheckpoint. This loads up all of the checkpointed (older)
|
|
|
|
* registers, including FP and V[S]Rs. After recheckpointing, the
|
|
|
|
* transactional versions should be loaded.
|
|
|
|
*/
|
|
|
|
tm_enable();
|
2014-04-04 17:19:48 +08:00
|
|
|
/* Make sure the transaction is marked as failed */
|
|
|
|
current->thread.tm_texasr |= TEXASR_FS;
|
2013-02-14 00:21:41 +08:00
|
|
|
/* This loads the checkpointed FP/VEC state, if used */
|
2017-11-02 11:09:05 +08:00
|
|
|
tm_recheckpoint(¤t->thread);
|
2013-02-14 00:21:41 +08:00
|
|
|
|
|
|
|
/* This loads the speculative FP/VEC state, if used */
|
2016-09-23 14:18:24 +08:00
|
|
|
msr_check_and_set(msr & (MSR_FP | MSR_VEC));
|
2013-02-14 00:21:41 +08:00
|
|
|
if (msr & MSR_FP) {
|
2016-09-23 14:18:24 +08:00
|
|
|
load_fp_state(¤t->thread.fp_state);
|
2013-02-14 00:21:41 +08:00
|
|
|
regs->msr |= (MSR_FP | current->thread.fpexc_mode);
|
|
|
|
}
|
2013-04-09 14:18:55 +08:00
|
|
|
#ifdef CONFIG_ALTIVEC
|
2013-02-14 00:21:41 +08:00
|
|
|
if (msr & MSR_VEC) {
|
2016-09-23 14:18:24 +08:00
|
|
|
load_vr_state(¤t->thread.vr_state);
|
2013-02-14 00:21:41 +08:00
|
|
|
regs->msr |= MSR_VEC;
|
|
|
|
}
|
2013-04-09 14:18:55 +08:00
|
|
|
#endif
|
2013-02-14 00:21:41 +08:00
|
|
|
|
powerpc/tm: Set MSR[TS] just prior to recheckpoint
On a signal handler return, the user could set a context with MSR[TS] bits
set, and these bits would be copied to task regs->msr.
At restore_tm_sigcontexts(), after current task regs->msr[TS] bits are set,
several __get_user() are called and then a recheckpoint is executed.
This is a problem since a page fault (in kernel space) could happen when
calling __get_user(). If it happens, the process MSR[TS] bits were
already set, but recheckpoint was not executed, and SPRs are still invalid.
The page fault can cause the current process to be de-scheduled, with
MSR[TS] active and without tm_recheckpoint() being called. More
importantly, without TEXASR[FS] bit set also.
Since TEXASR might not have the FS bit set, and when the process is
scheduled back, it will try to reclaim, which will be aborted because of
the CPU is not in the suspended state, and, then, recheckpoint. This
recheckpoint will restore thread->texasr into TEXASR SPR, which might be
zero, hitting a BUG_ON().
kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
cpu 0xb: Vector: 700 (Program Check) at [c00000041f1576d0]
pc: c000000000054550: restore_gprs+0xb0/0x180
lr: 0000000000000000
sp: c00000041f157950
msr: 8000000100021033
current = 0xc00000041f143000
paca = 0xc00000000fb86300 softe: 0 irq_happened: 0x01
pid = 1021, comm = kworker/11:1
kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
enter ? for help
[c00000041f157b30] c00000000001bc3c tm_recheckpoint.part.11+0x6c/0xa0
[c00000041f157b70] c00000000001d184 __switch_to+0x1e4/0x4c0
[c00000041f157bd0] c00000000082eeb8 __schedule+0x2f8/0x990
[c00000041f157cb0] c00000000082f598 schedule+0x48/0xc0
[c00000041f157ce0] c0000000000f0d28 worker_thread+0x148/0x610
[c00000041f157d80] c0000000000f96b0 kthread+0x120/0x140
[c00000041f157e30] c00000000000c0e0 ret_from_kernel_thread+0x5c/0x7c
This patch simply delays the MSR[TS] set, so, if there is any page fault in
the __get_user() section, it does not have regs->msr[TS] set, since the TM
structures are still invalid, thus avoiding doing TM operations for
in-kernel exceptions and possible process reschedule.
With this patch, the MSR[TS] will only be set just before recheckpointing
and setting TEXASR[FS] = 1, thus avoiding an interrupt with TM registers in
invalid state.
Other than that, if CONFIG_PREEMPT is set, there might be a preemption just
after setting MSR[TS] and before tm_recheckpoint(), thus, this block must
be atomic from a preemption perspective, thus, calling
preempt_disable/enable() on this code.
It is not possible to move tm_recheckpoint to happen earlier, because it is
required to get the checkpointed registers from userspace, with
__get_user(), thus, the only way to avoid this undesired behavior is
delaying the MSR[TS] set.
The 32-bits signal handler seems to be safe this current issue, but, it
might be exposed to the preemption issue, thus, disabling preemption in
this chunk of code.
Changes from v2:
* Run the critical section with preempt_disable.
Fixes: 87b4e5393af7 ("powerpc/tm: Fix return of active 64bit signals")
Cc: stable@vger.kernel.org (v3.9+)
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-11-22 03:21:09 +08:00
|
|
|
preempt_enable();
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#define copy_siginfo_to_user copy_siginfo_to_user32
|
|
|
|
|
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up a signal frame for a "real-time" signal handler
|
|
|
|
* (one which gets siginfo).
|
|
|
|
*/
|
2014-03-02 21:46:11 +08:00
|
|
|
int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
|
2016-09-23 14:18:12 +08:00
|
|
|
struct task_struct *tsk)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-10-18 09:17:58 +08:00
|
|
|
struct rt_sigframe __user *rt_sf;
|
|
|
|
struct mcontext __user *frame;
|
2013-06-09 19:23:15 +08:00
|
|
|
struct mcontext __user *tm_frame = NULL;
|
2007-10-12 08:20:07 +08:00
|
|
|
void __user *addr;
|
2007-06-04 15:22:48 +08:00
|
|
|
unsigned long newsp = 0;
|
2013-02-14 00:21:41 +08:00
|
|
|
int sigret;
|
|
|
|
unsigned long tramp;
|
2016-09-23 14:18:12 +08:00
|
|
|
struct pt_regs *regs = tsk->thread.regs;
|
|
|
|
|
|
|
|
BUG_ON(tsk != current);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Set up Signal Frame */
|
|
|
|
/* Put a Real Time Context onto stack */
|
2016-09-23 14:18:12 +08:00
|
|
|
rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1);
|
2007-10-12 08:20:07 +08:00
|
|
|
addr = rt_sf;
|
2007-06-04 15:22:48 +08:00
|
|
|
if (unlikely(rt_sf == NULL))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
|
|
|
|
|
|
|
/* Put the siginfo & fill in most of the ucontext */
|
2014-03-02 21:46:11 +08:00
|
|
|
if (copy_siginfo_to_user(&rt_sf->info, &ksig->info)
|
2005-04-17 06:20:36 +08:00
|
|
|
|| __put_user(0, &rt_sf->uc.uc_flags)
|
2012-12-23 16:26:46 +08:00
|
|
|
|| __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1])
|
2005-10-18 09:17:58 +08:00
|
|
|
|| __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext),
|
|
|
|
&rt_sf->uc.uc_regs)
|
|
|
|
|| put_sigset_t(&rt_sf->uc.uc_sigmask, oldset))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
|
|
|
|
|
|
|
/* Save user registers on the stack */
|
|
|
|
frame = &rt_sf->uc.uc_mcontext;
|
2007-10-12 08:20:07 +08:00
|
|
|
addr = frame;
|
2016-09-23 14:18:12 +08:00
|
|
|
if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) {
|
2013-02-14 00:21:41 +08:00
|
|
|
sigret = 0;
|
2016-09-23 14:18:12 +08:00
|
|
|
tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp;
|
2005-11-11 18:15:21 +08:00
|
|
|
} else {
|
2013-02-14 00:21:41 +08:00
|
|
|
sigret = __NR_rt_sigreturn;
|
|
|
|
tramp = (unsigned long) frame->tramp;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
2013-06-09 19:23:15 +08:00
|
|
|
tm_frame = &rt_sf->uc_transact.uc_mcontext;
|
2013-02-14 00:21:41 +08:00
|
|
|
if (MSR_TM_ACTIVE(regs->msr)) {
|
2014-01-29 13:33:56 +08:00
|
|
|
if (__put_user((unsigned long)&rt_sf->uc_transact,
|
|
|
|
&rt_sf->uc.uc_link) ||
|
|
|
|
__put_user((unsigned long)tm_frame,
|
|
|
|
&rt_sf->uc_transact.uc_regs))
|
|
|
|
goto badframe;
|
2013-06-09 19:23:15 +08:00
|
|
|
if (save_tm_user_regs(regs, frame, tm_frame, sigret))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
else
|
|
|
|
#endif
|
2013-06-09 19:23:15 +08:00
|
|
|
{
|
2014-01-29 13:33:56 +08:00
|
|
|
if (__put_user(0, &rt_sf->uc.uc_link))
|
|
|
|
goto badframe;
|
2013-06-09 19:23:15 +08:00
|
|
|
if (save_user_regs(regs, frame, tm_frame, sigret, 1))
|
2013-02-14 00:21:41 +08:00
|
|
|
goto badframe;
|
2013-06-09 19:23:15 +08:00
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
regs->link = tramp;
|
|
|
|
|
2016-09-23 14:18:12 +08:00
|
|
|
tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
|
2005-11-14 18:55:15 +08:00
|
|
|
|
2007-06-04 15:22:48 +08:00
|
|
|
/* create a stack frame for the caller of the handler */
|
|
|
|
newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
|
2007-10-12 08:20:07 +08:00
|
|
|
addr = (void __user *)regs->gpr[1];
|
2005-10-22 12:46:33 +08:00
|
|
|
if (put_user(regs->gpr[1], (u32 __user *)newsp))
|
2005-10-18 09:17:58 +08:00
|
|
|
goto badframe;
|
2007-06-04 15:22:48 +08:00
|
|
|
|
|
|
|
/* Fill registers for signal handler */
|
2005-10-18 09:17:58 +08:00
|
|
|
regs->gpr[1] = newsp;
|
2014-03-02 21:46:11 +08:00
|
|
|
regs->gpr[3] = ksig->sig;
|
2005-04-17 06:20:36 +08:00
|
|
|
regs->gpr[4] = (unsigned long) &rt_sf->info;
|
|
|
|
regs->gpr[5] = (unsigned long) &rt_sf->uc;
|
|
|
|
regs->gpr[6] = (unsigned long) rt_sf;
|
2014-03-02 21:46:11 +08:00
|
|
|
regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
|
2013-09-23 10:04:43 +08:00
|
|
|
/* enter the signal handler in native-endian mode */
|
2006-06-07 14:14:40 +08:00
|
|
|
regs->msr &= ~MSR_LE;
|
2013-09-23 10:04:43 +08:00
|
|
|
regs->msr |= (MSR_KERNEL & MSR_LE);
|
2014-03-02 21:46:11 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
badframe:
|
2011-06-04 13:36:54 +08:00
|
|
|
if (show_unhandled_signals)
|
|
|
|
printk_ratelimited(KERN_INFO
|
|
|
|
"%s[%d]: bad frame in handle_rt_signal32: "
|
|
|
|
"%p nip %08lx lr %08lx\n",
|
2016-09-23 14:18:12 +08:00
|
|
|
tsk->comm, tsk->pid,
|
2011-06-04 13:36:54 +08:00
|
|
|
addr, regs->nip, regs->link);
|
2007-10-12 08:20:07 +08:00
|
|
|
|
2014-03-02 21:46:11 +08:00
|
|
|
return 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
sigset_t set;
|
2005-10-18 09:17:58 +08:00
|
|
|
struct mcontext __user *mcp;
|
|
|
|
|
|
|
|
if (get_sigset_t(&set, &ucp->uc_sigmask))
|
|
|
|
return -EFAULT;
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
{
|
|
|
|
u32 cmcp;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
if (__get_user(cmcp, &ucp->uc_regs))
|
|
|
|
return -EFAULT;
|
|
|
|
mcp = (struct mcontext __user *)(u64)cmcp;
|
2006-06-09 11:02:59 +08:00
|
|
|
/* no need to check access_ok(mcp), since mcp < 4GB */
|
2005-10-18 09:17:58 +08:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
if (__get_user(mcp, &ucp->uc_regs))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(mcp, sizeof(*mcp)))
|
2006-06-09 11:02:59 +08:00
|
|
|
return -EFAULT;
|
2005-10-18 09:17:58 +08:00
|
|
|
#endif
|
2012-04-28 02:09:19 +08:00
|
|
|
set_current_blocked(&set);
|
2005-10-18 09:17:58 +08:00
|
|
|
if (restore_user_regs(regs, mcp, sig))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
static int do_setcontext_tm(struct ucontext __user *ucp,
|
|
|
|
struct ucontext __user *tm_ucp,
|
|
|
|
struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
sigset_t set;
|
|
|
|
struct mcontext __user *mcp;
|
|
|
|
struct mcontext __user *tm_mcp;
|
|
|
|
u32 cmcp;
|
|
|
|
u32 tm_cmcp;
|
|
|
|
|
|
|
|
if (get_sigset_t(&set, &ucp->uc_sigmask))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (__get_user(cmcp, &ucp->uc_regs) ||
|
|
|
|
__get_user(tm_cmcp, &tm_ucp->uc_regs))
|
|
|
|
return -EFAULT;
|
|
|
|
mcp = (struct mcontext __user *)(u64)cmcp;
|
|
|
|
tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
|
|
|
|
/* no need to check access_ok(mcp), since mcp < 4GB */
|
|
|
|
|
|
|
|
set_current_blocked(&set);
|
|
|
|
if (restore_tm_user_regs(regs, mcp, tm_mcp))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-05-02 21:20:47 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
|
|
|
|
struct ucontext __user *, new_ctx, int, ctx_size)
|
|
|
|
#else
|
|
|
|
SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
|
|
|
|
struct ucontext __user *, new_ctx, long, ctx_size)
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2018-05-02 21:20:47 +08:00
|
|
|
struct pt_regs *regs = current_pt_regs();
|
2008-10-23 08:42:36 +08:00
|
|
|
int ctx_has_vsx_region = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-07-08 16:43:41 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
unsigned long new_msr = 0;
|
|
|
|
|
2008-11-06 08:49:00 +08:00
|
|
|
if (new_ctx) {
|
|
|
|
struct mcontext __user *mcp;
|
|
|
|
u32 cmcp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get pointer to the real mcontext. No need for
|
|
|
|
* access_ok since we are dealing with compat
|
|
|
|
* pointers.
|
|
|
|
*/
|
|
|
|
if (__get_user(cmcp, &new_ctx->uc_regs))
|
|
|
|
return -EFAULT;
|
|
|
|
mcp = (struct mcontext __user *)(u64)cmcp;
|
|
|
|
if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR]))
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2008-07-08 16:43:41 +08:00
|
|
|
/*
|
|
|
|
* Check that the context is not smaller than the original
|
|
|
|
* size (with VMX but without VSX)
|
|
|
|
*/
|
|
|
|
if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
|
|
|
|
return -EINVAL;
|
|
|
|
/*
|
|
|
|
* If the new context state sets the MSR VSX bits but
|
|
|
|
* it doesn't provide VSX state.
|
|
|
|
*/
|
|
|
|
if ((ctx_size < sizeof(struct ucontext)) &&
|
|
|
|
(new_msr & MSR_VSX))
|
|
|
|
return -EINVAL;
|
2008-10-23 08:42:36 +08:00
|
|
|
/* Does the context have enough room to store VSX data? */
|
|
|
|
if (ctx_size >= sizeof(struct ucontext))
|
|
|
|
ctx_has_vsx_region = 1;
|
2008-07-08 16:43:41 +08:00
|
|
|
#else
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Context size is for future use. Right now, we only make sure
|
|
|
|
* we are passed something we understand
|
|
|
|
*/
|
2005-10-18 09:17:58 +08:00
|
|
|
if (ctx_size < sizeof(struct ucontext))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
2008-07-08 16:43:41 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
if (old_ctx != NULL) {
|
2006-12-20 10:57:06 +08:00
|
|
|
struct mcontext __user *mctx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* old_ctx might not be 16-byte aligned, in which
|
|
|
|
* case old_ctx->uc_mcontext won't be either.
|
|
|
|
* Because we have the old_ctx->uc_pad2 field
|
|
|
|
* before old_ctx->uc_mcontext, we need to round down
|
|
|
|
* from &old_ctx->uc_mcontext to a 16-byte boundary.
|
|
|
|
*/
|
|
|
|
mctx = (struct mcontext __user *)
|
|
|
|
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(old_ctx, ctx_size)
|
2013-06-09 19:23:15 +08:00
|
|
|
|| save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
|
2005-10-18 09:17:58 +08:00
|
|
|
|| put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked)
|
2006-12-20 10:57:06 +08:00
|
|
|
|| __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
if (new_ctx == NULL)
|
|
|
|
return 0;
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(new_ctx, ctx_size) ||
|
2018-04-25 00:04:25 +08:00
|
|
|
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we get a fault copying the context into the kernel's
|
|
|
|
* image of the user's registers, we can't just return -EFAULT
|
|
|
|
* because the user's registers will be corrupted. For instance
|
|
|
|
* the NIP value may have been updated but not some of the
|
|
|
|
* other registers. Given that we have done the access_ok
|
|
|
|
* and successfully read the first and last bytes of the region
|
|
|
|
* above, this should only happen in an out-of-memory situation
|
|
|
|
* or if another thread unmaps the region containing the context.
|
|
|
|
* We kill the task with a SIGSEGV in this situation.
|
|
|
|
*/
|
2005-10-18 09:17:58 +08:00
|
|
|
if (do_setcontext(new_ctx, regs, 0))
|
2005-04-17 06:20:36 +08:00
|
|
|
do_exit(SIGSEGV);
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
|
|
|
|
set_thread_flag(TIF_RESTOREALL);
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-02 21:20:47 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
|
|
|
|
#else
|
|
|
|
SYSCALL_DEFINE0(rt_sigreturn)
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-10-18 09:17:58 +08:00
|
|
|
struct rt_sigframe __user *rt_sf;
|
2018-05-02 21:20:47 +08:00
|
|
|
struct pt_regs *regs = current_pt_regs();
|
powerpc/tm: Unset MSR[TS] if not recheckpointing
There is a TM Bad Thing bug that can be caused when you return from a
signal context in a suspended transaction but with ucontext MSR[TS] unset.
This forces regs->msr[TS] to be set at syscall entrance (since the CPU
state is transactional). It also calls treclaim() to flush the transaction
state, which is done based on the live (mfmsr) MSR state.
Since user context MSR[TS] is not set, then restore_tm_sigcontexts() is not
called, thus, not executing recheckpoint, keeping the CPU state as not
transactional. When calling rfid, SRR1 will have MSR[TS] set, but the CPU
state is non transactional, causing the TM Bad Thing with the following
stack:
[ 33.862316] Bad kernel stack pointer 3fffd9dce3e0 at c00000000000c47c
cpu 0x8: Vector: 700 (Program Check) at [c00000003ff7fd40]
pc: c00000000000c47c: fast_exception_return+0xac/0xb4
lr: 00003fff865f442c
sp: 3fffd9dce3e0
msr: 8000000102a03031
current = 0xc00000041f68b700
paca = 0xc00000000fb84800 softe: 0 irq_happened: 0x01
pid = 1721, comm = tm-signal-sigre
Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
WARNING: exception is not recoverable, can't continue
The same problem happens on 32-bits signal handler, and the fix is very
similar, if tm_recheckpoint() is not executed, then regs->msr[TS] should be
zeroed.
This patch also fixes a sparse warning related to lack of indentation when
CONFIG_PPC_TRANSACTIONAL_MEM is set.
Fixes: 2b0a576d15e0e ("powerpc: Add new transactional memory state to the signal context")
CC: Stable <stable@vger.kernel.org> # 3.10+
Signed-off-by: Breno Leitao <leitao@debian.org>
Tested-by: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-11-27 04:12:00 +08:00
|
|
|
int tm_restore = 0;
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
struct ucontext __user *uc_transact;
|
|
|
|
unsigned long msr_hi;
|
|
|
|
unsigned long tmp;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Always make any pending restarted system calls return -EINTR */
|
2015-02-13 07:01:14 +08:00
|
|
|
current->restart_block.fn = do_no_restart_syscall;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
rt_sf = (struct rt_sigframe __user *)
|
|
|
|
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(rt_sf, sizeof(*rt_sf)))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto bad;
|
powerpc: signals: Discard transaction state from signal frames
Userspace can begin and suspend a transaction within the signal
handler which means they might enter sys_rt_sigreturn() with the
processor in suspended state.
sys_rt_sigreturn() wants to restore process context (which may have
been in a transaction before signal delivery). To do this it must
restore TM SPRS. To achieve this, any transaction initiated within the
signal frame must be discarded in order to be able to restore TM SPRs
as TM SPRs can only be manipulated non-transactionally..
>From the PowerPC ISA:
TM Bad Thing Exception [Category: Transactional Memory]
An attempt is made to execute a mtspr targeting a TM register in
other than Non-transactional state.
Not doing so results in a TM Bad Thing:
[12045.221359] Kernel BUG at c000000000050a40 [verbose debug info unavailable]
[12045.221470] Unexpected TM Bad Thing exception at c000000000050a40 (msr 0x201033)
[12045.221540] Oops: Unrecoverable exception, sig: 6 [#1]
[12045.221586] SMP NR_CPUS=2048 NUMA PowerNV
[12045.221634] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE
nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4
xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc ebtable_filter
ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables kvm_hv kvm
uio_pdrv_genirq ipmi_powernv uio powernv_rng ipmi_msghandler autofs4 ses enclosure
scsi_transport_sas bnx2x ipr mdio libcrc32c
[12045.222167] CPU: 68 PID: 6178 Comm: sigreturnpanic Not tainted 4.7.0 #34
[12045.222224] task: c0000000fce38600 ti: c0000000fceb4000 task.ti: c0000000fceb4000
[12045.222293] NIP: c000000000050a40 LR: c0000000000163bc CTR: 0000000000000000
[12045.222361] REGS: c0000000fceb7ac0 TRAP: 0700 Not tainted (4.7.0)
[12045.222418] MSR: 9000000300201033 <SF,HV,ME,IR,DR,RI,LE,TM[SE]> CR: 28444280 XER: 20000000
[12045.222625] CFAR: c0000000000163b8 SOFTE: 0 PACATMSCRATCH: 900000014280f033
GPR00: 01100000b8000001 c0000000fceb7d40 c00000000139c100 c0000000fce390d0
GPR04: 900000034280f033 0000000000000000 0000000000000000 0000000000000000
GPR08: 0000000000000000 b000000000001033 0000000000000001 0000000000000000
GPR12: 0000000000000000 c000000002926400 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR24: 0000000000000000 00003ffff98cadd0 00003ffff98cb470 0000000000000000
GPR28: 900000034280f033 c0000000fceb7ea0 0000000000000001 c0000000fce390d0
[12045.223535] NIP [c000000000050a40] tm_restore_sprs+0xc/0x1c
[12045.223584] LR [c0000000000163bc] tm_recheckpoint+0x5c/0xa0
[12045.223630] Call Trace:
[12045.223655] [c0000000fceb7d80] [c000000000026e74] sys_rt_sigreturn+0x494/0x6c0
[12045.223738] [c0000000fceb7e30] [c0000000000092e0] system_call+0x38/0x108
[12045.223806] Instruction dump:
[12045.223841] 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8
[12045.223955] 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020
[12045.224074] ---[ end trace cb8002ee240bae76 ]---
It isn't clear exactly if there is really a use case for userspace
returning with a suspended transaction, however, doing so doesn't (on
its own) constitute a bad frame. As such, this patch simply discards
the transactional state of the context calling the sigreturn and
continues.
Reported-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Tested-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Reviewed-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Acked-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2016-08-23 08:46:17 +08:00
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
powerpc: signals: Discard transaction state from signal frames
Userspace can begin and suspend a transaction within the signal
handler which means they might enter sys_rt_sigreturn() with the
processor in suspended state.
sys_rt_sigreturn() wants to restore process context (which may have
been in a transaction before signal delivery). To do this it must
restore TM SPRS. To achieve this, any transaction initiated within the
signal frame must be discarded in order to be able to restore TM SPRs
as TM SPRs can only be manipulated non-transactionally..
>From the PowerPC ISA:
TM Bad Thing Exception [Category: Transactional Memory]
An attempt is made to execute a mtspr targeting a TM register in
other than Non-transactional state.
Not doing so results in a TM Bad Thing:
[12045.221359] Kernel BUG at c000000000050a40 [verbose debug info unavailable]
[12045.221470] Unexpected TM Bad Thing exception at c000000000050a40 (msr 0x201033)
[12045.221540] Oops: Unrecoverable exception, sig: 6 [#1]
[12045.221586] SMP NR_CPUS=2048 NUMA PowerNV
[12045.221634] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE
nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4
xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc ebtable_filter
ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables kvm_hv kvm
uio_pdrv_genirq ipmi_powernv uio powernv_rng ipmi_msghandler autofs4 ses enclosure
scsi_transport_sas bnx2x ipr mdio libcrc32c
[12045.222167] CPU: 68 PID: 6178 Comm: sigreturnpanic Not tainted 4.7.0 #34
[12045.222224] task: c0000000fce38600 ti: c0000000fceb4000 task.ti: c0000000fceb4000
[12045.222293] NIP: c000000000050a40 LR: c0000000000163bc CTR: 0000000000000000
[12045.222361] REGS: c0000000fceb7ac0 TRAP: 0700 Not tainted (4.7.0)
[12045.222418] MSR: 9000000300201033 <SF,HV,ME,IR,DR,RI,LE,TM[SE]> CR: 28444280 XER: 20000000
[12045.222625] CFAR: c0000000000163b8 SOFTE: 0 PACATMSCRATCH: 900000014280f033
GPR00: 01100000b8000001 c0000000fceb7d40 c00000000139c100 c0000000fce390d0
GPR04: 900000034280f033 0000000000000000 0000000000000000 0000000000000000
GPR08: 0000000000000000 b000000000001033 0000000000000001 0000000000000000
GPR12: 0000000000000000 c000000002926400 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR24: 0000000000000000 00003ffff98cadd0 00003ffff98cb470 0000000000000000
GPR28: 900000034280f033 c0000000fceb7ea0 0000000000000001 c0000000fce390d0
[12045.223535] NIP [c000000000050a40] tm_restore_sprs+0xc/0x1c
[12045.223584] LR [c0000000000163bc] tm_recheckpoint+0x5c/0xa0
[12045.223630] Call Trace:
[12045.223655] [c0000000fceb7d80] [c000000000026e74] sys_rt_sigreturn+0x494/0x6c0
[12045.223738] [c0000000fceb7e30] [c0000000000092e0] system_call+0x38/0x108
[12045.223806] Instruction dump:
[12045.223841] 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8
[12045.223955] 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020
[12045.224074] ---[ end trace cb8002ee240bae76 ]---
It isn't clear exactly if there is really a use case for userspace
returning with a suspended transaction, however, doing so doesn't (on
its own) constitute a bad frame. As such, this patch simply discards
the transactional state of the context calling the sigreturn and
continues.
Reported-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Tested-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Reviewed-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Acked-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2016-08-23 08:46:17 +08:00
|
|
|
/*
|
|
|
|
* If there is a transactional state then throw it away.
|
|
|
|
* The purpose of a sigreturn is to destroy all traces of the
|
|
|
|
* signal frame, this includes any transactional state created
|
|
|
|
* within in. We only check for suspended as we can never be
|
|
|
|
* active in the kernel, we are active, there is nothing better to
|
|
|
|
* do than go ahead and Bad Thing later.
|
|
|
|
* The cause is not important as there will never be a
|
|
|
|
* recheckpoint so it's not user visible.
|
|
|
|
*/
|
|
|
|
if (MSR_TM_SUSPENDED(mfmsr()))
|
|
|
|
tm_reclaim_current(0);
|
|
|
|
|
2013-02-14 00:21:41 +08:00
|
|
|
if (__get_user(tmp, &rt_sf->uc.uc_link))
|
|
|
|
goto bad;
|
|
|
|
uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
|
|
|
|
if (uc_transact) {
|
|
|
|
u32 cmcp;
|
|
|
|
struct mcontext __user *mcp;
|
|
|
|
|
|
|
|
if (__get_user(cmcp, &uc_transact->uc_regs))
|
|
|
|
return -EFAULT;
|
|
|
|
mcp = (struct mcontext __user *)(u64)cmcp;
|
|
|
|
/* The top 32 bits of the MSR are stashed in the transactional
|
|
|
|
* ucontext. */
|
|
|
|
if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
|
|
|
|
goto bad;
|
|
|
|
|
2013-06-09 19:23:18 +08:00
|
|
|
if (MSR_TM_ACTIVE(msr_hi<<32)) {
|
2013-02-14 00:21:41 +08:00
|
|
|
/* We only recheckpoint on return if we're
|
|
|
|
* transaction.
|
|
|
|
*/
|
|
|
|
tm_restore = 1;
|
|
|
|
if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs))
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
powerpc/tm: Unset MSR[TS] if not recheckpointing
There is a TM Bad Thing bug that can be caused when you return from a
signal context in a suspended transaction but with ucontext MSR[TS] unset.
This forces regs->msr[TS] to be set at syscall entrance (since the CPU
state is transactional). It also calls treclaim() to flush the transaction
state, which is done based on the live (mfmsr) MSR state.
Since user context MSR[TS] is not set, then restore_tm_sigcontexts() is not
called, thus, not executing recheckpoint, keeping the CPU state as not
transactional. When calling rfid, SRR1 will have MSR[TS] set, but the CPU
state is non transactional, causing the TM Bad Thing with the following
stack:
[ 33.862316] Bad kernel stack pointer 3fffd9dce3e0 at c00000000000c47c
cpu 0x8: Vector: 700 (Program Check) at [c00000003ff7fd40]
pc: c00000000000c47c: fast_exception_return+0xac/0xb4
lr: 00003fff865f442c
sp: 3fffd9dce3e0
msr: 8000000102a03031
current = 0xc00000041f68b700
paca = 0xc00000000fb84800 softe: 0 irq_happened: 0x01
pid = 1721, comm = tm-signal-sigre
Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
WARNING: exception is not recoverable, can't continue
The same problem happens on 32-bits signal handler, and the fix is very
similar, if tm_recheckpoint() is not executed, then regs->msr[TS] should be
zeroed.
This patch also fixes a sparse warning related to lack of indentation when
CONFIG_PPC_TRANSACTIONAL_MEM is set.
Fixes: 2b0a576d15e0e ("powerpc: Add new transactional memory state to the signal context")
CC: Stable <stable@vger.kernel.org> # 3.10+
Signed-off-by: Breno Leitao <leitao@debian.org>
Tested-by: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-11-27 04:12:00 +08:00
|
|
|
if (!tm_restore) {
|
|
|
|
/*
|
|
|
|
* Unset regs->msr because ucontext MSR TS is not
|
|
|
|
* set, and recheckpoint was not called. This avoid
|
|
|
|
* hitting a TM Bad thing at RFID
|
|
|
|
*/
|
|
|
|
regs->msr &= ~MSR_TS_MASK;
|
|
|
|
}
|
|
|
|
/* Fall through, for non-TM restore */
|
2013-02-14 00:21:41 +08:00
|
|
|
#endif
|
powerpc/tm: Unset MSR[TS] if not recheckpointing
There is a TM Bad Thing bug that can be caused when you return from a
signal context in a suspended transaction but with ucontext MSR[TS] unset.
This forces regs->msr[TS] to be set at syscall entrance (since the CPU
state is transactional). It also calls treclaim() to flush the transaction
state, which is done based on the live (mfmsr) MSR state.
Since user context MSR[TS] is not set, then restore_tm_sigcontexts() is not
called, thus, not executing recheckpoint, keeping the CPU state as not
transactional. When calling rfid, SRR1 will have MSR[TS] set, but the CPU
state is non transactional, causing the TM Bad Thing with the following
stack:
[ 33.862316] Bad kernel stack pointer 3fffd9dce3e0 at c00000000000c47c
cpu 0x8: Vector: 700 (Program Check) at [c00000003ff7fd40]
pc: c00000000000c47c: fast_exception_return+0xac/0xb4
lr: 00003fff865f442c
sp: 3fffd9dce3e0
msr: 8000000102a03031
current = 0xc00000041f68b700
paca = 0xc00000000fb84800 softe: 0 irq_happened: 0x01
pid = 1721, comm = tm-signal-sigre
Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
WARNING: exception is not recoverable, can't continue
The same problem happens on 32-bits signal handler, and the fix is very
similar, if tm_recheckpoint() is not executed, then regs->msr[TS] should be
zeroed.
This patch also fixes a sparse warning related to lack of indentation when
CONFIG_PPC_TRANSACTIONAL_MEM is set.
Fixes: 2b0a576d15e0e ("powerpc: Add new transactional memory state to the signal context")
CC: Stable <stable@vger.kernel.org> # 3.10+
Signed-off-by: Breno Leitao <leitao@debian.org>
Tested-by: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-11-27 04:12:00 +08:00
|
|
|
if (!tm_restore)
|
|
|
|
if (do_setcontext(&rt_sf->uc, regs, 1))
|
|
|
|
goto bad;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It's not clear whether or why it is desirable to save the
|
|
|
|
* sigaltstack setting on signal delivery and restore it on
|
|
|
|
* signal return. But other architectures do this and we have
|
|
|
|
* always done it up until now so it is probably better not to
|
|
|
|
* change it. -- paulus
|
2005-10-18 09:17:58 +08:00
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PPC64
|
2012-12-23 16:26:46 +08:00
|
|
|
if (compat_restore_altstack(&rt_sf->uc.uc_stack))
|
|
|
|
goto bad;
|
2005-10-18 09:17:58 +08:00
|
|
|
#else
|
2012-12-23 16:26:46 +08:00
|
|
|
if (restore_altstack(&rt_sf->uc.uc_stack))
|
|
|
|
goto bad;
|
2005-10-18 09:17:58 +08:00
|
|
|
#endif
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
set_thread_flag(TIF_RESTOREALL);
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
bad:
|
2011-06-04 13:36:54 +08:00
|
|
|
if (show_unhandled_signals)
|
|
|
|
printk_ratelimited(KERN_INFO
|
|
|
|
"%s[%d]: bad frame in sys_rt_sigreturn: "
|
|
|
|
"%p nip %08lx lr %08lx\n",
|
|
|
|
current->comm, current->pid,
|
|
|
|
rt_sf, regs->nip, regs->link);
|
2007-10-12 08:20:07 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
force_sig(SIGSEGV, current);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC32
|
2018-05-02 21:20:47 +08:00
|
|
|
SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
|
|
|
|
int, ndbg, struct sig_dbg_op __user *, dbg)
|
2005-10-18 09:17:58 +08:00
|
|
|
{
|
2018-05-02 21:20:47 +08:00
|
|
|
struct pt_regs *regs = current_pt_regs();
|
2005-10-18 09:17:58 +08:00
|
|
|
struct sig_dbg_op op;
|
|
|
|
int i;
|
|
|
|
unsigned long new_msr = regs->msr;
|
2010-02-08 19:50:57 +08:00
|
|
|
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
|
2013-07-04 14:15:46 +08:00
|
|
|
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
|
2005-10-18 09:17:58 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
for (i=0; i<ndbg; i++) {
|
2006-06-09 11:02:59 +08:00
|
|
|
if (copy_from_user(&op, dbg + i, sizeof(op)))
|
2005-10-18 09:17:58 +08:00
|
|
|
return -EFAULT;
|
|
|
|
switch (op.dbg_type) {
|
|
|
|
case SIG_DBG_SINGLE_STEPPING:
|
2010-02-08 19:50:57 +08:00
|
|
|
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
|
2005-10-18 09:17:58 +08:00
|
|
|
if (op.dbg_value) {
|
|
|
|
new_msr |= MSR_DE;
|
|
|
|
new_dbcr0 |= (DBCR0_IDM | DBCR0_IC);
|
|
|
|
} else {
|
2010-02-08 19:51:18 +08:00
|
|
|
new_dbcr0 &= ~DBCR0_IC;
|
|
|
|
if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
|
2013-07-04 14:15:46 +08:00
|
|
|
current->thread.debug.dbcr1)) {
|
2010-02-08 19:51:18 +08:00
|
|
|
new_msr &= ~MSR_DE;
|
|
|
|
new_dbcr0 &= ~DBCR0_IDM;
|
|
|
|
}
|
2005-10-18 09:17:58 +08:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
if (op.dbg_value)
|
|
|
|
new_msr |= MSR_SE;
|
|
|
|
else
|
|
|
|
new_msr &= ~MSR_SE;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
case SIG_DBG_BRANCH_TRACING:
|
2010-02-08 19:50:57 +08:00
|
|
|
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
|
2005-10-18 09:17:58 +08:00
|
|
|
return -EINVAL;
|
|
|
|
#else
|
|
|
|
if (op.dbg_value)
|
|
|
|
new_msr |= MSR_BE;
|
|
|
|
else
|
|
|
|
new_msr &= ~MSR_BE;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We wait until here to actually install the values in the
|
|
|
|
registers so if we fail in the above loop, it will not
|
|
|
|
affect the contents of these registers. After this point,
|
|
|
|
failure is a problem, anyway, and it's very unlikely unless
|
|
|
|
the user is really doing something wrong. */
|
|
|
|
regs->msr = new_msr;
|
2010-02-08 19:50:57 +08:00
|
|
|
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
|
2013-07-04 14:15:46 +08:00
|
|
|
current->thread.debug.dbcr0 = new_dbcr0;
|
2005-10-18 09:17:58 +08:00
|
|
|
#endif
|
|
|
|
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(ctx, sizeof(*ctx)) ||
|
2018-04-25 00:04:25 +08:00
|
|
|
fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
|
2006-06-09 11:02:59 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
/*
|
|
|
|
* If we get a fault copying the context into the kernel's
|
|
|
|
* image of the user's registers, we can't just return -EFAULT
|
|
|
|
* because the user's registers will be corrupted. For instance
|
|
|
|
* the NIP value may have been updated but not some of the
|
|
|
|
* other registers. Given that we have done the access_ok
|
|
|
|
* and successfully read the first and last bytes of the region
|
|
|
|
* above, this should only happen in an out-of-memory situation
|
|
|
|
* or if another thread unmaps the region containing the context.
|
|
|
|
* We kill the task with a SIGSEGV in this situation.
|
|
|
|
*/
|
|
|
|
if (do_setcontext(ctx, regs, 1)) {
|
2011-06-04 13:36:54 +08:00
|
|
|
if (show_unhandled_signals)
|
|
|
|
printk_ratelimited(KERN_INFO "%s[%d]: bad frame in "
|
|
|
|
"sys_debug_setcontext: %p nip %08lx "
|
|
|
|
"lr %08lx\n",
|
|
|
|
current->comm, current->pid,
|
|
|
|
ctx, regs->nip, regs->link);
|
2007-10-12 08:20:07 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
force_sig(SIGSEGV, current);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It's not clear whether or why it is desirable to save the
|
|
|
|
* sigaltstack setting on signal delivery and restore it on
|
|
|
|
* signal return. But other architectures do this and we have
|
|
|
|
* always done it up until now so it is probably better not to
|
|
|
|
* change it. -- paulus
|
|
|
|
*/
|
2012-12-23 16:26:46 +08:00
|
|
|
restore_altstack(&ctx->uc_stack);
|
2005-10-18 09:17:58 +08:00
|
|
|
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
set_thread_flag(TIF_RESTOREALL);
|
2005-10-18 09:17:58 +08:00
|
|
|
out:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, we're invoking a handler
|
|
|
|
*/
|
2016-09-23 14:18:12 +08:00
|
|
|
int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
|
|
|
|
struct task_struct *tsk)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2005-10-18 09:17:58 +08:00
|
|
|
struct sigcontext __user *sc;
|
2007-06-04 15:22:48 +08:00
|
|
|
struct sigframe __user *frame;
|
2013-06-09 19:23:15 +08:00
|
|
|
struct mcontext __user *tm_mctx = NULL;
|
2007-06-04 15:22:48 +08:00
|
|
|
unsigned long newsp = 0;
|
2013-02-14 00:21:41 +08:00
|
|
|
int sigret;
|
|
|
|
unsigned long tramp;
|
2016-09-23 14:18:12 +08:00
|
|
|
struct pt_regs *regs = tsk->thread.regs;
|
|
|
|
|
|
|
|
BUG_ON(tsk != current);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Set up Signal Frame */
|
2016-09-23 14:18:12 +08:00
|
|
|
frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1);
|
2007-06-04 15:22:48 +08:00
|
|
|
if (unlikely(frame == NULL))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
2007-06-04 15:22:48 +08:00
|
|
|
sc = (struct sigcontext __user *) &frame->sctx;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#if _NSIG != 64
|
2005-10-18 09:17:58 +08:00
|
|
|
#error "Please adjust handle_signal()"
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2014-03-02 21:46:11 +08:00
|
|
|
if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler)
|
2005-04-17 06:20:36 +08:00
|
|
|
|| __put_user(oldset->sig[0], &sc->oldmask)
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-04-17 06:20:36 +08:00
|
|
|
|| __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
|
2005-10-18 09:17:58 +08:00
|
|
|
#else
|
|
|
|
|| __put_user(oldset->sig[1], &sc->_unused[3])
|
|
|
|
#endif
|
2007-06-04 15:22:48 +08:00
|
|
|
|| __put_user(to_user_ptr(&frame->mctx), &sc->regs)
|
2014-03-02 21:46:11 +08:00
|
|
|
|| __put_user(ksig->sig, &sc->signal))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
|
|
|
|
2016-09-23 14:18:12 +08:00
|
|
|
if (vdso32_sigtramp && tsk->mm->context.vdso_base) {
|
2013-02-14 00:21:41 +08:00
|
|
|
sigret = 0;
|
2016-09-23 14:18:12 +08:00
|
|
|
tramp = tsk->mm->context.vdso_base + vdso32_sigtramp;
|
2005-11-11 18:15:21 +08:00
|
|
|
} else {
|
2013-02-14 00:21:41 +08:00
|
|
|
sigret = __NR_sigreturn;
|
|
|
|
tramp = (unsigned long) frame->mctx.tramp;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
2013-06-09 19:23:15 +08:00
|
|
|
tm_mctx = &frame->mctx_transact;
|
2013-02-14 00:21:41 +08:00
|
|
|
if (MSR_TM_ACTIVE(regs->msr)) {
|
|
|
|
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
|
|
|
|
sigret))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
else
|
|
|
|
#endif
|
2013-06-09 19:23:15 +08:00
|
|
|
{
|
|
|
|
if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
|
2013-02-14 00:21:41 +08:00
|
|
|
goto badframe;
|
2013-06-09 19:23:15 +08:00
|
|
|
}
|
2013-02-14 00:21:41 +08:00
|
|
|
|
|
|
|
regs->link = tramp;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-09-23 14:18:12 +08:00
|
|
|
tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
|
2005-11-14 18:55:15 +08:00
|
|
|
|
2007-06-04 15:22:48 +08:00
|
|
|
/* create a stack frame for the caller of the handler */
|
|
|
|
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
|
2005-05-01 01:01:40 +08:00
|
|
|
if (put_user(regs->gpr[1], (u32 __user *)newsp))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
2007-06-04 15:22:48 +08:00
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
regs->gpr[1] = newsp;
|
2014-03-02 21:46:11 +08:00
|
|
|
regs->gpr[3] = ksig->sig;
|
2005-04-17 06:20:36 +08:00
|
|
|
regs->gpr[4] = (unsigned long) sc;
|
2014-03-02 21:46:11 +08:00
|
|
|
regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler;
|
2006-06-07 14:14:40 +08:00
|
|
|
/* enter the signal handler in big-endian mode */
|
|
|
|
regs->msr &= ~MSR_LE;
|
2014-03-02 21:46:11 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
badframe:
|
2011-06-04 13:36:54 +08:00
|
|
|
if (show_unhandled_signals)
|
|
|
|
printk_ratelimited(KERN_INFO
|
|
|
|
"%s[%d]: bad frame in handle_signal32: "
|
|
|
|
"%p nip %08lx lr %08lx\n",
|
2016-09-23 14:18:12 +08:00
|
|
|
tsk->comm, tsk->pid,
|
2011-06-04 13:36:54 +08:00
|
|
|
frame, regs->nip, regs->link);
|
2007-10-12 08:20:07 +08:00
|
|
|
|
2014-03-02 21:46:11 +08:00
|
|
|
return 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a signal return; undo the signal stack.
|
|
|
|
*/
|
2018-05-02 21:20:47 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
COMPAT_SYSCALL_DEFINE0(sigreturn)
|
|
|
|
#else
|
|
|
|
SYSCALL_DEFINE0(sigreturn)
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2018-05-02 21:20:47 +08:00
|
|
|
struct pt_regs *regs = current_pt_regs();
|
2013-06-09 19:23:16 +08:00
|
|
|
struct sigframe __user *sf;
|
2005-10-18 09:17:58 +08:00
|
|
|
struct sigcontext __user *sc;
|
|
|
|
struct sigcontext sigctx;
|
|
|
|
struct mcontext __user *sr;
|
2007-10-12 08:20:07 +08:00
|
|
|
void __user *addr;
|
2005-04-17 06:20:36 +08:00
|
|
|
sigset_t set;
|
2013-06-09 19:23:16 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
struct mcontext __user *mcp, *tm_mcp;
|
|
|
|
unsigned long msr_hi;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Always make any pending restarted system calls return -EINTR */
|
2015-02-13 07:01:14 +08:00
|
|
|
current->restart_block.fn = do_no_restart_syscall;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-06-09 19:23:16 +08:00
|
|
|
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
|
|
|
|
sc = &sf->sctx;
|
2007-10-12 08:20:07 +08:00
|
|
|
addr = sc;
|
2005-04-17 06:20:36 +08:00
|
|
|
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
|
|
|
|
goto badframe;
|
|
|
|
|
2005-10-18 09:17:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Note that PPC32 puts the upper 32 bits of the sigmask in the
|
|
|
|
* unused part of the signal stackframe
|
|
|
|
*/
|
|
|
|
set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
|
2005-10-18 09:17:58 +08:00
|
|
|
#else
|
|
|
|
set.sig[0] = sigctx.oldmask;
|
|
|
|
set.sig[1] = sigctx._unused[3];
|
|
|
|
#endif
|
2012-04-28 02:09:19 +08:00
|
|
|
set_current_blocked(&set);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-06-09 19:23:16 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
mcp = (struct mcontext __user *)&sf->mctx;
|
|
|
|
tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
|
|
|
|
if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto badframe;
|
2013-06-09 19:23:16 +08:00
|
|
|
if (MSR_TM_ACTIVE(msr_hi<<32)) {
|
|
|
|
if (!cpu_has_feature(CPU_FTR_TM))
|
|
|
|
goto badframe;
|
|
|
|
if (restore_tm_user_regs(regs, mcp, tm_mcp))
|
|
|
|
goto badframe;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
|
|
|
|
addr = sr;
|
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
|
|
|
if (!access_ok(sr, sizeof(*sr))
|
2013-06-09 19:23:16 +08:00
|
|
|
|| restore_user_regs(regs, sr, 1))
|
|
|
|
goto badframe;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
|
|
|
set_thread_flag(TIF_RESTOREALL);
|
2005-10-18 09:17:58 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
badframe:
|
2011-06-04 13:36:54 +08:00
|
|
|
if (show_unhandled_signals)
|
|
|
|
printk_ratelimited(KERN_INFO
|
|
|
|
"%s[%d]: bad frame in sys_sigreturn: "
|
|
|
|
"%p nip %08lx lr %08lx\n",
|
|
|
|
current->comm, current->pid,
|
|
|
|
addr, regs->nip, regs->link);
|
2007-10-12 08:20:07 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
force_sig(SIGSEGV, current);
|
|
|
|
return 0;
|
|
|
|
}
|