diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 index e41f3748d30f..27c18a8d9d17 100644 --- a/arch/um/Kconfig_i386 +++ b/arch/um/Kconfig_i386 @@ -19,6 +19,18 @@ config 3_LEVEL_PGTABLES memory. All the memory that can't be mapped directly will be treated as high memory. +config STUB_CODE + hex + default 0xbfffe000 + +config STUB_DATA + hex + default 0xbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default y diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index f162f50f0b17..735a047c890c 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -14,6 +14,18 @@ config 3_LEVEL_PGTABLES bool default y +config STUB_CODE + hex + default 0x7fbfffe000 + +config STUB_DATA + hex + default 0x7fbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default n diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index 29e182d5a83a..301059062a3e 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -8,7 +8,7 @@ ifeq ($(CONFIG_MODE_SKAS),y) endif endif -CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) +CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) $(STUB_CFLAGS) ARCH_USER_CFLAGS := ifneq ($(CONFIG_GPROF),y) diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index 32144562c279..d80bd0052e6b 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -4,7 +4,7 @@ SUBARCH_LIBS := arch/um/sys-x86_64/ START := 0x60000000 -CFLAGS += -U__$(SUBARCH)__ -fno-builtin +CFLAGS += -U__$(SUBARCH)__ -fno-builtin $(STUB_CFLAGS) ARCH_USER_CFLAGS := -D__x86_64__ ELF_ARCH := i386:x86-64 diff --git a/arch/um/defconfig b/arch/um/defconfig index 4067c3aa5b60..80d30d19d750 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc3-skas3-v9-pre2 -# Sun Apr 24 19:46:10 2005 +# Linux kernel version: 2.6.12-rc6-mm1 +# Tue Jun 14 18:22:21 2005 # CONFIG_GENERIC_HARDIRQS=y CONFIG_UML=y @@ -13,23 +13,32 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y # # UML-specific options # -CONFIG_MODE_TT=y +# CONFIG_MODE_TT is not set +# CONFIG_STATIC_LINK is not set CONFIG_MODE_SKAS=y CONFIG_UML_X86=y # CONFIG_64BIT is not set CONFIG_TOP_ADDR=0xc0000000 # CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_STUB_CODE=0xbfffe000 +CONFIG_STUB_DATA=0xbffff000 +CONFIG_STUB_START=0xbfffe000 CONFIG_ARCH_HAS_SC_SIGNALS=y CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y -CONFIG_LD_SCRIPT_STATIC=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_LD_SCRIPT_DYN=y CONFIG_NET=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m -CONFIG_HOSTFS=y +# CONFIG_HOSTFS is not set CONFIG_MCONSOLE=y # CONFIG_MAGIC_SYSRQ is not set # CONFIG_HOST_2G_2G is not set -# CONFIG_SMP is not set CONFIG_NEST_LEVEL=0 CONFIG_KERNEL_HALF_GIGS=1 # CONFIG_HIGHMEM is not set @@ -63,6 +72,8 @@ CONFIG_IKCONFIG_PROC=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -81,6 +92,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y @@ -115,6 +127,7 @@ CONFIG_UML_SOUND=m CONFIG_SOUND=m CONFIG_HOSTAUDIO=m CONFIG_UML_RANDOM=y +# CONFIG_MMAPPER is not set # # Block devices @@ -176,6 +189,17 @@ CONFIG_INET=y # CONFIG_INET_TUNNEL is not set CONFIG_IP_TCPDIAG=y # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -206,11 +230,15 @@ CONFIG_IP_TCPDIAG=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set # CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set # CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set @@ -227,6 +255,7 @@ CONFIG_PPP=m # CONFIG_PPP_SYNC_TTY is not set # CONFIG_PPP_DEFLATE is not set # CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set CONFIG_SLIP=m # CONFIG_SLIP_COMPRESSED is not set @@ -240,10 +269,12 @@ CONFIG_SLIP=m # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set +# CONFIG_REISER4_FS is not set CONFIG_REISERFS_FS=y # CONFIG_REISERFS_CHECK is not set # CONFIG_REISERFS_PROC_INFO is not set @@ -256,6 +287,7 @@ CONFIG_REISERFS_FS=y # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y CONFIG_QUOTA=y # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set @@ -264,6 +296,12 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m +# +# Caches +# +# CONFIG_FSCACHE is not set +# CONFIG_FUSE_FS is not set + # # CD-ROM/DVD Filesystems # @@ -291,6 +329,8 @@ CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -319,6 +359,7 @@ CONFIG_RAMFS=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -404,14 +445,15 @@ CONFIG_CRC32=m # CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set -# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_FS is not set CONFIG_FRAME_POINTER=y -CONFIG_PT_PROXY=y +# CONFIG_GPROF is not set # CONFIG_GCOV is not set # CONFIG_SYSCALL_DEBUG is not set diff --git a/arch/um/include/mem.h b/arch/um/include/mem.h index 10c46c38949a..99d3ad4a03e5 100644 --- a/arch/um/include/mem.h +++ b/arch/um/include/mem.h @@ -13,6 +13,7 @@ extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); extern int is_remapped(void *virt); extern int physmem_remove_mapping(void *virt); extern void physmem_forget_descriptor(int fd); +extern unsigned long to_phys(void *virt); #endif diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h index 8744abb5224f..0a35e6d0baa0 100644 --- a/arch/um/include/registers.h +++ b/arch/um/include/registers.h @@ -14,6 +14,7 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern void save_registers(int pid, union uml_pt_regs *regs); extern void restore_registers(int pid, union uml_pt_regs *regs); extern void init_registers(int pid); +extern void get_safe_registers(unsigned long * regs); #endif diff --git a/arch/um/include/sysdep-i386/ptrace_user.h b/arch/um/include/sysdep-i386/ptrace_user.h index eca8066e7a43..899aa4b2a78d 100644 --- a/arch/um/include/sysdep-i386/ptrace_user.h +++ b/arch/um/include/sysdep-i386/ptrace_user.h @@ -20,11 +20,24 @@ #define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) #define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) #define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) +#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP) #define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) +#define REGS_SYSCALL_NR EAX /* This is used before a system call */ +#define REGS_SYSCALL_ARG1 EBX +#define REGS_SYSCALL_ARG2 ECX +#define REGS_SYSCALL_ARG3 EDX +#define REGS_SYSCALL_ARG4 ESI +#define REGS_SYSCALL_ARG5 EDI +#define REGS_SYSCALL_ARG6 EBP + +#define REGS_IP_INDEX EIP +#define REGS_SP_INDEX UESP + #define PT_IP_OFFSET PT_OFFSET(EIP) #define PT_IP(regs) ((regs)[EIP]) +#define PT_SP_OFFSET PT_OFFSET(UESP) #define PT_SP(regs) ((regs)[UESP]) #ifndef FRAME_SIZE diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h new file mode 100644 index 000000000000..fed9ff1cea52 --- /dev/null +++ b/arch/um/include/sysdep-i386/stub.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include +#include + +extern void stub_segv_handler(int sig); + +#define STUB_SYSCALL_RET EAX +#define STUB_MMAP_NR __NR_mmap2 +#define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT) + +#endif diff --git a/arch/um/include/sysdep-x86_64/ptrace_user.h b/arch/um/include/sysdep-x86_64/ptrace_user.h index 31729973fb14..128faf027364 100644 --- a/arch/um/include/sysdep-x86_64/ptrace_user.h +++ b/arch/um/include/sysdep-x86_64/ptrace_user.h @@ -55,6 +55,20 @@ #define PTRACE_OLDSETOPTIONS 21 #endif +/* These are before the system call, so the the system call number is RAX + * rather than ORIG_RAX, and arg4 is R10 rather than RCX + */ +#define REGS_SYSCALL_NR PT_INDEX(RAX) +#define REGS_SYSCALL_ARG1 PT_INDEX(RDI) +#define REGS_SYSCALL_ARG2 PT_INDEX(RSI) +#define REGS_SYSCALL_ARG3 PT_INDEX(RDX) +#define REGS_SYSCALL_ARG4 PT_INDEX(R10) +#define REGS_SYSCALL_ARG5 PT_INDEX(R8) +#define REGS_SYSCALL_ARG6 PT_INDEX(R9) + +#define REGS_IP_INDEX PT_INDEX(RIP) +#define REGS_SP_INDEX PT_INDEX(RSP) + #endif /* diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h new file mode 100644 index 000000000000..6b5447ad590d --- /dev/null +++ b/arch/um/include/sysdep-x86_64/stub.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include +#include +#include + +extern void stub_segv_handler(int sig); + +#define STUB_SYSCALL_RET PT_INDEX(RAX) +#define STUB_MMAP_NR __NR_mmap +#define MMAP_OFFSET(o) (o) + +#endif diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index da1097285b8c..c6f9628f39bf 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -37,31 +37,25 @@ struct host_vm_op { extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, + struct host_vm_op *, int)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, int index, int last_filled, - int data, void (*do_ops)(int, struct host_vm_op *, int)); + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715b0838a68c..3942a5f245de 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -67,6 +67,12 @@ SECTIONS *(.stub .text.* .gnu.linkonce.t.*) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } =0x90909090 .fini : { KEEP (*(.fini)) diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 420e6d51fa0f..a24e3b7f4bf0 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -353,6 +353,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +extern int __syscall_stub_start, __binary_start; + void setup_physmem(unsigned long start, unsigned long reserve_end, unsigned long len, unsigned long highmem) { @@ -371,6 +373,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, exit(1); } + /* Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, len - bootmap_size - reserve); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 1b5ef3e96c71..c45a60e9c92d 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include "uml-config.h" #include "choose-mode.h" #include "mode.h" +#include "tempfile.h" #ifdef UML_CONFIG_MODE_SKAS #include "skas.h" #include "skas_ptrace.h" @@ -358,11 +359,16 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } +int ptrace_faultinfo = 0; +int proc_mm = 1; + +extern void *__syscall_stub_start, __syscall_stub_end; + #ifdef UML_CONFIG_MODE_SKAS -static inline int check_skas3_ptrace_support(void) +static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; - int pid, n, ret = 1; + int pid, n; printf("Checking for the skas3 patch in the host..."); pid = start_ptraced_child(); @@ -374,33 +380,31 @@ static inline int check_skas3_ptrace_support(void) else { perror("not found"); } - ret = 0; - } else { + } + else { + ptrace_faultinfo = 1; printf("found\n"); } init_registers(pid); stop_ptraced_child(pid, 1, 1); - - return(ret); } int can_do_skas(void) { - int ret = 1; - printf("Checking for /proc/mm..."); if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { + proc_mm = 0; printf("not found\n"); - ret = 0; goto out; - } else { + } + else { printf("found\n"); } - ret = check_skas3_ptrace_support(); out: - return ret; + check_skas3_ptrace_support(); + return 1; } #else int can_do_skas(void) diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c index c6b4d5dba789..77ed7bbab219 100644 --- a/arch/um/kernel/skas/exec_kern.c +++ b/arch/um/kernel/skas/exec_kern.c @@ -18,7 +18,7 @@ void flush_thread_skas(void) { force_flush_all(); - switch_mm_skas(current->mm->context.skas.mm_fd); + switch_mm_skas(¤t->mm->context.skas.id); } void start_thread_skas(struct pt_regs *regs, unsigned long eip, diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h new file mode 100644 index 000000000000..48dd0989ddaa --- /dev/null +++ b/arch/um/kernel/skas/include/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 4cd60d7213f3..278b72f1d9ad 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,10 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "mm_id.h" + struct mmu_context_skas { - int mm_fd; + struct mm_id id; + unsigned long last_page_table; }; +extern void switch_mm_skas(struct mm_id * mm_idp); + #endif /* diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 96b51dba3471..d91a60f3830a 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -6,9 +6,11 @@ #ifndef __SKAS_H #define __SKAS_H +#include "mm_id.h" #include "sysdep/ptrace.h" extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -22,16 +24,17 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset); -extern int unmap(int fd, void *addr, unsigned long len); -extern int protect(int fd, unsigned long addr, unsigned long len, - int r, int w, int x); +extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); -extern void start_userspace(int cpu); +extern int start_userspace(unsigned long stub_stack); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); #endif diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c index 438db2f43456..147466d7ff4f 100644 --- a/arch/um/kernel/skas/mem.c +++ b/arch/um/kernel/skas/mem.c @@ -5,7 +5,9 @@ #include "linux/config.h" #include "linux/mm.h" +#include "asm/pgtable.h" #include "mem_user.h" +#include "skas.h" unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, unsigned long *task_size_out) @@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, *task_size_out = CONFIG_HOST_TASK_SIZE; #else *host_size_out = top; - *task_size_out = top; + if (proc_mm && ptrace_faultinfo) + *task_size_out = top; + else *task_size_out = CONFIG_STUB_START & PGDIR_MASK; #endif return(((unsigned long) set_task_sizes_skas) & ~0xffffff); } diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index 1310bf1e88d1..b0980ff3bd95 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -3,100 +3,171 @@ * Licensed under the GPL */ +#include #include #include +#include +#include +#include #include "mem_user.h" #include "mem.h" +#include "mm_id.h" #include "user.h" #include "os.h" #include "proc_mm.h" +#include "ptrace_user.h" +#include "user_util.h" +#include "kern_util.h" +#include "task.h" +#include "registers.h" +#include "uml-config.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" +#include "skas.h" -void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset) +extern unsigned long syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid, int sig, char * fname); + +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { - struct proc_mm_op map; - int prot, n; + int n, pid = mm_idp->u.pid; + unsigned long regs[MAX_REG_NR]; - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &syscall_stub - + (unsigned long) &__syscall_stub_start); + /* XXX Don't have a define for starting a syscall */ + regs[REGS_SYSCALL_NR] = syscall; + regs[REGS_SYSCALL_ARG1] = args[0]; + regs[REGS_SYSCALL_ARG2] = args[1]; + regs[REGS_SYSCALL_ARG3] = args[2]; + regs[REGS_SYSCALL_ARG4] = args[3]; + regs[REGS_SYSCALL_ARG5] = args[4]; + regs[REGS_SYSCALL_ARG6] = args[5]; + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("run_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = - { .mmap = - { .addr = virt, - .len = len, - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, - .fd = phys_fd, - .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + wait_stub_done(pid, 0, "run_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); } -int unmap(int fd, void *addr, unsigned long len) +int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset) { - struct proc_mm_op unmap; - int n; + int prot, n; - unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, - .u = - { .munmap = - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); - return(0); + if(proc_mm){ + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + n = os_write_file(fd, &map, sizeof(map)); + if(n != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -n); + } + else { + long res; + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + if((void *) res == MAP_FAILED) + printk("mmap stub failed, errno = %d\n", res); + } + + return 0; } -int protect(int fd, unsigned long addr, unsigned long len, int r, int w, - int x, int must_succeed) +int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) { - struct proc_mm_op protect; - int prot, n; + int n; - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); + if(proc_mm){ + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + n = os_write_file(fd, &unmap, sizeof(unmap)); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + } + else { + int res; + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; - protect = ((struct proc_mm_op) { .op = MM_MPROTECT, - .u = - { .mprotect = - { .addr = (unsigned long) addr, - .len = len, - .prot = prot } } } ); + res = run_syscall_stub(mm_idp, __NR_munmap, args); + if(res < 0) + printk("munmap stub failed, errno = %d\n", res); + } - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) { - if(n == 0) return(0); + return(0); +} - if(must_succeed) - panic("protect failed, err = %d", -n); +int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x) +{ + struct proc_mm_op protect; + int prot, n; - return(-EIO); - } + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); - return(0); + if(proc_mm){ + int fd = mm_idp->u.mm_fd; + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + n = os_write_file(fd, &protect, sizeof(protect)); + if(n != sizeof(protect)) + panic("protect failed, err = %d", -n); + } + else { + int res; + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + res = run_syscall_stub(mm_idp, __NR_mprotect, args); + if(res < 0) + panic("mprotect stub failed, errno = %d\n", res); + } + + return(0); } void before_mem_skas(unsigned long unused) { } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 6cb9a6d028a9..511a855c9ec0 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,46 +3,138 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/list.h" #include "linux/spinlock.h" #include "linux/slab.h" +#include "linux/errno.h" +#include "linux/mm.h" #include "asm/current.h" #include "asm/segment.h" #include "asm/mmu.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" #include "os.h" #include "skas.h" +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + /* There's an interaction between the skas0 stub pages, stack + * randomization, and the BUG at the end of exit_mmap. exit_mmap + * checks that the number of page tables freed is the same as had + * been allocated. If the stack is on the last page table page, + * then the stack pte page will be freed, and if not, it won't. To + * avoid having to know where the stack is, or if the process mapped + * something at the top of its address space for some other reason, + * we set TASK_SIZE to end at the start of the last page table. + * This keeps exit_mmap off the last page, but introduces a leak + * of that page. So, we hang onto it here and free it in + * destroy_context_skas. + */ + + mm->context.skas.last_page_table = pmd_page_kernel(*pmd); + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkexec(*pte); + *pte = pte_wrprotect(*pte); + spin_unlock(&mm->page_table_lock); + return(0); + + out_pmd: + pud_free(pud); + out_pte: + pmd_free(pmd); + out: + spin_unlock(&mm->page_table_lock); + return(-ENOMEM); +} + int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { - int from; + struct mm_struct *cur_mm = current->mm; + struct mm_id *mm_id = &mm->context.skas.id; + unsigned long stack; + int from, ret; - if((current->mm != NULL) && (current->mm != &init_mm)) - from = current->mm->context.skas.mm_fd; - else from = -1; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm->context.skas.id.u.mm_fd; + else from = -1; - mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0){ - printk("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); - return(mm->context.skas.mm_fd); + ret = new_mm(from); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + return ret; + } + mm_id->u.mm_fd = ret; + } + else { + /* This zeros the entry that pgd_alloc didn't, needed since + * we are about to reinitialize it, and want mm.nr_ptes to + * be accurate. + */ + mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); + + ret = init_stub_pte(mm, CONFIG_STUB_CODE, + (unsigned long) &__syscall_stub_start); + if(ret) + goto out; + + ret = -ENOMEM; + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; + mm_id->stack = stack; + + ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); + if(ret) + goto out_free; + + mm->nr_ptes--; + mm_id->u.pid = start_userspace(stack); } - return(0); + return 0; + + out_free: + free_page(mm_id->stack); + out: + return ret; } void destroy_context_skas(struct mm_struct *mm) { - os_close_file(mm->context.skas.mm_fd); -} + struct mmu_context_skas *mmu = &mm->context.skas; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + if(proc_mm) + os_close_file(mmu->id.u.mm_fd); + else { + os_kill_ptraced_process(mmu->id.u.pid, 1); + free_page(mmu->id.stack); + free_page(mmu->last_page_table); + } +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 773cd2b525fc..1647abb0d1aa 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -14,6 +14,7 @@ #include #include #include +#include #include "user.h" #include "ptrace_user.h" #include "time_user.h" @@ -21,13 +22,17 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "mm_id.h" #include "sysdep/sigcontext.h" +#include "sysdep/stub.h" #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "mem.h" +#include "uml-config.h" #include "process.h" int is_skas_winch(int pid, int fd, void *data) @@ -39,20 +44,55 @@ int is_skas_winch(int pid, int fd, void *data) return(1); } +void wait_stub_done(int pid, int sig, char * fname) +{ + int n, status, err; + + do { + if ( sig != -1 ) { + err = ptrace(PTRACE_CONT, pid, 0, sig); + if(err) + panic("%s : continue failed, errno = %d\n", + fname, errno); + } + sig = 0; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + } while((n >= 0) && WIFSTOPPED(status) && + (WSTOPSIG(status) == SIGVTALRM)); + + if((n < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + panic("%s : failed to wait for SIGUSR1/SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", + fname, pid, n, errno, status); + } +} + void get_skas_faultinfo(int pid, struct faultinfo * fi) { - int err; + int err; - err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if(err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + if(ptrace_faultinfo){ + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if(err) + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); - /* Special handling for i386, which has different structs */ - if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) - memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, - sizeof(struct faultinfo) - - sizeof(struct ptrace_faultinfo)); + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo"); + + /* faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + } } static void handle_segv(int pid, union uml_pt_regs * regs) @@ -91,11 +131,56 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu handle_syscall(regs); } -static int userspace_tramp(void *arg) +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) { - init_new_thread_signals(0); - enable_timer(); + void *addr; + ptrace(PTRACE_TRACEME, 0, 0, 0); + + init_new_thread_signals(1); + enable_timer(); + + if(!proc_mm){ + /* This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + __u64 offset; + + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping mmap stub failed, errno = %d\n", + errno); + exit(1); + } + + if(stack != NULL){ + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping segfault stack failed, " + "errno = %d\n", errno); + exit(1); + } + } + } + if(!ptrace_faultinfo && (stack != NULL)){ + unsigned long v = UML_CONFIG_STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); + set_handler(SIGSEGV, (void *) v, SA_ONSTACK, + SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, + SIGUSR1, -1); + } + os_stop_process(os_getpid()); return(0); } @@ -105,11 +190,11 @@ static int userspace_tramp(void *arg) #define NR_CPUS 1 int userspace_pid[NR_CPUS]; -void start_userspace(int cpu) +int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n; + int pid, status, n, flags; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -117,8 +202,9 @@ void start_userspace(int cpu) panic("start_userspace : mmap failed, errno = %d", errno); sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(userspace_tramp, (void *) sp, - CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + flags = CLONE_FILES | SIGCHLD; + if(proc_mm) flags |= CLONE_VM; + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -140,7 +226,7 @@ void start_userspace(int cpu) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid[cpu] = pid; + return(pid); } void userspace(union uml_pt_regs *regs) @@ -174,7 +260,9 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid, regs); + if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo) + user_signal(SIGSEGV, regs, pid); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -194,6 +282,7 @@ void userspace(union uml_pt_regs *regs) printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); } + pid = userspace_pid[0]; interrupt_end(); /* Avoid -ERESTARTSYS handling in host */ @@ -334,21 +423,19 @@ void reboot_skas(void) siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } -void switch_mm_skas(int mm_fd) +void switch_mm_skas(struct mm_id *mm_idp) { int err; #warning need cpu pid in switch_mm_skas - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -} - -void kill_off_processes_skas(void) -{ -#warning need to loop over userspace_pids in kill_off_processes_skas - os_kill_ptraced_process(userspace_pid[0], 1); + if(proc_mm){ + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " + "errno = %d\n", errno); + } + else userspace_pid[0] = mm_idp->u.pid; } /* diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 0a7b8aa55db8..cbabab104ac3 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -175,9 +175,12 @@ static int start_kernel_proc(void *unused) return(0); } +extern int userspace_pid[]; + int start_uml_skas(void) { - start_userspace(0); + if(proc_mm) + userspace_pid[0] = start_userspace(0); init_new_thread_signals(1); @@ -199,3 +202,31 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } + +void kill_off_processes_skas(void) +{ + if(proc_mm) +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p){ + if(p->mm == NULL) + continue; + + pid = p->mm->context.skas.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +unsigned long current_stub_stack(void) +{ + if(current->mm == NULL) + return(0); + + return(current->mm->context.skas.id.stack); +} diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 18f9a7711de1..6230999c672c 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -6,6 +6,7 @@ #include "linux/stddef.h" #include "linux/sched.h" +#include "linux/config.h" #include "linux/mm.h" #include "asm/page.h" #include "asm/pgtable.h" @@ -17,7 +18,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int fd, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -26,18 +27,18 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(fd, op->u.mmap.addr, op->u.mmap.len, + map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, op->u.mmap.fd, op->u.mmap.offset); break; case MUNMAP: - unmap(fd, (void *) op->u.munmap.addr, + unmap(&mmu->skas.id, (void *) op->u.munmap.addr, op->u.munmap.len); break; case MPROTECT: - protect(fd, op->u.mprotect.addr, op->u.mprotect.len, - op->u.mprotect.r, op->u.mprotect.w, - op->u.mprotect.x); + protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x); break; default: printk("Unknown op type %d in do_ops\n", op->type); @@ -46,12 +47,15 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) } } +extern int proc_mm; + static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - int fd = mm->context.skas.mm_fd; + if(!proc_mm && (end_addr > CONFIG_STUB_START)) + end_addr = CONFIG_STUB_START; - fix_range_common(mm, start_addr, end_addr, force, fd, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } void __flush_tlb_one_skas(unsigned long addr) @@ -69,16 +73,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, void flush_tlb_mm_skas(struct mm_struct *mm) { + unsigned long end; + /* Don't bother flushing if this address space is about to be * destroyed. */ if(atomic_read(&mm->mm_users) == 0) return; - fix_range(mm, 0, host_task_size, 0); + end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(mm, 0, end, 0); } void force_flush_all_skas(void) { - fix_range(current->mm, 0, host_task_size, 1); + unsigned long end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(current->mm, 0, end, 1); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index eda477edfdf5..83ec8d4747fd 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -18,13 +18,15 @@ #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; + union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; struct host_vm_op ops[16]; @@ -40,7 +42,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pgd_mkuptodate(*npgd); } @@ -55,7 +57,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pud_mkuptodate(*npud); } @@ -70,7 +72,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pmd_mkuptodate(*npmd); } @@ -93,21 +95,21 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, op_index = add_mmap(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(data, ops, op_index); + (*do_ops)(mmu, ops, op_index); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -195,51 +197,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) return(updated); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); -} - -void flush_tlb_all(void) -{ - flush_tlb_mm(current->mm); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); -} - -void flush_tlb_kernel_vm(void) -{ - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); -} - -void __flush_tlb_one(unsigned long addr) -{ - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -} - -void force_flush_all(void) -{ - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -} - pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { return(pgd_offset(mm, address)); @@ -270,9 +227,9 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) } int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { __u64 offset; struct host_vm_op *last; @@ -292,7 +249,7 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -310,8 +267,8 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int index, int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -325,7 +282,7 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -337,8 +294,9 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -354,7 +312,7 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -367,3 +325,49 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, .x = x } } }); return(index); } + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); +} + +void flush_tlb_kernel_vm(void) +{ + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); +} + +void __flush_tlb_one(unsigned long addr) +{ + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); +} + +void force_flush_all(void) +{ + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); +} + diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 203216ad86f1..2eefb43bc9c2 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int unused, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -55,7 +55,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr, panic("fix_range fixing wrong address space, current = 0x%p", current); - fix_range_common(mm, start_addr, end_addr, force, 0, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } atomic_t vmchange_seq = ATOMIC_INIT(1); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 61dfd4fef752..163476a8cb1b 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -30,6 +30,7 @@ SECTIONS _einittext = .; } . = ALIGN(4096); + .text : { *(.text) @@ -39,6 +40,12 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } #include "asm/common.lds.S" diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index 9a0ad094d926..3125d320722c 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -121,6 +121,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c index 6286c974bbeb..44438d15c3d6 100644 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -69,6 +69,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index 7459d09c233e..17f305b6bade 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -16,6 +16,11 @@ define unprofile endef +# The stubs and unmap.o can't try to call mcount or update basic block data +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) +endef + quiet_cmd_make_link = SYMLINK $@ cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 095bcdb0b9cc..77c3c4d29f55 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -1,6 +1,6 @@ obj-y = bitops.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ - ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o \ - sys_call_table.o + ptrace_user.o semaphore.o signal.o sigcontext.o stub.o stub_segv.o \ + syscalls.o sysrq.o sys_call_table.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_MODULES) += module.o @@ -16,6 +16,14 @@ semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S new file mode 100644 index 000000000000..2f2c70a8f043 --- /dev/null +++ b/arch/um/sys-i386/stub.S @@ -0,0 +1,8 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + int $0x80 + mov %eax, UML_CONFIG_STUB_DATA + int3 diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c new file mode 100644 index 000000000000..b251442ad0b1 --- /dev/null +++ b/arch/um/sys-i386/stub_segv.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + sc); + + __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid)); + __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;" + "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Pop the frame pointer and return address since we need to leave + * the stack in its original form when we do the sigreturn here, by + * hand. + */ + __asm__("popl %%eax ; popl %%eax ; popl %%eax ; movl %0, %%eax ; " + "int $0x80" : : "g" (__NR_sigreturn)); +} diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 2bc6f6849010..7488206ce6f4 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -6,8 +6,8 @@ #XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \ - syscalls.o sysrq.o thunk.o syscall_table.o + ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o stub.o \ + stub_segv.o syscalls.o syscall_table.o sysrq.o thunk.o obj-y := ksyms.o obj-$(CONFIG_MODULES) += module.o um_module.o @@ -28,6 +28,14 @@ semaphore.c-dir = kernel thunk.S-dir = lib module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S new file mode 100644 index 000000000000..31c14925716b --- /dev/null +++ b/arch/um/sys-x86_64/stub.S @@ -0,0 +1,15 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + syscall + /* We don't have 64-bit constants, so this constructs the address + * we need. + */ + movq $(UML_CONFIG_STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rax, (%rbx) + int3 diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c new file mode 100644 index 000000000000..161d1fe9c034 --- /dev/null +++ b/arch/um/sys-x86_64/stub_segv.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct ucontext *uc; + + __asm__("movq %%rdx, %0" : "=g" (uc) :); + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + &uc->uc_mcontext); + + __asm__("movq %0, %%rax ; syscall": : "g" (__NR_getpid)); + __asm__("movq %%rax, %%rdi ; movq %0, %%rax ; movq %1, %%rsi ;" + "syscall": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Two popqs to restore the stack to the state just before entering + * the handler, one pops the return address, the other pops the frame + * pointer. + */ + __asm__("popq %%rax ; popq %%rax ; movq %0, %%rax ; syscall" : : "g" + (__NR_rt_sigreturn)); +} diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h index 89bff310b7a9..7529c9c853dd 100644 --- a/include/asm-um/mmu_context.h +++ b/include/asm-um/mmu_context.h @@ -7,7 +7,9 @@ #define __UM_MMU_CONTEXT_H #include "linux/sched.h" +#include "linux/config.h" #include "choose-mode.h" +#include "um_mmu.h" #define get_mmu_context(task) do ; while(0) #define activate_context(tsk) do ; while(0) @@ -18,8 +20,6 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) { } -extern void switch_mm_skas(int mm_fd); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -30,7 +30,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, cpu_set(cpu, next->cpu_vm_mask); if(next != &init_mm) CHOOSE_MODE((void) 0, - switch_mm_skas(next->context.skas.mm_fd)); + switch_mm_skas(&next->context.skas.id)); } }