2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2025-01-10 22:54:11 +08:00
linux-next/arch/powerpc/include/asm/systbl.h

359 lines
8.6 KiB
C
Raw Normal View History

/*
* List of powerpc syscalls. For the meaning of the _SPU suffix see
* arch/powerpc/platforms/cell/spu_callbacks.c
*/
SYSCALL(restart_syscall)
SYSCALL(exit)
PPC_SYS(fork)
SYSCALL_SPU(read)
SYSCALL_SPU(write)
COMPAT_SYS_SPU(open)
SYSCALL_SPU(close)
COMPAT_SYS_SPU(waitpid)
COMPAT_SYS_SPU(creat)
SYSCALL_SPU(link)
SYSCALL_SPU(unlink)
COMPAT_SYS(execve)
SYSCALL_SPU(chdir)
COMPAT_SYS_SPU(time)
SYSCALL_SPU(mknod)
SYSCALL_SPU(chmod)
SYSCALL_SPU(lchown)
SYSCALL(ni_syscall)
OLDSYS(stat)
SYSX_SPU(sys_lseek,ppc32_lseek,sys_lseek)
SYSCALL_SPU(getpid)
COMPAT_SYS(mount)
SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount)
SYSCALL_SPU(setuid)
SYSCALL_SPU(getuid)
COMPAT_SYS_SPU(stime)
COMPAT_SYS(ptrace)
SYSCALL_SPU(alarm)
OLDSYS(fstat)
SYSCALL(pause)
COMPAT_SYS(utime)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(access)
COMPAT_SYS_SPU(nice)
SYSCALL(ni_syscall)
SYSCALL_SPU(sync)
COMPAT_SYS_SPU(kill)
SYSCALL_SPU(rename)
COMPAT_SYS_SPU(mkdir)
SYSCALL_SPU(rmdir)
SYSCALL_SPU(dup)
SYSCALL_SPU(pipe)
COMPAT_SYS_SPU(times)
SYSCALL(ni_syscall)
SYSCALL_SPU(brk)
SYSCALL_SPU(setgid)
SYSCALL_SPU(getgid)
SYSCALL(signal)
SYSCALL_SPU(geteuid)
SYSCALL_SPU(getegid)
SYSCALL(acct)
SYSCALL(umount)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(ioctl)
COMPAT_SYS_SPU(fcntl)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(setpgid)
SYSCALL(ni_syscall)
SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
COMPAT_SYS_SPU(umask)
SYSCALL_SPU(chroot)
COMPAT_SYS(ustat)
SYSCALL_SPU(dup2)
SYSCALL_SPU(getppid)
SYSCALL_SPU(getpgrp)
SYSCALL_SPU(setsid)
SYS32ONLY(sigaction)
SYSCALL_SPU(sgetmask)
COMPAT_SYS_SPU(ssetmask)
SYSCALL_SPU(setreuid)
SYSCALL_SPU(setregid)
SYS32ONLY(sigsuspend)
COMPAT_SYS(sigpending)
COMPAT_SYS_SPU(sethostname)
COMPAT_SYS_SPU(setrlimit)
COMPAT_SYS(old_getrlimit)
COMPAT_SYS_SPU(getrusage)
COMPAT_SYS_SPU(gettimeofday)
COMPAT_SYS_SPU(settimeofday)
COMPAT_SYS_SPU(getgroups)
COMPAT_SYS_SPU(setgroups)
SYSX(sys_ni_syscall,sys_ni_syscall,ppc_select)
SYSCALL_SPU(symlink)
OLDSYS(lstat)
COMPAT_SYS_SPU(readlink)
SYSCALL(uselib)
SYSCALL(swapon)
SYSCALL(reboot)
SYSX(sys_ni_syscall,compat_sys_old_readdir,sys_old_readdir)
SYSCALL_SPU(mmap)
SYSCALL_SPU(munmap)
COMPAT_SYS_SPU(truncate)
COMPAT_SYS_SPU(ftruncate)
SYSCALL_SPU(fchmod)
SYSCALL_SPU(fchown)
COMPAT_SYS_SPU(getpriority)
COMPAT_SYS_SPU(setpriority)
SYSCALL(ni_syscall)
COMPAT_SYS(statfs)
COMPAT_SYS(fstatfs)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(socketcall)
COMPAT_SYS_SPU(syslog)
COMPAT_SYS_SPU(setitimer)
COMPAT_SYS_SPU(getitimer)
COMPAT_SYS_SPU(newstat)
COMPAT_SYS_SPU(newlstat)
COMPAT_SYS_SPU(newfstat)
SYSX(sys_ni_syscall,sys_uname,sys_uname)
SYSCALL(ni_syscall)
SYSCALL_SPU(vhangup)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(wait4)
SYSCALL(swapoff)
COMPAT_SYS_SPU(sysinfo)
COMPAT_SYS(ipc)
SYSCALL_SPU(fsync)
SYS32ONLY(sigreturn)
PPC_SYS(clone)
COMPAT_SYS_SPU(setdomainname)
SYSCALL_SPU(newuname)
SYSCALL(ni_syscall)
COMPAT_SYS_SPU(adjtimex)
SYSCALL_SPU(mprotect)
SYSX(sys_ni_syscall,compat_sys_sigprocmask,sys_sigprocmask)
SYSCALL(ni_syscall)
SYSCALL(init_module)
SYSCALL(delete_module)
SYSCALL(ni_syscall)
SYSCALL(quotactl)
COMPAT_SYS_SPU(getpgid)
SYSCALL_SPU(fchdir)
SYSCALL_SPU(bdflush)
COMPAT_SYS(sysfs)
SYSX_SPU(ppc64_personality,ppc64_personality,sys_personality)
SYSCALL(ni_syscall)
SYSCALL_SPU(setfsuid)
SYSCALL_SPU(setfsgid)
SYSCALL_SPU(llseek)
COMPAT_SYS_SPU(getdents)
SYSX_SPU(sys_select,ppc32_select,sys_select)
SYSCALL_SPU(flock)
SYSCALL_SPU(msync)
COMPAT_SYS_SPU(readv)
COMPAT_SYS_SPU(writev)
COMPAT_SYS_SPU(getsid)
SYSCALL_SPU(fdatasync)
COMPAT_SYS(sysctl)
SYSCALL_SPU(mlock)
SYSCALL_SPU(munlock)
SYSCALL_SPU(mlockall)
SYSCALL_SPU(munlockall)
COMPAT_SYS_SPU(sched_setparam)
COMPAT_SYS_SPU(sched_getparam)
COMPAT_SYS_SPU(sched_setscheduler)
COMPAT_SYS_SPU(sched_getscheduler)
SYSCALL_SPU(sched_yield)
COMPAT_SYS_SPU(sched_get_priority_max)
COMPAT_SYS_SPU(sched_get_priority_min)
COMPAT_SYS_SPU(sched_rr_get_interval)
COMPAT_SYS_SPU(nanosleep)
SYSCALL_SPU(mremap)
SYSCALL_SPU(setresuid)
SYSCALL_SPU(getresuid)
SYSCALL(ni_syscall)
SYSCALL_SPU(poll)
SYSCALL(ni_syscall)
SYSCALL_SPU(setresgid)
SYSCALL_SPU(getresgid)
COMPAT_SYS_SPU(prctl)
COMPAT_SYS(rt_sigreturn)
COMPAT_SYS(rt_sigaction)
COMPAT_SYS(rt_sigprocmask)
COMPAT_SYS(rt_sigpending)
COMPAT_SYS(rt_sigtimedwait)
COMPAT_SYS(rt_sigqueueinfo)
COMPAT_SYS(rt_sigsuspend)
COMPAT_SYS_SPU(pread64)
COMPAT_SYS_SPU(pwrite64)
SYSCALL_SPU(chown)
SYSCALL_SPU(getcwd)
SYSCALL_SPU(capget)
SYSCALL_SPU(capset)
COMPAT_SYS(sigaltstack)
SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
PPC_SYS(vfork)
COMPAT_SYS_SPU(getrlimit)
COMPAT_SYS_SPU(readahead)
SYS32ONLY(mmap2)
SYS32ONLY(truncate64)
SYS32ONLY(ftruncate64)
SYSX(sys_ni_syscall,sys_stat64,sys_stat64)
SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64)
SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64)
SYSCALL(pciconfig_read)
SYSCALL(pciconfig_write)
SYSCALL(pciconfig_iobase)
SYSCALL(ni_syscall)
SYSCALL_SPU(getdents64)
SYSCALL_SPU(pivot_root)
SYSX(sys_ni_syscall,compat_sys_fcntl64,sys_fcntl64)
SYSCALL_SPU(madvise)
SYSCALL_SPU(mincore)
SYSCALL_SPU(gettid)
SYSCALL_SPU(tkill)
SYSCALL_SPU(setxattr)
SYSCALL_SPU(lsetxattr)
SYSCALL_SPU(fsetxattr)
SYSCALL_SPU(getxattr)
SYSCALL_SPU(lgetxattr)
SYSCALL_SPU(fgetxattr)
SYSCALL_SPU(listxattr)
SYSCALL_SPU(llistxattr)
SYSCALL_SPU(flistxattr)
SYSCALL_SPU(removexattr)
SYSCALL_SPU(lremovexattr)
SYSCALL_SPU(fremovexattr)
COMPAT_SYS_SPU(futex)
COMPAT_SYS_SPU(sched_setaffinity)
COMPAT_SYS_SPU(sched_getaffinity)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYS32ONLY(sendfile64)
COMPAT_SYS_SPU(io_setup)
SYSCALL_SPU(io_destroy)
COMPAT_SYS_SPU(io_getevents)
COMPAT_SYS_SPU(io_submit)
SYSCALL_SPU(io_cancel)
SYSCALL(set_tid_address)
SYSX_SPU(sys_fadvise64,ppc32_fadvise64,sys_fadvise64)
SYSCALL(exit_group)
SYSX(sys_lookup_dcookie,ppc32_lookup_dcookie,sys_lookup_dcookie)
SYSCALL_SPU(epoll_create)
SYSCALL_SPU(epoll_ctl)
SYSCALL_SPU(epoll_wait)
SYSCALL_SPU(remap_file_pages)
SYSX_SPU(sys_timer_create,compat_sys_timer_create,sys_timer_create)
COMPAT_SYS_SPU(timer_settime)
COMPAT_SYS_SPU(timer_gettime)
SYSCALL_SPU(timer_getoverrun)
SYSCALL_SPU(timer_delete)
COMPAT_SYS_SPU(clock_settime)
COMPAT_SYS_SPU(clock_gettime)
COMPAT_SYS_SPU(clock_getres)
COMPAT_SYS_SPU(clock_nanosleep)
SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext)
COMPAT_SYS_SPU(tgkill)
COMPAT_SYS_SPU(utimes)
COMPAT_SYS_SPU(statfs64)
COMPAT_SYS_SPU(fstatfs64)
SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
PPC_SYS_SPU(rtas)
OLDSYS(debug_setcontext)
SYSCALL(ni_syscall)
COMPAT_SYS(migrate_pages)
COMPAT_SYS(mbind)
COMPAT_SYS(get_mempolicy)
COMPAT_SYS(set_mempolicy)
COMPAT_SYS(mq_open)
SYSCALL(mq_unlink)
COMPAT_SYS(mq_timedsend)
COMPAT_SYS(mq_timedreceive)
COMPAT_SYS(mq_notify)
COMPAT_SYS(mq_getsetattr)
COMPAT_SYS(kexec_load)
COMPAT_SYS(add_key)
COMPAT_SYS(request_key)
COMPAT_SYS(keyctl)
COMPAT_SYS(waitid)
COMPAT_SYS(ioprio_set)
COMPAT_SYS(ioprio_get)
SYSCALL(inotify_init)
SYSCALL(inotify_add_watch)
SYSCALL(inotify_rm_watch)
SYSCALL(spu_run)
SYSCALL(spu_create)
COMPAT_SYS(pselect6)
COMPAT_SYS(ppoll)
SYSCALL_SPU(unshare)
SYSCALL_SPU(splice)
SYSCALL_SPU(tee)
COMPAT_SYS_SPU(vmsplice)
COMPAT_SYS_SPU(openat)
SYSCALL_SPU(mkdirat)
SYSCALL_SPU(mknodat)
SYSCALL_SPU(fchownat)
COMPAT_SYS_SPU(futimesat)
SYSX_SPU(sys_newfstatat, sys_fstatat64, sys_fstatat64)
SYSCALL_SPU(unlinkat)
SYSCALL_SPU(renameat)
SYSCALL_SPU(linkat)
SYSCALL_SPU(symlinkat)
SYSCALL_SPU(readlinkat)
SYSCALL_SPU(fchmodat)
SYSCALL_SPU(faccessat)
COMPAT_SYS_SPU(get_robust_list)
COMPAT_SYS_SPU(set_robust_list)
COMPAT_SYS_SPU(move_pages)
SYSCALL_SPU(getcpu)
COMPAT_SYS(epoll_pwait)
COMPAT_SYS_SPU(utimensat)
COMPAT_SYS_SPU(signalfd)
SYSCALL_SPU(timerfd_create)
SYSCALL_SPU(eventfd)
COMPAT_SYS_SPU(sync_file_range2)
COMPAT_SYS(fallocate)
[POWERPC] Provide a way to protect 4k subpages when using 64k pages Using 64k pages on 64-bit PowerPC systems makes life difficult for emulators that are trying to emulate an ISA, such as x86, which use a smaller page size, since the emulator can no longer use the MMU and the normal system calls for controlling page protections. Of course, the emulator can emulate the MMU by checking and possibly remapping the address for each memory access in software, but that is pretty slow. This provides a facility for such programs to control the access permissions on individual 4k sub-pages of 64k pages. The idea is that the emulator supplies an array of protection masks to apply to a specified range of virtual addresses. These masks are applied at the level where hardware PTEs are inserted into the hardware page table based on the Linux PTEs, so the Linux PTEs are not affected. Note that this new mechanism does not allow any access that would otherwise be prohibited; it can only prohibit accesses that would otherwise be allowed. This new facility is only available on 64-bit PowerPC and only when the kernel is configured for 64k pages. The masks are supplied using a new subpage_prot system call, which takes a starting virtual address and length, and a pointer to an array of protection masks in memory. The array has a 32-bit word per 64k page to be protected; each 32-bit word consists of 16 2-bit fields, for which 0 allows any access (that is otherwise allowed), 1 prevents write accesses, and 2 or 3 prevent any access. Implicit in this is that the regions of the address space that are protected are switched to use 4k hardware pages rather than 64k hardware pages (on machines with hardware 64k page support). In fact the whole process is switched to use 4k hardware pages when the subpage_prot system call is used, but this could be improved in future to switch only the affected segments. The subpage protection bits are stored in a 3 level tree akin to the page table tree. The top level of this tree is stored in a structure that is appended to the top level of the page table tree, i.e., the pgd array. Since it will often only be 32-bit addresses (below 4GB) that are protected, the pointers to the first four bottom level pages are also stored in this structure (each bottom level page contains the protection bits for 1GB of address space), so the protection bits for addresses below 4GB can be accessed with one fewer loads than those for higher addresses. Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-01-24 05:35:13 +08:00
SYSCALL(subpage_prot)
COMPAT_SYS_SPU(timerfd_settime)
COMPAT_SYS_SPU(timerfd_gettime)
COMPAT_SYS_SPU(signalfd4)
SYSCALL_SPU(eventfd2)
SYSCALL_SPU(epoll_create1)
SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1)
perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
SYSCALL_SPU(perf_event_open)
COMPAT_SYS_SPU(preadv)
COMPAT_SYS_SPU(pwritev)
COMPAT_SYS(rt_tgsigqueueinfo)
SYSCALL(fanotify_init)
COMPAT_SYS(fanotify_mark)
SYSCALL_SPU(prlimit64)
SYSCALL_SPU(socket)
SYSCALL_SPU(bind)
SYSCALL_SPU(connect)
SYSCALL_SPU(listen)
SYSCALL_SPU(accept)
SYSCALL_SPU(getsockname)
SYSCALL_SPU(getpeername)
SYSCALL_SPU(socketpair)
SYSCALL_SPU(send)
SYSCALL_SPU(sendto)
COMPAT_SYS_SPU(recv)
COMPAT_SYS_SPU(recvfrom)
SYSCALL_SPU(shutdown)
COMPAT_SYS_SPU(setsockopt)
COMPAT_SYS_SPU(getsockopt)
COMPAT_SYS_SPU(sendmsg)
COMPAT_SYS_SPU(recvmsg)
COMPAT_SYS_SPU(recvmmsg)
SYSCALL_SPU(accept4)
SYSCALL_SPU(name_to_handle_at)
COMPAT_SYS_SPU(open_by_handle_at)
COMPAT_SYS_SPU(clock_adjtime)
SYSCALL_SPU(syncfs)
COMPAT_SYS_SPU(sendmmsg)
ns: Wire up the setns system call 32bit and 64bit on x86 are tested and working. The rest I have looked at closely and I can't find any problems. setns is an easy system call to wire up. It just takes two ints so I don't expect any weird architecture porting problems. While doing this I have noticed that we have some architectures that are very slow to get new system calls. cris seems to be the slowest where the last system calls wired up were preadv and pwritev. avr32 is weird in that recvmmsg was wired up but never declared in unistd.h. frv is behind with perf_event_open being the last syscall wired up. On h8300 the last system call wired up was epoll_wait. On m32r the last system call wired up was fallocate. mn10300 has recvmmsg as the last system call wired up. The rest seem to at least have syncfs wired up which was new in the 2.6.39. v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com> v4: Moved wiring up of the system call to another patch v5: ported to v2.6.39-rc6 v6: rebased onto parisc-next and net-next to avoid syscall conflicts. v7: ported to Linus's latest post 2.6.39 tree. >  arch/blackfin/include/asm/unistd.h     |    3 ++- >  arch/blackfin/mach-common/entry.S      |    1 + Acked-by: Mike Frysinger <vapier@gentoo.org> Oh - ia64 wiring looks good. Acked-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-28 10:28:27 +08:00
SYSCALL_SPU(setns)
Cross Memory Attach The basic idea behind cross memory attach is to allow MPI programs doing intra-node communication to do a single copy of the message rather than a double copy of the message via shared memory. The following patch attempts to achieve this by allowing a destination process, given an address and size from a source process, to copy memory directly from the source process into its own address space via a system call. There is also a symmetrical ability to copy from the current process's address space into a destination process's address space. - Use of /proc/pid/mem has been considered, but there are issues with using it: - Does not allow for specifying iovecs for both src and dest, assuming preadv or pwritev was implemented either the area read from or written to would need to be contiguous. - Currently mem_read allows only processes who are currently ptrace'ing the target and are still able to ptrace the target to read from the target. This check could possibly be moved to the open call, but its not clear exactly what race this restriction is stopping (reason appears to have been lost) - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix domain socket is a bit ugly from a userspace point of view, especially when you may have hundreds if not (eventually) thousands of processes that all need to do this with each other - Doesn't allow for some future use of the interface we would like to consider adding in the future (see below) - Interestingly reading from /proc/pid/mem currently actually involves two copies! (But this could be fixed pretty easily) As mentioned previously use of vmsplice instead was considered, but has problems. Since you need the reader and writer working co-operatively if the pipe is not drained then you block. Which requires some wrapping to do non blocking on the send side or polling on the receive. In all to all communication it requires ordering otherwise you can deadlock. And in the example of many MPI tasks writing to one MPI task vmsplice serialises the copying. There are some cases of MPI collectives where even a single copy interface does not get us the performance gain we could. For example in an MPI_Reduce rather than copy the data from the source we would like to instead use it directly in a mathops (say the reduce is doing a sum) as this would save us doing a copy. We don't need to keep a copy of the data from the source. I haven't implemented this, but I think this interface could in the future do all this through the use of the flags - eg could specify the math operation and type and the kernel rather than just copying the data would apply the specified operation between the source and destination and store it in the destination. Although we don't have a "second user" of the interface (though I've had some nibbles from people who may be interested in using it for intra process messaging which is not MPI). This interface is something which hardware vendors are already doing for their custom drivers to implement fast local communication. And so in addition to this being useful for OpenMPI it would mean the driver maintainers don't have to fix things up when the mm changes. There was some discussion about how much faster a true zero copy would go. Here's a link back to the email with some testing I did on that: http://marc.info/?l=linux-mm&m=130105930902915&w=2 There is a basic man page for the proposed interface here: http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt This has been implemented for x86 and powerpc, other architecture should mainly (I think) just need to add syscall numbers for the process_vm_readv and process_vm_writev. There are 32 bit compatibility versions for 64-bit kernels. For arch maintainers there are some simple tests to be able to quickly verify that the syscalls are working correctly here: http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: <linux-man@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-11-01 08:06:39 +08:00
COMPAT_SYS(process_vm_readv)
COMPAT_SYS(process_vm_writev)