2005-09-26 14:04:21 +08:00
|
|
|
/*
|
|
|
|
* This program is used to generate definitions needed by
|
|
|
|
* assembly language modules.
|
|
|
|
*
|
|
|
|
* We use the technique used in the OSF Mach kernel code:
|
|
|
|
* generate asm statements containing #defines,
|
|
|
|
* compile this file to assembler, and then extract the
|
|
|
|
* #defines from the assembly-language output.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/signal.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/mman.h>
|
|
|
|
#include <linux/mm.h>
|
2007-05-03 20:31:38 +08:00
|
|
|
#include <linux/suspend.h>
|
2008-02-05 13:16:48 +08:00
|
|
|
#include <linux/hrtimer.h>
|
2005-09-28 22:35:31 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-09-26 14:04:21 +08:00
|
|
|
#include <linux/time.h>
|
|
|
|
#include <linux/hardirq.h>
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif
|
2008-04-29 16:04:08 +08:00
|
|
|
#include <linux/kbuild.h>
|
2005-09-28 22:35:31 +08:00
|
|
|
|
2005-09-26 14:04:21 +08:00
|
|
|
#include <asm/io.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/cputable.h>
|
|
|
|
#include <asm/thread_info.h>
|
2005-10-26 15:05:24 +08:00
|
|
|
#include <asm/rtas.h>
|
2005-11-11 18:15:21 +08:00
|
|
|
#include <asm/vdso_datapage.h>
|
2005-09-26 14:04:21 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#include <asm/paca.h>
|
|
|
|
#include <asm/lppaca.h>
|
|
|
|
#include <asm/cache.h>
|
|
|
|
#include <asm/compat.h>
|
2006-08-09 15:00:30 +08:00
|
|
|
#include <asm/mmu.h>
|
2006-09-14 02:32:39 +08:00
|
|
|
#include <asm/hvcall.h>
|
2005-09-26 14:04:21 +08:00
|
|
|
#endif
|
2008-04-10 14:39:18 +08:00
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
#include <asm/iseries/alpaca.h>
|
|
|
|
#endif
|
2010-08-30 18:01:56 +08:00
|
|
|
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
|
2009-01-04 06:23:08 +08:00
|
|
|
#include <linux/kvm_host.h>
|
2010-04-16 06:11:44 +08:00
|
|
|
#endif
|
2010-08-30 18:01:56 +08:00
|
|
|
#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
|
|
|
|
#include <asm/kvm_book3s.h>
|
2008-11-05 23:36:18 +08:00
|
|
|
#endif
|
2005-09-26 14:04:21 +08:00
|
|
|
|
2009-07-28 09:59:34 +08:00
|
|
|
#ifdef CONFIG_PPC32
|
2008-04-30 18:23:21 +08:00
|
|
|
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
|
|
|
|
#include "head_booke.h"
|
|
|
|
#endif
|
2009-07-28 09:59:34 +08:00
|
|
|
#endif
|
2008-04-30 18:23:21 +08:00
|
|
|
|
2009-10-17 07:48:40 +08:00
|
|
|
#if defined(CONFIG_PPC_FSL_BOOK3E)
|
2008-12-09 11:34:55 +08:00
|
|
|
#include "../mm/mmu_decl.h"
|
|
|
|
#endif
|
|
|
|
|
2005-09-26 14:04:21 +08:00
|
|
|
int main(void)
|
|
|
|
{
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(THREAD, offsetof(struct task_struct, thread));
|
|
|
|
DEFINE(MM, offsetof(struct task_struct, mm));
|
2008-12-19 03:13:24 +08:00
|
|
|
DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
|
2005-09-26 14:04:21 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
DEFINE(SIGSEGV, SIGSEGV);
|
|
|
|
DEFINE(NMI_MASK, NMI_MASK);
|
2011-03-02 23:18:48 +08:00
|
|
|
DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
|
2005-09-28 22:35:31 +08:00
|
|
|
#else
|
2007-05-09 17:35:17 +08:00
|
|
|
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(KSP, offsetof(struct thread_struct, ksp));
|
2008-04-28 14:21:22 +08:00
|
|
|
DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
|
|
|
|
DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
|
|
|
|
DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
|
|
|
|
DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
|
|
|
|
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
|
|
|
|
DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
|
|
|
|
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
|
|
|
|
#endif /* CONFIG_ALTIVEC */
|
2008-06-25 12:07:18 +08:00
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
|
|
|
|
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
|
|
|
|
#endif /* CONFIG_VSX */
|
2005-09-28 22:35:31 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
|
|
|
|
#else /* CONFIG_PPC64 */
|
|
|
|
DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
|
|
|
|
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
|
|
|
|
DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
|
|
|
|
#endif
|
2005-09-26 14:04:21 +08:00
|
|
|
#ifdef CONFIG_SPE
|
|
|
|
DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
|
|
|
|
DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
|
|
|
|
DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
|
|
|
|
DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
|
|
|
|
#endif /* CONFIG_SPE */
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
2010-04-16 06:11:51 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
|
|
|
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
|
|
|
|
#endif
|
2005-09-28 22:35:31 +08:00
|
|
|
|
|
|
|
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
2006-04-18 19:49:11 +08:00
|
|
|
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
|
|
|
DEFINE(TI_TASK, offsetof(struct thread_info, task));
|
|
|
|
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
|
|
|
|
DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
|
|
|
|
DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
|
|
|
|
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
|
|
|
|
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
|
|
|
|
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
|
|
|
|
/* paca */
|
|
|
|
DEFINE(PACA_SIZE, sizeof(struct paca_struct));
|
|
|
|
DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
|
|
|
|
DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
|
|
|
|
DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
|
|
|
|
DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
|
|
|
|
DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
|
|
|
|
DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
|
|
|
|
DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
|
|
|
|
DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
|
2008-08-30 09:40:24 +08:00
|
|
|
DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
|
|
|
|
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
|
[POWERPC] Lazy interrupt disabling for 64-bit machines
This implements a lazy strategy for disabling interrupts. This means
that local_irq_disable() et al. just clear the 'interrupts are
enabled' flag in the paca. If an interrupt comes along, the interrupt
entry code notices that interrupts are supposed to be disabled, and
clears the EE bit in SRR1, clears the 'interrupts are hard-enabled'
flag in the paca, and returns. This means that interrupts only
actually get disabled in the processor when an interrupt comes along.
When interrupts are enabled by local_irq_enable() et al., the code
sets the interrupts-enabled flag in the paca, and then checks whether
interrupts got hard-disabled. If so, it also sets the EE bit in the
MSR to hard-enable the interrupts.
This has the potential to improve performance, and also makes it
easier to make a kernel that can boot on iSeries and on other 64-bit
machines, since this lazy-disable strategy is very similar to the
soft-disable strategy that iSeries already uses.
This version renames paca->proc_enabled to paca->soft_enabled, and
changes a couple of soft-disables in the kexec code to hard-disables,
which should fix the crash that Michael Ellerman saw. This doesn't
yet use a reserved CR field for the soft_enabled and hard_enabled
flags. This applies on top of Stephen Rothwell's patches to make it
possible to build a combined iSeries/other kernel.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-04 14:47:49 +08:00
|
|
|
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
|
|
|
|
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
|
2007-05-08 14:27:27 +08:00
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
|
|
|
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
|
|
|
|
context.low_slices_psize));
|
|
|
|
DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
|
|
|
|
context.high_slices_psize));
|
|
|
|
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
|
2009-06-03 05:17:41 +08:00
|
|
|
#endif /* CONFIG_PPC_MM_SLICES */
|
2009-07-24 07:15:42 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
|
|
|
DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
|
|
|
|
DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
|
|
|
|
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
|
|
|
|
DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
|
|
|
|
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
|
|
|
|
DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
|
|
|
|
DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
|
|
|
|
DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
|
|
|
|
DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
|
|
|
|
DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
|
|
|
|
#endif /* CONFIG_PPC_BOOK3E */
|
|
|
|
|
2009-06-03 05:17:41 +08:00
|
|
|
#ifdef CONFIG_PPC_STD_MMU_64
|
|
|
|
DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
|
|
|
|
DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
|
|
|
|
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
|
|
|
|
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
|
|
|
|
DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
|
|
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
2007-05-08 14:27:27 +08:00
|
|
|
DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
|
|
|
|
#else
|
|
|
|
DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
|
|
|
|
#endif /* CONFIG_PPC_MM_SLICES */
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
|
|
|
|
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
|
|
|
|
DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
|
2006-01-13 07:26:42 +08:00
|
|
|
DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
|
2006-08-07 14:19:19 +08:00
|
|
|
DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
|
2006-08-09 15:00:30 +08:00
|
|
|
DEFINE(SLBSHADOW_STACKVSID,
|
|
|
|
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
|
|
|
|
DEFINE(SLBSHADOW_STACKESID,
|
|
|
|
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
|
|
|
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
|
|
|
|
DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
|
|
|
|
DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
|
|
|
|
DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
|
|
|
DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
|
2011-06-29 08:22:05 +08:00
|
|
|
DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
|
|
|
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
|
2009-06-03 05:17:41 +08:00
|
|
|
#endif /* CONFIG_PPC_STD_MMU_64 */
|
|
|
|
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
|
|
|
|
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
|
2010-05-14 03:40:11 +08:00
|
|
|
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
|
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times. This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish. The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.
This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in. Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.
On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log. We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called). So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.
On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval. This avoids having to
read the SPURR on every kernel entry and exit. On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.
This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
|
|
|
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
|
|
|
|
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
|
2009-06-03 05:17:41 +08:00
|
|
|
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
|
|
|
|
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
|
|
|
|
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
|
2005-10-26 15:05:24 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-09-28 22:35:31 +08:00
|
|
|
|
|
|
|
/* RTAS */
|
|
|
|
DEFINE(RTASBASE, offsetof(struct rtas_t, base));
|
|
|
|
DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
|
|
|
|
|
2005-09-26 14:04:21 +08:00
|
|
|
/* Interrupt register frame */
|
2008-04-24 04:33:49 +08:00
|
|
|
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
|
2010-04-16 06:11:55 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-09-28 22:35:31 +08:00
|
|
|
/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
|
|
|
|
DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
|
|
|
|
DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
|
2006-09-07 07:23:12 +08:00
|
|
|
|
|
|
|
/* hcall statistics */
|
|
|
|
DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
|
|
|
|
DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
|
|
|
|
DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
|
|
|
|
DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
|
|
|
|
DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
|
|
|
|
DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
|
|
|
|
DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
|
|
|
|
DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
|
|
|
|
DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
|
|
|
|
DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
|
|
|
|
DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
|
|
|
|
DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
|
|
|
|
DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
|
|
|
|
DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
|
|
|
|
DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
|
|
|
|
DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
|
|
|
|
DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
|
2005-09-28 22:35:31 +08:00
|
|
|
#ifndef CONFIG_PPC64
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
|
|
|
|
DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15]));
|
|
|
|
DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16]));
|
|
|
|
DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17]));
|
|
|
|
DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18]));
|
|
|
|
DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19]));
|
|
|
|
DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20]));
|
|
|
|
DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
|
|
|
|
DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
|
|
|
|
DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
|
|
|
|
DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24]));
|
|
|
|
DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25]));
|
|
|
|
DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26]));
|
|
|
|
DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27]));
|
|
|
|
DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28]));
|
|
|
|
DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29]));
|
|
|
|
DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
|
|
|
|
DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
2005-09-26 14:04:21 +08:00
|
|
|
/*
|
|
|
|
* Note: these symbols include _ because they overlap with special
|
|
|
|
* register names
|
|
|
|
*/
|
|
|
|
DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
|
|
|
|
DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
|
|
|
|
DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
|
|
|
|
DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
|
|
|
|
DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
|
|
|
|
DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
|
|
|
|
DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
|
|
|
|
DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
|
|
|
|
DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
|
2005-10-28 20:45:25 +08:00
|
|
|
DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
|
2005-09-28 22:35:31 +08:00
|
|
|
#ifndef CONFIG_PPC64
|
|
|
|
DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
|
|
|
|
/*
|
|
|
|
* The PowerPC 400-class & Book-E processors have neither the DAR
|
|
|
|
* nor the DSISR SPRs. Hence, we overload them to hold the similar
|
|
|
|
* DEAR and ESR SPRs for such processors. For critical interrupts
|
|
|
|
* we use them to hold SRR0 and SRR1.
|
2005-09-26 14:04:21 +08:00
|
|
|
*/
|
|
|
|
DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
|
|
|
|
DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
|
2005-09-28 22:35:31 +08:00
|
|
|
#else /* CONFIG_PPC64 */
|
|
|
|
DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
|
|
|
|
|
|
|
|
/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
|
|
|
|
DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
|
|
|
|
DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
|
|
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
|
2009-07-28 09:59:34 +08:00
|
|
|
#if defined(CONFIG_PPC32)
|
2008-04-30 18:23:21 +08:00
|
|
|
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
|
|
|
|
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
|
|
|
|
DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
|
|
|
|
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
|
|
|
|
DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
|
|
|
|
DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
|
|
|
|
DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
|
|
|
|
DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
|
|
|
|
DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
|
|
|
|
DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
|
|
|
|
DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
|
|
|
|
DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
|
|
|
|
DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
|
|
|
|
DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
|
|
|
|
DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
|
|
|
|
DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
|
|
|
|
DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
|
|
|
|
#endif
|
2009-07-28 09:59:34 +08:00
|
|
|
#endif
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(CLONE_VM, CLONE_VM);
|
|
|
|
DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
|
2005-09-28 22:35:31 +08:00
|
|
|
|
|
|
|
#ifndef CONFIG_PPC64
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
|
2005-09-28 22:35:31 +08:00
|
|
|
#endif /* ! CONFIG_PPC64 */
|
2005-09-26 14:04:21 +08:00
|
|
|
|
|
|
|
/* About the CPU features table */
|
|
|
|
DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
|
|
|
|
DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
|
2006-08-11 13:07:08 +08:00
|
|
|
DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
|
2005-09-26 14:04:21 +08:00
|
|
|
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(pbe_address, offsetof(struct pbe, address));
|
|
|
|
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
|
|
|
|
DEFINE(pbe_next, offsetof(struct pbe, next));
|
2005-09-26 14:04:21 +08:00
|
|
|
|
2007-05-03 20:31:38 +08:00
|
|
|
#ifndef CONFIG_PPC64
|
2005-10-11 20:08:12 +08:00
|
|
|
DEFINE(TASK_SIZE, TASK_SIZE);
|
2005-09-28 22:35:31 +08:00
|
|
|
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
|
2005-11-11 18:15:21 +08:00
|
|
|
#endif /* ! CONFIG_PPC64 */
|
2005-09-26 14:04:21 +08:00
|
|
|
|
2005-11-11 18:15:21 +08:00
|
|
|
/* datapage offsets for use by vdso */
|
|
|
|
DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
|
|
|
|
DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
|
|
|
|
DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
|
|
|
|
DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
|
|
|
|
DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
|
|
|
|
DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
|
|
|
|
DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
|
|
|
|
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
|
|
|
|
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
|
|
|
|
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
|
2008-10-28 07:56:03 +08:00
|
|
|
DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
|
powerpc: Rework VDSO gettimeofday to prevent time going backwards
Currently it is possible for userspace to see the result of
gettimeofday() going backwards by 1 microsecond, assuming that
userspace is using the gettimeofday() in the VDSO. The VDSO
gettimeofday() algorithm computes the time in "xsecs", which are
units of 2^-20 seconds, or approximately 0.954 microseconds,
using the algorithm
now = (timebase - tb_orig_stamp) * tb_to_xs + stamp_xsec
and then converts the time in xsecs to seconds and microseconds.
The kernel updates the tb_orig_stamp and stamp_xsec values every
tick in update_vsyscall(). If the length of the tick is not an
integer number of xsecs, then some precision is lost in converting
the current time to xsecs. For example, with CONFIG_HZ=1000, the
tick is 1ms long, which is 1048.576 xsecs. That means that
stamp_xsec will advance by either 1048 or 1049 on each tick.
With the right conditions, it is possible for userspace to get
(timebase - tb_orig_stamp) * tb_to_xs being 1049 if the kernel is
slightly late in updating the vdso_datapage, and then for stamp_xsec
to advance by 1048 when the kernel does update it, and for userspace
to then see (timebase - tb_orig_stamp) * tb_to_xs being zero due to
integer truncation. The result is that time appears to go backwards
by 1 microsecond.
To fix this we change the VDSO gettimeofday to use a new field in the
VDSO datapage which stores the nanoseconds part of the time as a
fractional number of seconds in a 0.32 binary fraction format.
(Or put another way, as a 32-bit number in units of 0.23283 ns.)
This is convenient because we can use the mulhwu instruction to
convert it to either microseconds or nanoseconds.
Since it turns out that computing the time of day using this new field
is simpler than either using stamp_xsec (as gettimeofday does) or
stamp_xtime.tv_nsec (as clock_gettime does), this converts both
gettimeofday and clock_gettime to use the new field. The existing
__do_get_tspec function is converted to use the new field and take
a parameter in r7 that indicates the desired resolution, 1,000,000
for microseconds or 1,000,000,000 for nanoseconds. The __do_get_xsec
function is then unused and is deleted.
The new algorithm is
now = ((timebase - tb_orig_stamp) << 12) * tb_to_xs
+ (stamp_xtime_seconds << 32) + stamp_sec_fraction
with 'now' in units of 2^-32 seconds. That is then converted to
seconds and either microseconds or nanoseconds with
seconds = now >> 32
partseconds = ((now & 0xffffffff) * resolution) >> 32
The 32-bit VDSO code also makes a further simplification: it ignores
the bottom 32 bits of the tb_to_xs value, which is a 0.64 format binary
fraction. Doing so gets rid of 4 multiply instructions. Assuming
a timebase frequency of 1GHz or less and an update interval of no
more than 10ms, the upper 32 bits of tb_to_xs will be at least
4503599, so the error from ignoring the low 32 bits will be at most
2.2ns, which is more than an order of magnitude less than the time
taken to do gettimeofday or clock_gettime on our fastest processors,
so there is no possibility of seeing inconsistent values due to this.
This also moves update_gtod() down next to its only caller, and makes
update_vsyscall use the time passed in via the wall_time argument rather
than accessing xtime directly. At present, wall_time always points to
xtime, but that could change in future.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-06-21 03:03:08 +08:00
|
|
|
DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
|
2007-11-20 09:24:45 +08:00
|
|
|
DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
|
|
|
|
DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
|
|
|
|
DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
|
|
|
|
DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
|
2005-11-11 18:15:21 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
|
|
|
|
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
|
|
|
|
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
|
|
|
|
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
|
2005-11-14 11:55:58 +08:00
|
|
|
DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
|
|
|
|
DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
|
2005-11-11 18:15:21 +08:00
|
|
|
DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
|
|
|
|
DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
|
|
|
|
#else
|
|
|
|
DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
|
|
|
|
DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
|
2005-11-14 11:55:58 +08:00
|
|
|
DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
|
|
|
|
DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
|
2005-11-11 18:15:21 +08:00
|
|
|
#endif
|
|
|
|
/* timeval/timezone offsets for use by vdso */
|
2005-09-26 14:04:21 +08:00
|
|
|
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
|
|
|
|
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
|
2005-11-11 18:15:21 +08:00
|
|
|
|
|
|
|
/* Other bits used by the vdso */
|
|
|
|
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
|
|
|
|
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
|
|
|
|
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
|
2008-02-08 06:24:52 +08:00
|
|
|
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
|
2005-11-11 18:15:21 +08:00
|
|
|
|
2007-01-02 02:45:34 +08:00
|
|
|
#ifdef CONFIG_BUG
|
|
|
|
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
|
|
|
|
#endif
|
2007-08-20 12:58:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_ISERIES
|
|
|
|
/* the assembler miscalculates the VSID values */
|
|
|
|
DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET));
|
|
|
|
DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET));
|
|
|
|
DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START));
|
|
|
|
DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START));
|
2008-04-10 14:39:18 +08:00
|
|
|
|
|
|
|
/* alpaca */
|
|
|
|
DEFINE(ALPACA_SIZE, sizeof(struct alpaca));
|
2007-08-20 12:58:36 +08:00
|
|
|
#endif
|
2007-09-18 15:22:59 +08:00
|
|
|
|
|
|
|
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
|
2008-09-25 00:01:24 +08:00
|
|
|
DEFINE(PTE_SIZE, sizeof(pte_t));
|
2007-12-07 03:11:04 +08:00
|
|
|
|
2008-04-17 12:28:09 +08:00
|
|
|
#ifdef CONFIG_KVM
|
|
|
|
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
|
|
|
|
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
|
|
|
|
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
|
2011-04-28 06:24:10 +08:00
|
|
|
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
|
|
|
|
DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr));
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr));
|
|
|
|
DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr));
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr));
|
|
|
|
#endif
|
|
|
|
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
|
|
|
|
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
|
|
|
|
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
|
|
|
|
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
|
|
|
|
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
|
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
|
|
|
DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
|
|
|
|
DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
|
|
|
|
DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
|
|
|
|
DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0));
|
|
|
|
DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
|
|
|
|
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
|
|
|
|
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
|
|
|
|
#endif
|
2008-04-17 12:28:09 +08:00
|
|
|
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
|
|
|
|
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
|
|
|
|
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
|
|
|
|
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
|
2008-07-26 02:54:53 +08:00
|
|
|
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
|
2011-06-15 07:35:14 +08:00
|
|
|
DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
|
2010-07-29 20:47:42 +08:00
|
|
|
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
|
2010-07-29 20:47:43 +08:00
|
|
|
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
|
2011-06-15 07:34:29 +08:00
|
|
|
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2010-04-16 06:11:42 +08:00
|
|
|
/* book3s */
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
|
|
|
DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
|
|
|
|
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
|
|
|
|
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
|
|
|
|
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
|
|
|
|
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
|
|
|
|
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
|
|
|
|
DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
|
|
|
|
DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
|
|
|
|
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
|
|
|
|
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
|
|
|
|
#endif
|
2010-04-16 06:11:42 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
|
|
|
|
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
|
2009-10-30 13:47:18 +08:00
|
|
|
DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
|
|
|
|
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
|
|
|
|
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
|
|
|
|
DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
|
|
|
|
DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
|
|
|
|
DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
|
|
|
|
DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
|
|
|
|
DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
|
2009-10-30 13:47:18 +08:00
|
|
|
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
|
|
|
|
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
|
|
|
|
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
|
2010-01-08 09:58:06 +08:00
|
|
|
DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
|
2009-10-30 13:47:18 +08:00
|
|
|
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
|
|
|
|
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
|
|
|
|
DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr));
|
2011-06-29 08:22:05 +08:00
|
|
|
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
|
|
|
|
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
|
|
|
|
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
|
|
|
|
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
|
|
|
|
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
|
|
|
|
DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
|
|
|
|
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
|
|
|
|
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
|
|
|
|
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
|
|
|
|
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
|
|
|
|
DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
|
|
|
|
offsetof(struct kvmppc_vcpu_book3s, vcpu));
|
|
|
|
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
|
|
|
|
DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
|
|
|
|
DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
|
2011-06-29 08:20:58 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_PR
|
2011-06-29 08:20:58 +08:00
|
|
|
# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#else
|
|
|
|
# define SVCPU_FIELD(x, f)
|
|
|
|
#endif
|
2011-06-29 08:20:58 +08:00
|
|
|
# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f))
|
|
|
|
#else /* 32-bit */
|
|
|
|
# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f))
|
|
|
|
# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
SVCPU_FIELD(SVCPU_CR, cr);
|
|
|
|
SVCPU_FIELD(SVCPU_XER, xer);
|
|
|
|
SVCPU_FIELD(SVCPU_CTR, ctr);
|
|
|
|
SVCPU_FIELD(SVCPU_LR, lr);
|
|
|
|
SVCPU_FIELD(SVCPU_PC, pc);
|
|
|
|
SVCPU_FIELD(SVCPU_R0, gpr[0]);
|
|
|
|
SVCPU_FIELD(SVCPU_R1, gpr[1]);
|
|
|
|
SVCPU_FIELD(SVCPU_R2, gpr[2]);
|
|
|
|
SVCPU_FIELD(SVCPU_R3, gpr[3]);
|
|
|
|
SVCPU_FIELD(SVCPU_R4, gpr[4]);
|
|
|
|
SVCPU_FIELD(SVCPU_R5, gpr[5]);
|
|
|
|
SVCPU_FIELD(SVCPU_R6, gpr[6]);
|
|
|
|
SVCPU_FIELD(SVCPU_R7, gpr[7]);
|
|
|
|
SVCPU_FIELD(SVCPU_R8, gpr[8]);
|
|
|
|
SVCPU_FIELD(SVCPU_R9, gpr[9]);
|
|
|
|
SVCPU_FIELD(SVCPU_R10, gpr[10]);
|
|
|
|
SVCPU_FIELD(SVCPU_R11, gpr[11]);
|
|
|
|
SVCPU_FIELD(SVCPU_R12, gpr[12]);
|
|
|
|
SVCPU_FIELD(SVCPU_R13, gpr[13]);
|
|
|
|
SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr);
|
|
|
|
SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar);
|
|
|
|
SVCPU_FIELD(SVCPU_LAST_INST, last_inst);
|
|
|
|
SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1);
|
2010-04-16 06:11:44 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_32
|
2011-06-29 08:20:58 +08:00
|
|
|
SVCPU_FIELD(SVCPU_SR, sr);
|
2010-04-16 06:11:44 +08:00
|
|
|
#endif
|
2011-06-29 08:20:58 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
SVCPU_FIELD(SVCPU_SLB, slb);
|
|
|
|
SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
|
|
|
|
HSTATE_FIELD(HSTATE_HOST_R2, host_r2);
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
HSTATE_FIELD(HSTATE_HOST_MSR, host_msr);
|
2011-06-29 08:20:58 +08:00
|
|
|
HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
|
|
|
|
HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
|
|
|
|
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
|
|
|
|
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
|
|
|
|
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
|
|
|
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
|
|
|
|
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
|
|
|
|
HSTATE_FIELD(HSTATE_PMC, host_pmc);
|
|
|
|
HSTATE_FIELD(HSTATE_PURR, host_purr);
|
|
|
|
HSTATE_FIELD(HSTATE_SPURR, host_spurr);
|
|
|
|
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
|
|
|
|
HSTATE_FIELD(HSTATE_DABR, dabr);
|
|
|
|
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
|
|
|
|
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
|
|
|
|
2011-06-29 08:20:58 +08:00
|
|
|
#else /* CONFIG_PPC_BOOK3S */
|
2010-01-08 09:58:03 +08:00
|
|
|
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
|
|
|
|
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
|
2010-04-16 06:11:44 +08:00
|
|
|
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
|
|
|
|
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
|
|
|
|
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
|
|
|
|
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
|
|
|
|
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
|
|
|
|
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
|
2010-04-16 06:11:42 +08:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S */
|
2011-06-29 08:20:58 +08:00
|
|
|
#endif /* CONFIG_KVM */
|
2010-07-29 20:47:57 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_KVM_GUEST
|
|
|
|
DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
|
|
|
|
scratch1));
|
|
|
|
DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
|
|
|
|
scratch2));
|
|
|
|
DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
|
|
|
|
scratch3));
|
|
|
|
DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
|
|
|
|
int_pending));
|
|
|
|
DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
|
|
|
|
DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
|
|
|
|
critical));
|
2010-08-03 16:39:35 +08:00
|
|
|
DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
|
2010-07-29 20:47:57 +08:00
|
|
|
#endif
|
|
|
|
|
2008-12-11 09:55:41 +08:00
|
|
|
#ifdef CONFIG_44x
|
|
|
|
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
|
|
|
|
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
|
|
|
|
#endif
|
2009-10-17 07:48:40 +08:00
|
|
|
#ifdef CONFIG_PPC_FSL_BOOK3E
|
2010-05-14 03:38:21 +08:00
|
|
|
DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
|
|
|
|
DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
|
|
|
|
DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
|
|
|
|
DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
|
|
|
|
DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
|
|
|
|
DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
|
|
|
|
#endif
|
2008-04-17 12:28:09 +08:00
|
|
|
|
2011-06-15 07:34:31 +08:00
|
|
|
#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
|
|
|
|
DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
|
|
|
|
DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
|
|
|
|
DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
|
|
|
|
DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
|
|
|
|
#endif
|
|
|
|
|
2008-12-03 05:51:57 +08:00
|
|
|
#ifdef CONFIG_KVM_EXIT_TIMING
|
|
|
|
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
|
|
|
|
arch.timing_exit.tv32.tbu));
|
|
|
|
DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
|
|
|
|
arch.timing_exit.tv32.tbl));
|
|
|
|
DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
|
|
|
|
arch.timing_last_enter.tv32.tbu));
|
|
|
|
DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
|
|
|
|
arch.timing_last_enter.tv32.tbl));
|
|
|
|
#endif
|
|
|
|
|
2005-09-26 14:04:21 +08:00
|
|
|
return 0;
|
|
|
|
}
|