mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
This commit is contained in:
commit
66cdd0ceaf
@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo {
|
||||
This ioctl fetches PV specific information that need to be passed to the guest
|
||||
using the device tree or other means from vm context.
|
||||
|
||||
For now the only implemented piece of information distributed here is an array
|
||||
of 4 instructions that make up a hypercall.
|
||||
The hcall array defines 4 instructions that make up a hypercall.
|
||||
|
||||
If any additional field gets added to this structure later on, a bit for that
|
||||
additional piece of information will be set in the flags bitmap.
|
||||
|
||||
The flags bitmap is defined as:
|
||||
|
||||
/* the host supports the ePAPR idle hcall
|
||||
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
|
||||
|
||||
4.48 KVM_ASSIGN_PCI_DEVICE
|
||||
|
||||
@ -1731,7 +1734,46 @@ registers, find a list below:
|
||||
Arch | Register | Width (bits)
|
||||
| |
|
||||
PPC | KVM_REG_PPC_HIOR | 64
|
||||
|
||||
PPC | KVM_REG_PPC_IAC1 | 64
|
||||
PPC | KVM_REG_PPC_IAC2 | 64
|
||||
PPC | KVM_REG_PPC_IAC3 | 64
|
||||
PPC | KVM_REG_PPC_IAC4 | 64
|
||||
PPC | KVM_REG_PPC_DAC1 | 64
|
||||
PPC | KVM_REG_PPC_DAC2 | 64
|
||||
PPC | KVM_REG_PPC_DABR | 64
|
||||
PPC | KVM_REG_PPC_DSCR | 64
|
||||
PPC | KVM_REG_PPC_PURR | 64
|
||||
PPC | KVM_REG_PPC_SPURR | 64
|
||||
PPC | KVM_REG_PPC_DAR | 64
|
||||
PPC | KVM_REG_PPC_DSISR | 32
|
||||
PPC | KVM_REG_PPC_AMR | 64
|
||||
PPC | KVM_REG_PPC_UAMOR | 64
|
||||
PPC | KVM_REG_PPC_MMCR0 | 64
|
||||
PPC | KVM_REG_PPC_MMCR1 | 64
|
||||
PPC | KVM_REG_PPC_MMCRA | 64
|
||||
PPC | KVM_REG_PPC_PMC1 | 32
|
||||
PPC | KVM_REG_PPC_PMC2 | 32
|
||||
PPC | KVM_REG_PPC_PMC3 | 32
|
||||
PPC | KVM_REG_PPC_PMC4 | 32
|
||||
PPC | KVM_REG_PPC_PMC5 | 32
|
||||
PPC | KVM_REG_PPC_PMC6 | 32
|
||||
PPC | KVM_REG_PPC_PMC7 | 32
|
||||
PPC | KVM_REG_PPC_PMC8 | 32
|
||||
PPC | KVM_REG_PPC_FPR0 | 64
|
||||
...
|
||||
PPC | KVM_REG_PPC_FPR31 | 64
|
||||
PPC | KVM_REG_PPC_VR0 | 128
|
||||
...
|
||||
PPC | KVM_REG_PPC_VR31 | 128
|
||||
PPC | KVM_REG_PPC_VSR0 | 128
|
||||
...
|
||||
PPC | KVM_REG_PPC_VSR31 | 128
|
||||
PPC | KVM_REG_PPC_FPSCR | 64
|
||||
PPC | KVM_REG_PPC_VSCR | 32
|
||||
PPC | KVM_REG_PPC_VPA_ADDR | 64
|
||||
PPC | KVM_REG_PPC_VPA_SLB | 128
|
||||
PPC | KVM_REG_PPC_VPA_DTL | 128
|
||||
PPC | KVM_REG_PPC_EPCR | 32
|
||||
|
||||
4.69 KVM_GET_ONE_REG
|
||||
|
||||
@ -1747,7 +1789,7 @@ kvm_one_reg struct passed in. On success, the register value can be found
|
||||
at the memory location pointed to by "addr".
|
||||
|
||||
The list of registers accessible using this interface is identical to the
|
||||
list in 4.64.
|
||||
list in 4.68.
|
||||
|
||||
|
||||
4.70 KVM_KVMCLOCK_CTRL
|
||||
@ -1997,6 +2039,93 @@ return the hash table order in the parameter. (If the guest is using
|
||||
the virtualized real-mode area (VRMA) facility, the kernel will
|
||||
re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
|
||||
|
||||
4.77 KVM_S390_INTERRUPT
|
||||
|
||||
Capability: basic
|
||||
Architectures: s390
|
||||
Type: vm ioctl, vcpu ioctl
|
||||
Parameters: struct kvm_s390_interrupt (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Allows to inject an interrupt to the guest. Interrupts can be floating
|
||||
(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type.
|
||||
|
||||
Interrupt parameters are passed via kvm_s390_interrupt:
|
||||
|
||||
struct kvm_s390_interrupt {
|
||||
__u32 type;
|
||||
__u32 parm;
|
||||
__u64 parm64;
|
||||
};
|
||||
|
||||
type can be one of the following:
|
||||
|
||||
KVM_S390_SIGP_STOP (vcpu) - sigp restart
|
||||
KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
|
||||
KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
|
||||
KVM_S390_RESTART (vcpu) - restart
|
||||
KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
|
||||
parameters in parm and parm64
|
||||
KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
|
||||
KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
|
||||
KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
|
||||
|
||||
Note that the vcpu ioctl is asynchronous to vcpu execution.
|
||||
|
||||
4.78 KVM_PPC_GET_HTAB_FD
|
||||
|
||||
Capability: KVM_CAP_PPC_HTAB_FD
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: Pointer to struct kvm_get_htab_fd (in)
|
||||
Returns: file descriptor number (>= 0) on success, -1 on error
|
||||
|
||||
This returns a file descriptor that can be used either to read out the
|
||||
entries in the guest's hashed page table (HPT), or to write entries to
|
||||
initialize the HPT. The returned fd can only be written to if the
|
||||
KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
|
||||
can only be read if that bit is clear. The argument struct looks like
|
||||
this:
|
||||
|
||||
/* For KVM_PPC_GET_HTAB_FD */
|
||||
struct kvm_get_htab_fd {
|
||||
__u64 flags;
|
||||
__u64 start_index;
|
||||
__u64 reserved[2];
|
||||
};
|
||||
|
||||
/* Values for kvm_get_htab_fd.flags */
|
||||
#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
|
||||
#define KVM_GET_HTAB_WRITE ((__u64)0x2)
|
||||
|
||||
The `start_index' field gives the index in the HPT of the entry at
|
||||
which to start reading. It is ignored when writing.
|
||||
|
||||
Reads on the fd will initially supply information about all
|
||||
"interesting" HPT entries. Interesting entries are those with the
|
||||
bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
|
||||
all entries. When the end of the HPT is reached, the read() will
|
||||
return. If read() is called again on the fd, it will start again from
|
||||
the beginning of the HPT, but will only return HPT entries that have
|
||||
changed since they were last read.
|
||||
|
||||
Data read or written is structured as a header (8 bytes) followed by a
|
||||
series of valid HPT entries (16 bytes) each. The header indicates how
|
||||
many valid HPT entries there are and how many invalid entries follow
|
||||
the valid entries. The invalid entries are not represented explicitly
|
||||
in the stream. The header format is:
|
||||
|
||||
struct kvm_get_htab_header {
|
||||
__u32 index;
|
||||
__u16 n_valid;
|
||||
__u16 n_invalid;
|
||||
};
|
||||
|
||||
Writes to the fd create HPT entries starting at the index given in the
|
||||
header; first `n_valid' valid entries with contents from the data
|
||||
written, then `n_invalid' invalid entries, invalidating any previously
|
||||
valid entries found.
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
@ -2109,7 +2238,8 @@ executed a memory-mapped I/O instruction which could not be satisfied
|
||||
by kvm. The 'data' member contains the written data if 'is_write' is
|
||||
true, and should be filled by application code otherwise.
|
||||
|
||||
NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding
|
||||
NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR
|
||||
and KVM_EXIT_PAPR the corresponding
|
||||
operations are complete (and guest state is consistent) only after userspace
|
||||
has re-entered the kernel with KVM_RUN. The kernel side will first finish
|
||||
incomplete operations and then check for pending signals. Userspace
|
||||
|
@ -4314,10 +4314,10 @@ F: include/linux/kvm*
|
||||
F: virt/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
|
||||
M: Joerg Roedel <joerg.roedel@amd.com>
|
||||
M: Joerg Roedel <joro@8bytes.org>
|
||||
L: kvm@vger.kernel.org
|
||||
W: http://kvm.qumranet.com
|
||||
S: Supported
|
||||
S: Maintained
|
||||
F: arch/x86/include/asm/svm.h
|
||||
F: arch/x86/kvm/svm.c
|
||||
|
||||
@ -4325,6 +4325,7 @@ KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
|
||||
M: Alexander Graf <agraf@suse.de>
|
||||
L: kvm-ppc@vger.kernel.org
|
||||
W: http://kvm.qumranet.com
|
||||
T: git git://github.com/agraf/linux-2.6.git
|
||||
S: Supported
|
||||
F: arch/powerpc/include/asm/kvm*
|
||||
F: arch/powerpc/kvm/
|
||||
|
@ -1330,6 +1330,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
return -EINVAL;
|
||||
@ -1362,11 +1367,9 @@ static void kvm_release_vm_pages(struct kvm *kvm)
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int j;
|
||||
unsigned long base_gfn;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
base_gfn = memslot->base_gfn;
|
||||
for (j = 0; j < memslot->npages; j++) {
|
||||
if (memslot->rmap[j])
|
||||
put_page((struct page *)memslot->rmap[j]);
|
||||
|
@ -1,5 +1,4 @@
|
||||
|
||||
|
||||
generic-y += clkdev.h
|
||||
generic-y += rwsem.h
|
||||
generic-y += trace_clock.h
|
||||
|
@ -50,64 +50,13 @@
|
||||
#ifndef _EPAPR_HCALLS_H
|
||||
#define _EPAPR_HCALLS_H
|
||||
|
||||
#include <uapi/asm/epapr_hcalls.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
#include <linux/errno.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#define EV_BYTE_CHANNEL_SEND 1
|
||||
#define EV_BYTE_CHANNEL_RECEIVE 2
|
||||
#define EV_BYTE_CHANNEL_POLL 3
|
||||
#define EV_INT_SET_CONFIG 4
|
||||
#define EV_INT_GET_CONFIG 5
|
||||
#define EV_INT_SET_MASK 6
|
||||
#define EV_INT_GET_MASK 7
|
||||
#define EV_INT_IACK 9
|
||||
#define EV_INT_EOI 10
|
||||
#define EV_INT_SEND_IPI 11
|
||||
#define EV_INT_SET_TASK_PRIORITY 12
|
||||
#define EV_INT_GET_TASK_PRIORITY 13
|
||||
#define EV_DOORBELL_SEND 14
|
||||
#define EV_MSGSND 15
|
||||
#define EV_IDLE 16
|
||||
|
||||
/* vendor ID: epapr */
|
||||
#define EV_LOCAL_VENDOR_ID 0 /* for private use */
|
||||
#define EV_EPAPR_VENDOR_ID 1
|
||||
#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */
|
||||
#define EV_IBM_VENDOR_ID 3 /* IBM */
|
||||
#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */
|
||||
#define EV_ENEA_VENDOR_ID 5 /* Enea */
|
||||
#define EV_WR_VENDOR_ID 6 /* Wind River Systems */
|
||||
#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */
|
||||
#define EV_KVM_VENDOR_ID 42 /* KVM */
|
||||
|
||||
/* The max number of bytes that a byte channel can send or receive per call */
|
||||
#define EV_BYTE_CHANNEL_MAX_BYTES 16
|
||||
|
||||
|
||||
#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
|
||||
#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
|
||||
|
||||
/* epapr error codes */
|
||||
#define EV_EPERM 1 /* Operation not permitted */
|
||||
#define EV_ENOENT 2 /* Entry Not Found */
|
||||
#define EV_EIO 3 /* I/O error occured */
|
||||
#define EV_EAGAIN 4 /* The operation had insufficient
|
||||
* resources to complete and should be
|
||||
* retried
|
||||
*/
|
||||
#define EV_ENOMEM 5 /* There was insufficient memory to
|
||||
* complete the operation */
|
||||
#define EV_EFAULT 6 /* Bad guest address */
|
||||
#define EV_ENODEV 7 /* No such device */
|
||||
#define EV_EINVAL 8 /* An argument supplied to the hcall
|
||||
was out of range or invalid */
|
||||
#define EV_INTERNAL 9 /* An internal error occured */
|
||||
#define EV_CONFIG 10 /* A configuration error was detected */
|
||||
#define EV_INVALID_STATE 11 /* The object is in an invalid state */
|
||||
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
|
||||
#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */
|
||||
|
||||
/*
|
||||
* Hypercall register clobber list
|
||||
*
|
||||
@ -193,7 +142,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt,
|
||||
r5 = priority;
|
||||
r6 = destination;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
|
||||
: : EV_HCALL_CLOBBERS4
|
||||
);
|
||||
@ -222,7 +171,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt,
|
||||
r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
|
||||
r3 = interrupt;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
|
||||
: : EV_HCALL_CLOBBERS4
|
||||
);
|
||||
@ -252,7 +201,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt,
|
||||
r3 = interrupt;
|
||||
r4 = mask;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -277,7 +226,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt,
|
||||
r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
|
||||
r3 = interrupt;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -305,7 +254,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt)
|
||||
r11 = EV_HCALL_TOKEN(EV_INT_EOI);
|
||||
r3 = interrupt;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -344,7 +293,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle,
|
||||
r7 = be32_to_cpu(p[2]);
|
||||
r8 = be32_to_cpu(p[3]);
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3),
|
||||
"+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
|
||||
: : EV_HCALL_CLOBBERS6
|
||||
@ -383,7 +332,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle,
|
||||
r3 = handle;
|
||||
r4 = *count;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4),
|
||||
"=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
|
||||
: : EV_HCALL_CLOBBERS6
|
||||
@ -421,7 +370,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle,
|
||||
r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
|
||||
r3 = handle;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
|
||||
: : EV_HCALL_CLOBBERS3
|
||||
);
|
||||
@ -454,7 +403,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
|
||||
r11 = EV_HCALL_TOKEN(EV_INT_IACK);
|
||||
r3 = handle;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -478,7 +427,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle)
|
||||
r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
|
||||
r3 = handle;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -498,12 +447,12 @@ static inline unsigned int ev_idle(void)
|
||||
|
||||
r11 = EV_HCALL_TOKEN(EV_IDLE);
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "=r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
|
||||
return r3;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _EPAPR_HCALLS_H */
|
||||
|
@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
|
||||
r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
|
||||
r3 = vcpu_mask;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle,
|
||||
r9 = (uint32_t)propvalue_addr;
|
||||
r10 = *propvalue_len;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11),
|
||||
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
|
||||
"+r" (r8), "+r" (r9), "+r" (r10)
|
||||
@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle,
|
||||
r9 = (uint32_t)propvalue_addr;
|
||||
r10 = propvalue_len;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11),
|
||||
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
|
||||
"+r" (r8), "+r" (r9), "+r" (r10)
|
||||
@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition)
|
||||
r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART);
|
||||
r3 = partition;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition,
|
||||
r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS);
|
||||
r3 = partition;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition,
|
||||
r4 = entry_point;
|
||||
r5 = load;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5)
|
||||
: : EV_HCALL_CLOBBERS3
|
||||
);
|
||||
@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition)
|
||||
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP);
|
||||
r3 = partition;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source,
|
||||
#endif
|
||||
r7 = count;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11),
|
||||
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7)
|
||||
: : EV_HCALL_CLOBBERS5
|
||||
@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn)
|
||||
r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE);
|
||||
r3 = liodn;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn)
|
||||
r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE);
|
||||
r3 = liodn;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt,
|
||||
r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR);
|
||||
r3 = interrupt;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "=r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void)
|
||||
|
||||
r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET);
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "=r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize,
|
||||
r6 = addr_lo;
|
||||
r7 = peek;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6),
|
||||
"+r" (r7)
|
||||
: : EV_HCALL_CLOBBERS5
|
||||
@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle,
|
||||
r3 = handle;
|
||||
r4 = vcpu;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu)
|
||||
r3 = handle;
|
||||
r4 = vcpu;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu)
|
||||
r3 = handle;
|
||||
r4 = vcpu;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3), "+r" (r4)
|
||||
: : EV_HCALL_CLOBBERS2
|
||||
);
|
||||
@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle)
|
||||
r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE);
|
||||
r3 = handle;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle)
|
||||
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA);
|
||||
r3 = handle;
|
||||
|
||||
__asm__ __volatile__ ("sc 1"
|
||||
asm volatile("bl epapr_hypercall_start"
|
||||
: "+r" (r11), "+r" (r3)
|
||||
: : EV_HCALL_CLOBBERS1
|
||||
);
|
||||
|
@ -118,6 +118,7 @@
|
||||
|
||||
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
|
||||
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
|
||||
#define RESUME_FLAG_ARCH1 (1<<2)
|
||||
|
||||
#define RESUME_GUEST 0
|
||||
#define RESUME_GUEST_NV RESUME_FLAG_NV
|
||||
|
@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
|
||||
u64 sdr1;
|
||||
u64 hior;
|
||||
u64 msr_mask;
|
||||
u64 purr_offset;
|
||||
u64 spurr_offset;
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
u32 vsid_pool[VSID_POOL_SIZE];
|
||||
u32 vsid_next;
|
||||
@ -157,10 +159,14 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
|
||||
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
|
||||
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel);
|
||||
extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel);
|
||||
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel,
|
||||
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
|
||||
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
||||
unsigned long pte_index, unsigned long avpn,
|
||||
unsigned long *hpret);
|
||||
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
struct kvm_memory_slot *memslot, unsigned long *map);
|
||||
|
||||
extern void kvmppc_entry_trampoline(void);
|
||||
extern void kvmppc_hv_entry_trampoline(void);
|
||||
|
@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */
|
||||
#define HPTE_V_HVLOCK 0x40UL
|
||||
#define HPTE_V_ABSENT 0x20UL
|
||||
|
||||
/*
|
||||
* We use this bit in the guest_rpte field of the revmap entry
|
||||
* to indicate a modified HPTE.
|
||||
*/
|
||||
#define HPTE_GR_MODIFIED (1ul << 62)
|
||||
|
||||
/* These bits are reserved in the guest view of the HPTE */
|
||||
#define HPTE_GR_RESERVED HPTE_GR_MODIFIED
|
||||
|
||||
static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
|
||||
{
|
||||
unsigned long tmp, old;
|
||||
@ -60,7 +69,7 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
|
||||
" ori %0,%0,%4\n"
|
||||
" stdcx. %0,0,%2\n"
|
||||
" beq+ 2f\n"
|
||||
" li %1,%3\n"
|
||||
" mr %1,%3\n"
|
||||
"2: isync"
|
||||
: "=&r" (tmp), "=&r" (old)
|
||||
: "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
|
||||
@ -237,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
|
||||
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* This works for 4k, 64k and 16M pages on POWER7,
|
||||
* and 4k and 16M pages on PPC970.
|
||||
*/
|
||||
static inline unsigned long slb_pgsize_encoding(unsigned long psize)
|
||||
{
|
||||
unsigned long senc = 0;
|
||||
|
||||
if (psize > 0x1000) {
|
||||
senc = SLB_VSID_L;
|
||||
if (psize == 0x10000)
|
||||
senc |= SLB_VSID_LP_01;
|
||||
}
|
||||
return senc;
|
||||
}
|
||||
|
||||
static inline int is_vrma_hpte(unsigned long hpte_v)
|
||||
{
|
||||
return (hpte_v & ~0xffffffUL) ==
|
||||
(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
|
||||
}
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_64_H__ */
|
||||
|
@ -17,6 +17,7 @@
|
||||
* there are no exceptions for which we fall through directly to
|
||||
* the normal host handler.
|
||||
*
|
||||
* 32-bit host
|
||||
* Expected inputs (normal exceptions):
|
||||
* SCRATCH0 = saved r10
|
||||
* r10 = thread struct
|
||||
@ -33,14 +34,38 @@
|
||||
* *(r8 + GPR9) = saved r9
|
||||
* *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
|
||||
* *(r8 + GPR11) = saved r11
|
||||
*
|
||||
* 64-bit host
|
||||
* Expected inputs (GEN/GDBELL/DBG/MC exception types):
|
||||
* r10 = saved CR
|
||||
* r13 = PACA_POINTER
|
||||
* *(r13 + PACA_EX##type + EX_R10) = saved r10
|
||||
* *(r13 + PACA_EX##type + EX_R11) = saved r11
|
||||
* SPRN_SPRG_##type##_SCRATCH = saved r13
|
||||
*
|
||||
* Expected inputs (CRIT exception type):
|
||||
* r10 = saved CR
|
||||
* r13 = PACA_POINTER
|
||||
* *(r13 + PACA_EX##type + EX_R10) = saved r10
|
||||
* *(r13 + PACA_EX##type + EX_R11) = saved r11
|
||||
* *(r13 + PACA_EX##type + EX_R13) = saved r13
|
||||
*
|
||||
* Expected inputs (TLB exception type):
|
||||
* r10 = saved CR
|
||||
* r13 = PACA_POINTER
|
||||
* *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
|
||||
* *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
|
||||
* SPRN_SPRG_GEN_SCRATCH = saved r13
|
||||
*
|
||||
* Only the bolted version of TLB miss exception handlers is supported now.
|
||||
*/
|
||||
.macro DO_KVM intno srr1
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
BEGIN_FTR_SECTION
|
||||
mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
|
||||
bf 3, kvmppc_resume_\intno\()_\srr1
|
||||
bf 3, 1975f
|
||||
b kvmppc_handler_\intno\()_\srr1
|
||||
kvmppc_resume_\intno\()_\srr1:
|
||||
1975:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
|
||||
#endif
|
||||
.endm
|
||||
|
@ -46,7 +46,7 @@
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
#if !defined(CONFIG_KVM_440)
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
@ -204,7 +204,7 @@ struct revmap_entry {
|
||||
};
|
||||
|
||||
/*
|
||||
* We use the top bit of each memslot->rmap entry as a lock bit,
|
||||
* We use the top bit of each memslot->arch.rmap entry as a lock bit,
|
||||
* and bit 32 as a present flag. The bottom 32 bits are the
|
||||
* index in the guest HPT of a HPTE that points to the page.
|
||||
*/
|
||||
@ -215,14 +215,17 @@ struct revmap_entry {
|
||||
#define KVMPPC_RMAP_PRESENT 0x100000000ul
|
||||
#define KVMPPC_RMAP_INDEX 0xfffffffful
|
||||
|
||||
/* Low-order bits in kvm->arch.slot_phys[][] */
|
||||
/* Low-order bits in memslot->arch.slot_phys[] */
|
||||
#define KVMPPC_PAGE_ORDER_MASK 0x1f
|
||||
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
|
||||
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
|
||||
#define KVMPPC_GOT_PAGE 0x80
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
unsigned long *rmap;
|
||||
unsigned long *slot_phys;
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
@ -243,12 +246,12 @@ struct kvm_arch {
|
||||
int using_mmu_notifiers;
|
||||
u32 hpt_order;
|
||||
atomic_t vcpus_running;
|
||||
u32 online_vcores;
|
||||
unsigned long hpt_npte;
|
||||
unsigned long hpt_mask;
|
||||
atomic_t hpte_mod_interest;
|
||||
spinlock_t slot_phys_lock;
|
||||
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
|
||||
int slot_npages[KVM_MEM_SLOTS_NUM];
|
||||
unsigned short last_vcpu[NR_CPUS];
|
||||
cpumask_t need_tlb_flush;
|
||||
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
|
||||
struct kvmppc_linear_info *hpt_li;
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
@ -273,6 +276,7 @@ struct kvmppc_vcore {
|
||||
int nap_count;
|
||||
int napping_threads;
|
||||
u16 pcpu;
|
||||
u16 last_cpu;
|
||||
u8 vcore_state;
|
||||
u8 in_guest;
|
||||
struct list_head runnable_threads;
|
||||
@ -288,9 +292,10 @@ struct kvmppc_vcore {
|
||||
|
||||
/* Values for vcore_state */
|
||||
#define VCORE_INACTIVE 0
|
||||
#define VCORE_RUNNING 1
|
||||
#define VCORE_EXITING 2
|
||||
#define VCORE_SLEEPING 3
|
||||
#define VCORE_SLEEPING 1
|
||||
#define VCORE_STARTING 2
|
||||
#define VCORE_RUNNING 3
|
||||
#define VCORE_EXITING 4
|
||||
|
||||
/*
|
||||
* Struct used to manage memory for a virtual processor area
|
||||
@ -346,6 +351,27 @@ struct kvmppc_slb {
|
||||
bool class : 1;
|
||||
};
|
||||
|
||||
# ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
#define KVMPPC_BOOKE_IAC_NUM 2
|
||||
#define KVMPPC_BOOKE_DAC_NUM 2
|
||||
# else
|
||||
#define KVMPPC_BOOKE_IAC_NUM 4
|
||||
#define KVMPPC_BOOKE_DAC_NUM 2
|
||||
# endif
|
||||
#define KVMPPC_BOOKE_MAX_IAC 4
|
||||
#define KVMPPC_BOOKE_MAX_DAC 2
|
||||
|
||||
struct kvmppc_booke_debug_reg {
|
||||
u32 dbcr0;
|
||||
u32 dbcr1;
|
||||
u32 dbcr2;
|
||||
#ifdef CONFIG_KVM_E500MC
|
||||
u32 dbcr4;
|
||||
#endif
|
||||
u64 iac[KVMPPC_BOOKE_MAX_IAC];
|
||||
u64 dac[KVMPPC_BOOKE_MAX_DAC];
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
ulong host_stack;
|
||||
u32 host_pid;
|
||||
@ -380,13 +406,18 @@ struct kvm_vcpu_arch {
|
||||
u32 host_mas4;
|
||||
u32 host_mas6;
|
||||
u32 shadow_epcr;
|
||||
u32 epcr;
|
||||
u32 shadow_msrp;
|
||||
u32 eplc;
|
||||
u32 epsc;
|
||||
u32 oldpir;
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_BOOKE)
|
||||
#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
|
||||
u32 epcr;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
/* For Gekko paired singles */
|
||||
u32 qpr[32];
|
||||
@ -440,8 +471,6 @@ struct kvm_vcpu_arch {
|
||||
|
||||
u32 ccr0;
|
||||
u32 ccr1;
|
||||
u32 dbcr0;
|
||||
u32 dbcr1;
|
||||
u32 dbsr;
|
||||
|
||||
u64 mmcr[3];
|
||||
@ -471,9 +500,12 @@ struct kvm_vcpu_arch {
|
||||
ulong fault_esr;
|
||||
ulong queued_dear;
|
||||
ulong queued_esr;
|
||||
spinlock_t wdt_lock;
|
||||
struct timer_list wdt_timer;
|
||||
u32 tlbcfg[4];
|
||||
u32 mmucfg;
|
||||
u32 epr;
|
||||
struct kvmppc_booke_debug_reg dbg_reg;
|
||||
#endif
|
||||
gpa_t paddr_accessed;
|
||||
gva_t vaddr_accessed;
|
||||
@ -486,6 +518,7 @@ struct kvm_vcpu_arch {
|
||||
u8 osi_needed;
|
||||
u8 osi_enabled;
|
||||
u8 papr_enabled;
|
||||
u8 watchdog_enabled;
|
||||
u8 sane;
|
||||
u8 cpu_type;
|
||||
u8 hcall_needed;
|
||||
@ -497,7 +530,6 @@ struct kvm_vcpu_arch {
|
||||
u64 dec_jiffies;
|
||||
u64 dec_expires;
|
||||
unsigned long pending_exceptions;
|
||||
u16 last_cpu;
|
||||
u8 ceded;
|
||||
u8 prodded;
|
||||
u32 last_inst;
|
||||
@ -534,13 +566,17 @@ struct kvm_vcpu_arch {
|
||||
unsigned long dtl_index;
|
||||
u64 stolen_logged;
|
||||
struct kvmppc_vpa slb_shadow;
|
||||
|
||||
spinlock_t tbacct_lock;
|
||||
u64 busy_stolen;
|
||||
u64 busy_preempt;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Values for vcpu->arch.state */
|
||||
#define KVMPPC_VCPU_STOPPED 0
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 1
|
||||
#define KVMPPC_VCPU_RUNNABLE 2
|
||||
#define KVMPPC_VCPU_NOTREADY 0
|
||||
#define KVMPPC_VCPU_RUNNABLE 1
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 2
|
||||
|
||||
/* Values for vcpu->arch.io_gpr */
|
||||
#define KVM_MMIO_REG_MASK 0x001f
|
||||
|
@ -21,7 +21,6 @@
|
||||
|
||||
#include <uapi/asm/kvm_para.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
|
||||
#include <linux/of.h>
|
||||
@ -55,7 +54,7 @@ static unsigned long kvm_hypercall(unsigned long *in,
|
||||
unsigned long *out,
|
||||
unsigned long nr)
|
||||
{
|
||||
return HC_EV_UNIMPLEMENTED;
|
||||
return EV_UNIMPLEMENTED;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -66,7 +65,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
|
||||
unsigned long out[8];
|
||||
unsigned long r;
|
||||
|
||||
r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
*r2 = out[0];
|
||||
|
||||
return r;
|
||||
@ -77,7 +76,7 @@ static inline long kvm_hypercall0(unsigned int nr)
|
||||
unsigned long in[8];
|
||||
unsigned long out[8];
|
||||
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
|
||||
@ -86,7 +85,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
|
||||
unsigned long out[8];
|
||||
|
||||
in[0] = p1;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
|
||||
@ -97,7 +96,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
|
||||
|
||||
in[0] = p1;
|
||||
in[1] = p2;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
|
||||
@ -109,7 +108,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
|
||||
in[0] = p1;
|
||||
in[1] = p2;
|
||||
in[2] = p3;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
}
|
||||
|
||||
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
||||
@ -123,7 +122,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
||||
in[1] = p2;
|
||||
in[2] = p3;
|
||||
in[3] = p4;
|
||||
return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
|
||||
return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/bug.h>
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
#include <asm/kvm_book3s.h>
|
||||
#else
|
||||
@ -68,6 +69,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
|
||||
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
|
||||
extern void kvmppc_decrementer_func(unsigned long data);
|
||||
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* Core-specific hooks */
|
||||
|
||||
@ -104,6 +107,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq);
|
||||
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
|
||||
struct kvm_interrupt *irq);
|
||||
extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int op, int *advance);
|
||||
@ -111,6 +115,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
|
||||
ulong val);
|
||||
extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
|
||||
ulong *val);
|
||||
extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvmppc_booke_init(void);
|
||||
extern void kvmppc_booke_exit(void);
|
||||
@ -139,16 +144,28 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
|
||||
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
|
||||
extern int kvmppc_core_init_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
|
||||
extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont);
|
||||
extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old);
|
||||
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
|
||||
struct kvm_ppc_smmu_info *info);
|
||||
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
|
||||
extern int kvmppc_bookehv_init(void);
|
||||
extern void kvmppc_bookehv_exit(void);
|
||||
|
||||
extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
|
||||
|
||||
/*
|
||||
* Cuts out inst bits with ordering according to spec.
|
||||
* That means the leftmost bit is zero. All given bits are included.
|
||||
@ -182,6 +199,41 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
|
||||
return r;
|
||||
}
|
||||
|
||||
union kvmppc_one_reg {
|
||||
u32 wval;
|
||||
u64 dval;
|
||||
vector128 vval;
|
||||
u64 vsxval[2];
|
||||
struct {
|
||||
u64 addr;
|
||||
u64 length;
|
||||
} vpaval;
|
||||
};
|
||||
|
||||
#define one_reg_size(id) \
|
||||
(1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
|
||||
|
||||
#define get_reg_val(id, reg) ({ \
|
||||
union kvmppc_one_reg __u; \
|
||||
switch (one_reg_size(id)) { \
|
||||
case 4: __u.wval = (reg); break; \
|
||||
case 8: __u.dval = (reg); break; \
|
||||
default: BUG(); \
|
||||
} \
|
||||
__u; \
|
||||
})
|
||||
|
||||
|
||||
#define set_reg_val(id, val) ({ \
|
||||
u64 __v; \
|
||||
switch (one_reg_size(id)) { \
|
||||
case 4: __v = (val).wval; break; \
|
||||
case 8: __v = (val).dval; break; \
|
||||
default: BUG(); \
|
||||
} \
|
||||
__v; \
|
||||
})
|
||||
|
||||
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
|
||||
@ -190,6 +242,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
|
||||
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
|
||||
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
|
||||
|
||||
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
|
||||
|
||||
@ -230,5 +284,36 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
|
||||
}
|
||||
}
|
||||
|
||||
/* Please call after prepare_to_enter. This function puts the lazy ee state
|
||||
back to normal mode, without actually enabling interrupts. */
|
||||
static inline void kvmppc_lazy_ee_enable(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC64
|
||||
/* Only need to enable IRQs by hard enabling them after this */
|
||||
local_paca->irq_happened = 0;
|
||||
local_paca->soft_enabled = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
|
||||
{
|
||||
ulong ea;
|
||||
ulong msr_64bit = 0;
|
||||
|
||||
ea = kvmppc_get_gpr(vcpu, rb);
|
||||
if (ra)
|
||||
ea += kvmppc_get_gpr(vcpu, ra);
|
||||
|
||||
#if defined(CONFIG_PPC_BOOK3E_64)
|
||||
msr_64bit = MSR_CM;
|
||||
#elif defined(CONFIG_PPC_BOOK3S_64)
|
||||
msr_64bit = MSR_SF;
|
||||
#endif
|
||||
|
||||
if (!(vcpu->arch.shared->msr & msr_64bit))
|
||||
ea = (uint32_t)ea;
|
||||
|
||||
return ea;
|
||||
}
|
||||
|
||||
#endif /* __POWERPC_KVM_PPC_H__ */
|
||||
|
@ -59,7 +59,7 @@
|
||||
#define MAS1_TSIZE_SHIFT 7
|
||||
#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
|
||||
|
||||
#define MAS2_EPN 0xFFFFF000
|
||||
#define MAS2_EPN (~0xFFFUL)
|
||||
#define MAS2_X0 0x00000040
|
||||
#define MAS2_X1 0x00000020
|
||||
#define MAS2_W 0x00000010
|
||||
|
@ -121,6 +121,16 @@ extern char initial_stab[];
|
||||
#define PP_RXRX 3 /* Supervisor read, User read */
|
||||
#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
|
||||
|
||||
/* Fields for tlbiel instruction in architecture 2.06 */
|
||||
#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
|
||||
#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
|
||||
#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
|
||||
#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
|
||||
#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
|
||||
#define TLBIEL_INVAL_SET_SHIFT 12
|
||||
|
||||
#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
struct hash_pte {
|
||||
|
@ -518,6 +518,7 @@
|
||||
#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
|
||||
#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
|
||||
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
|
||||
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
|
||||
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
|
||||
#define SRR1_PROGTRAP 0x00020000 /* Trap */
|
||||
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
|
||||
|
@ -539,6 +539,13 @@
|
||||
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
|
||||
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
|
||||
|
||||
#ifdef CONFIG_E500
|
||||
#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
|
||||
(((tcr) & 0x1E0000) >> 15))
|
||||
#else
|
||||
#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
|
||||
#endif
|
||||
|
||||
/* Bit definitions for the TSR. */
|
||||
#define TSR_ENW 0x80000000 /* Enable Next Watchdog */
|
||||
#define TSR_WIS 0x40000000 /* WDT Interrupt Status */
|
||||
|
@ -67,6 +67,14 @@ void generic_mach_cpu_die(void);
|
||||
void generic_set_cpu_dead(unsigned int cpu);
|
||||
void generic_set_cpu_up(unsigned int cpu);
|
||||
int generic_check_cpu_restart(unsigned int cpu);
|
||||
|
||||
extern void inhibit_secondary_onlining(void);
|
||||
extern void uninhibit_secondary_onlining(void);
|
||||
|
||||
#else /* HOTPLUG_CPU */
|
||||
static inline void inhibit_secondary_onlining(void) {}
|
||||
static inline void uninhibit_secondary_onlining(void) {}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
|
@ -7,6 +7,7 @@ header-y += bootx.h
|
||||
header-y += byteorder.h
|
||||
header-y += cputable.h
|
||||
header-y += elf.h
|
||||
header-y += epapr_hcalls.h
|
||||
header-y += errno.h
|
||||
header-y += fcntl.h
|
||||
header-y += ioctl.h
|
||||
|
98
arch/powerpc/include/uapi/asm/epapr_hcalls.h
Normal file
98
arch/powerpc/include/uapi/asm/epapr_hcalls.h
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* ePAPR hcall interface
|
||||
*
|
||||
* Copyright 2008-2011 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* Author: Timur Tabi <timur@freescale.com>
|
||||
*
|
||||
* This file is provided under a dual BSD/GPL license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Freescale Semiconductor nor the
|
||||
* names of its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
*
|
||||
* ALTERNATIVELY, this software may be distributed under the terms of the
|
||||
* GNU General Public License ("GPL") as published by the Free Software
|
||||
* Foundation, either version 2 of that License or (at your option) any
|
||||
* later version.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
|
||||
#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
|
||||
|
||||
#define EV_BYTE_CHANNEL_SEND 1
|
||||
#define EV_BYTE_CHANNEL_RECEIVE 2
|
||||
#define EV_BYTE_CHANNEL_POLL 3
|
||||
#define EV_INT_SET_CONFIG 4
|
||||
#define EV_INT_GET_CONFIG 5
|
||||
#define EV_INT_SET_MASK 6
|
||||
#define EV_INT_GET_MASK 7
|
||||
#define EV_INT_IACK 9
|
||||
#define EV_INT_EOI 10
|
||||
#define EV_INT_SEND_IPI 11
|
||||
#define EV_INT_SET_TASK_PRIORITY 12
|
||||
#define EV_INT_GET_TASK_PRIORITY 13
|
||||
#define EV_DOORBELL_SEND 14
|
||||
#define EV_MSGSND 15
|
||||
#define EV_IDLE 16
|
||||
|
||||
/* vendor ID: epapr */
|
||||
#define EV_LOCAL_VENDOR_ID 0 /* for private use */
|
||||
#define EV_EPAPR_VENDOR_ID 1
|
||||
#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */
|
||||
#define EV_IBM_VENDOR_ID 3 /* IBM */
|
||||
#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */
|
||||
#define EV_ENEA_VENDOR_ID 5 /* Enea */
|
||||
#define EV_WR_VENDOR_ID 6 /* Wind River Systems */
|
||||
#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */
|
||||
#define EV_KVM_VENDOR_ID 42 /* KVM */
|
||||
|
||||
/* The max number of bytes that a byte channel can send or receive per call */
|
||||
#define EV_BYTE_CHANNEL_MAX_BYTES 16
|
||||
|
||||
|
||||
#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
|
||||
#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
|
||||
|
||||
/* epapr return codes */
|
||||
#define EV_SUCCESS 0
|
||||
#define EV_EPERM 1 /* Operation not permitted */
|
||||
#define EV_ENOENT 2 /* Entry Not Found */
|
||||
#define EV_EIO 3 /* I/O error occured */
|
||||
#define EV_EAGAIN 4 /* The operation had insufficient
|
||||
* resources to complete and should be
|
||||
* retried
|
||||
*/
|
||||
#define EV_ENOMEM 5 /* There was insufficient memory to
|
||||
* complete the operation */
|
||||
#define EV_EFAULT 6 /* Bad guest address */
|
||||
#define EV_ENODEV 7 /* No such device */
|
||||
#define EV_EINVAL 8 /* An argument supplied to the hcall
|
||||
was out of range or invalid */
|
||||
#define EV_INTERNAL 9 /* An internal error occured */
|
||||
#define EV_CONFIG 10 /* A configuration error was detected */
|
||||
#define EV_INVALID_STATE 11 /* The object is in an invalid state */
|
||||
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
|
||||
#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */
|
||||
|
||||
#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */
|
@ -221,6 +221,12 @@ struct kvm_sregs {
|
||||
|
||||
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
|
||||
__u32 dbcr[3];
|
||||
/*
|
||||
* iac/dac registers are 64bit wide, while this API
|
||||
* interface provides only lower 32 bits on 64 bit
|
||||
* processors. ONE_REG interface is added for 64bit
|
||||
* iac/dac registers.
|
||||
*/
|
||||
__u32 iac[4];
|
||||
__u32 dac[2];
|
||||
__u32 dvc[2];
|
||||
@ -325,6 +331,86 @@ struct kvm_book3e_206_tlb_params {
|
||||
__u32 reserved[8];
|
||||
};
|
||||
|
||||
/* For KVM_PPC_GET_HTAB_FD */
|
||||
struct kvm_get_htab_fd {
|
||||
__u64 flags;
|
||||
__u64 start_index;
|
||||
__u64 reserved[2];
|
||||
};
|
||||
|
||||
/* Values for kvm_get_htab_fd.flags */
|
||||
#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
|
||||
#define KVM_GET_HTAB_WRITE ((__u64)0x2)
|
||||
|
||||
/*
|
||||
* Data read on the file descriptor is formatted as a series of
|
||||
* records, each consisting of a header followed by a series of
|
||||
* `n_valid' HPTEs (16 bytes each), which are all valid. Following
|
||||
* those valid HPTEs there are `n_invalid' invalid HPTEs, which
|
||||
* are not represented explicitly in the stream. The same format
|
||||
* is used for writing.
|
||||
*/
|
||||
struct kvm_get_htab_header {
|
||||
__u32 index;
|
||||
__u16 n_valid;
|
||||
__u16 n_invalid;
|
||||
};
|
||||
|
||||
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
|
||||
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
|
||||
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
|
||||
#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
|
||||
#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
|
||||
#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
|
||||
#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
|
||||
#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
|
||||
#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
|
||||
#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
|
||||
#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
|
||||
#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
|
||||
#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
|
||||
#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
|
||||
#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
|
||||
|
||||
#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
|
||||
#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
|
||||
#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
|
||||
|
||||
#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
|
||||
#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
|
||||
#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
|
||||
#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
|
||||
#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
|
||||
#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
|
||||
#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
|
||||
#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
|
||||
|
||||
/* 32 floating-point registers */
|
||||
#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
|
||||
#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n))
|
||||
#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
|
||||
|
||||
/* 32 VMX/Altivec vector registers */
|
||||
#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
|
||||
#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n))
|
||||
#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
|
||||
|
||||
/* 32 double-width FP registers for VSX */
|
||||
/* High-order halves overlap with FP regs */
|
||||
#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
|
||||
#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n))
|
||||
#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
|
||||
|
||||
/* FP and vector status/control registers */
|
||||
#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
|
||||
#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
|
||||
|
||||
/* Virtual processor areas */
|
||||
/* For SLB & DTL, address in high (first) half, length in low half */
|
||||
#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
|
||||
#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
|
||||
#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
|
||||
|
||||
#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared {
|
||||
};
|
||||
|
||||
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
|
||||
#define HC_VENDOR_KVM (42 << 16)
|
||||
#define HC_EV_SUCCESS 0
|
||||
#define HC_EV_UNIMPLEMENTED 12
|
||||
|
||||
#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num)
|
||||
|
||||
#include <uapi/asm/epapr_hcalls.h>
|
||||
|
||||
#define KVM_FEATURE_MAGIC_PAGE 1
|
||||
|
||||
|
@ -441,8 +441,7 @@ int main(void)
|
||||
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
|
||||
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
|
||||
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
|
||||
DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
|
||||
DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
|
||||
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
|
||||
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
|
||||
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
|
||||
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
|
||||
@ -470,7 +469,6 @@ int main(void)
|
||||
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
|
||||
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
|
||||
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
|
||||
DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
|
||||
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
|
||||
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
|
||||
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
|
||||
|
@ -8,13 +8,41 @@
|
||||
*/
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <asm/epapr_hcalls.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-compat.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
/* epapr_ev_idle() was derived from e500_idle() */
|
||||
_GLOBAL(epapr_ev_idle)
|
||||
CURRENT_THREAD_INFO(r3, r1)
|
||||
PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
|
||||
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
|
||||
PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
|
||||
|
||||
wrteei 1
|
||||
|
||||
idle_loop:
|
||||
LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
|
||||
|
||||
.global epapr_ev_idle_start
|
||||
epapr_ev_idle_start:
|
||||
li r3, -1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
/*
|
||||
* Guard against spurious wakeups from a hypervisor --
|
||||
* only interrupt will cause us to return to LR due to
|
||||
* _TLF_NAPPING.
|
||||
*/
|
||||
b idle_loop
|
||||
|
||||
/* Hypercall entry point. Will be patched with device tree instructions. */
|
||||
.global epapr_hypercall_start
|
||||
epapr_hypercall_start:
|
||||
|
@ -21,6 +21,10 @@
|
||||
#include <asm/epapr_hcalls.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/code-patching.h>
|
||||
#include <asm/machdep.h>
|
||||
|
||||
extern void epapr_ev_idle(void);
|
||||
extern u32 epapr_ev_idle_start[];
|
||||
|
||||
bool epapr_paravirt_enabled;
|
||||
|
||||
@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void)
|
||||
if (len % 4 || len > (4 * 4))
|
||||
return -ENODEV;
|
||||
|
||||
for (i = 0; i < (len / 4); i++)
|
||||
for (i = 0; i < (len / 4); i++) {
|
||||
patch_instruction(epapr_hypercall_start + i, insts[i]);
|
||||
patch_instruction(epapr_ev_idle_start + i, insts[i]);
|
||||
}
|
||||
|
||||
if (of_get_property(hyper_node, "has-idle", NULL))
|
||||
ppc_md.power_save = epapr_ev_idle;
|
||||
|
||||
epapr_paravirt_enabled = true;
|
||||
|
||||
|
@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data)
|
||||
in[0] = KVM_MAGIC_PAGE;
|
||||
in[1] = KVM_MAGIC_PAGE;
|
||||
|
||||
kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
|
||||
kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
|
||||
|
||||
*features = out[0];
|
||||
}
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include <asm/dcr.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/epapr_hcalls.h>
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
extern void transfer_to_handler(void);
|
||||
@ -191,3 +192,7 @@ EXPORT_SYMBOL(__arch_hweight64);
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
EXPORT_SYMBOL_GPL(mmu_psize_defs);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EPAPR_PARAVIRT
|
||||
EXPORT_SYMBOL(epapr_hypercall_start);
|
||||
#endif
|
||||
|
@ -427,6 +427,45 @@ int generic_check_cpu_restart(unsigned int cpu)
|
||||
{
|
||||
return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
|
||||
}
|
||||
|
||||
static atomic_t secondary_inhibit_count;
|
||||
|
||||
/*
|
||||
* Don't allow secondary CPU threads to come online
|
||||
*/
|
||||
void inhibit_secondary_onlining(void)
|
||||
{
|
||||
/*
|
||||
* This makes secondary_inhibit_count stable during cpu
|
||||
* online/offline operations.
|
||||
*/
|
||||
get_online_cpus();
|
||||
|
||||
atomic_inc(&secondary_inhibit_count);
|
||||
put_online_cpus();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
|
||||
|
||||
/*
|
||||
* Allow secondary CPU threads to come online again
|
||||
*/
|
||||
void uninhibit_secondary_onlining(void)
|
||||
{
|
||||
get_online_cpus();
|
||||
atomic_dec(&secondary_inhibit_count);
|
||||
put_online_cpus();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
|
||||
|
||||
static int secondaries_inhibited(void)
|
||||
{
|
||||
return atomic_read(&secondary_inhibit_count);
|
||||
}
|
||||
|
||||
#else /* HOTPLUG_CPU */
|
||||
|
||||
#define secondaries_inhibited() 0
|
||||
|
||||
#endif
|
||||
|
||||
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
|
||||
@ -445,6 +484,13 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
{
|
||||
int rc, c;
|
||||
|
||||
/*
|
||||
* Don't allow secondary threads to come online if inhibited
|
||||
*/
|
||||
if (threads_per_core > 1 && secondaries_inhibited() &&
|
||||
cpu % threads_per_core != 0)
|
||||
return -EBUSY;
|
||||
|
||||
if (smp_ops == NULL ||
|
||||
(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
|
||||
return -EINVAL;
|
||||
|
@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu_44x->shadow_refs[i].gtlb_index = -1;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_440;
|
||||
vcpu->arch.pvr = mfspr(SPRN_PVR);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -27,12 +27,70 @@
|
||||
#include "booke.h"
|
||||
#include "44x_tlb.h"
|
||||
|
||||
#define XOP_MFDCRX 259
|
||||
#define XOP_MFDCR 323
|
||||
#define XOP_MTDCRX 387
|
||||
#define XOP_MTDCR 451
|
||||
#define XOP_TLBSX 914
|
||||
#define XOP_ICCCI 966
|
||||
#define XOP_TLBWE 978
|
||||
|
||||
static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
|
||||
{
|
||||
/* emulate some access in kernel */
|
||||
switch (dcrn) {
|
||||
case DCRN_CPR0_CONFIG_ADDR:
|
||||
vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
|
||||
return EMULATE_DONE;
|
||||
default:
|
||||
vcpu->run->dcr.dcrn = dcrn;
|
||||
vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
|
||||
vcpu->run->dcr.is_write = 1;
|
||||
vcpu->arch.dcr_is_write = 1;
|
||||
vcpu->arch.dcr_needed = 1;
|
||||
kvmppc_account_exit(vcpu, DCR_EXITS);
|
||||
return EMULATE_DO_DCR;
|
||||
}
|
||||
}
|
||||
|
||||
static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
|
||||
{
|
||||
/* The guest may access CPR0 registers to determine the timebase
|
||||
* frequency, and it must know the real host frequency because it
|
||||
* can directly access the timebase registers.
|
||||
*
|
||||
* It would be possible to emulate those accesses in userspace,
|
||||
* but userspace can really only figure out the end frequency.
|
||||
* We could decompose that into the factors that compute it, but
|
||||
* that's tricky math, and it's easier to just report the real
|
||||
* CPR0 values.
|
||||
*/
|
||||
switch (dcrn) {
|
||||
case DCRN_CPR0_CONFIG_ADDR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
|
||||
break;
|
||||
case DCRN_CPR0_CONFIG_DATA:
|
||||
local_irq_disable();
|
||||
mtdcr(DCRN_CPR0_CONFIG_ADDR,
|
||||
vcpu->arch.cpr0_cfgaddr);
|
||||
kvmppc_set_gpr(vcpu, rt,
|
||||
mfdcr(DCRN_CPR0_CONFIG_DATA));
|
||||
local_irq_enable();
|
||||
break;
|
||||
default:
|
||||
vcpu->run->dcr.dcrn = dcrn;
|
||||
vcpu->run->dcr.data = 0;
|
||||
vcpu->run->dcr.is_write = 0;
|
||||
vcpu->arch.dcr_is_write = 0;
|
||||
vcpu->arch.io_gpr = rt;
|
||||
vcpu->arch.dcr_needed = 1;
|
||||
kvmppc_account_exit(vcpu, DCR_EXITS);
|
||||
return EMULATE_DO_DCR;
|
||||
}
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int inst, int *advance)
|
||||
{
|
||||
@ -50,55 +108,21 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
switch (get_xop(inst)) {
|
||||
|
||||
case XOP_MFDCR:
|
||||
/* The guest may access CPR0 registers to determine the timebase
|
||||
* frequency, and it must know the real host frequency because it
|
||||
* can directly access the timebase registers.
|
||||
*
|
||||
* It would be possible to emulate those accesses in userspace,
|
||||
* but userspace can really only figure out the end frequency.
|
||||
* We could decompose that into the factors that compute it, but
|
||||
* that's tricky math, and it's easier to just report the real
|
||||
* CPR0 values.
|
||||
*/
|
||||
switch (dcrn) {
|
||||
case DCRN_CPR0_CONFIG_ADDR:
|
||||
kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
|
||||
break;
|
||||
case DCRN_CPR0_CONFIG_DATA:
|
||||
local_irq_disable();
|
||||
mtdcr(DCRN_CPR0_CONFIG_ADDR,
|
||||
vcpu->arch.cpr0_cfgaddr);
|
||||
kvmppc_set_gpr(vcpu, rt,
|
||||
mfdcr(DCRN_CPR0_CONFIG_DATA));
|
||||
local_irq_enable();
|
||||
break;
|
||||
default:
|
||||
run->dcr.dcrn = dcrn;
|
||||
run->dcr.data = 0;
|
||||
run->dcr.is_write = 0;
|
||||
vcpu->arch.io_gpr = rt;
|
||||
vcpu->arch.dcr_needed = 1;
|
||||
kvmppc_account_exit(vcpu, DCR_EXITS);
|
||||
emulated = EMULATE_DO_DCR;
|
||||
}
|
||||
emulated = emulate_mfdcr(vcpu, rt, dcrn);
|
||||
break;
|
||||
|
||||
case XOP_MFDCRX:
|
||||
emulated = emulate_mfdcr(vcpu, rt,
|
||||
kvmppc_get_gpr(vcpu, ra));
|
||||
break;
|
||||
|
||||
case XOP_MTDCR:
|
||||
/* emulate some access in kernel */
|
||||
switch (dcrn) {
|
||||
case DCRN_CPR0_CONFIG_ADDR:
|
||||
vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
|
||||
break;
|
||||
default:
|
||||
run->dcr.dcrn = dcrn;
|
||||
run->dcr.data = kvmppc_get_gpr(vcpu, rs);
|
||||
run->dcr.is_write = 1;
|
||||
vcpu->arch.dcr_needed = 1;
|
||||
kvmppc_account_exit(vcpu, DCR_EXITS);
|
||||
emulated = EMULATE_DO_DCR;
|
||||
}
|
||||
emulated = emulate_mtdcr(vcpu, rs, dcrn);
|
||||
break;
|
||||
|
||||
case XOP_MTDCRX:
|
||||
emulated = emulate_mtdcr(vcpu, rs,
|
||||
kvmppc_get_gpr(vcpu, ra));
|
||||
break;
|
||||
|
||||
case XOP_TLBWE:
|
||||
|
@ -20,6 +20,7 @@ config KVM
|
||||
bool
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_EVENTFD
|
||||
|
||||
config KVM_BOOK3S_HANDLER
|
||||
bool
|
||||
@ -36,6 +37,7 @@ config KVM_BOOK3S_64_HANDLER
|
||||
config KVM_BOOK3S_PR
|
||||
bool
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
|
||||
config KVM_BOOK3S_32
|
||||
tristate "KVM support for PowerPC book3s_32 processors"
|
||||
@ -123,6 +125,7 @@ config KVM_E500V2
|
||||
depends on EXPERIMENTAL && E500 && !PPC_E500MC
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
---help---
|
||||
Support running unmodified E500 guest kernels in virtual machines on
|
||||
E500v2 host processors.
|
||||
@ -138,6 +141,7 @@ config KVM_E500MC
|
||||
select KVM
|
||||
select KVM_MMIO
|
||||
select KVM_BOOKE_HV
|
||||
select MMU_NOTIFIER
|
||||
---help---
|
||||
Support running unmodified E500MC/E5500 (32-bit) guest kernels in
|
||||
virtual machines on E500MC/E5500 host processors.
|
||||
|
@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
||||
|
||||
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
|
||||
|
||||
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
|
||||
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
|
||||
eventfd.o)
|
||||
|
||||
CFLAGS_44x_tlb.o := -I.
|
||||
CFLAGS_e500_tlb.o := -I.
|
||||
@ -72,10 +73,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
|
||||
book3s_hv_rmhandlers.o \
|
||||
book3s_hv_rm_mmu.o \
|
||||
book3s_64_vio_hv.o \
|
||||
book3s_hv_ras.o \
|
||||
book3s_hv_builtin.o
|
||||
|
||||
kvm-book3s_64-module-objs := \
|
||||
../../../virt/kvm/kvm_main.o \
|
||||
../../../virt/kvm/eventfd.o \
|
||||
powerpc.o \
|
||||
emulate.o \
|
||||
book3s.o \
|
||||
|
@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
int i;
|
||||
@ -476,6 +485,122 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r;
|
||||
union kvmppc_one_reg val;
|
||||
int size;
|
||||
long int i;
|
||||
|
||||
size = one_reg_size(reg->id);
|
||||
if (size > sizeof(val))
|
||||
return -EINVAL;
|
||||
|
||||
r = kvmppc_get_one_reg(vcpu, reg->id, &val);
|
||||
|
||||
if (r == -EINVAL) {
|
||||
r = 0;
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_DAR:
|
||||
val = get_reg_val(reg->id, vcpu->arch.shared->dar);
|
||||
break;
|
||||
case KVM_REG_PPC_DSISR:
|
||||
val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
|
||||
break;
|
||||
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
|
||||
i = reg->id - KVM_REG_PPC_FPR0;
|
||||
val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
|
||||
break;
|
||||
case KVM_REG_PPC_FPSCR:
|
||||
val = get_reg_val(reg->id, vcpu->arch.fpscr);
|
||||
break;
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
|
||||
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
|
||||
break;
|
||||
case KVM_REG_PPC_VSCR:
|
||||
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
|
||||
break;
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
|
||||
r = -EFAULT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
int r;
|
||||
union kvmppc_one_reg val;
|
||||
int size;
|
||||
long int i;
|
||||
|
||||
size = one_reg_size(reg->id);
|
||||
if (size > sizeof(val))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
|
||||
return -EFAULT;
|
||||
|
||||
r = kvmppc_set_one_reg(vcpu, reg->id, &val);
|
||||
|
||||
if (r == -EINVAL) {
|
||||
r = 0;
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_DAR:
|
||||
vcpu->arch.shared->dar = set_reg_val(reg->id, val);
|
||||
break;
|
||||
case KVM_REG_PPC_DSISR:
|
||||
vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
|
||||
break;
|
||||
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
|
||||
i = reg->id - KVM_REG_PPC_FPR0;
|
||||
vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
|
||||
break;
|
||||
case KVM_REG_PPC_FPSCR:
|
||||
vcpu->arch.fpscr = set_reg_val(reg->id, val);
|
||||
break;
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
|
||||
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
|
||||
break;
|
||||
case KVM_REG_PPC_VSCR:
|
||||
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
|
||||
break;
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_translation *tr)
|
||||
{
|
||||
|
@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
if (is_error_noslot_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
|
||||
orig_pte->eaddr);
|
||||
r = -EINVAL;
|
||||
@ -254,6 +254,7 @@ next_pteg:
|
||||
|
||||
kvmppc_mmu_hpte_cache_map(vcpu, pte);
|
||||
|
||||
kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
|
||||
|
||||
/* Get host physical address for gpa */
|
||||
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
|
||||
if (is_error_pfn(hpaddr)) {
|
||||
if (is_error_noslot_pfn(hpaddr)) {
|
||||
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
@ -171,6 +171,7 @@ map_again:
|
||||
|
||||
kvmppc_mmu_hpte_cache_map(vcpu, pte);
|
||||
}
|
||||
kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
|
||||
|
||||
out:
|
||||
return r;
|
||||
|
@ -24,6 +24,9 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/file.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
@ -40,6 +43,11 @@
|
||||
/* Power architecture requires HPT is at least 256kB */
|
||||
#define PPC_MIN_HPT_ORDER 18
|
||||
|
||||
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
long pte_index, unsigned long pteh,
|
||||
unsigned long ptel, unsigned long *pte_idx_ret);
|
||||
static void kvmppc_rmap_reset(struct kvm *kvm);
|
||||
|
||||
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
{
|
||||
unsigned long hpt;
|
||||
@ -137,10 +145,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
/* Set the entire HPT to 0, i.e. invalid HPTEs */
|
||||
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
|
||||
/*
|
||||
* Set the whole last_vcpu array to an invalid vcpu number.
|
||||
* This ensures that each vcpu will flush its TLB on next entry.
|
||||
* Reset all the reverse-mapping chains for all memslots
|
||||
*/
|
||||
memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
|
||||
kvmppc_rmap_reset(kvm);
|
||||
/* Ensure that each vcpu will flush its TLB on next entry. */
|
||||
cpumask_setall(&kvm->arch.need_tlb_flush);
|
||||
*htab_orderp = order;
|
||||
err = 0;
|
||||
} else {
|
||||
@ -184,6 +193,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
|
||||
unsigned long addr, hash;
|
||||
unsigned long psize;
|
||||
unsigned long hp0, hp1;
|
||||
unsigned long idx_ret;
|
||||
long ret;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
@ -215,7 +225,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
|
||||
hash = (hash << 3) + 7;
|
||||
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
|
||||
hp_r = hp1 | addr;
|
||||
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
|
||||
ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
|
||||
&idx_ret);
|
||||
if (ret != H_SUCCESS) {
|
||||
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
|
||||
addr, ret);
|
||||
@ -260,7 +271,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
|
||||
|
||||
/*
|
||||
* This is called to get a reference to a guest page if there isn't
|
||||
* one already in the kvm->arch.slot_phys[][] arrays.
|
||||
* one already in the memslot->arch.slot_phys[] array.
|
||||
*/
|
||||
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
|
||||
struct kvm_memory_slot *memslot,
|
||||
@ -275,7 +286,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long pfn, i, npages;
|
||||
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
physp = memslot->arch.slot_phys;
|
||||
if (!physp)
|
||||
return -EINVAL;
|
||||
if (physp[gfn - memslot->base_gfn])
|
||||
@ -353,15 +364,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* We come here on a H_ENTER call from the guest when we are not
|
||||
* using mmu notifiers and we don't have the requested page pinned
|
||||
* already.
|
||||
*/
|
||||
long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||
long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
long pte_index, unsigned long pteh,
|
||||
unsigned long ptel, unsigned long *pte_idx_ret)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long psize, gpa, gfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
long ret;
|
||||
@ -389,8 +395,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
do_insert:
|
||||
/* Protect linux PTE lookup from page table destruction */
|
||||
rcu_read_lock_sched(); /* this disables preemption too */
|
||||
vcpu->arch.pgdir = current->mm->pgd;
|
||||
ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
|
||||
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
|
||||
current->mm->pgd, false, pte_idx_ret);
|
||||
rcu_read_unlock_sched();
|
||||
if (ret == H_TOO_HARD) {
|
||||
/* this can't happen */
|
||||
@ -401,6 +407,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* We come here on a H_ENTER call from the guest when we are not
|
||||
* using mmu notifiers and we don't have the requested page pinned
|
||||
* already.
|
||||
*/
|
||||
long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh,
|
||||
unsigned long ptel)
|
||||
{
|
||||
return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
|
||||
pteh, ptel, &vcpu->arch.gpr[4]);
|
||||
}
|
||||
|
||||
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
|
||||
gva_t eaddr)
|
||||
{
|
||||
@ -570,7 +589,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hptep, hpte[3], r;
|
||||
unsigned long mmu_seq, psize, pte_size;
|
||||
unsigned long gfn, hva, pfn;
|
||||
unsigned long gpa, gfn, hva, pfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long *rmap;
|
||||
struct revmap_entry *rev;
|
||||
@ -608,15 +627,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
/* Translate the logical address and get the page */
|
||||
psize = hpte_page_size(hpte[0], r);
|
||||
gfn = hpte_rpn(r, psize);
|
||||
gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
/* No memslot means it's an emulated MMIO region */
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
|
||||
unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
|
||||
dsisr & DSISR_ISSTORE);
|
||||
}
|
||||
|
||||
if (!kvm->arch.using_mmu_notifiers)
|
||||
return -EFAULT; /* should never get here */
|
||||
@ -710,7 +728,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
/* Check if we might have been invalidated; let the guest retry if so */
|
||||
ret = RESUME_GUEST;
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq)) {
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
|
||||
unlock_rmap(rmap);
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -756,6 +774,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
static void kvmppc_rmap_reset(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm->memslots;
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
/*
|
||||
* This assumes it is acceptable to lose reference and
|
||||
* change bits across a reset.
|
||||
*/
|
||||
memset(memslot->arch.rmap, 0,
|
||||
memslot->npages * sizeof(*memslot->arch.rmap));
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
@ -850,7 +887,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
psize = hpte_page_size(hptep[0], ptel);
|
||||
if ((hptep[0] & HPTE_V_VALID) &&
|
||||
hpte_rpn(ptel, psize) == gfn) {
|
||||
hptep[0] |= HPTE_V_ABSENT;
|
||||
if (kvm->arch.using_mmu_notifiers)
|
||||
hptep[0] |= HPTE_V_ABSENT;
|
||||
kvmppc_invalidate_hpte(kvm, hptep, i);
|
||||
/* Harvest R and C */
|
||||
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
|
||||
@ -877,6 +915,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long *rmapp;
|
||||
unsigned long gfn;
|
||||
unsigned long n;
|
||||
|
||||
rmapp = memslot->arch.rmap;
|
||||
gfn = memslot->base_gfn;
|
||||
for (n = memslot->npages; n; --n) {
|
||||
/*
|
||||
* Testing the present bit without locking is OK because
|
||||
* the memslot has been marked invalid already, and hence
|
||||
* no new HPTEs referencing this page can be created,
|
||||
* thus the present bit can't go from 0 to 1.
|
||||
*/
|
||||
if (*rmapp & KVMPPC_RMAP_PRESENT)
|
||||
kvm_unmap_rmapp(kvm, rmapp, gfn);
|
||||
++rmapp;
|
||||
++gfn;
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn)
|
||||
{
|
||||
@ -1030,16 +1090,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long *map)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long *rmapp, *map;
|
||||
unsigned long *rmapp;
|
||||
|
||||
preempt_disable();
|
||||
rmapp = memslot->arch.rmap;
|
||||
map = memslot->dirty_bitmap;
|
||||
for (i = 0; i < memslot->npages; ++i) {
|
||||
if (kvm_test_clear_dirty(kvm, rmapp))
|
||||
if (kvm_test_clear_dirty(kvm, rmapp) && map)
|
||||
__set_bit_le(i, map);
|
||||
++rmapp;
|
||||
}
|
||||
@ -1057,20 +1117,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
|
||||
unsigned long hva, psize, offset;
|
||||
unsigned long pa;
|
||||
unsigned long *physp;
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
return NULL;
|
||||
goto err;
|
||||
if (!kvm->arch.using_mmu_notifiers) {
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
physp = memslot->arch.slot_phys;
|
||||
if (!physp)
|
||||
return NULL;
|
||||
goto err;
|
||||
physp += gfn - memslot->base_gfn;
|
||||
pa = *physp;
|
||||
if (!pa) {
|
||||
if (kvmppc_get_guest_page(kvm, gfn, memslot,
|
||||
PAGE_SIZE) < 0)
|
||||
return NULL;
|
||||
goto err;
|
||||
pa = *physp;
|
||||
}
|
||||
page = pfn_to_page(pa >> PAGE_SHIFT);
|
||||
@ -1079,9 +1141,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
npages = get_user_pages_fast(hva, 1, 1, pages);
|
||||
if (npages < 1)
|
||||
return NULL;
|
||||
goto err;
|
||||
page = pages[0];
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
|
||||
psize = PAGE_SIZE;
|
||||
if (PageHuge(page)) {
|
||||
page = compound_head(page);
|
||||
@ -1091,6 +1155,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
|
||||
if (nb_ret)
|
||||
*nb_ret = psize - offset;
|
||||
return page_address(page) + offset;
|
||||
|
||||
err:
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
|
||||
@ -1100,6 +1168,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Functions for reading and writing the hash table via reads and
|
||||
* writes on a file descriptor.
|
||||
*
|
||||
* Reads return the guest view of the hash table, which has to be
|
||||
* pieced together from the real hash table and the guest_rpte
|
||||
* values in the revmap array.
|
||||
*
|
||||
* On writes, each HPTE written is considered in turn, and if it
|
||||
* is valid, it is written to the HPT as if an H_ENTER with the
|
||||
* exact flag set was done. When the invalid count is non-zero
|
||||
* in the header written to the stream, the kernel will make
|
||||
* sure that that many HPTEs are invalid, and invalidate them
|
||||
* if not.
|
||||
*/
|
||||
|
||||
struct kvm_htab_ctx {
|
||||
unsigned long index;
|
||||
unsigned long flags;
|
||||
struct kvm *kvm;
|
||||
int first_pass;
|
||||
};
|
||||
|
||||
#define HPTE_SIZE (2 * sizeof(unsigned long))
|
||||
|
||||
static long record_hpte(unsigned long flags, unsigned long *hptp,
|
||||
unsigned long *hpte, struct revmap_entry *revp,
|
||||
int want_valid, int first_pass)
|
||||
{
|
||||
unsigned long v, r;
|
||||
int ok = 1;
|
||||
int valid, dirty;
|
||||
|
||||
/* Unmodified entries are uninteresting except on the first pass */
|
||||
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
|
||||
if (!first_pass && !dirty)
|
||||
return 0;
|
||||
|
||||
valid = 0;
|
||||
if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
|
||||
valid = 1;
|
||||
if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
|
||||
!(hptp[0] & HPTE_V_BOLTED))
|
||||
valid = 0;
|
||||
}
|
||||
if (valid != want_valid)
|
||||
return 0;
|
||||
|
||||
v = r = 0;
|
||||
if (valid || dirty) {
|
||||
/* lock the HPTE so it's stable and read it */
|
||||
preempt_disable();
|
||||
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
|
||||
cpu_relax();
|
||||
v = hptp[0];
|
||||
if (v & HPTE_V_ABSENT) {
|
||||
v &= ~HPTE_V_ABSENT;
|
||||
v |= HPTE_V_VALID;
|
||||
}
|
||||
/* re-evaluate valid and dirty from synchronized HPTE value */
|
||||
valid = !!(v & HPTE_V_VALID);
|
||||
if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
|
||||
valid = 0;
|
||||
r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
|
||||
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
|
||||
/* only clear modified if this is the right sort of entry */
|
||||
if (valid == want_valid && dirty) {
|
||||
r &= ~HPTE_GR_MODIFIED;
|
||||
revp->guest_rpte = r;
|
||||
}
|
||||
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
|
||||
hptp[0] &= ~HPTE_V_HVLOCK;
|
||||
preempt_enable();
|
||||
if (!(valid == want_valid && (first_pass || dirty)))
|
||||
ok = 0;
|
||||
}
|
||||
hpte[0] = v;
|
||||
hpte[1] = r;
|
||||
return ok;
|
||||
}
|
||||
|
||||
static ssize_t kvm_htab_read(struct file *file, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct kvm_htab_ctx *ctx = file->private_data;
|
||||
struct kvm *kvm = ctx->kvm;
|
||||
struct kvm_get_htab_header hdr;
|
||||
unsigned long *hptp;
|
||||
struct revmap_entry *revp;
|
||||
unsigned long i, nb, nw;
|
||||
unsigned long __user *lbuf;
|
||||
struct kvm_get_htab_header __user *hptr;
|
||||
unsigned long flags;
|
||||
int first_pass;
|
||||
unsigned long hpte[2];
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, count))
|
||||
return -EFAULT;
|
||||
|
||||
first_pass = ctx->first_pass;
|
||||
flags = ctx->flags;
|
||||
|
||||
i = ctx->index;
|
||||
hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
|
||||
revp = kvm->arch.revmap + i;
|
||||
lbuf = (unsigned long __user *)buf;
|
||||
|
||||
nb = 0;
|
||||
while (nb + sizeof(hdr) + HPTE_SIZE < count) {
|
||||
/* Initialize header */
|
||||
hptr = (struct kvm_get_htab_header __user *)buf;
|
||||
hdr.n_valid = 0;
|
||||
hdr.n_invalid = 0;
|
||||
nw = nb;
|
||||
nb += sizeof(hdr);
|
||||
lbuf = (unsigned long __user *)(buf + sizeof(hdr));
|
||||
|
||||
/* Skip uninteresting entries, i.e. clean on not-first pass */
|
||||
if (!first_pass) {
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
!(revp->guest_rpte & HPTE_GR_MODIFIED)) {
|
||||
++i;
|
||||
hptp += 2;
|
||||
++revp;
|
||||
}
|
||||
}
|
||||
hdr.index = i;
|
||||
|
||||
/* Grab a series of valid entries */
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
hdr.n_valid < 0xffff &&
|
||||
nb + HPTE_SIZE < count &&
|
||||
record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
|
||||
/* valid entry, write it out */
|
||||
++hdr.n_valid;
|
||||
if (__put_user(hpte[0], lbuf) ||
|
||||
__put_user(hpte[1], lbuf + 1))
|
||||
return -EFAULT;
|
||||
nb += HPTE_SIZE;
|
||||
lbuf += 2;
|
||||
++i;
|
||||
hptp += 2;
|
||||
++revp;
|
||||
}
|
||||
/* Now skip invalid entries while we can */
|
||||
while (i < kvm->arch.hpt_npte &&
|
||||
hdr.n_invalid < 0xffff &&
|
||||
record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
|
||||
/* found an invalid entry */
|
||||
++hdr.n_invalid;
|
||||
++i;
|
||||
hptp += 2;
|
||||
++revp;
|
||||
}
|
||||
|
||||
if (hdr.n_valid || hdr.n_invalid) {
|
||||
/* write back the header */
|
||||
if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
|
||||
return -EFAULT;
|
||||
nw = nb;
|
||||
buf = (char __user *)lbuf;
|
||||
} else {
|
||||
nb = nw;
|
||||
}
|
||||
|
||||
/* Check if we've wrapped around the hash table */
|
||||
if (i >= kvm->arch.hpt_npte) {
|
||||
i = 0;
|
||||
ctx->first_pass = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->index = i;
|
||||
|
||||
return nb;
|
||||
}
|
||||
|
||||
static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct kvm_htab_ctx *ctx = file->private_data;
|
||||
struct kvm *kvm = ctx->kvm;
|
||||
struct kvm_get_htab_header hdr;
|
||||
unsigned long i, j;
|
||||
unsigned long v, r;
|
||||
unsigned long __user *lbuf;
|
||||
unsigned long *hptp;
|
||||
unsigned long tmp[2];
|
||||
ssize_t nb;
|
||||
long int err, ret;
|
||||
int rma_setup;
|
||||
|
||||
if (!access_ok(VERIFY_READ, buf, count))
|
||||
return -EFAULT;
|
||||
|
||||
/* lock out vcpus from running while we're doing this */
|
||||
mutex_lock(&kvm->lock);
|
||||
rma_setup = kvm->arch.rma_setup_done;
|
||||
if (rma_setup) {
|
||||
kvm->arch.rma_setup_done = 0; /* temporarily */
|
||||
/* order rma_setup_done vs. vcpus_running */
|
||||
smp_mb();
|
||||
if (atomic_read(&kvm->arch.vcpus_running)) {
|
||||
kvm->arch.rma_setup_done = 1;
|
||||
mutex_unlock(&kvm->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
err = 0;
|
||||
for (nb = 0; nb + sizeof(hdr) <= count; ) {
|
||||
err = -EFAULT;
|
||||
if (__copy_from_user(&hdr, buf, sizeof(hdr)))
|
||||
break;
|
||||
|
||||
err = 0;
|
||||
if (nb + hdr.n_valid * HPTE_SIZE > count)
|
||||
break;
|
||||
|
||||
nb += sizeof(hdr);
|
||||
buf += sizeof(hdr);
|
||||
|
||||
err = -EINVAL;
|
||||
i = hdr.index;
|
||||
if (i >= kvm->arch.hpt_npte ||
|
||||
i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
|
||||
break;
|
||||
|
||||
hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
|
||||
lbuf = (unsigned long __user *)buf;
|
||||
for (j = 0; j < hdr.n_valid; ++j) {
|
||||
err = -EFAULT;
|
||||
if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
|
||||
goto out;
|
||||
err = -EINVAL;
|
||||
if (!(v & HPTE_V_VALID))
|
||||
goto out;
|
||||
lbuf += 2;
|
||||
nb += HPTE_SIZE;
|
||||
|
||||
if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
|
||||
kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
|
||||
err = -EIO;
|
||||
ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
|
||||
tmp);
|
||||
if (ret != H_SUCCESS) {
|
||||
pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
|
||||
"r=%lx\n", ret, i, v, r);
|
||||
goto out;
|
||||
}
|
||||
if (!rma_setup && is_vrma_hpte(v)) {
|
||||
unsigned long psize = hpte_page_size(v, r);
|
||||
unsigned long senc = slb_pgsize_encoding(psize);
|
||||
unsigned long lpcr;
|
||||
|
||||
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
|
||||
(VRMA_VSID << SLB_VSID_SHIFT_1T);
|
||||
lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
|
||||
lpcr |= senc << (LPCR_VRMASD_SH - 4);
|
||||
kvm->arch.lpcr = lpcr;
|
||||
rma_setup = 1;
|
||||
}
|
||||
++i;
|
||||
hptp += 2;
|
||||
}
|
||||
|
||||
for (j = 0; j < hdr.n_invalid; ++j) {
|
||||
if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
|
||||
kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
|
||||
++i;
|
||||
hptp += 2;
|
||||
}
|
||||
err = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
/* Order HPTE updates vs. rma_setup_done */
|
||||
smp_wmb();
|
||||
kvm->arch.rma_setup_done = rma_setup;
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
return nb;
|
||||
}
|
||||
|
||||
static int kvm_htab_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct kvm_htab_ctx *ctx = filp->private_data;
|
||||
|
||||
filp->private_data = NULL;
|
||||
if (!(ctx->flags & KVM_GET_HTAB_WRITE))
|
||||
atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
|
||||
kvm_put_kvm(ctx->kvm);
|
||||
kfree(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file_operations kvm_htab_fops = {
|
||||
.read = kvm_htab_read,
|
||||
.write = kvm_htab_write,
|
||||
.llseek = default_llseek,
|
||||
.release = kvm_htab_release,
|
||||
};
|
||||
|
||||
int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_htab_ctx *ctx;
|
||||
int rwflag;
|
||||
|
||||
/* reject flags we don't recognize */
|
||||
if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
|
||||
return -EINVAL;
|
||||
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
kvm_get_kvm(kvm);
|
||||
ctx->kvm = kvm;
|
||||
ctx->index = ghf->start_index;
|
||||
ctx->flags = ghf->flags;
|
||||
ctx->first_pass = 1;
|
||||
|
||||
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
|
||||
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
|
||||
if (ret < 0) {
|
||||
kvm_put_kvm(kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (rwflag == O_RDONLY) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
atomic_inc(&kvm->arch.hpte_mod_interest);
|
||||
/* make sure kvmppc_do_h_enter etc. see the increment */
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <asm/kvm_book3s.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/time.h>
|
||||
|
||||
#define OP_19_XOP_RFID 18
|
||||
#define OP_19_XOP_RFI 50
|
||||
@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
(mfmsr() & MSR_HV))
|
||||
vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
|
||||
break;
|
||||
case SPRN_PURR:
|
||||
to_book3s(vcpu)->purr_offset = spr_val - get_tb();
|
||||
break;
|
||||
case SPRN_SPURR:
|
||||
to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
|
||||
break;
|
||||
case SPRN_GQR0:
|
||||
case SPRN_GQR1:
|
||||
case SPRN_GQR2:
|
||||
@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
case SPRN_CTRLF:
|
||||
case SPRN_CTRLT:
|
||||
case SPRN_L2CR:
|
||||
case SPRN_DSCR:
|
||||
case SPRN_MMCR0_GEKKO:
|
||||
case SPRN_MMCR1_GEKKO:
|
||||
case SPRN_PMC1_GEKKO:
|
||||
@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
|
||||
*spr_val = to_book3s(vcpu)->hid[5];
|
||||
break;
|
||||
case SPRN_CFAR:
|
||||
case SPRN_PURR:
|
||||
case SPRN_DSCR:
|
||||
*spr_val = 0;
|
||||
break;
|
||||
case SPRN_PURR:
|
||||
*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
|
||||
break;
|
||||
case SPRN_SPURR:
|
||||
*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
|
||||
break;
|
||||
case SPRN_GQR0:
|
||||
case SPRN_GQR1:
|
||||
case SPRN_GQR2:
|
||||
|
@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
|
||||
#endif
|
||||
#ifdef CONFIG_VSX
|
||||
EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
|
||||
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
|
||||
for (i = 0; i < count; ++i) {
|
||||
linear = alloc_bootmem_align(size, size);
|
||||
pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
|
||||
size >> 20);
|
||||
pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
|
||||
size >> 20);
|
||||
linear_info[i].base_virt = linear;
|
||||
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
|
||||
linear_info[i].npages = npages;
|
||||
|
144
arch/powerpc/kvm/book3s_hv_ras.c
Normal file
144
arch/powerpc/kvm/book3s_hv_ras.c
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/opal.h>
|
||||
|
||||
/* SRR1 bits for machine check on POWER7 */
|
||||
#define SRR1_MC_LDSTERR (1ul << (63-42))
|
||||
#define SRR1_MC_IFETCH_SH (63-45)
|
||||
#define SRR1_MC_IFETCH_MASK 0x7
|
||||
#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
|
||||
#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
|
||||
#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
|
||||
#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
|
||||
|
||||
/* DSISR bits for machine check on POWER7 */
|
||||
#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
|
||||
#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
|
||||
#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
|
||||
#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
|
||||
#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
|
||||
|
||||
/* POWER7 SLB flush and reload */
|
||||
static void reload_slb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct slb_shadow *slb;
|
||||
unsigned long i, n;
|
||||
|
||||
/* First clear out SLB */
|
||||
asm volatile("slbmte %0,%0; slbia" : : "r" (0));
|
||||
|
||||
/* Do they have an SLB shadow buffer registered? */
|
||||
slb = vcpu->arch.slb_shadow.pinned_addr;
|
||||
if (!slb)
|
||||
return;
|
||||
|
||||
/* Sanity check */
|
||||
n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
|
||||
if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
|
||||
return;
|
||||
|
||||
/* Load up the SLB from that */
|
||||
for (i = 0; i < n; ++i) {
|
||||
unsigned long rb = slb->save_area[i].esid;
|
||||
unsigned long rs = slb->save_area[i].vsid;
|
||||
|
||||
rb = (rb & ~0xFFFul) | i; /* insert entry number */
|
||||
asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
|
||||
}
|
||||
}
|
||||
|
||||
/* POWER7 TLB flush */
|
||||
static void flush_tlb_power7(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long i, rb;
|
||||
|
||||
rb = TLBIEL_INVAL_SET_LPID;
|
||||
for (i = 0; i < POWER7_TLB_SETS; ++i) {
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
rb += 1 << TLBIEL_INVAL_SET_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* On POWER7, see if we can handle a machine check that occurred inside
|
||||
* the guest in real mode, without switching to the host partition.
|
||||
*
|
||||
* Returns: 0 => exit guest, 1 => deliver machine check to guest
|
||||
*/
|
||||
static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long srr1 = vcpu->arch.shregs.msr;
|
||||
struct opal_machine_check_event *opal_evt;
|
||||
long handled = 1;
|
||||
|
||||
if (srr1 & SRR1_MC_LDSTERR) {
|
||||
/* error on load/store */
|
||||
unsigned long dsisr = vcpu->arch.shregs.dsisr;
|
||||
|
||||
if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
|
||||
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
|
||||
/* flush and reload SLB; flushes D-ERAT too */
|
||||
reload_slb(vcpu);
|
||||
dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
|
||||
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
|
||||
}
|
||||
if (dsisr & DSISR_MC_TLB_MULTI) {
|
||||
flush_tlb_power7(vcpu);
|
||||
dsisr &= ~DSISR_MC_TLB_MULTI;
|
||||
}
|
||||
/* Any other errors we don't understand? */
|
||||
if (dsisr & 0xffffffffUL)
|
||||
handled = 0;
|
||||
}
|
||||
|
||||
switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
|
||||
case 0:
|
||||
break;
|
||||
case SRR1_MC_IFETCH_SLBPAR:
|
||||
case SRR1_MC_IFETCH_SLBMULTI:
|
||||
case SRR1_MC_IFETCH_SLBPARMULTI:
|
||||
reload_slb(vcpu);
|
||||
break;
|
||||
case SRR1_MC_IFETCH_TLBMULTI:
|
||||
flush_tlb_power7(vcpu);
|
||||
break;
|
||||
default:
|
||||
handled = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* See if OPAL has already handled the condition.
|
||||
* We assume that if the condition is recovered then OPAL
|
||||
* will have generated an error log event that we will pick
|
||||
* up and log later.
|
||||
*/
|
||||
opal_evt = local_paca->opal_mc_evt;
|
||||
if (opal_evt->version == OpalMCE_V1 &&
|
||||
(opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
|
||||
opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
|
||||
handled = 1;
|
||||
|
||||
if (handled)
|
||||
opal_evt->in_use = 0;
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
return kvmppc_realmode_mc_power7(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
|
||||
return __va(addr);
|
||||
}
|
||||
|
||||
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
|
||||
static int global_invalidates(struct kvm *kvm, unsigned long flags)
|
||||
{
|
||||
int global;
|
||||
|
||||
/*
|
||||
* If there is only one vcore, and it's currently running,
|
||||
* we can use tlbiel as long as we mark all other physical
|
||||
* cores as potentially having stale TLB entries for this lpid.
|
||||
* If we're not using MMU notifiers, we never take pages away
|
||||
* from the guest, so we can use tlbiel if requested.
|
||||
* Otherwise, don't use tlbiel.
|
||||
*/
|
||||
if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
|
||||
global = 0;
|
||||
else if (kvm->arch.using_mmu_notifiers)
|
||||
global = 1;
|
||||
else
|
||||
global = !(flags & H_LOCAL);
|
||||
|
||||
if (!global) {
|
||||
/* any other core might now have stale TLB entries... */
|
||||
smp_wmb();
|
||||
cpumask_setall(&kvm->arch.need_tlb_flush);
|
||||
cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
|
||||
&kvm->arch.need_tlb_flush);
|
||||
}
|
||||
|
||||
return global;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add this HPTE into the chain for the real page.
|
||||
* Must be called with the chain locked; it unlocks the chain.
|
||||
@ -59,13 +90,24 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
head->back = pte_index;
|
||||
} else {
|
||||
rev->forw = rev->back = pte_index;
|
||||
i = pte_index;
|
||||
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
|
||||
pte_index | KVMPPC_RMAP_PRESENT;
|
||||
}
|
||||
smp_wmb();
|
||||
*rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
|
||||
unlock_rmap(rmap);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
|
||||
|
||||
/*
|
||||
* Note modification of an HPTE; set the HPTE modified bit
|
||||
* if anyone is interested.
|
||||
*/
|
||||
static inline void note_hpte_modification(struct kvm *kvm,
|
||||
struct revmap_entry *rev)
|
||||
{
|
||||
if (atomic_read(&kvm->arch.hpte_mod_interest))
|
||||
rev->guest_rpte |= HPTE_GR_MODIFIED;
|
||||
}
|
||||
|
||||
/* Remove this HPTE from the chain for a real page */
|
||||
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||
struct revmap_entry *rev,
|
||||
@ -81,7 +123,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||
ptel = rev->guest_rpte |= rcbits;
|
||||
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
|
||||
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
if (!memslot)
|
||||
return;
|
||||
|
||||
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
|
||||
@ -103,14 +145,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
|
||||
unlock_rmap(rmap);
|
||||
}
|
||||
|
||||
static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
|
||||
static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
|
||||
int writing, unsigned long *pte_sizep)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long ps = *pte_sizep;
|
||||
unsigned int shift;
|
||||
|
||||
ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
|
||||
ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
|
||||
if (!ptep)
|
||||
return __pte(0);
|
||||
if (shift)
|
||||
@ -130,15 +172,15 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
|
||||
hpte[0] = hpte_v;
|
||||
}
|
||||
|
||||
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel,
|
||||
pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long i, pa, gpa, gfn, psize;
|
||||
unsigned long slot_fn, hva;
|
||||
unsigned long *hpte;
|
||||
struct revmap_entry *rev;
|
||||
unsigned long g_ptel = ptel;
|
||||
unsigned long g_ptel;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long *physp, pte_size;
|
||||
unsigned long is_io;
|
||||
@ -147,13 +189,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned int writing;
|
||||
unsigned long mmu_seq;
|
||||
unsigned long rcbits;
|
||||
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
|
||||
|
||||
psize = hpte_page_size(pteh, ptel);
|
||||
if (!psize)
|
||||
return H_PARAMETER;
|
||||
writing = hpte_is_writable(ptel);
|
||||
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
|
||||
ptel &= ~HPTE_GR_RESERVED;
|
||||
g_ptel = ptel;
|
||||
|
||||
/* used later to detect if we might have been invalidated */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
@ -183,7 +226,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
rmap = &memslot->arch.rmap[slot_fn];
|
||||
|
||||
if (!kvm->arch.using_mmu_notifiers) {
|
||||
physp = kvm->arch.slot_phys[memslot->id];
|
||||
physp = memslot->arch.slot_phys;
|
||||
if (!physp)
|
||||
return H_PARAMETER;
|
||||
physp += slot_fn;
|
||||
@ -201,7 +244,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
|
||||
/* Look up the Linux PTE for the backing page */
|
||||
pte_size = psize;
|
||||
pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
|
||||
pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
|
||||
if (pte_present(pte)) {
|
||||
if (writing && !pte_write(pte))
|
||||
/* make the actual HPTE be read-only */
|
||||
@ -210,6 +253,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
pa = pte_pfn(pte) << PAGE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
if (pte_size < psize)
|
||||
return H_PARAMETER;
|
||||
if (pa && pte_size > psize)
|
||||
@ -287,8 +331,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
rev = &kvm->arch.revmap[pte_index];
|
||||
if (realmode)
|
||||
rev = real_vmalloc_addr(rev);
|
||||
if (rev)
|
||||
if (rev) {
|
||||
rev->guest_rpte = g_ptel;
|
||||
note_hpte_modification(kvm, rev);
|
||||
}
|
||||
|
||||
/* Link HPTE into reverse-map chain */
|
||||
if (pteh & HPTE_V_VALID) {
|
||||
@ -297,7 +343,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
lock_rmap(rmap);
|
||||
/* Check for pending invalidations under the rmap chain lock */
|
||||
if (kvm->arch.using_mmu_notifiers &&
|
||||
mmu_notifier_retry(vcpu, mmu_seq)) {
|
||||
mmu_notifier_retry(kvm, mmu_seq)) {
|
||||
/* inval in progress, write a non-present HPTE */
|
||||
pteh |= HPTE_V_ABSENT;
|
||||
pteh &= ~HPTE_V_VALID;
|
||||
@ -318,10 +364,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
hpte[0] = pteh;
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
|
||||
vcpu->arch.gpr[4] = pte_index;
|
||||
*pte_idx_ret = pte_index;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_h_enter);
|
||||
EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
|
||||
|
||||
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
long pte_index, unsigned long pteh, unsigned long ptel)
|
||||
{
|
||||
return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
|
||||
vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
|
||||
}
|
||||
|
||||
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
|
||||
|
||||
@ -343,11 +396,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
|
||||
return old == 0;
|
||||
}
|
||||
|
||||
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long pte_index, unsigned long avpn,
|
||||
unsigned long va)
|
||||
long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
||||
unsigned long pte_index, unsigned long avpn,
|
||||
unsigned long *hpret)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long *hpte;
|
||||
unsigned long v, r, rb;
|
||||
struct revmap_entry *rev;
|
||||
@ -369,7 +421,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
if (v & HPTE_V_VALID) {
|
||||
hpte[0] &= ~HPTE_V_VALID;
|
||||
rb = compute_tlbie_rb(v, hpte[1], pte_index);
|
||||
if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
|
||||
if (global_invalidates(kvm, flags)) {
|
||||
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
@ -385,13 +437,22 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
/* Read PTE low word after tlbie to get final R/C values */
|
||||
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
|
||||
}
|
||||
r = rev->guest_rpte;
|
||||
r = rev->guest_rpte & ~HPTE_GR_RESERVED;
|
||||
note_hpte_modification(kvm, rev);
|
||||
unlock_hpte(hpte, 0);
|
||||
|
||||
vcpu->arch.gpr[4] = v;
|
||||
vcpu->arch.gpr[5] = r;
|
||||
hpret[0] = v;
|
||||
hpret[1] = r;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
|
||||
|
||||
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long pte_index, unsigned long avpn)
|
||||
{
|
||||
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
|
||||
&vcpu->arch.gpr[4]);
|
||||
}
|
||||
|
||||
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -459,6 +520,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||
|
||||
args[j] = ((0x80 | flags) << 56) + pte_index;
|
||||
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
|
||||
note_hpte_modification(kvm, rev);
|
||||
|
||||
if (!(hp[0] & HPTE_V_VALID)) {
|
||||
/* insert R and C bits from PTE */
|
||||
@ -534,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
return H_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (atomic_read(&kvm->online_vcpus) == 1)
|
||||
flags |= H_LOCAL;
|
||||
v = hpte[0];
|
||||
bits = (flags << 55) & HPTE_R_PP0;
|
||||
bits |= (flags << 48) & HPTE_R_KEY_HI;
|
||||
@ -548,6 +608,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
if (rev) {
|
||||
r = (rev->guest_rpte & ~mask) | bits;
|
||||
rev->guest_rpte = r;
|
||||
note_hpte_modification(kvm, rev);
|
||||
}
|
||||
r = (hpte[1] & ~mask) | bits;
|
||||
|
||||
@ -555,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
if (v & HPTE_V_VALID) {
|
||||
rb = compute_tlbie_rb(v, r, pte_index);
|
||||
hpte[0] = v & ~HPTE_V_VALID;
|
||||
if (!(flags & H_LOCAL)) {
|
||||
if (global_invalidates(kvm, flags)) {
|
||||
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
|
||||
cpu_relax();
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
@ -568,6 +629,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
asm volatile("tlbiel %0" : : "r" (rb));
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
}
|
||||
/*
|
||||
* If the host has this page as readonly but the guest
|
||||
* wants to make it read/write, reduce the permissions.
|
||||
* Checking the host permissions involves finding the
|
||||
* memslot and then the Linux PTE for the page.
|
||||
*/
|
||||
if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
|
||||
unsigned long psize, gfn, hva;
|
||||
struct kvm_memory_slot *memslot;
|
||||
pgd_t *pgdir = vcpu->arch.pgdir;
|
||||
pte_t pte;
|
||||
|
||||
psize = hpte_page_size(v, r);
|
||||
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
|
||||
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
if (memslot) {
|
||||
hva = __gfn_to_hva_memslot(memslot, gfn);
|
||||
pte = lookup_linux_pte(pgdir, hva, 1, &psize);
|
||||
if (pte_present(pte) && !pte_write(pte))
|
||||
r = hpte_make_readonly(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
hpte[1] = r;
|
||||
eieio();
|
||||
@ -599,8 +682,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
v &= ~HPTE_V_ABSENT;
|
||||
v |= HPTE_V_VALID;
|
||||
}
|
||||
if (v & HPTE_V_VALID)
|
||||
if (v & HPTE_V_VALID) {
|
||||
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
|
||||
r &= ~HPTE_GR_RESERVED;
|
||||
}
|
||||
vcpu->arch.gpr[4 + i * 2] = v;
|
||||
vcpu->arch.gpr[5 + i * 2] = r;
|
||||
}
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/exception-64s.h>
|
||||
#include <asm/kvm_book3s_asm.h>
|
||||
#include <asm/mmu-hash64.h>
|
||||
|
||||
/*****************************************************************************
|
||||
* *
|
||||
@ -134,8 +135,11 @@ kvm_start_guest:
|
||||
|
||||
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
|
||||
|
||||
/* reload vcpu pointer after clearing the IPI */
|
||||
ld r4,HSTATE_KVM_VCPU(r13)
|
||||
cmpdi r4,0
|
||||
/* if we have no vcpu to run, go back to sleep */
|
||||
beq cr1,kvm_no_guest
|
||||
beq kvm_no_guest
|
||||
|
||||
/* were we napping due to cede? */
|
||||
lbz r0,HSTATE_NAPPING(r13)
|
||||
@ -310,7 +314,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
mtspr SPRN_SDR1,r6 /* switch to partition page table */
|
||||
mtspr SPRN_LPID,r7
|
||||
isync
|
||||
|
||||
/* See if we need to flush the TLB */
|
||||
lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
|
||||
clrldi r7,r6,64-6 /* extract bit number (6 bits) */
|
||||
srdi r6,r6,6 /* doubleword number */
|
||||
sldi r6,r6,3 /* address offset */
|
||||
add r6,r6,r9
|
||||
addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
|
||||
li r0,1
|
||||
sld r0,r0,r7
|
||||
ld r7,0(r6)
|
||||
and. r7,r7,r0
|
||||
beq 22f
|
||||
23: ldarx r7,0,r6 /* if set, clear the bit */
|
||||
andc r7,r7,r0
|
||||
stdcx. r7,0,r6
|
||||
bne 23b
|
||||
li r6,128 /* and flush the TLB */
|
||||
mtctr r6
|
||||
li r7,0x800 /* IS field = 0b10 */
|
||||
ptesync
|
||||
28: tlbiel r7
|
||||
addi r7,r7,0x1000
|
||||
bdnz 28b
|
||||
ptesync
|
||||
|
||||
22: li r0,1
|
||||
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
|
||||
b 10f
|
||||
|
||||
@ -333,36 +363,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
mr r9,r4
|
||||
blt hdec_soon
|
||||
|
||||
/*
|
||||
* Invalidate the TLB if we could possibly have stale TLB
|
||||
* entries for this partition on this core due to the use
|
||||
* of tlbiel.
|
||||
* XXX maybe only need this on primary thread?
|
||||
*/
|
||||
ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
|
||||
lwz r5,VCPU_VCPUID(r4)
|
||||
lhz r6,PACAPACAINDEX(r13)
|
||||
rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
|
||||
lhz r8,VCPU_LAST_CPU(r4)
|
||||
sldi r7,r6,1 /* see if this is the same vcpu */
|
||||
add r7,r7,r9 /* as last ran on this pcpu */
|
||||
lhz r0,KVM_LAST_VCPU(r7)
|
||||
cmpw r6,r8 /* on the same cpu core as last time? */
|
||||
bne 3f
|
||||
cmpw r0,r5 /* same vcpu as this core last ran? */
|
||||
beq 1f
|
||||
3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
|
||||
sth r5,KVM_LAST_VCPU(r7)
|
||||
li r6,128
|
||||
mtctr r6
|
||||
li r7,0x800 /* IS field = 0b10 */
|
||||
ptesync
|
||||
2: tlbiel r7
|
||||
addi r7,r7,0x1000
|
||||
bdnz 2b
|
||||
ptesync
|
||||
1:
|
||||
|
||||
/* Save purr/spurr */
|
||||
mfspr r5,SPRN_PURR
|
||||
mfspr r6,SPRN_SPURR
|
||||
@ -679,8 +679,7 @@ BEGIN_FTR_SECTION
|
||||
1:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
|
||||
nohpte_cont:
|
||||
hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Save DEC */
|
||||
mfspr r5,SPRN_DEC
|
||||
mftb r6
|
||||
@ -701,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
std r6, VCPU_FAULT_DAR(r9)
|
||||
stw r7, VCPU_FAULT_DSISR(r9)
|
||||
|
||||
/* See if it is a machine check */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
beq machine_check_realmode
|
||||
mc_cont:
|
||||
|
||||
/* Save guest CTRL register, set runlatch to 1 */
|
||||
6: mfspr r6,SPRN_CTRLF
|
||||
stw r6,VCPU_CTRL(r9)
|
||||
@ -1113,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
/*
|
||||
* For external and machine check interrupts, we need
|
||||
* to call the Linux handler to process the interrupt.
|
||||
* We do that by jumping to the interrupt vector address
|
||||
* which we have in r12. The [h]rfid at the end of the
|
||||
* We do that by jumping to absolute address 0x500 for
|
||||
* external interrupts, or the machine_check_fwnmi label
|
||||
* for machine checks (since firmware might have patched
|
||||
* the vector area at 0x200). The [h]rfid at the end of the
|
||||
* handler will return to the book3s_hv_interrupts.S code.
|
||||
* For other interrupts we do the rfid to get back
|
||||
* to the book3s_interrupts.S code here.
|
||||
* to the book3s_hv_interrupts.S code here.
|
||||
*/
|
||||
ld r8, HSTATE_VMHANDLER(r13)
|
||||
ld r7, HSTATE_HOST_MSR(r13)
|
||||
|
||||
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
BEGIN_FTR_SECTION
|
||||
beq 11f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
|
||||
/* RFI into the highmem handler, or branch to interrupt handler */
|
||||
12: mfmsr r6
|
||||
mtctr r12
|
||||
mfmsr r6
|
||||
li r0, MSR_RI
|
||||
andc r6, r6, r0
|
||||
mtmsrd r6, 1 /* Clear RI in MSR */
|
||||
mtsrr0 r8
|
||||
mtsrr1 r7
|
||||
beqctr
|
||||
beqa 0x500 /* external interrupt (PPC970) */
|
||||
beq cr1, 13f /* machine check */
|
||||
RFI
|
||||
|
||||
11:
|
||||
BEGIN_FTR_SECTION
|
||||
b 12b
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
mtspr SPRN_HSRR0, r8
|
||||
/* On POWER7, we have external interrupts set to use HSRR0/1 */
|
||||
11: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0x500
|
||||
|
||||
13: b machine_check_fwnmi
|
||||
|
||||
/*
|
||||
* Check whether an HDSI is an HPTE not found fault or something else.
|
||||
* If it is an HPTE not found fault that is due to the guest accessing
|
||||
@ -1177,7 +1184,7 @@ kvmppc_hdsi:
|
||||
cmpdi r3, 0 /* retry the instruction */
|
||||
beq 6f
|
||||
cmpdi r3, -1 /* handle in kernel mode */
|
||||
beq nohpte_cont
|
||||
beq guest_exit_cont
|
||||
cmpdi r3, -2 /* MMIO emulation; need instr word */
|
||||
beq 2f
|
||||
|
||||
@ -1191,6 +1198,7 @@ kvmppc_hdsi:
|
||||
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
|
||||
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11, r11, 63
|
||||
fast_interrupt_c_return:
|
||||
6: ld r7, VCPU_CTR(r9)
|
||||
lwz r8, VCPU_XER(r9)
|
||||
mtctr r7
|
||||
@ -1223,7 +1231,7 @@ kvmppc_hdsi:
|
||||
/* Unset guest mode. */
|
||||
li r0, KVM_GUEST_MODE_NONE
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
b nohpte_cont
|
||||
b guest_exit_cont
|
||||
|
||||
/*
|
||||
* Similarly for an HISI, reflect it to the guest as an ISI unless
|
||||
@ -1249,9 +1257,9 @@ kvmppc_hisi:
|
||||
ld r11, VCPU_MSR(r9)
|
||||
li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
|
||||
cmpdi r3, 0 /* retry the instruction */
|
||||
beq 6f
|
||||
beq fast_interrupt_c_return
|
||||
cmpdi r3, -1 /* handle in kernel mode */
|
||||
beq nohpte_cont
|
||||
beq guest_exit_cont
|
||||
|
||||
/* Synthesize an ISI for the guest */
|
||||
mr r11, r3
|
||||
@ -1260,12 +1268,7 @@ kvmppc_hisi:
|
||||
li r10, BOOK3S_INTERRUPT_INST_STORAGE
|
||||
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11, r11, 63
|
||||
6: ld r7, VCPU_CTR(r9)
|
||||
lwz r8, VCPU_XER(r9)
|
||||
mtctr r7
|
||||
mtxer r8
|
||||
mr r4, r9
|
||||
b fast_guest_return
|
||||
b fast_interrupt_c_return
|
||||
|
||||
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
|
||||
ld r5, KVM_VRMA_SLB_V(r6)
|
||||
@ -1281,14 +1284,14 @@ kvmppc_hisi:
|
||||
hcall_try_real_mode:
|
||||
ld r3,VCPU_GPR(R3)(r9)
|
||||
andi. r0,r11,MSR_PR
|
||||
bne hcall_real_cont
|
||||
bne guest_exit_cont
|
||||
clrrdi r3,r3,2
|
||||
cmpldi r3,hcall_real_table_end - hcall_real_table
|
||||
bge hcall_real_cont
|
||||
bge guest_exit_cont
|
||||
LOAD_REG_ADDR(r4, hcall_real_table)
|
||||
lwzx r3,r3,r4
|
||||
cmpwi r3,0
|
||||
beq hcall_real_cont
|
||||
beq guest_exit_cont
|
||||
add r3,r3,r4
|
||||
mtctr r3
|
||||
mr r3,r9 /* get vcpu pointer */
|
||||
@ -1309,7 +1312,7 @@ hcall_real_fallback:
|
||||
li r12,BOOK3S_INTERRUPT_SYSCALL
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
|
||||
b hcall_real_cont
|
||||
b guest_exit_cont
|
||||
|
||||
.globl hcall_real_table
|
||||
hcall_real_table:
|
||||
@ -1568,6 +1571,21 @@ kvm_cede_exit:
|
||||
li r3,H_TOO_HARD
|
||||
blr
|
||||
|
||||
/* Try to handle a machine check in real mode */
|
||||
machine_check_realmode:
|
||||
mr r3, r9 /* get vcpu pointer */
|
||||
bl .kvmppc_realmode_machine_check
|
||||
nop
|
||||
cmpdi r3, 0 /* continue exiting from guest? */
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
beq mc_cont
|
||||
/* If not, deliver a machine check. SRR0/1 are already set */
|
||||
li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11, r11, 63
|
||||
b fast_interrupt_c_return
|
||||
|
||||
secondary_too_late:
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
HMT_LOW
|
||||
@ -1587,6 +1605,10 @@ secondary_too_late:
|
||||
.endr
|
||||
|
||||
secondary_nap:
|
||||
/* Clear our vcpu pointer so we don't come back in early */
|
||||
li r0, 0
|
||||
std r0, HSTATE_KVM_VCPU(r13)
|
||||
lwsync
|
||||
/* Clear any pending IPI - assume we're a secondary thread */
|
||||
ld r5, HSTATE_XICS_PHYS(r13)
|
||||
li r7, XICS_XIRR
|
||||
@ -1612,8 +1634,6 @@ secondary_nap:
|
||||
kvm_no_guest:
|
||||
li r0, KVM_HWTHREAD_IN_NAP
|
||||
stb r0, HSTATE_HWTHREAD_STATE(r13)
|
||||
li r0, 0
|
||||
std r0, HSTATE_KVM_VCPU(r13)
|
||||
|
||||
li r3, LPCR_PECE0
|
||||
mfspr r4, SPRN_LPCR
|
||||
|
@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
|
||||
hlist_del_init_rcu(&pte->list_vpte);
|
||||
hlist_del_init_rcu(&pte->list_vpte_long);
|
||||
|
||||
if (pte->pte.may_write)
|
||||
kvm_release_pfn_dirty(pte->pfn);
|
||||
else
|
||||
kvm_release_pfn_clean(pte->pfn);
|
||||
|
||||
spin_unlock(&vcpu3s->mmu_lock);
|
||||
|
||||
vcpu3s->hpte_cache_count--;
|
||||
|
@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
|
||||
#define MSR_USER32 MSR_USER
|
||||
#define MSR_USER64 MSR_USER
|
||||
#define HW_PAGE_SIZE PAGE_SIZE
|
||||
#define __hard_irq_disable local_irq_disable
|
||||
#define __hard_irq_enable local_irq_enable
|
||||
#endif
|
||||
|
||||
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@ -66,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
|
||||
svcpu_put(svcpu);
|
||||
#endif
|
||||
|
||||
vcpu->cpu = smp_processor_id();
|
||||
#ifdef CONFIG_PPC_BOOK3S_32
|
||||
current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
|
||||
#endif
|
||||
@ -83,17 +81,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
svcpu_put(svcpu);
|
||||
#endif
|
||||
|
||||
kvmppc_giveup_ext(vcpu, MSR_FP);
|
||||
kvmppc_giveup_ext(vcpu, MSR_VEC);
|
||||
kvmppc_giveup_ext(vcpu, MSR_VSX);
|
||||
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
|
||||
vcpu->cpu = -1;
|
||||
}
|
||||
|
||||
int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = 1; /* Indicate we want to get back into the guest */
|
||||
|
||||
/* We misuse TLB_FLUSH to indicate that we want to clear
|
||||
all shadow cache entries */
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
|
||||
kvmppc_mmu_pte_flush(vcpu, 0, 0);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/************* MMU Notifiers *************/
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
/*
|
||||
* Flush all shadow tlb entries everywhere. This is slow, but
|
||||
* we are 100% sure that we catch the to be unmapped page
|
||||
*/
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
/* kvm_unmap_hva flushes everything anyways */
|
||||
kvm_unmap_hva(kvm, start);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
/* The page will get remapped properly on its next fault */
|
||||
kvm_unmap_hva(kvm, hva);
|
||||
}
|
||||
|
||||
/*****************************************/
|
||||
|
||||
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ulong smsr = vcpu->arch.shared->msr;
|
||||
|
||||
/* Guest MSR values */
|
||||
smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
|
||||
smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
|
||||
/* Process MSR values */
|
||||
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
|
||||
/* External providers the guest reserved */
|
||||
@ -379,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
static inline int get_fpr_index(int i)
|
||||
{
|
||||
#ifdef CONFIG_VSX
|
||||
i *= 2;
|
||||
#endif
|
||||
return i;
|
||||
return i * TS_FPRWIDTH;
|
||||
}
|
||||
|
||||
/* Give up external provider (FPU, Altivec, VSX) */
|
||||
@ -396,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
|
||||
u64 *thread_fpr = (u64*)t->fpr;
|
||||
int i;
|
||||
|
||||
if (!(vcpu->arch.guest_owned_ext & msr))
|
||||
/*
|
||||
* VSX instructions can access FP and vector registers, so if
|
||||
* we are giving up VSX, make sure we give up FP and VMX as well.
|
||||
*/
|
||||
if (msr & MSR_VSX)
|
||||
msr |= MSR_FP | MSR_VEC;
|
||||
|
||||
msr &= vcpu->arch.guest_owned_ext;
|
||||
if (!msr)
|
||||
return;
|
||||
|
||||
#ifdef DEBUG_EXT
|
||||
printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
|
||||
#endif
|
||||
|
||||
switch (msr) {
|
||||
case MSR_FP:
|
||||
if (msr & MSR_FP) {
|
||||
/*
|
||||
* Note that on CPUs with VSX, giveup_fpu stores
|
||||
* both the traditional FP registers and the added VSX
|
||||
* registers into thread.fpr[].
|
||||
*/
|
||||
giveup_fpu(current);
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
|
||||
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
|
||||
|
||||
vcpu->arch.fpscr = t->fpscr.val;
|
||||
break;
|
||||
case MSR_VEC:
|
||||
|
||||
#ifdef CONFIG_VSX
|
||||
if (cpu_has_feature(CPU_FTR_VSX))
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
|
||||
vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
if (msr & MSR_VEC) {
|
||||
giveup_altivec(current);
|
||||
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
|
||||
vcpu->arch.vscr = t->vscr;
|
||||
#endif
|
||||
break;
|
||||
case MSR_VSX:
|
||||
#ifdef CONFIG_VSX
|
||||
__giveup_vsx(current);
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
|
||||
vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
vcpu->arch.guest_owned_ext &= ~msr;
|
||||
current->thread.regs->msr &= ~msr;
|
||||
vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
|
||||
kvmppc_recalc_shadow_msr(vcpu);
|
||||
}
|
||||
|
||||
@ -490,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/* We already own the ext */
|
||||
if (vcpu->arch.guest_owned_ext & msr) {
|
||||
return RESUME_GUEST;
|
||||
if (msr == MSR_VSX) {
|
||||
/* No VSX? Give an illegal instruction interrupt */
|
||||
#ifdef CONFIG_VSX
|
||||
if (!cpu_has_feature(CPU_FTR_VSX))
|
||||
#endif
|
||||
{
|
||||
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to load up all the FP and VMX registers before
|
||||
* we can let the guest use VSX instructions.
|
||||
*/
|
||||
msr = MSR_FP | MSR_VEC | MSR_VSX;
|
||||
}
|
||||
|
||||
/* See if we already own all the ext(s) needed */
|
||||
msr &= ~vcpu->arch.guest_owned_ext;
|
||||
if (!msr)
|
||||
return RESUME_GUEST;
|
||||
|
||||
#ifdef DEBUG_EXT
|
||||
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
|
||||
#endif
|
||||
|
||||
current->thread.regs->msr |= msr;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_FP:
|
||||
if (msr & MSR_FP) {
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
|
||||
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
|
||||
|
||||
#ifdef CONFIG_VSX
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
|
||||
thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
|
||||
#endif
|
||||
t->fpscr.val = vcpu->arch.fpscr;
|
||||
t->fpexc_mode = 0;
|
||||
kvmppc_load_up_fpu();
|
||||
break;
|
||||
case MSR_VEC:
|
||||
}
|
||||
|
||||
if (msr & MSR_VEC) {
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
|
||||
t->vscr = vcpu->arch.vscr;
|
||||
t->vrsave = -1;
|
||||
kvmppc_load_up_altivec();
|
||||
#endif
|
||||
break;
|
||||
case MSR_VSX:
|
||||
#ifdef CONFIG_VSX
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
|
||||
thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
|
||||
kvmppc_load_up_vsx();
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
vcpu->arch.guest_owned_ext |= msr;
|
||||
|
||||
kvmppc_recalc_shadow_msr(vcpu);
|
||||
|
||||
return RESUME_GUEST;
|
||||
@ -540,18 +606,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int exit_nr)
|
||||
{
|
||||
int r = RESUME_HOST;
|
||||
int s;
|
||||
|
||||
vcpu->stat.sum_exits++;
|
||||
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
|
||||
/* We get here with MSR.EE=0, so enable it to be a nice citizen */
|
||||
__hard_irq_enable();
|
||||
/* We get here with MSR.EE=1 */
|
||||
|
||||
trace_kvm_exit(exit_nr, vcpu);
|
||||
kvm_guest_exit();
|
||||
|
||||
trace_kvm_book3s_exit(exit_nr, vcpu);
|
||||
preempt_enable();
|
||||
kvm_resched(vcpu);
|
||||
switch (exit_nr) {
|
||||
case BOOK3S_INTERRUPT_INST_STORAGE:
|
||||
{
|
||||
@ -802,7 +868,6 @@ program_interrupt:
|
||||
}
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
if (!(r & RESUME_HOST)) {
|
||||
/* To avoid clobbering exit_reason, only check for signals if
|
||||
* we aren't already exiting to userspace for some other
|
||||
@ -814,20 +879,13 @@ program_interrupt:
|
||||
* and if we really did time things so badly, then we just exit
|
||||
* again due to a host external interrupt.
|
||||
*/
|
||||
__hard_irq_disable();
|
||||
if (signal_pending(current)) {
|
||||
__hard_irq_enable();
|
||||
#ifdef EXIT_DEBUG
|
||||
printk(KERN_EMERG "KVM: Going back to host\n");
|
||||
#endif
|
||||
vcpu->stat.signal_exits++;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
r = -EINTR;
|
||||
local_irq_disable();
|
||||
s = kvmppc_prepare_to_enter(vcpu);
|
||||
if (s <= 0) {
|
||||
local_irq_enable();
|
||||
r = s;
|
||||
} else {
|
||||
/* In case an interrupt came in that was triggered
|
||||
* from userspace (like DEC), we need to check what
|
||||
* to inject now! */
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
kvmppc_lazy_ee_enable();
|
||||
}
|
||||
}
|
||||
|
||||
@ -899,34 +957,59 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
int r = 0;
|
||||
|
||||
switch (reg->id) {
|
||||
switch (id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
r = copy_to_user((u64 __user *)(long)reg->addr,
|
||||
&to_book3s(vcpu)->hior, sizeof(u64));
|
||||
*val = get_reg_val(id, to_book3s(vcpu)->hior);
|
||||
break;
|
||||
#ifdef CONFIG_VSX
|
||||
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
|
||||
long int i = id - KVM_REG_PPC_VSR0;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_VSX)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
val->vsxval[0] = vcpu->arch.fpr[i];
|
||||
val->vsxval[1] = vcpu->arch.vsr[i];
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_VSX */
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
int r = 0;
|
||||
|
||||
switch (reg->id) {
|
||||
switch (id) {
|
||||
case KVM_REG_PPC_HIOR:
|
||||
r = copy_from_user(&to_book3s(vcpu)->hior,
|
||||
(u64 __user *)(long)reg->addr, sizeof(u64));
|
||||
if (!r)
|
||||
to_book3s(vcpu)->hior_explicit = true;
|
||||
to_book3s(vcpu)->hior = set_reg_val(id, *val);
|
||||
to_book3s(vcpu)->hior_explicit = true;
|
||||
break;
|
||||
#ifdef CONFIG_VSX
|
||||
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
|
||||
long int i = id - KVM_REG_PPC_VSR0;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_VSX)) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
vcpu->arch.fpr[i] = val->vsxval[0];
|
||||
vcpu->arch.vsr[i] = val->vsxval[1];
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_VSX */
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1020,8 +1103,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
ulong ext_msr;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* Check if we can run the vcpu at all */
|
||||
if (!vcpu->arch.sane) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
@ -1029,21 +1110,16 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvmppc_core_prepare_to_enter(vcpu);
|
||||
|
||||
/*
|
||||
* Interrupts could be timers for the guest which we have to inject
|
||||
* again, so let's postpone them until we're in the guest and if we
|
||||
* really did time things so badly, then we just exit again due to
|
||||
* a host external interrupt.
|
||||
*/
|
||||
__hard_irq_disable();
|
||||
|
||||
/* No need to go into the guest when all we do is going out */
|
||||
if (signal_pending(current)) {
|
||||
__hard_irq_enable();
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
ret = -EINTR;
|
||||
local_irq_disable();
|
||||
ret = kvmppc_prepare_to_enter(vcpu);
|
||||
if (ret <= 0) {
|
||||
local_irq_enable();
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1070,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
/* Save VSX state in stack */
|
||||
used_vsr = current->thread.used_vsr;
|
||||
if (used_vsr && (current->thread.regs->msr & MSR_VSX))
|
||||
__giveup_vsx(current);
|
||||
__giveup_vsx(current);
|
||||
#endif
|
||||
|
||||
/* Remember the MSR with disabled extensions */
|
||||
@ -1080,20 +1156,19 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.shared->msr & MSR_FP)
|
||||
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
|
||||
|
||||
kvm_guest_enter();
|
||||
kvmppc_lazy_ee_enable();
|
||||
|
||||
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
|
||||
|
||||
kvm_guest_exit();
|
||||
/* No need for kvm_guest_exit. It's done in handle_exit.
|
||||
We also get here with interrupts enabled. */
|
||||
|
||||
/* Make sure we save the guest FPU/Altivec/VSX state */
|
||||
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
|
||||
|
||||
current->thread.regs->msr = ext_msr;
|
||||
|
||||
/* Make sure we save the guest FPU/Altivec/VSX state */
|
||||
kvmppc_giveup_ext(vcpu, MSR_FP);
|
||||
kvmppc_giveup_ext(vcpu, MSR_VEC);
|
||||
kvmppc_giveup_ext(vcpu, MSR_VSX);
|
||||
|
||||
/* Restore FPU state from stack */
|
||||
/* Restore FPU/VSX state from stack */
|
||||
memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
|
||||
current->thread.fpscr.val = fpscr;
|
||||
current->thread.fpexc_mode = fpexc_mode;
|
||||
@ -1113,7 +1188,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1181,14 +1256,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
|
||||
}
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
}
|
||||
|
||||
int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old)
|
||||
{
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -170,20 +170,21 @@ kvmppc_handler_skip_ins:
|
||||
* Call kvmppc_handler_trampoline_enter in real mode
|
||||
*
|
||||
* On entry, r4 contains the guest shadow MSR
|
||||
* MSR.EE has to be 0 when calling this function
|
||||
*/
|
||||
_GLOBAL(kvmppc_entry_trampoline)
|
||||
mfmsr r5
|
||||
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
|
||||
toreal(r7)
|
||||
|
||||
li r9, MSR_RI
|
||||
ori r9, r9, MSR_EE
|
||||
andc r9, r5, r9 /* Clear EE and RI in MSR value */
|
||||
li r6, MSR_IR | MSR_DR
|
||||
ori r6, r6, MSR_EE
|
||||
andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
|
||||
MTMSR_EERI(r9) /* Clear EE and RI in MSR */
|
||||
mtsrr0 r7 /* before we set srr0/1 */
|
||||
andc r6, r5, r6 /* Clear DR and IR in MSR value */
|
||||
/*
|
||||
* Set EE in HOST_MSR so that it's enabled when we get into our
|
||||
* C exit handler function
|
||||
*/
|
||||
ori r5, r5, MSR_EE
|
||||
mtsrr0 r7
|
||||
mtsrr1 r6
|
||||
RFI
|
||||
|
||||
@ -233,8 +234,5 @@ define_load_up(fpu)
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
define_load_up(altivec)
|
||||
#endif
|
||||
#ifdef CONFIG_VSX
|
||||
define_load_up(vsx)
|
||||
#endif
|
||||
|
||||
#include "book3s_segment.S"
|
||||
|
@ -36,9 +36,11 @@
|
||||
#include <asm/dbell.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/time.h>
|
||||
|
||||
#include "timing.h"
|
||||
#include "booke.h"
|
||||
#include "trace.h"
|
||||
|
||||
unsigned long kvmppc_booke_handlers;
|
||||
|
||||
@ -62,6 +64,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
||||
{ "doorbell", VCPU_STAT(dbell_exits) },
|
||||
{ "guest doorbell", VCPU_STAT(gdbell_exits) },
|
||||
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -120,6 +123,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
|
||||
/* We always treat the FP bit as enabled from the host
|
||||
perspective, so only need to adjust the shadow MSR */
|
||||
vcpu->arch.shadow_msr &= ~MSR_FP;
|
||||
vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for "full" MSR writes. No need to call this if only
|
||||
* EE/CE/ME/DE/RI are changing.
|
||||
@ -136,11 +149,13 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
|
||||
|
||||
kvmppc_mmu_msr_notify(vcpu, old_msr);
|
||||
kvmppc_vcpu_sync_spe(vcpu);
|
||||
kvmppc_vcpu_sync_fpu(vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
|
||||
unsigned int priority)
|
||||
{
|
||||
trace_kvm_booke_queue_irqprio(vcpu, priority);
|
||||
set_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
@ -206,6 +221,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
|
||||
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
|
||||
}
|
||||
|
||||
static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
|
||||
{
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
@ -287,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
bool crit;
|
||||
bool keep_irq = false;
|
||||
enum int_class int_class;
|
||||
ulong new_msr = vcpu->arch.shared->msr;
|
||||
|
||||
/* Truncate crit indicators in 32 bit mode */
|
||||
if (!(vcpu->arch.shared->msr & MSR_SF)) {
|
||||
@ -325,6 +351,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
msr_mask = MSR_CE | MSR_ME | MSR_DE;
|
||||
int_class = INT_CLASS_NONCRIT;
|
||||
break;
|
||||
case BOOKE_IRQPRIO_WATCHDOG:
|
||||
case BOOKE_IRQPRIO_CRITICAL:
|
||||
case BOOKE_IRQPRIO_DBELL_CRIT:
|
||||
allowed = vcpu->arch.shared->msr & MSR_CE;
|
||||
@ -381,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
set_guest_esr(vcpu, vcpu->arch.queued_esr);
|
||||
if (update_dear == true)
|
||||
set_guest_dear(vcpu, vcpu->arch.queued_dear);
|
||||
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
|
||||
|
||||
new_msr &= msr_mask;
|
||||
#if defined(CONFIG_64BIT)
|
||||
if (vcpu->arch.epcr & SPRN_EPCR_ICM)
|
||||
new_msr |= MSR_CM;
|
||||
#endif
|
||||
kvmppc_set_msr(vcpu, new_msr);
|
||||
|
||||
if (!keep_irq)
|
||||
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
||||
@ -404,12 +437,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
return allowed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of jiffies until the next timeout. If the timeout is
|
||||
* longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
|
||||
* because the larger value can break the timer APIs.
|
||||
*/
|
||||
static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 tb, wdt_tb, wdt_ticks = 0;
|
||||
u64 nr_jiffies = 0;
|
||||
u32 period = TCR_GET_WP(vcpu->arch.tcr);
|
||||
|
||||
wdt_tb = 1ULL << (63 - period);
|
||||
tb = get_tb();
|
||||
/*
|
||||
* The watchdog timeout will hapeen when TB bit corresponding
|
||||
* to watchdog will toggle from 0 to 1.
|
||||
*/
|
||||
if (tb & wdt_tb)
|
||||
wdt_ticks = wdt_tb;
|
||||
|
||||
wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
|
||||
|
||||
/* Convert timebase ticks to jiffies */
|
||||
nr_jiffies = wdt_ticks;
|
||||
|
||||
if (do_div(nr_jiffies, tb_ticks_per_jiffy))
|
||||
nr_jiffies++;
|
||||
|
||||
return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
|
||||
}
|
||||
|
||||
static void arm_next_watchdog(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long nr_jiffies;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If TSR_ENW and TSR_WIS are not set then no need to exit to
|
||||
* userspace, so clear the KVM_REQ_WATCHDOG request.
|
||||
*/
|
||||
if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
|
||||
clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
|
||||
|
||||
spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
|
||||
nr_jiffies = watchdog_next_timeout(vcpu);
|
||||
/*
|
||||
* If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
|
||||
* then do not run the watchdog timer as this can break timer APIs.
|
||||
*/
|
||||
if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
|
||||
mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
|
||||
else
|
||||
del_timer(&vcpu->arch.wdt_timer);
|
||||
spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
|
||||
}
|
||||
|
||||
void kvmppc_watchdog_func(unsigned long data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
|
||||
u32 tsr, new_tsr;
|
||||
int final;
|
||||
|
||||
do {
|
||||
new_tsr = tsr = vcpu->arch.tsr;
|
||||
final = 0;
|
||||
|
||||
/* Time out event */
|
||||
if (tsr & TSR_ENW) {
|
||||
if (tsr & TSR_WIS)
|
||||
final = 1;
|
||||
else
|
||||
new_tsr = tsr | TSR_WIS;
|
||||
} else {
|
||||
new_tsr = tsr | TSR_ENW;
|
||||
}
|
||||
} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
|
||||
|
||||
if (new_tsr & TSR_WIS) {
|
||||
smp_wmb();
|
||||
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is final watchdog expiry and some action is required
|
||||
* then exit to userspace.
|
||||
*/
|
||||
if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
|
||||
vcpu->arch.watchdog_enabled) {
|
||||
smp_wmb();
|
||||
kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop running the watchdog timer after final expiration to
|
||||
* prevent the host from being flooded with timers if the
|
||||
* guest sets a short period.
|
||||
* Timers will resume when TSR/TCR is updated next time.
|
||||
*/
|
||||
if (!final)
|
||||
arm_next_watchdog(vcpu);
|
||||
}
|
||||
|
||||
static void update_timer_ints(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
else
|
||||
kvmppc_core_dequeue_dec(vcpu);
|
||||
|
||||
if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
|
||||
kvmppc_core_queue_watchdog(vcpu);
|
||||
else
|
||||
kvmppc_core_dequeue_watchdog(vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
|
||||
@ -417,13 +559,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
|
||||
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
||||
unsigned int priority;
|
||||
|
||||
if (vcpu->requests) {
|
||||
if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
|
||||
smp_mb();
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
priority = __ffs(*pending);
|
||||
while (priority < BOOKE_IRQPRIO_MAX) {
|
||||
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
|
||||
@ -459,37 +594,20 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common checks before entering the guest world. Call with interrupts
|
||||
* disabled.
|
||||
*
|
||||
* returns !0 if a signal is pending and check_signal is true
|
||||
*/
|
||||
static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = 0;
|
||||
int r = 1; /* Indicate we want to get back into the guest */
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
while (true) {
|
||||
if (need_resched()) {
|
||||
local_irq_enable();
|
||||
cond_resched();
|
||||
local_irq_disable();
|
||||
continue;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
|
||||
update_timer_ints(vcpu);
|
||||
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
|
||||
kvmppc_core_flush_tlb(vcpu);
|
||||
#endif
|
||||
|
||||
if (signal_pending(current)) {
|
||||
r = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kvmppc_core_prepare_to_enter(vcpu)) {
|
||||
/* interrupts got enabled in between, so we
|
||||
are back at square 1 */
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
|
||||
r = 0;
|
||||
}
|
||||
|
||||
return r;
|
||||
@ -497,7 +615,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
int ret, s;
|
||||
#ifdef CONFIG_PPC_FPU
|
||||
unsigned int fpscr;
|
||||
int fpexc_mode;
|
||||
@ -510,11 +628,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
if (kvmppc_prepare_to_enter(vcpu)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
ret = -EINTR;
|
||||
s = kvmppc_prepare_to_enter(vcpu);
|
||||
if (s <= 0) {
|
||||
local_irq_enable();
|
||||
ret = s;
|
||||
goto out;
|
||||
}
|
||||
kvmppc_lazy_ee_enable();
|
||||
|
||||
kvm_guest_enter();
|
||||
|
||||
@ -542,6 +662,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
|
||||
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
|
||||
|
||||
/* No need for kvm_guest_exit. It's done in handle_exit.
|
||||
We also get here with interrupts enabled. */
|
||||
|
||||
#ifdef CONFIG_PPC_FPU
|
||||
kvmppc_save_guest_fp(vcpu);
|
||||
|
||||
@ -557,10 +680,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
current->thread.fpexc_mode = fpexc_mode;
|
||||
#endif
|
||||
|
||||
kvm_guest_exit();
|
||||
|
||||
out:
|
||||
local_irq_enable();
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -668,6 +789,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int exit_nr)
|
||||
{
|
||||
int r = RESUME_HOST;
|
||||
int s;
|
||||
|
||||
/* update before a new last_exit_type is rewritten */
|
||||
kvmppc_update_timing_stats(vcpu);
|
||||
@ -677,6 +799,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
trace_kvm_exit(exit_nr, vcpu);
|
||||
kvm_guest_exit();
|
||||
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
|
||||
@ -971,10 +1096,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
if (!(r & RESUME_HOST)) {
|
||||
local_irq_disable();
|
||||
if (kvmppc_prepare_to_enter(vcpu)) {
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
|
||||
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
|
||||
s = kvmppc_prepare_to_enter(vcpu);
|
||||
if (s <= 0) {
|
||||
local_irq_enable();
|
||||
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
|
||||
} else {
|
||||
kvmppc_lazy_ee_enable();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1011,6 +1138,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* setup watchdog timer once */
|
||||
spin_lock_init(&vcpu->arch.wdt_lock);
|
||||
setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
|
||||
(unsigned long)vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
del_timer_sync(&vcpu->arch.wdt_timer);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
{
|
||||
int i;
|
||||
@ -1106,7 +1248,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
|
||||
u32 old_tsr = vcpu->arch.tsr;
|
||||
|
||||
vcpu->arch.tsr = sregs->u.e.tsr;
|
||||
|
||||
if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
|
||||
arm_next_watchdog(vcpu);
|
||||
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
@ -1221,12 +1369,70 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
|
||||
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
return -EINVAL;
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_IAC1:
|
||||
case KVM_REG_PPC_IAC2:
|
||||
case KVM_REG_PPC_IAC3:
|
||||
case KVM_REG_PPC_IAC4: {
|
||||
int iac = reg->id - KVM_REG_PPC_IAC1;
|
||||
r = copy_to_user((u64 __user *)(long)reg->addr,
|
||||
&vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case KVM_REG_PPC_DAC1:
|
||||
case KVM_REG_PPC_DAC2: {
|
||||
int dac = reg->id - KVM_REG_PPC_DAC1;
|
||||
r = copy_to_user((u64 __user *)(long)reg->addr,
|
||||
&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
|
||||
break;
|
||||
}
|
||||
#if defined(CONFIG_64BIT)
|
||||
case KVM_REG_PPC_EPCR:
|
||||
r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
{
|
||||
return -EINVAL;
|
||||
int r = -EINVAL;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_PPC_IAC1:
|
||||
case KVM_REG_PPC_IAC2:
|
||||
case KVM_REG_PPC_IAC3:
|
||||
case KVM_REG_PPC_IAC4: {
|
||||
int iac = reg->id - KVM_REG_PPC_IAC1;
|
||||
r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
|
||||
(u64 __user *)(long)reg->addr, sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case KVM_REG_PPC_DAC1:
|
||||
case KVM_REG_PPC_DAC2: {
|
||||
int dac = reg->id - KVM_REG_PPC_DAC1;
|
||||
r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
|
||||
(u64 __user *)(long)reg->addr, sizeof(u64));
|
||||
break;
|
||||
}
|
||||
#if defined(CONFIG_64BIT)
|
||||
case KVM_REG_PPC_EPCR: {
|
||||
u32 new_epcr;
|
||||
r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
|
||||
if (r == 0)
|
||||
kvmppc_set_epcr(vcpu, new_epcr);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
@ -1253,20 +1459,50 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
}
|
||||
|
||||
int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old)
|
||||
{
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
{
|
||||
}
|
||||
|
||||
void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
|
||||
{
|
||||
#if defined(CONFIG_64BIT)
|
||||
vcpu->arch.epcr = new_epcr;
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
|
||||
if (vcpu->arch.epcr & SPRN_EPCR_ICM)
|
||||
vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
|
||||
{
|
||||
vcpu->arch.tcr = new_tcr;
|
||||
arm_next_watchdog(vcpu);
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
@ -1281,6 +1517,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
|
||||
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
|
||||
{
|
||||
clear_bits(tsr_bits, &vcpu->arch.tsr);
|
||||
|
||||
/*
|
||||
* We may have stopped the watchdog due to
|
||||
* being stuck on final expiration.
|
||||
*/
|
||||
if (tsr_bits & (TSR_ENW | TSR_WIS))
|
||||
arm_next_watchdog(vcpu);
|
||||
|
||||
update_timer_ints(vcpu);
|
||||
}
|
||||
|
||||
@ -1298,12 +1542,14 @@ void kvmppc_decrementer_func(unsigned long data)
|
||||
|
||||
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
vcpu->cpu = smp_processor_id();
|
||||
current->thread.kvm_vcpu = vcpu;
|
||||
}
|
||||
|
||||
void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
current->thread.kvm_vcpu = NULL;
|
||||
vcpu->cpu = -1;
|
||||
}
|
||||
|
||||
int __init kvmppc_booke_init(void)
|
||||
|
@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
|
||||
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
|
||||
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
|
||||
|
||||
void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
|
||||
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
|
||||
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
|
||||
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
|
||||
|
@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
vcpu->arch.csrr1 = spr_val;
|
||||
break;
|
||||
case SPRN_DBCR0:
|
||||
vcpu->arch.dbcr0 = spr_val;
|
||||
vcpu->arch.dbg_reg.dbcr0 = spr_val;
|
||||
break;
|
||||
case SPRN_DBCR1:
|
||||
vcpu->arch.dbcr1 = spr_val;
|
||||
vcpu->arch.dbg_reg.dbcr1 = spr_val;
|
||||
break;
|
||||
case SPRN_DBSR:
|
||||
vcpu->arch.dbsr &= ~spr_val;
|
||||
@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
kvmppc_clr_tsr_bits(vcpu, spr_val);
|
||||
break;
|
||||
case SPRN_TCR:
|
||||
/*
|
||||
* WRC is a 2-bit field that is supposed to preserve its
|
||||
* value once written to non-zero.
|
||||
*/
|
||||
if (vcpu->arch.tcr & TCR_WRC_MASK) {
|
||||
spr_val &= ~TCR_WRC_MASK;
|
||||
spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
|
||||
}
|
||||
kvmppc_set_tcr(vcpu, spr_val);
|
||||
break;
|
||||
|
||||
@ -229,7 +237,17 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
case SPRN_IVOR15:
|
||||
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
|
||||
break;
|
||||
|
||||
case SPRN_MCSR:
|
||||
vcpu->arch.mcsr &= ~spr_val;
|
||||
break;
|
||||
#if defined(CONFIG_64BIT)
|
||||
case SPRN_EPCR:
|
||||
kvmppc_set_epcr(vcpu, spr_val);
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
emulated = EMULATE_FAIL;
|
||||
}
|
||||
@ -258,10 +276,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
|
||||
*spr_val = vcpu->arch.csrr1;
|
||||
break;
|
||||
case SPRN_DBCR0:
|
||||
*spr_val = vcpu->arch.dbcr0;
|
||||
*spr_val = vcpu->arch.dbg_reg.dbcr0;
|
||||
break;
|
||||
case SPRN_DBCR1:
|
||||
*spr_val = vcpu->arch.dbcr1;
|
||||
*spr_val = vcpu->arch.dbg_reg.dbcr1;
|
||||
break;
|
||||
case SPRN_DBSR:
|
||||
*spr_val = vcpu->arch.dbsr;
|
||||
@ -321,6 +339,14 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
|
||||
case SPRN_IVOR15:
|
||||
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
|
||||
break;
|
||||
case SPRN_MCSR:
|
||||
*spr_val = vcpu->arch.mcsr;
|
||||
break;
|
||||
#if defined(CONFIG_64BIT)
|
||||
case SPRN_EPCR:
|
||||
*spr_val = vcpu->arch.epcr;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
emulated = EMULATE_FAIL;
|
||||
|
@ -16,6 +16,7 @@
|
||||
*
|
||||
* Author: Varun Sethi <varun.sethi@freescale.com>
|
||||
* Author: Scott Wood <scotwood@freescale.com>
|
||||
* Author: Mihai Caraman <mihai.caraman@freescale.com>
|
||||
*
|
||||
* This file is derived from arch/powerpc/kvm/booke_interrupts.S
|
||||
*/
|
||||
@ -30,31 +31,33 @@
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#include <asm/exception-64e.h>
|
||||
#else
|
||||
#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
|
||||
|
||||
#define GET_VCPU(vcpu, thread) \
|
||||
PPC_LL vcpu, THREAD_KVM_VCPU(thread)
|
||||
#endif
|
||||
|
||||
#define LONGBYTES (BITS_PER_LONG / 8)
|
||||
|
||||
#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
|
||||
|
||||
/* The host stack layout: */
|
||||
#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
|
||||
#define HOST_CALLEE_LR (1 * LONGBYTES)
|
||||
#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
|
||||
#define HOST_R1 0 /* Implied by stwu. */
|
||||
#define HOST_CALLEE_LR PPC_LR_STKOFF
|
||||
#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
|
||||
/*
|
||||
* r2 is special: it holds 'current', and it made nonvolatile in the
|
||||
* kernel with the -ffixed-r2 gcc option.
|
||||
*/
|
||||
#define HOST_R2 (3 * LONGBYTES)
|
||||
#define HOST_CR (4 * LONGBYTES)
|
||||
#define HOST_NV_GPRS (5 * LONGBYTES)
|
||||
#define HOST_R2 (HOST_RUN + LONGBYTES)
|
||||
#define HOST_CR (HOST_R2 + LONGBYTES)
|
||||
#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
|
||||
#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
|
||||
#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
|
||||
#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
|
||||
#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
|
||||
#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
|
||||
/* LR in caller stack frame. */
|
||||
#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
|
||||
|
||||
#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
|
||||
#define NEED_DEAR 0x00000002 /* save faulting DEAR */
|
||||
@ -201,12 +204,128 @@
|
||||
b kvmppc_resume_host
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/* Exception types */
|
||||
#define EX_GEN 1
|
||||
#define EX_GDBELL 2
|
||||
#define EX_DBG 3
|
||||
#define EX_MC 4
|
||||
#define EX_CRIT 5
|
||||
#define EX_TLB 6
|
||||
|
||||
/*
|
||||
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
|
||||
*/
|
||||
.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
|
||||
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
|
||||
mr r11, r4
|
||||
/*
|
||||
* Get vcpu from Paca: paca->__current.thread->kvm_vcpu
|
||||
*/
|
||||
PPC_LL r4, PACACURRENT(r13)
|
||||
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
|
||||
stw r10, VCPU_CR(r4)
|
||||
PPC_STL r11, VCPU_GPR(R4)(r4)
|
||||
PPC_STL r5, VCPU_GPR(R5)(r4)
|
||||
.if \type == EX_CRIT
|
||||
PPC_LL r5, (\paca_ex + EX_R13)(r13)
|
||||
.else
|
||||
mfspr r5, \scratch
|
||||
.endif
|
||||
PPC_STL r6, VCPU_GPR(R6)(r4)
|
||||
PPC_STL r8, VCPU_GPR(R8)(r4)
|
||||
PPC_STL r9, VCPU_GPR(R9)(r4)
|
||||
PPC_STL r5, VCPU_GPR(R13)(r4)
|
||||
PPC_LL r6, (\paca_ex + \ex_r10)(r13)
|
||||
PPC_LL r8, (\paca_ex + \ex_r11)(r13)
|
||||
PPC_STL r3, VCPU_GPR(R3)(r4)
|
||||
PPC_STL r7, VCPU_GPR(R7)(r4)
|
||||
PPC_STL r12, VCPU_GPR(R12)(r4)
|
||||
PPC_STL r6, VCPU_GPR(R10)(r4)
|
||||
PPC_STL r8, VCPU_GPR(R11)(r4)
|
||||
mfctr r5
|
||||
PPC_STL r5, VCPU_CTR(r4)
|
||||
mfspr r5, \srr0
|
||||
mfspr r6, \srr1
|
||||
kvm_handler_common \intno, \srr0, \flags
|
||||
.endm
|
||||
|
||||
#define EX_PARAMS(type) \
|
||||
EX_##type, \
|
||||
SPRN_SPRG_##type##_SCRATCH, \
|
||||
PACA_EX##type, \
|
||||
EX_R10, \
|
||||
EX_R11
|
||||
|
||||
#define EX_PARAMS_TLB \
|
||||
EX_TLB, \
|
||||
SPRN_SPRG_GEN_SCRATCH, \
|
||||
PACA_EXTLB, \
|
||||
EX_TLB_R10, \
|
||||
EX_TLB_R11
|
||||
|
||||
kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
|
||||
SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
|
||||
SPRN_MCSRR0, SPRN_MCSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
|
||||
kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, NEED_ESR
|
||||
kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
|
||||
kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1,NEED_ESR
|
||||
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
|
||||
SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
/*
|
||||
* Only bolted TLB miss exception handlers are supported for now
|
||||
*/
|
||||
kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
|
||||
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
|
||||
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
|
||||
SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, NEED_EMU
|
||||
kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
|
||||
SPRN_SRR0, SPRN_SRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
|
||||
SPRN_GSRR0, SPRN_GSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
|
||||
SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
|
||||
SPRN_DSRR0, SPRN_DSRR1, 0
|
||||
kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
|
||||
SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
#else
|
||||
/*
|
||||
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
|
||||
*/
|
||||
.macro kvm_handler intno srr0, srr1, flags
|
||||
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
|
||||
GET_VCPU(r11, r10)
|
||||
PPC_LL r11, THREAD_KVM_VCPU(r10)
|
||||
PPC_STL r3, VCPU_GPR(R3)(r11)
|
||||
mfspr r3, SPRN_SPRG_RSCRATCH0
|
||||
PPC_STL r4, VCPU_GPR(R4)(r11)
|
||||
@ -233,7 +352,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
|
||||
.macro kvm_lvl_handler intno scratch srr0, srr1, flags
|
||||
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
|
||||
mfspr r10, SPRN_SPRG_THREAD
|
||||
GET_VCPU(r11, r10)
|
||||
PPC_LL r11, THREAD_KVM_VCPU(r10)
|
||||
PPC_STL r3, VCPU_GPR(R3)(r11)
|
||||
mfspr r3, \scratch
|
||||
PPC_STL r4, VCPU_GPR(R4)(r11)
|
||||
@ -295,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
|
||||
SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
|
||||
kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
|
||||
SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
|
||||
|
||||
#endif
|
||||
|
||||
/* Registers:
|
||||
* SPRG_SCRATCH0: guest r10
|
||||
|
@ -27,8 +27,7 @@
|
||||
#define E500_TLB_NUM 2
|
||||
|
||||
#define E500_TLB_VALID 1
|
||||
#define E500_TLB_DIRTY 2
|
||||
#define E500_TLB_BITMAP 4
|
||||
#define E500_TLB_BITMAP 2
|
||||
|
||||
struct tlbe_ref {
|
||||
pfn_t pfn;
|
||||
@ -130,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
ulong value);
|
||||
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
|
||||
int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
|
||||
int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
|
||||
int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
|
||||
int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
|
||||
int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
|
||||
int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
|
||||
int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
|
||||
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
|
||||
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
|
||||
|
||||
@ -155,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
|
||||
static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return tlbe->mas2 & 0xfffff000;
|
||||
return tlbe->mas2 & MAS2_EPN;
|
||||
}
|
||||
|
||||
static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
|
@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
int ra = get_ra(inst);
|
||||
int rb = get_rb(inst);
|
||||
int rt = get_rt(inst);
|
||||
gva_t ea;
|
||||
|
||||
switch (get_op(inst)) {
|
||||
case 31:
|
||||
@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
|
||||
case XOP_TLBSX:
|
||||
emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
|
||||
ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
|
||||
emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
|
||||
break;
|
||||
|
||||
case XOP_TLBILX:
|
||||
emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
|
||||
case XOP_TLBILX: {
|
||||
int type = rt & 0x3;
|
||||
ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
|
||||
emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
|
||||
break;
|
||||
}
|
||||
|
||||
case XOP_TLBIVAX:
|
||||
emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
|
||||
ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
|
||||
emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -304,17 +304,13 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
|
||||
ref->flags = E500_TLB_VALID;
|
||||
|
||||
if (tlbe_is_writable(gtlbe))
|
||||
ref->flags |= E500_TLB_DIRTY;
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
|
||||
{
|
||||
if (ref->flags & E500_TLB_VALID) {
|
||||
if (ref->flags & E500_TLB_DIRTY)
|
||||
kvm_release_pfn_dirty(ref->pfn);
|
||||
else
|
||||
kvm_release_pfn_clean(ref->pfn);
|
||||
|
||||
trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
|
||||
ref->flags = 0;
|
||||
}
|
||||
}
|
||||
@ -357,6 +353,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
clear_tlb_privs(vcpu_e500);
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
clear_tlb_refs(vcpu_e500);
|
||||
clear_tlb1_bitmap(vcpu_e500);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
|
||||
unsigned int eaddr, int as)
|
||||
{
|
||||
@ -412,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
struct tlbe_ref *ref)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long pfn, hva;
|
||||
unsigned long pfn = 0; /* silence GCC warning */
|
||||
unsigned long hva;
|
||||
int pfnmap = 0;
|
||||
int tsize = BOOK3E_PAGESZ_4K;
|
||||
|
||||
@ -521,7 +525,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
if (likely(!pfnmap)) {
|
||||
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
if (is_error_pfn(pfn)) {
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
||||
(long)gfn);
|
||||
return;
|
||||
@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
|
||||
/* Clear i-cache for new pages */
|
||||
kvmppc_mmu_flush_icache(pfn);
|
||||
|
||||
/* Drop refcount on page, so that mmu notifiers can clear it */
|
||||
kvm_release_pfn_clean(pfn);
|
||||
}
|
||||
|
||||
/* XXX only map the one-one case, for now use TLB0 */
|
||||
@ -682,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
|
||||
int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
unsigned int ia;
|
||||
int esel, tlbsel;
|
||||
gva_t ea;
|
||||
|
||||
ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
|
||||
|
||||
ia = (ea >> 2) & 0x1;
|
||||
|
||||
@ -716,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
|
||||
}
|
||||
|
||||
static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
|
||||
int pid, int rt)
|
||||
int pid, int type)
|
||||
{
|
||||
struct kvm_book3e_206_tlb_entry *tlbe;
|
||||
int tid, esel;
|
||||
@ -725,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
|
||||
for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
|
||||
tlbe = get_entry(vcpu_e500, tlbsel, esel);
|
||||
tid = get_tlb_tid(tlbe);
|
||||
if (rt == 0 || tid == pid) {
|
||||
if (type == 0 || tid == pid) {
|
||||
inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
|
||||
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
|
||||
}
|
||||
@ -733,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
|
||||
}
|
||||
|
||||
static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
|
||||
int ra, int rb)
|
||||
gva_t ea)
|
||||
{
|
||||
int tlbsel, esel;
|
||||
gva_t ea;
|
||||
|
||||
ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
|
||||
if (ra)
|
||||
ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
|
||||
|
||||
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
|
||||
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
|
||||
@ -752,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
|
||||
}
|
||||
}
|
||||
|
||||
int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
|
||||
int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
int pid = get_cur_spid(vcpu);
|
||||
|
||||
if (rt == 0 || rt == 1) {
|
||||
tlbilx_all(vcpu_e500, 0, pid, rt);
|
||||
tlbilx_all(vcpu_e500, 1, pid, rt);
|
||||
} else if (rt == 3) {
|
||||
tlbilx_one(vcpu_e500, pid, ra, rb);
|
||||
if (type == 0 || type == 1) {
|
||||
tlbilx_all(vcpu_e500, 0, pid, type);
|
||||
tlbilx_all(vcpu_e500, 1, pid, type);
|
||||
} else if (type == 3) {
|
||||
tlbilx_one(vcpu_e500, pid, ea);
|
||||
}
|
||||
|
||||
return EMULATE_DONE;
|
||||
@ -786,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
|
||||
int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
int as = !!get_cur_sas(vcpu);
|
||||
unsigned int pid = get_cur_spid(vcpu);
|
||||
int esel, tlbsel;
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
|
||||
gva_t ea;
|
||||
|
||||
ea = kvmppc_get_gpr(vcpu, rb);
|
||||
|
||||
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
|
||||
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
|
||||
@ -875,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
||||
|
||||
gtlbe->mas1 = vcpu->arch.shared->mas1;
|
||||
gtlbe->mas2 = vcpu->arch.shared->mas2;
|
||||
if (!(vcpu->arch.shared->msr & MSR_CM))
|
||||
gtlbe->mas2 &= 0xffffffffUL;
|
||||
gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
|
||||
|
||||
trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
|
||||
@ -1039,8 +1037,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
|
||||
sesel = 0; /* unused */
|
||||
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
|
||||
|
||||
kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
|
||||
&priv->ref, eaddr, &stlbe);
|
||||
/* Only triggers after clear_tlb_refs */
|
||||
if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
|
||||
kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
|
||||
else
|
||||
kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
|
||||
&priv->ref, eaddr, &stlbe);
|
||||
break;
|
||||
|
||||
case 1: {
|
||||
@ -1060,6 +1062,49 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
|
||||
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
|
||||
}
|
||||
|
||||
/************* MMU Notifiers *************/
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
/*
|
||||
* Flush all shadow tlb entries everywhere. This is slow, but
|
||||
* we are 100% sure that we catch the to be unmapped page
|
||||
*/
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
/* kvm_unmap_hva flushes everything anyways */
|
||||
kvm_unmap_hva(kvm, start);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
/* The page will get remapped properly on its next fault */
|
||||
kvm_unmap_hva(kvm, hva);
|
||||
}
|
||||
|
||||
/*****************************************/
|
||||
|
||||
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int i;
|
||||
@ -1081,6 +1126,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
}
|
||||
|
||||
vcpu_e500->num_shared_tlb_pages = 0;
|
||||
|
||||
kfree(vcpu_e500->shared_tlb_pages);
|
||||
vcpu_e500->shared_tlb_pages = NULL;
|
||||
} else {
|
||||
kfree(vcpu_e500->gtlb_arch);
|
||||
@ -1178,21 +1225,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (!virt)
|
||||
if (!virt) {
|
||||
ret = -ENOMEM;
|
||||
goto err_put_page;
|
||||
}
|
||||
|
||||
privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
|
||||
GFP_KERNEL);
|
||||
privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!privs[0] || !privs[1])
|
||||
goto err_put_page;
|
||||
if (!privs[0] || !privs[1]) {
|
||||
ret = -ENOMEM;
|
||||
goto err_privs;
|
||||
}
|
||||
|
||||
g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
|
||||
GFP_KERNEL);
|
||||
if (!g2h_bitmap)
|
||||
goto err_put_page;
|
||||
if (!g2h_bitmap) {
|
||||
ret = -ENOMEM;
|
||||
goto err_privs;
|
||||
}
|
||||
|
||||
free_gtlb(vcpu_e500);
|
||||
|
||||
@ -1232,10 +1285,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
|
||||
kvmppc_recalc_tlb1map_range(vcpu_e500);
|
||||
return 0;
|
||||
|
||||
err_put_page:
|
||||
err_privs:
|
||||
kfree(privs[0]);
|
||||
kfree(privs[1]);
|
||||
|
||||
err_put_page:
|
||||
for (i = 0; i < num_pages; i++)
|
||||
put_page(pages[i]);
|
||||
|
||||
@ -1332,7 +1386,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
if (!vcpu_e500->gtlb_priv[1])
|
||||
goto err;
|
||||
|
||||
vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
|
||||
vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
|
||||
vcpu_e500->gtlb_params[1].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->g2h_tlb1_map)
|
||||
|
@ -131,6 +131,125 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
|
||||
return vcpu->arch.dec - jd;
|
||||
}
|
||||
|
||||
static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
{
|
||||
enum emulation_result emulated = EMULATE_DONE;
|
||||
ulong spr_val = kvmppc_get_gpr(vcpu, rs);
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
vcpu->arch.shared->srr0 = spr_val;
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
vcpu->arch.shared->srr1 = spr_val;
|
||||
break;
|
||||
|
||||
/* XXX We need to context-switch the timebase for
|
||||
* watchdog and FIT. */
|
||||
case SPRN_TBWL: break;
|
||||
case SPRN_TBWU: break;
|
||||
|
||||
case SPRN_MSSSR0: break;
|
||||
|
||||
case SPRN_DEC:
|
||||
vcpu->arch.dec = spr_val;
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
vcpu->arch.shared->sprg0 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
vcpu->arch.shared->sprg1 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
vcpu->arch.shared->sprg2 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
vcpu->arch.shared->sprg3 = spr_val;
|
||||
break;
|
||||
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
|
||||
spr_val);
|
||||
if (emulated == EMULATE_FAIL)
|
||||
printk(KERN_INFO "mtspr: unknown spr "
|
||||
"0x%x\n", sprn);
|
||||
break;
|
||||
}
|
||||
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
|
||||
|
||||
return emulated;
|
||||
}
|
||||
|
||||
static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
{
|
||||
enum emulation_result emulated = EMULATE_DONE;
|
||||
ulong spr_val = 0;
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
spr_val = vcpu->arch.shared->srr0;
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
spr_val = vcpu->arch.shared->srr1;
|
||||
break;
|
||||
case SPRN_PVR:
|
||||
spr_val = vcpu->arch.pvr;
|
||||
break;
|
||||
case SPRN_PIR:
|
||||
spr_val = vcpu->vcpu_id;
|
||||
break;
|
||||
case SPRN_MSSSR0:
|
||||
spr_val = 0;
|
||||
break;
|
||||
|
||||
/* Note: mftb and TBRL/TBWL are user-accessible, so
|
||||
* the guest can always access the real TB anyways.
|
||||
* In fact, we probably will never see these traps. */
|
||||
case SPRN_TBWL:
|
||||
spr_val = get_tb() >> 32;
|
||||
break;
|
||||
case SPRN_TBWU:
|
||||
spr_val = get_tb();
|
||||
break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
spr_val = vcpu->arch.shared->sprg0;
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
spr_val = vcpu->arch.shared->sprg1;
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
spr_val = vcpu->arch.shared->sprg2;
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
spr_val = vcpu->arch.shared->sprg3;
|
||||
break;
|
||||
/* Note: SPRG4-7 are user-readable, so we don't get
|
||||
* a trap. */
|
||||
|
||||
case SPRN_DEC:
|
||||
spr_val = kvmppc_get_dec(vcpu, get_tb());
|
||||
break;
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
|
||||
&spr_val);
|
||||
if (unlikely(emulated == EMULATE_FAIL)) {
|
||||
printk(KERN_INFO "mfspr: unknown spr "
|
||||
"0x%x\n", sprn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (emulated == EMULATE_DONE)
|
||||
kvmppc_set_gpr(vcpu, rt, spr_val);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
|
||||
|
||||
return emulated;
|
||||
}
|
||||
|
||||
/* XXX to do:
|
||||
* lhax
|
||||
* lhaux
|
||||
@ -156,7 +275,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
int sprn = get_sprn(inst);
|
||||
enum emulation_result emulated = EMULATE_DONE;
|
||||
int advance = 1;
|
||||
ulong spr_val = 0;
|
||||
|
||||
/* this default type might be overwritten by subcategories */
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
|
||||
@ -236,62 +354,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
|
||||
case OP_31_XOP_MFSPR:
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
spr_val = vcpu->arch.shared->srr0;
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
spr_val = vcpu->arch.shared->srr1;
|
||||
break;
|
||||
case SPRN_PVR:
|
||||
spr_val = vcpu->arch.pvr;
|
||||
break;
|
||||
case SPRN_PIR:
|
||||
spr_val = vcpu->vcpu_id;
|
||||
break;
|
||||
case SPRN_MSSSR0:
|
||||
spr_val = 0;
|
||||
break;
|
||||
|
||||
/* Note: mftb and TBRL/TBWL are user-accessible, so
|
||||
* the guest can always access the real TB anyways.
|
||||
* In fact, we probably will never see these traps. */
|
||||
case SPRN_TBWL:
|
||||
spr_val = get_tb() >> 32;
|
||||
break;
|
||||
case SPRN_TBWU:
|
||||
spr_val = get_tb();
|
||||
break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
spr_val = vcpu->arch.shared->sprg0;
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
spr_val = vcpu->arch.shared->sprg1;
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
spr_val = vcpu->arch.shared->sprg2;
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
spr_val = vcpu->arch.shared->sprg3;
|
||||
break;
|
||||
/* Note: SPRG4-7 are user-readable, so we don't get
|
||||
* a trap. */
|
||||
|
||||
case SPRN_DEC:
|
||||
spr_val = kvmppc_get_dec(vcpu, get_tb());
|
||||
break;
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
|
||||
&spr_val);
|
||||
if (unlikely(emulated == EMULATE_FAIL)) {
|
||||
printk(KERN_INFO "mfspr: unknown spr "
|
||||
"0x%x\n", sprn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
kvmppc_set_gpr(vcpu, rt, spr_val);
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
|
||||
emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
|
||||
break;
|
||||
|
||||
case OP_31_XOP_STHX:
|
||||
@ -308,49 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
|
||||
case OP_31_XOP_MTSPR:
|
||||
spr_val = kvmppc_get_gpr(vcpu, rs);
|
||||
switch (sprn) {
|
||||
case SPRN_SRR0:
|
||||
vcpu->arch.shared->srr0 = spr_val;
|
||||
break;
|
||||
case SPRN_SRR1:
|
||||
vcpu->arch.shared->srr1 = spr_val;
|
||||
break;
|
||||
|
||||
/* XXX We need to context-switch the timebase for
|
||||
* watchdog and FIT. */
|
||||
case SPRN_TBWL: break;
|
||||
case SPRN_TBWU: break;
|
||||
|
||||
case SPRN_MSSSR0: break;
|
||||
|
||||
case SPRN_DEC:
|
||||
vcpu->arch.dec = spr_val;
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
break;
|
||||
|
||||
case SPRN_SPRG0:
|
||||
vcpu->arch.shared->sprg0 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG1:
|
||||
vcpu->arch.shared->sprg1 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG2:
|
||||
vcpu->arch.shared->sprg2 = spr_val;
|
||||
break;
|
||||
case SPRN_SPRG3:
|
||||
vcpu->arch.shared->sprg3 = spr_val;
|
||||
break;
|
||||
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
|
||||
spr_val);
|
||||
if (emulated == EMULATE_FAIL)
|
||||
printk(KERN_INFO "mtspr: unknown spr "
|
||||
"0x%x\n", sprn);
|
||||
break;
|
||||
}
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
|
||||
emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
|
||||
break;
|
||||
|
||||
case OP_31_XOP_DCBI:
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cputhreads.h>
|
||||
#include <asm/irqflags.h>
|
||||
#include "timing.h"
|
||||
#include "../mm/mmu_decl.h"
|
||||
|
||||
@ -38,8 +39,7 @@
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return !(v->arch.shared->msr & MSR_WE) ||
|
||||
!!(v->arch.pending_exceptions) ||
|
||||
return !!(v->arch.pending_exceptions) ||
|
||||
v->requests;
|
||||
}
|
||||
|
||||
@ -48,6 +48,85 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
||||
/*
|
||||
* Common checks before entering the guest world. Call with interrupts
|
||||
* disabled.
|
||||
*
|
||||
* returns:
|
||||
*
|
||||
* == 1 if we're ready to go into guest state
|
||||
* <= 0 if we need to go back to the host with return value
|
||||
*/
|
||||
int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = 1;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
while (true) {
|
||||
if (need_resched()) {
|
||||
local_irq_enable();
|
||||
cond_resched();
|
||||
local_irq_disable();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
r = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
vcpu->mode = IN_GUEST_MODE;
|
||||
|
||||
/*
|
||||
* Reading vcpu->requests must happen after setting vcpu->mode,
|
||||
* so we don't miss a request because the requester sees
|
||||
* OUTSIDE_GUEST_MODE and assumes we'll be checking requests
|
||||
* before next entering the guest (and thus doesn't IPI).
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (vcpu->requests) {
|
||||
/* Make sure we process requests preemptable */
|
||||
local_irq_enable();
|
||||
trace_kvm_check_requests(vcpu);
|
||||
r = kvmppc_core_check_requests(vcpu);
|
||||
local_irq_disable();
|
||||
if (r > 0)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kvmppc_core_prepare_to_enter(vcpu)) {
|
||||
/* interrupts got enabled in between, so we
|
||||
are back at square 1 */
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
/* lazy EE magic */
|
||||
hard_irq_disable();
|
||||
if (lazy_irq_pending()) {
|
||||
/* Got an interrupt in between, try again */
|
||||
local_irq_enable();
|
||||
local_irq_disable();
|
||||
kvm_guest_exit();
|
||||
continue;
|
||||
}
|
||||
|
||||
trace_hardirqs_on();
|
||||
#endif
|
||||
|
||||
kvm_guest_enter();
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
|
||||
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int nr = kvmppc_get_gpr(vcpu, 11);
|
||||
@ -67,18 +146,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
switch (nr) {
|
||||
case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
|
||||
case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
|
||||
{
|
||||
vcpu->arch.magic_page_pa = param1;
|
||||
vcpu->arch.magic_page_ea = param2;
|
||||
|
||||
r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
|
||||
|
||||
r = HC_EV_SUCCESS;
|
||||
r = EV_SUCCESS;
|
||||
break;
|
||||
}
|
||||
case HC_VENDOR_KVM | KVM_HC_FEATURES:
|
||||
r = HC_EV_SUCCESS;
|
||||
case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
|
||||
r = EV_SUCCESS;
|
||||
#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
|
||||
/* XXX Missing magic page on 44x */
|
||||
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
|
||||
@ -86,8 +165,13 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* Second return value is in r4 */
|
||||
break;
|
||||
case EV_HCALL_TOKEN(EV_IDLE):
|
||||
r = EV_SUCCESS;
|
||||
kvm_vcpu_block(vcpu);
|
||||
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
|
||||
break;
|
||||
default:
|
||||
r = HC_EV_UNIMPLEMENTED;
|
||||
r = EV_UNIMPLEMENTED;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -220,6 +304,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
switch (ext) {
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_CAP_PPC_BOOKE_SREGS:
|
||||
case KVM_CAP_PPC_BOOKE_WATCHDOG:
|
||||
#else
|
||||
case KVM_CAP_PPC_SEGSTATE:
|
||||
case KVM_CAP_PPC_HIOR:
|
||||
@ -229,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_PPC_IRQ_LEVEL:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
r = 1;
|
||||
break;
|
||||
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
||||
@ -260,10 +346,22 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_201))
|
||||
r = 2;
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
|
||||
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
r = 1;
|
||||
#else
|
||||
r = 0;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
case KVM_CAP_PPC_HTAB_FD:
|
||||
r = 1;
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
/*
|
||||
* Recommending a number of CPUs is somewhat arbitrary; we
|
||||
@ -302,19 +400,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
if (!dont || free->arch.rmap != dont->arch.rmap) {
|
||||
vfree(free->arch.rmap);
|
||||
free->arch.rmap = NULL;
|
||||
}
|
||||
kvmppc_core_free_memslot(free, dont);
|
||||
}
|
||||
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
||||
{
|
||||
slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
|
||||
if (!slot->arch.rmap)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
return kvmppc_core_create_memslot(slot, npages);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
@ -323,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
{
|
||||
return kvmppc_core_prepare_memory_region(kvm, mem);
|
||||
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
@ -331,7 +422,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc)
|
||||
{
|
||||
kvmppc_core_commit_memory_region(kvm, mem);
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
@ -341,6 +432,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvmppc_core_flush_memslot(kvm, slot);
|
||||
}
|
||||
|
||||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
@ -354,6 +446,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Make sure we're not using the vcpu anymore */
|
||||
@ -390,6 +487,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
|
||||
|
||||
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
||||
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
|
||||
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
|
||||
@ -398,13 +497,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
#ifdef CONFIG_KVM_EXIT_TIMING
|
||||
mutex_init(&vcpu->arch.exit_timing_lock);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
ret = kvmppc_subarch_vcpu_init(vcpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvmppc_mmu_destroy(vcpu);
|
||||
kvmppc_subarch_vcpu_uninit(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@ -420,7 +520,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
|
||||
#endif
|
||||
kvmppc_core_vcpu_load(vcpu, cpu);
|
||||
vcpu->cpu = smp_processor_id();
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
@ -429,7 +528,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
#ifdef CONFIG_BOOKE
|
||||
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
|
||||
#endif
|
||||
vcpu->cpu = -1;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
@ -527,6 +625,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
vcpu->mmio_is_write = 0;
|
||||
vcpu->arch.mmio_sign_extend = 0;
|
||||
|
||||
if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
|
||||
bytes, &run->mmio.data)) {
|
||||
kvmppc_complete_mmio_load(vcpu, run);
|
||||
vcpu->mmio_needed = 0;
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
return EMULATE_DO_MMIO;
|
||||
}
|
||||
|
||||
@ -536,8 +641,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
{
|
||||
int r;
|
||||
|
||||
r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
|
||||
vcpu->arch.mmio_sign_extend = 1;
|
||||
r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -575,6 +680,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
|
||||
bytes, &run->mmio.data)) {
|
||||
kvmppc_complete_mmio_load(vcpu, run);
|
||||
vcpu->mmio_needed = 0;
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
return EMULATE_DO_MMIO;
|
||||
}
|
||||
|
||||
@ -649,6 +761,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
r = 0;
|
||||
vcpu->arch.papr_enabled = true;
|
||||
break;
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_CAP_PPC_BOOKE_WATCHDOG:
|
||||
r = 0;
|
||||
vcpu->arch.watchdog_enabled = true;
|
||||
break;
|
||||
#endif
|
||||
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
|
||||
case KVM_CAP_SW_TLB: {
|
||||
struct kvm_config_tlb cfg;
|
||||
@ -751,9 +869,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
|
||||
|
||||
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
|
||||
{
|
||||
u32 inst_nop = 0x60000000;
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
u32 inst_sc1 = 0x44000022;
|
||||
pvinfo->hcall[0] = inst_sc1;
|
||||
pvinfo->hcall[1] = inst_nop;
|
||||
pvinfo->hcall[2] = inst_nop;
|
||||
pvinfo->hcall[3] = inst_nop;
|
||||
#else
|
||||
u32 inst_lis = 0x3c000000;
|
||||
u32 inst_ori = 0x60000000;
|
||||
u32 inst_nop = 0x60000000;
|
||||
u32 inst_sc = 0x44000002;
|
||||
u32 inst_imm_mask = 0xffff;
|
||||
|
||||
@ -770,6 +895,9 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
|
||||
pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
|
||||
pvinfo->hcall[2] = inst_sc;
|
||||
pvinfo->hcall[3] = inst_nop;
|
||||
#endif
|
||||
|
||||
pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -832,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_PPC_GET_HTAB_FD: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
struct kvm_get_htab_fd ghf;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&ghf, argp, sizeof(ghf)))
|
||||
break;
|
||||
r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
@ -31,6 +31,126 @@ TRACE_EVENT(kvm_ppc_instr,
|
||||
__entry->inst, __entry->pc, __entry->emulate)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
#define kvm_trace_symbol_exit \
|
||||
{0x100, "SYSTEM_RESET"}, \
|
||||
{0x200, "MACHINE_CHECK"}, \
|
||||
{0x300, "DATA_STORAGE"}, \
|
||||
{0x380, "DATA_SEGMENT"}, \
|
||||
{0x400, "INST_STORAGE"}, \
|
||||
{0x480, "INST_SEGMENT"}, \
|
||||
{0x500, "EXTERNAL"}, \
|
||||
{0x501, "EXTERNAL_LEVEL"}, \
|
||||
{0x502, "EXTERNAL_HV"}, \
|
||||
{0x600, "ALIGNMENT"}, \
|
||||
{0x700, "PROGRAM"}, \
|
||||
{0x800, "FP_UNAVAIL"}, \
|
||||
{0x900, "DECREMENTER"}, \
|
||||
{0x980, "HV_DECREMENTER"}, \
|
||||
{0xc00, "SYSCALL"}, \
|
||||
{0xd00, "TRACE"}, \
|
||||
{0xe00, "H_DATA_STORAGE"}, \
|
||||
{0xe20, "H_INST_STORAGE"}, \
|
||||
{0xe40, "H_EMUL_ASSIST"}, \
|
||||
{0xf00, "PERFMON"}, \
|
||||
{0xf20, "ALTIVEC"}, \
|
||||
{0xf40, "VSX"}
|
||||
#else
|
||||
#define kvm_trace_symbol_exit \
|
||||
{0, "CRITICAL"}, \
|
||||
{1, "MACHINE_CHECK"}, \
|
||||
{2, "DATA_STORAGE"}, \
|
||||
{3, "INST_STORAGE"}, \
|
||||
{4, "EXTERNAL"}, \
|
||||
{5, "ALIGNMENT"}, \
|
||||
{6, "PROGRAM"}, \
|
||||
{7, "FP_UNAVAIL"}, \
|
||||
{8, "SYSCALL"}, \
|
||||
{9, "AP_UNAVAIL"}, \
|
||||
{10, "DECREMENTER"}, \
|
||||
{11, "FIT"}, \
|
||||
{12, "WATCHDOG"}, \
|
||||
{13, "DTLB_MISS"}, \
|
||||
{14, "ITLB_MISS"}, \
|
||||
{15, "DEBUG"}, \
|
||||
{32, "SPE_UNAVAIL"}, \
|
||||
{33, "SPE_FP_DATA"}, \
|
||||
{34, "SPE_FP_ROUND"}, \
|
||||
{35, "PERFORMANCE_MONITOR"}, \
|
||||
{36, "DOORBELL"}, \
|
||||
{37, "DOORBELL_CRITICAL"}, \
|
||||
{38, "GUEST_DBELL"}, \
|
||||
{39, "GUEST_DBELL_CRIT"}, \
|
||||
{40, "HV_SYSCALL"}, \
|
||||
{41, "HV_PRIV"}
|
||||
#endif
|
||||
|
||||
TRACE_EVENT(kvm_exit,
|
||||
TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(exit_nr, vcpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, exit_nr )
|
||||
__field( unsigned long, pc )
|
||||
__field( unsigned long, msr )
|
||||
__field( unsigned long, dar )
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
__field( unsigned long, srr1 )
|
||||
#endif
|
||||
__field( unsigned long, last_inst )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu;
|
||||
#endif
|
||||
__entry->exit_nr = exit_nr;
|
||||
__entry->pc = kvmppc_get_pc(vcpu);
|
||||
__entry->dar = kvmppc_get_fault_dar(vcpu);
|
||||
__entry->msr = vcpu->arch.shared->msr;
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
svcpu = svcpu_get(vcpu);
|
||||
__entry->srr1 = svcpu->shadow_srr1;
|
||||
svcpu_put(svcpu);
|
||||
#endif
|
||||
__entry->last_inst = vcpu->arch.last_inst;
|
||||
),
|
||||
|
||||
TP_printk("exit=%s"
|
||||
" | pc=0x%lx"
|
||||
" | msr=0x%lx"
|
||||
" | dar=0x%lx"
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
" | srr1=0x%lx"
|
||||
#endif
|
||||
" | last_inst=0x%lx"
|
||||
,
|
||||
__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
|
||||
__entry->pc,
|
||||
__entry->msr,
|
||||
__entry->dar,
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
__entry->srr1,
|
||||
#endif
|
||||
__entry->last_inst
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_unmap_hva,
|
||||
TP_PROTO(unsigned long hva),
|
||||
TP_ARGS(hva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, hva )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->hva = hva;
|
||||
),
|
||||
|
||||
TP_printk("unmap hva 0x%lx\n", __entry->hva)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_stlb_inval,
|
||||
TP_PROTO(unsigned int stlb_index),
|
||||
TP_ARGS(stlb_index),
|
||||
@ -98,6 +218,24 @@ TRACE_EVENT(kvm_gtlb_write,
|
||||
__entry->word1, __entry->word2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_check_requests,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(vcpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, cpu_nr )
|
||||
__field( __u32, requests )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu_nr = vcpu->vcpu_id;
|
||||
__entry->requests = vcpu->requests;
|
||||
),
|
||||
|
||||
TP_printk("vcpu=%x requests=%x",
|
||||
__entry->cpu_nr, __entry->requests)
|
||||
);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Book3S trace points *
|
||||
@ -105,34 +243,6 @@ TRACE_EVENT(kvm_gtlb_write,
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
|
||||
TRACE_EVENT(kvm_book3s_exit,
|
||||
TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(exit_nr, vcpu),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, exit_nr )
|
||||
__field( unsigned long, pc )
|
||||
__field( unsigned long, msr )
|
||||
__field( unsigned long, dar )
|
||||
__field( unsigned long, srr1 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu;
|
||||
__entry->exit_nr = exit_nr;
|
||||
__entry->pc = kvmppc_get_pc(vcpu);
|
||||
__entry->dar = kvmppc_get_fault_dar(vcpu);
|
||||
__entry->msr = vcpu->arch.shared->msr;
|
||||
svcpu = svcpu_get(vcpu);
|
||||
__entry->srr1 = svcpu->shadow_srr1;
|
||||
svcpu_put(svcpu);
|
||||
),
|
||||
|
||||
TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
|
||||
__entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
|
||||
__entry->srr1)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_book3s_reenter,
|
||||
TP_PROTO(int r, struct kvm_vcpu *vcpu),
|
||||
TP_ARGS(r, vcpu),
|
||||
@ -395,6 +505,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
|
||||
__entry->mas2, __entry->mas7_3)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_booke206_ref_release,
|
||||
TP_PROTO(__u64 pfn, __u32 flags),
|
||||
TP_ARGS(pfn, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, pfn )
|
||||
__field( __u32, flags )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pfn = pfn;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk("pfn=%llx flags=%x",
|
||||
__entry->pfn, __entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_booke_queue_irqprio,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
|
||||
TP_ARGS(vcpu, priority),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, cpu_nr )
|
||||
__field( __u32, priority )
|
||||
__field( unsigned long, pending )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu_nr = vcpu->vcpu_id;
|
||||
__entry->priority = priority;
|
||||
__entry->pending = vcpu->arch.pending_exceptions;
|
||||
),
|
||||
|
||||
TP_printk("vcpu=%x prio=%x pending=%lx",
|
||||
__entry->cpu_nr, __entry->priority, __entry->pending)
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
@ -90,6 +90,7 @@ config MPIC
|
||||
config PPC_EPAPR_HV_PIC
|
||||
bool
|
||||
default n
|
||||
select EPAPR_PARAVIRT
|
||||
|
||||
config MPIC_WEIRD
|
||||
bool
|
||||
|
@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
|
||||
u32 intr_index;
|
||||
u32 have_shift = 0;
|
||||
struct fsl_msi_cascade_data *cascade_data;
|
||||
unsigned int ret;
|
||||
|
||||
cascade_data = irq_get_handler_data(irq);
|
||||
msi_data = cascade_data->msi_data;
|
||||
@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
|
||||
case FSL_PIC_IP_IPIC:
|
||||
msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
|
||||
break;
|
||||
case FSL_PIC_IP_VMPIC:
|
||||
#ifdef CONFIG_EPAPR_PARAVIRT
|
||||
case FSL_PIC_IP_VMPIC: {
|
||||
unsigned int ret;
|
||||
ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
|
||||
if (ret) {
|
||||
pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
|
||||
@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
while (msir_value) {
|
||||
intr_index = ffs(msir_value) - 1;
|
||||
@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = {
|
||||
.compatible = "fsl,ipic-msi",
|
||||
.data = &ipic_msi_feature,
|
||||
},
|
||||
#ifdef CONFIG_EPAPR_PARAVIRT
|
||||
{
|
||||
.compatible = "fsl,vmpic-msi",
|
||||
.data = &vmpic_msi_feature,
|
||||
},
|
||||
#endif
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops;
|
||||
EXPORT_SYMBOL(diu_ops);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EPAPR_PARAVIRT
|
||||
/*
|
||||
* Restart the current partition
|
||||
*
|
||||
@ -278,3 +279,4 @@ void fsl_hv_halt(void)
|
||||
pr_info("hv exit\n");
|
||||
fh_partition_stop(-1);
|
||||
}
|
||||
#endif
|
||||
|
@ -629,11 +629,28 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
case KVM_S390_SIGP_STOP:
|
||||
case KVM_S390_RESTART:
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
|
||||
inti->type = s390int->type;
|
||||
break;
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
if (s390int->parm & 0xffff0000) {
|
||||
kfree(inti);
|
||||
return -EINVAL;
|
||||
}
|
||||
VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
|
||||
s390int->parm);
|
||||
inti->type = s390int->type;
|
||||
inti->extcall.code = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
if (s390int->parm & 0xffff0000) {
|
||||
kfree(inti);
|
||||
return -EINVAL;
|
||||
}
|
||||
VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
|
||||
inti->type = s390int->type;
|
||||
inti->emerg.code = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_INT_VIRTIO:
|
||||
case KVM_S390_INT_SERVICE:
|
||||
default:
|
||||
|
@ -355,6 +355,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
|
||||
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
|
||||
@ -993,7 +998,7 @@ static int __init kvm_s390_init(void)
|
||||
}
|
||||
memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
|
||||
facilities[0] &= 0xff00fff3f47c0000ULL;
|
||||
facilities[1] &= 0x201c000000000000ULL;
|
||||
facilities[1] &= 0x001c000000000000ULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define VCLOCK_NONE 0 /* No vDSO clock available. */
|
||||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
|
||||
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
|
||||
|
||||
struct arch_clocksource_data {
|
||||
int vclock_mode;
|
||||
|
@ -202,6 +202,7 @@
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pvclock.h>
|
||||
#ifdef CONFIG_X86_32
|
||||
#include <linux/threads.h>
|
||||
#include <asm/kmap_types.h>
|
||||
@ -80,6 +81,10 @@ enum fixed_addresses {
|
||||
+ ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
|
||||
VVAR_PAGE,
|
||||
VSYSCALL_HPET,
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
PVCLOCK_FIXMAP_BEGIN,
|
||||
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
|
||||
#endif
|
||||
FIX_DBGP_BASE,
|
||||
FIX_EARLYCON_MEM_BASE,
|
||||
|
@ -163,6 +163,9 @@ struct kimage_arch {
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef void crash_vmclear_fn(void);
|
||||
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_KEXEC_H */
|
||||
|
6
arch/x86/include/asm/kvm_guest.h
Normal file
6
arch/x86/include/asm/kvm_guest.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef _ASM_X86_KVM_GUEST_H
|
||||
#define _ASM_X86_KVM_GUEST_H
|
||||
|
||||
int kvm_setup_vsyscall_timeinfo(void);
|
||||
|
||||
#endif /* _ASM_X86_KVM_GUEST_H */
|
@ -22,6 +22,8 @@
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <linux/clocksource.h>
|
||||
|
||||
#include <asm/pvclock-abi.h>
|
||||
#include <asm/desc.h>
|
||||
@ -442,6 +444,7 @@ struct kvm_vcpu_arch {
|
||||
s8 virtual_tsc_shift;
|
||||
u32 virtual_tsc_mult;
|
||||
u32 virtual_tsc_khz;
|
||||
s64 ia32_tsc_adjust_msr;
|
||||
|
||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||
unsigned nmi_pending; /* NMI queued after currently running handler */
|
||||
@ -559,6 +562,12 @@ struct kvm_arch {
|
||||
u64 cur_tsc_write;
|
||||
u64 cur_tsc_offset;
|
||||
u8 cur_tsc_generation;
|
||||
int nr_vcpus_matched_tsc;
|
||||
|
||||
spinlock_t pvclock_gtod_sync_lock;
|
||||
bool use_master_clock;
|
||||
u64 master_kernel_ns;
|
||||
cycle_t master_cycle_now;
|
||||
|
||||
struct kvm_xen_hvm_config xen_hvm_config;
|
||||
|
||||
@ -612,6 +621,12 @@ struct kvm_vcpu_stat {
|
||||
|
||||
struct x86_instruction_info;
|
||||
|
||||
struct msr_data {
|
||||
bool host_initiated;
|
||||
u32 index;
|
||||
u64 data;
|
||||
};
|
||||
|
||||
struct kvm_x86_ops {
|
||||
int (*cpu_has_kvm_support)(void); /* __init */
|
||||
int (*disabled_by_bios)(void); /* __init */
|
||||
@ -634,7 +649,7 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
|
||||
void (*get_segment)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_segment *var, int seg);
|
||||
@ -697,10 +712,11 @@ struct kvm_x86_ops {
|
||||
bool (*has_wbinvd_exit)(void);
|
||||
|
||||
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
|
||||
u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
|
||||
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
|
||||
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
|
||||
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
|
||||
|
||||
@ -785,7 +801,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
|
||||
void kvm_enable_efer_bits(u64);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
struct x86_emulate_ctxt;
|
||||
|
||||
@ -812,7 +828,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
|
||||
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
|
||||
|
||||
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
|
@ -236,6 +236,7 @@
|
||||
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
|
||||
#define MSR_EBC_FREQUENCY_ID 0x0000002c
|
||||
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
|
||||
#define MSR_IA32_TSC_ADJUST 0x0000003b
|
||||
|
||||
#define FEATURE_CONTROL_LOCKED (1<<0)
|
||||
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
/* some helper functions for xen and kvm pv clock sources */
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
|
||||
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
|
||||
void pvclock_set_flags(u8 flags);
|
||||
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
|
||||
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
|
||||
@ -56,4 +57,50 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
|
||||
return product;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
u64 delta = __native_read_tsc() - src->tsc_timestamp;
|
||||
return pvclock_scale_delta(delta, src->tsc_to_system_mul,
|
||||
src->tsc_shift);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
|
||||
cycle_t *cycles, u8 *flags)
|
||||
{
|
||||
unsigned version;
|
||||
cycle_t ret, offset;
|
||||
u8 ret_flags;
|
||||
|
||||
version = src->version;
|
||||
/* Note: emulated platforms which do not advertise SSE2 support
|
||||
* result in kvmclock not using the necessary RDTSC barriers.
|
||||
* Without barriers, it is possible that RDTSC instruction reads from
|
||||
* the time stamp counter outside rdtsc_barrier protected section
|
||||
* below, resulting in violation of monotonicity.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
offset = pvclock_get_nsec_offset(src);
|
||||
ret = src->system_time + offset;
|
||||
ret_flags = src->flags;
|
||||
rdtsc_barrier();
|
||||
|
||||
*cycles = ret;
|
||||
*flags = ret_flags;
|
||||
return version;
|
||||
}
|
||||
|
||||
struct pvclock_vsyscall_time_info {
|
||||
struct pvclock_vcpu_time_info pvti;
|
||||
u32 migrate_count;
|
||||
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
||||
|
||||
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
|
||||
#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
|
||||
|
||||
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
|
||||
int size);
|
||||
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
|
||||
|
||||
#endif /* _ASM_X86_PVCLOCK_H */
|
||||
|
@ -445,8 +445,7 @@ enum vmcs_field {
|
||||
#define VMX_EPTP_WB_BIT (1ull << 14)
|
||||
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
|
||||
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
|
||||
#define VMX_EPT_AD_BIT (1ull << 21)
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
|
||||
#define VMX_EPT_AD_BIT (1ull << 21)
|
||||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
|
||||
|
@ -33,6 +33,26 @@ extern void map_vsyscall(void);
|
||||
*/
|
||||
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define VGETCPU_CPU_MASK 0xfff
|
||||
|
||||
static inline unsigned int __getcpu(void)
|
||||
{
|
||||
unsigned int p;
|
||||
|
||||
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
|
||||
/* Load per CPU data from RDTSCP */
|
||||
native_read_tscp(&p);
|
||||
} else {
|
||||
/* Load per CPU data from GDT */
|
||||
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_VSYSCALL_H */
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hardirq.h>
|
||||
@ -30,6 +31,27 @@
|
||||
|
||||
int in_crash_kexec;
|
||||
|
||||
/*
|
||||
* This is used to VMCLEAR all VMCSs loaded on the
|
||||
* processor. And when loading kvm_intel module, the
|
||||
* callback function pointer will be assigned.
|
||||
*
|
||||
* protected by rcu.
|
||||
*/
|
||||
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
|
||||
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
|
||||
|
||||
static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||
{
|
||||
crash_vmclear_fn *do_vmclear_operation = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
|
||||
if (do_vmclear_operation)
|
||||
do_vmclear_operation();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
|
||||
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
#endif
|
||||
crash_save_cpu(regs, cpu);
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on all cpus if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
/* Disable VMX or SVM if needed.
|
||||
*
|
||||
* We need to disable virtualization on all CPUs.
|
||||
@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
|
||||
kdump_nmi_shootdown_cpus();
|
||||
|
||||
/*
|
||||
* VMCLEAR VMCSs loaded on this cpu if needed.
|
||||
*/
|
||||
cpu_crash_vmclear_loaded_vmcss();
|
||||
|
||||
/* Booting kdump kernel with VMX or SVM enabled won't work,
|
||||
* because (among other limitations) we can't disable paging
|
||||
* with the virt flags.
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <asm/apic.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/kvm_guest.h>
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
@ -62,6 +63,15 @@ static int parse_no_stealacc(char *arg)
|
||||
|
||||
early_param("no-steal-acc", parse_no_stealacc);
|
||||
|
||||
static int kvmclock_vsyscall = 1;
|
||||
static int parse_no_kvmclock_vsyscall(char *arg)
|
||||
{
|
||||
kvmclock_vsyscall = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
|
||||
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
|
||||
static int has_steal_clock = 0;
|
||||
@ -110,11 +120,6 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
|
||||
struct kvm_task_sleep_node n, *e;
|
||||
DEFINE_WAIT(wait);
|
||||
int cpu, idle;
|
||||
|
||||
cpu = get_cpu();
|
||||
idle = idle_cpu(cpu);
|
||||
put_cpu();
|
||||
|
||||
spin_lock(&b->lock);
|
||||
e = _find_apf_task(b, token);
|
||||
@ -128,7 +133,7 @@ void kvm_async_pf_task_wait(u32 token)
|
||||
|
||||
n.token = token;
|
||||
n.cpu = smp_processor_id();
|
||||
n.halted = idle || preempt_count() > 1;
|
||||
n.halted = is_idle_task(current) || preempt_count() > 1;
|
||||
init_waitqueue_head(&n.wq);
|
||||
hlist_add_head(&n.link, &b->list);
|
||||
spin_unlock(&b->lock);
|
||||
@ -471,6 +476,9 @@ void __init kvm_guest_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
apic_set_eoi_write(kvm_guest_apic_eoi_write);
|
||||
|
||||
if (kvmclock_vsyscall)
|
||||
kvm_setup_vsyscall_timeinfo();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
register_cpu_notifier(&kvm_cpu_notifier);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <asm/apic.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
|
||||
early_param("no-kvmclock", parse_no_kvmclock);
|
||||
|
||||
/* The hypervisor will put information about time periodically here */
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
|
||||
static struct pvclock_vsyscall_time_info *hv_clock;
|
||||
static struct pvclock_wall_clock wall_clock;
|
||||
|
||||
/*
|
||||
@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
struct timespec ts;
|
||||
int low, high;
|
||||
int cpu;
|
||||
|
||||
low = (int)__pa_symbol(&wall_clock);
|
||||
high = ((u64)__pa_symbol(&wall_clock) >> 32);
|
||||
|
||||
native_write_msr(msr_kvm_wall_clock, low, high);
|
||||
|
||||
vcpu_time = &get_cpu_var(hv_clock);
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
|
||||
put_cpu_var(hv_clock);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return ts.tv_sec;
|
||||
}
|
||||
@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
|
||||
{
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
cycle_t ret;
|
||||
int cpu;
|
||||
|
||||
preempt_disable_notrace();
|
||||
src = &__get_cpu_var(hv_clock);
|
||||
cpu = smp_processor_id();
|
||||
src = &hv_clock[cpu].pvti;
|
||||
ret = pvclock_clocksource_read(src);
|
||||
preempt_enable_notrace();
|
||||
return ret;
|
||||
@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
|
||||
static unsigned long kvm_get_tsc_khz(void)
|
||||
{
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
src = &per_cpu(hv_clock, 0);
|
||||
return pvclock_tsc_khz(src);
|
||||
int cpu;
|
||||
unsigned long tsc_khz;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
src = &hv_clock[cpu].pvti;
|
||||
tsc_khz = pvclock_tsc_khz(src);
|
||||
preempt_enable();
|
||||
return tsc_khz;
|
||||
}
|
||||
|
||||
static void kvm_get_preset_lpj(void)
|
||||
@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
|
||||
{
|
||||
bool ret = false;
|
||||
struct pvclock_vcpu_time_info *src;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
src = &__get_cpu_var(hv_clock);
|
||||
if (!hv_clock)
|
||||
return ret;
|
||||
|
||||
src = &hv_clock[cpu].pvti;
|
||||
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
|
||||
__this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
|
||||
src->flags &= ~PVCLOCK_GUEST_STOPPED;
|
||||
ret = true;
|
||||
}
|
||||
|
||||
@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int low, high, ret;
|
||||
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
|
||||
|
||||
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
|
||||
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
|
||||
low = (int)__pa(src) | 1;
|
||||
high = ((u64)__pa(src) >> 32);
|
||||
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
|
||||
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
|
||||
cpu, high, low, txt);
|
||||
@ -197,6 +217,8 @@ static void kvm_shutdown(void)
|
||||
|
||||
void __init kvmclock_init(void)
|
||||
{
|
||||
unsigned long mem;
|
||||
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
@ -209,8 +231,18 @@ void __init kvmclock_init(void)
|
||||
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
|
||||
msr_kvm_system_time, msr_kvm_wall_clock);
|
||||
|
||||
if (kvm_register_clock("boot clock"))
|
||||
mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
|
||||
PAGE_SIZE);
|
||||
if (!mem)
|
||||
return;
|
||||
hv_clock = __va(mem);
|
||||
|
||||
if (kvm_register_clock("boot clock")) {
|
||||
hv_clock = NULL;
|
||||
memblock_free(mem,
|
||||
sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
return;
|
||||
}
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
|
||||
x86_platform.get_wallclock = kvm_get_wallclock;
|
||||
@ -233,3 +265,37 @@ void __init kvmclock_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||
}
|
||||
|
||||
int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int cpu;
|
||||
int ret;
|
||||
u8 flags;
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
unsigned int size;
|
||||
|
||||
size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
|
||||
if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
|
||||
preempt_enable();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
||||
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -17,23 +17,13 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/pvclock.h>
|
||||
|
||||
/*
|
||||
* These are perodically updated
|
||||
* xen: magic shared_info page
|
||||
* kvm: gpa registered via msr
|
||||
* and then copied here.
|
||||
*/
|
||||
struct pvclock_shadow_time {
|
||||
u64 tsc_timestamp; /* TSC at last update of time vals. */
|
||||
u64 system_timestamp; /* Time, in nanosecs, since boot. */
|
||||
u32 tsc_to_nsec_mul;
|
||||
int tsc_shift;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
static u8 valid_flags __read_mostly = 0;
|
||||
|
||||
void pvclock_set_flags(u8 flags)
|
||||
@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
|
||||
valid_flags = flags;
|
||||
}
|
||||
|
||||
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
|
||||
{
|
||||
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
|
||||
return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
|
||||
shadow->tsc_shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads a consistent set of time-base values from hypervisor,
|
||||
* into a shadow data area.
|
||||
*/
|
||||
static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
|
||||
struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
do {
|
||||
dst->version = src->version;
|
||||
rmb(); /* fetch version before data */
|
||||
dst->tsc_timestamp = src->tsc_timestamp;
|
||||
dst->system_timestamp = src->system_time;
|
||||
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
|
||||
dst->tsc_shift = src->tsc_shift;
|
||||
dst->flags = src->flags;
|
||||
rmb(); /* test version after fetching data */
|
||||
} while ((src->version & 1) || (dst->version != src->version));
|
||||
|
||||
return dst->version;
|
||||
}
|
||||
|
||||
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
u64 pv_tsc_khz = 1000000ULL << 32;
|
||||
@ -88,23 +50,32 @@ void pvclock_resume(void)
|
||||
atomic64_set(&last_value, 0);
|
||||
}
|
||||
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
|
||||
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
struct pvclock_shadow_time shadow;
|
||||
unsigned version;
|
||||
cycle_t ret, offset;
|
||||
u64 last;
|
||||
cycle_t ret;
|
||||
u8 flags;
|
||||
|
||||
do {
|
||||
version = pvclock_get_time_values(&shadow, src);
|
||||
barrier();
|
||||
offset = pvclock_get_nsec_offset(&shadow);
|
||||
ret = shadow.system_timestamp + offset;
|
||||
barrier();
|
||||
} while (version != src->version);
|
||||
version = __pvclock_read_cycles(src, &ret, &flags);
|
||||
} while ((src->version & 1) || version != src->version);
|
||||
|
||||
return flags & valid_flags;
|
||||
}
|
||||
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
unsigned version;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u8 flags;
|
||||
|
||||
do {
|
||||
version = __pvclock_read_cycles(src, &ret, &flags);
|
||||
} while ((src->version & 1) || version != src->version);
|
||||
|
||||
if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
|
||||
(shadow.flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
(flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
|
||||
|
||||
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
|
||||
}
|
||||
|
||||
static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
|
||||
|
||||
static struct pvclock_vsyscall_time_info *
|
||||
pvclock_get_vsyscall_user_time_info(int cpu)
|
||||
{
|
||||
if (!pvclock_vdso_info) {
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &pvclock_vdso_info[cpu];
|
||||
}
|
||||
|
||||
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
|
||||
{
|
||||
return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
|
||||
void *v)
|
||||
{
|
||||
struct task_migration_notifier *mn = v;
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
|
||||
|
||||
/* this is NULL when pvclock vsyscall is not initialized */
|
||||
if (unlikely(pvti == NULL))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
pvti->migrate_count++;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block pvclock_migrate = {
|
||||
.notifier_call = pvclock_task_migrate,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize the generic pvclock vsyscall state. This will allocate
|
||||
* a/some page(s) for the per-vcpu pvclock information, set up a
|
||||
* fixmap mapping for the page(s)
|
||||
*/
|
||||
|
||||
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
|
||||
int size)
|
||||
{
|
||||
int idx;
|
||||
|
||||
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
|
||||
|
||||
pvclock_vdso_info = i;
|
||||
|
||||
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
|
||||
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
|
||||
__pa_symbol(i) + (idx*PAGE_SIZE),
|
||||
PAGE_KERNEL_VVAR);
|
||||
}
|
||||
|
||||
|
||||
register_task_migration_notifier(&pvclock_migrate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -320,6 +320,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
if (index == 0) {
|
||||
entry->ebx &= kvm_supported_word9_x86_features;
|
||||
cpuid_mask(&entry->ebx, 9);
|
||||
// TSC_ADJUST is emulated
|
||||
entry->ebx |= F(TSC_ADJUST);
|
||||
} else
|
||||
entry->ebx = 0;
|
||||
entry->eax = 0;
|
||||
@ -659,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
||||
} else
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpuid);
|
||||
|
||||
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -31,6 +31,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
|
||||
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@ -676,8 +676,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
addr.seg);
|
||||
if (!usable)
|
||||
goto bad;
|
||||
/* code segment or read-only data segment */
|
||||
if (((desc.type & 8) || !(desc.type & 2)) && write)
|
||||
/* code segment in protected mode or read-only data segment */
|
||||
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
|
||||
|| !(desc.type & 2)) && write)
|
||||
goto bad;
|
||||
/* unreadable code segment */
|
||||
if (!fetch && (desc.type & 8) && !(desc.type & 2))
|
||||
|
@ -1011,7 +1011,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
||||
local_irq_save(flags);
|
||||
|
||||
now = apic->lapic_timer.timer.base->get_time();
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
|
||||
if (likely(tscdeadline > guest_tsc)) {
|
||||
ns = (tscdeadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, this_tsc_khz);
|
||||
|
@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
|| (!vcpu->arch.mmu.direct_map && write_fault
|
||||
&& !is_write_protection(vcpu) && !user_fault)) {
|
||||
|
||||
/*
|
||||
* There are two cases:
|
||||
* - the one is other vcpu creates new sp in the window
|
||||
* between mapping_level() and acquiring mmu-lock.
|
||||
* - the another case is the new sp is created by itself
|
||||
* (page-fault path) when guest uses the target gfn as
|
||||
* its page table.
|
||||
* Both of these cases can be fixed by allowing guest to
|
||||
* retry the access, it will refault, then we can establish
|
||||
* the mapping by using small page.
|
||||
*/
|
||||
if (level > PT_PAGE_TABLE_LEVEL &&
|
||||
has_wrprotected_page(vcpu->kvm, gfn, level)) {
|
||||
ret = 1;
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
has_wrprotected_page(vcpu->kvm, gfn, level))
|
||||
goto done;
|
||||
}
|
||||
|
||||
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
|
||||
|
||||
@ -2505,6 +2513,14 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
|
||||
mmu_free_roots(vcpu);
|
||||
}
|
||||
|
||||
static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
||||
{
|
||||
int bit7;
|
||||
|
||||
bit7 = (gpte >> 7) & 1;
|
||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
||||
}
|
||||
|
||||
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
bool no_dirty_log)
|
||||
{
|
||||
@ -2517,6 +2533,26 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
return gfn_to_pfn_memslot_atomic(slot, gfn);
|
||||
}
|
||||
|
||||
static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
u64 gpte)
|
||||
{
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
|
||||
goto no_present;
|
||||
|
||||
if (!is_present_gpte(gpte))
|
||||
goto no_present;
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
goto no_present;
|
||||
|
||||
return false;
|
||||
|
||||
no_present:
|
||||
drop_spte(vcpu->kvm, spte);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp,
|
||||
u64 *start, u64 *end)
|
||||
@ -2671,7 +2707,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
* PT_PAGE_TABLE_LEVEL and there would be no adjustment done
|
||||
* here.
|
||||
*/
|
||||
if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
|
||||
if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
|
||||
level == PT_PAGE_TABLE_LEVEL &&
|
||||
PageTransCompound(pfn_to_page(pfn)) &&
|
||||
!has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
|
||||
@ -2699,18 +2735,13 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static bool mmu_invalid_pfn(pfn_t pfn)
|
||||
{
|
||||
return unlikely(is_invalid_pfn(pfn));
|
||||
}
|
||||
|
||||
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
||||
pfn_t pfn, unsigned access, int *ret_val)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
/* The pfn is invalid, report the error! */
|
||||
if (unlikely(is_invalid_pfn(pfn))) {
|
||||
if (unlikely(is_error_pfn(pfn))) {
|
||||
*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
|
||||
goto exit;
|
||||
}
|
||||
@ -2862,7 +2893,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (likely(!force_pt_level))
|
||||
@ -3331,7 +3362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (likely(!force_pt_level))
|
||||
@ -3399,14 +3430,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
|
||||
nonpaging_free(vcpu);
|
||||
}
|
||||
|
||||
static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
||||
{
|
||||
int bit7;
|
||||
|
||||
bit7 = (gpte >> 7) & 1;
|
||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
||||
}
|
||||
|
||||
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
|
||||
{
|
||||
unsigned mask;
|
||||
|
@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
|
||||
addr, access);
|
||||
}
|
||||
|
||||
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
pt_element_t gpte)
|
||||
static bool
|
||||
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, pt_element_t gpte, bool no_dirty_log)
|
||||
{
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
|
||||
goto no_present;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
if (!is_present_gpte(gpte))
|
||||
goto no_present;
|
||||
if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
|
||||
return false;
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
goto no_present;
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
||||
return false;
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
no_dirty_log && (pte_access & ACC_WRITE_MASK));
|
||||
if (is_error_pfn(pfn))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* we call mmu_set_spte() with host_writable = true because
|
||||
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
|
||||
|
||||
no_present:
|
||||
drop_spte(vcpu->kvm, spte);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte)
|
||||
{
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
pfn_t pfn;
|
||||
pt_element_t gpte = *(const pt_element_t *)pte;
|
||||
|
||||
gpte = *(const pt_element_t *)pte;
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
return;
|
||||
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
||||
if (mmu_invalid_pfn(pfn))
|
||||
return;
|
||||
|
||||
/*
|
||||
* we call mmu_set_spte() with host_writable = true because that
|
||||
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL,
|
||||
gpte_to_gfn(gpte), pfn, true, true);
|
||||
FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
|
||||
}
|
||||
|
||||
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
|
||||
@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
||||
spte = sp->spt + i;
|
||||
|
||||
for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
if (spte == sptep)
|
||||
continue;
|
||||
|
||||
if (is_shadow_present_pte(*spte))
|
||||
continue;
|
||||
|
||||
gpte = gptep[i];
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
continue;
|
||||
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
pte_access & ACC_WRITE_MASK);
|
||||
if (mmu_invalid_pfn(pfn))
|
||||
if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
|
||||
break;
|
||||
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL, gfn,
|
||||
pfn, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch a shadow pte for a specific level in the paging hierarchy.
|
||||
* If the guest tries to write a write-protected page, we need to
|
||||
* emulate this operation, return 1 to indicate this case.
|
||||
*/
|
||||
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int user_fault, int write_fault, int hlevel,
|
||||
int *emulate, pfn_t pfn, bool map_writable,
|
||||
bool prefault)
|
||||
pfn_t pfn, bool map_writable, bool prefault)
|
||||
{
|
||||
unsigned access = gw->pt_access;
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
int top_level;
|
||||
unsigned direct_access;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, emulate = 0;
|
||||
|
||||
if (!is_present_gpte(gw->ptes[gw->level - 1]))
|
||||
return NULL;
|
||||
return 0;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
|
||||
user_fault, write_fault, emulate, it.level,
|
||||
user_fault, write_fault, &emulate, it.level,
|
||||
gw->gfn, pfn, prefault, map_writable);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
return it.sptep;
|
||||
return emulate;
|
||||
|
||||
out_gpte_changed:
|
||||
if (sp)
|
||||
kvm_mmu_put_page(sp, it.sptep);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
int write_fault = error_code & PFERR_WRITE_MASK;
|
||||
int user_fault = error_code & PFERR_USER_MASK;
|
||||
struct guest_walker walker;
|
||||
u64 *sptep;
|
||||
int emulate = 0;
|
||||
int r;
|
||||
pfn_t pfn;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
|
||||
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, &emulate, pfn, map_writable, prefault);
|
||||
(void)sptep;
|
||||
pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
|
||||
sptep, *sptep, emulate);
|
||||
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, pfn, map_writable, prefault);
|
||||
++vcpu->stat.pf_fixed;
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return emulate;
|
||||
return r;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
sizeof(pt_element_t)))
|
||||
return -EINVAL;
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
continue;
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "mmu.h"
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/mod_devicetable.h>
|
||||
@ -630,15 +631,12 @@ static int svm_hardware_enable(void *garbage)
|
||||
return -EBUSY;
|
||||
|
||||
if (!has_svm()) {
|
||||
printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
|
||||
me);
|
||||
pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
|
||||
return -EINVAL;
|
||||
}
|
||||
sd = per_cpu(svm_data, me);
|
||||
|
||||
if (!sd) {
|
||||
printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
|
||||
me);
|
||||
pr_err("%s: svm_data is NULL on %d\n", __func__, me);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -1012,6 +1010,13 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
svm->tsc_ratio = ratio;
|
||||
}
|
||||
|
||||
static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
return svm->vmcb->control.tsc_offset;
|
||||
}
|
||||
|
||||
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -1189,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 dummy;
|
||||
u32 eax = 1;
|
||||
|
||||
init_vmcb(svm);
|
||||
|
||||
@ -1197,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
|
||||
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
|
||||
}
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1254,11 +1262,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
|
||||
svm->asid_generation = 0;
|
||||
init_vmcb(svm);
|
||||
kvm_write_tsc(&svm->vcpu, 0);
|
||||
|
||||
err = fx_init(&svm->vcpu);
|
||||
if (err)
|
||||
goto free_page4;
|
||||
|
||||
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_bsp(&svm->vcpu))
|
||||
@ -1268,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
|
||||
return &svm->vcpu;
|
||||
|
||||
free_page4:
|
||||
__free_page(hsave_page);
|
||||
free_page3:
|
||||
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
|
||||
free_page2:
|
||||
@ -3008,11 +3009,11 @@ static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
|
||||
return vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
svm_scale_tsc(vcpu, host_tsc);
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
@ -3131,13 +3132,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
u32 ecx = msr->index;
|
||||
u64 data = msr->data;
|
||||
switch (ecx) {
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, data);
|
||||
kvm_write_tsc(vcpu, msr);
|
||||
break;
|
||||
case MSR_STAR:
|
||||
svm->vmcb->save.star = data;
|
||||
@ -3192,20 +3195,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
|
||||
break;
|
||||
default:
|
||||
return kvm_set_msr_common(vcpu, ecx, data);
|
||||
return kvm_set_msr_common(vcpu, msr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wrmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
||||
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
|
||||
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
|
||||
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
if (svm_set_msr(&svm->vcpu, ecx, data)) {
|
||||
if (svm_set_msr(&svm->vcpu, &msr)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
} else {
|
||||
@ -4302,6 +4309,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.has_wbinvd_exit = svm_has_wbinvd_exit,
|
||||
|
||||
.set_tsc_khz = svm_set_tsc_khz,
|
||||
.read_tsc_offset = svm_read_tsc_offset,
|
||||
.write_tsc_offset = svm_write_tsc_offset,
|
||||
.adjust_tsc_offset = svm_adjust_tsc_offset,
|
||||
.compute_tsc_offset = svm_compute_tsc_offset,
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <linux/tracepoint.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/svm.h>
|
||||
#include <asm/clocksource.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
@ -754,6 +755,68 @@ TRACE_EVENT(
|
||||
__entry->write ? "Write" : "Read",
|
||||
__entry->gpa_match ? "GPA" : "GVA")
|
||||
);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define host_clocks \
|
||||
{VCLOCK_NONE, "none"}, \
|
||||
{VCLOCK_TSC, "tsc"}, \
|
||||
{VCLOCK_HPET, "hpet"} \
|
||||
|
||||
TRACE_EVENT(kvm_update_master_clock,
|
||||
TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
|
||||
TP_ARGS(use_master_clock, host_clock, offset_matched),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( bool, use_master_clock )
|
||||
__field( unsigned int, host_clock )
|
||||
__field( bool, offset_matched )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->use_master_clock = use_master_clock;
|
||||
__entry->host_clock = host_clock;
|
||||
__entry->offset_matched = offset_matched;
|
||||
),
|
||||
|
||||
TP_printk("masterclock %d hostclock %s offsetmatched %u",
|
||||
__entry->use_master_clock,
|
||||
__print_symbolic(__entry->host_clock, host_clocks),
|
||||
__entry->offset_matched)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_track_tsc,
|
||||
TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
|
||||
unsigned int online_vcpus, bool use_master_clock,
|
||||
unsigned int host_clock),
|
||||
TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
|
||||
host_clock),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( unsigned int, nr_vcpus_matched_tsc )
|
||||
__field( unsigned int, online_vcpus )
|
||||
__field( bool, use_master_clock )
|
||||
__field( unsigned int, host_clock )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->nr_vcpus_matched_tsc = nr_matched;
|
||||
__entry->online_vcpus = online_vcpus;
|
||||
__entry->use_master_clock = use_master_clock;
|
||||
__entry->host_clock = host_clock;
|
||||
),
|
||||
|
||||
TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
|
||||
" hostclock %s",
|
||||
__entry->vcpu_id, __entry->use_master_clock,
|
||||
__entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
|
||||
__print_symbolic(__entry->host_clock, host_clocks))
|
||||
);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <asm/i387.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/kexec.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
|
||||
return vmx_capability.ept & VMX_EPT_AD_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_individual_addr(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_context(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
|
||||
@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
|
||||
vmcs, phys_addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
/*
|
||||
* This bitmap is used to indicate whether the vmclear
|
||||
* operation is enabled on all cpus. All disabled by
|
||||
* default.
|
||||
*/
|
||||
static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
|
||||
|
||||
static inline void crash_enable_local_vmclear(int cpu)
|
||||
{
|
||||
cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static inline void crash_disable_local_vmclear(int cpu)
|
||||
{
|
||||
cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static inline int crash_local_vmclear_enabled(int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static void crash_vmclear_local_loaded_vmcss(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct loaded_vmcs *v;
|
||||
|
||||
if (!crash_local_vmclear_enabled(cpu))
|
||||
return;
|
||||
|
||||
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
|
||||
loaded_vmcss_on_cpu_link)
|
||||
vmcs_clear(v->vmcs);
|
||||
}
|
||||
#else
|
||||
static inline void crash_enable_local_vmclear(int cpu) { }
|
||||
static inline void crash_disable_local_vmclear(int cpu) { }
|
||||
#endif /* CONFIG_KEXEC */
|
||||
|
||||
static void __loaded_vmcs_clear(void *arg)
|
||||
{
|
||||
struct loaded_vmcs *loaded_vmcs = arg;
|
||||
@ -1001,15 +1037,28 @@ static void __loaded_vmcs_clear(void *arg)
|
||||
return; /* vcpu migration can race with cpu offline */
|
||||
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
|
||||
per_cpu(current_vmcs, cpu) = NULL;
|
||||
crash_disable_local_vmclear(cpu);
|
||||
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
|
||||
|
||||
/*
|
||||
* we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
|
||||
* is before setting loaded_vmcs->vcpu to -1 which is done in
|
||||
* loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
|
||||
* then adds the vmcs into percpu list before it is deleted.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
loaded_vmcs_init(loaded_vmcs);
|
||||
crash_enable_local_vmclear(cpu);
|
||||
}
|
||||
|
||||
static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
|
||||
{
|
||||
if (loaded_vmcs->cpu != -1)
|
||||
smp_call_function_single(
|
||||
loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
|
||||
int cpu = loaded_vmcs->cpu;
|
||||
|
||||
if (cpu != -1)
|
||||
smp_call_function_single(cpu,
|
||||
__loaded_vmcs_clear, loaded_vmcs, 1);
|
||||
}
|
||||
|
||||
static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
|
||||
@ -1051,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
||||
{
|
||||
if (enable_ept) {
|
||||
if (cpu_has_vmx_invept_individual_addr())
|
||||
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
|
||||
eptp, gpa);
|
||||
else
|
||||
ept_sync_context(eptp);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
@ -1535,8 +1573,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
local_irq_disable();
|
||||
crash_disable_local_vmclear(cpu);
|
||||
|
||||
/*
|
||||
* Read loaded_vmcs->cpu should be before fetching
|
||||
* loaded_vmcs->loaded_vmcss_on_cpu_link.
|
||||
* See the comments in __loaded_vmcs_clear().
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
|
||||
&per_cpu(loaded_vmcss_on_cpu, cpu));
|
||||
crash_enable_local_vmclear(cpu);
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
@ -1839,11 +1887,10 @@ static u64 guest_read_tsc(void)
|
||||
* Like guest_read_tsc, but always returns L1's notion of the timestamp
|
||||
* counter, even if a nested guest (L2) is currently running.
|
||||
*/
|
||||
u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
u64 host_tsc, tsc_offset;
|
||||
u64 tsc_offset;
|
||||
|
||||
rdtscll(host_tsc);
|
||||
tsc_offset = is_guest_mode(vcpu) ?
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
|
||||
vmcs_read64(TSC_OFFSET);
|
||||
@ -1866,6 +1913,11 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
WARN(1, "user requested TSC rate below hardware speed\n");
|
||||
}
|
||||
|
||||
static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vmcs_read64(TSC_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
* writes 'offset' into guest's timestamp counter offset register
|
||||
*/
|
||||
@ -2202,15 +2254,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct shared_msr_entry *msr;
|
||||
int ret = 0;
|
||||
u32 msr_index = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr_index) {
|
||||
case MSR_EFER:
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case MSR_FS_BASE:
|
||||
@ -2236,7 +2290,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, data);
|
||||
break;
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, data);
|
||||
kvm_write_tsc(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_IA32_CR_PAT:
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
||||
@ -2244,7 +2298,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
vcpu->arch.pat = data;
|
||||
break;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
if (!vmx->rdtscp_enabled)
|
||||
@ -2267,7 +2324,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
}
|
||||
break;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -2341,6 +2398,18 @@ static int hardware_enable(void *garbage)
|
||||
return -EBUSY;
|
||||
|
||||
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
|
||||
|
||||
/*
|
||||
* Now we can enable the vmclear operation in kdump
|
||||
* since the loaded_vmcss_on_cpu list on this cpu
|
||||
* has been initialized.
|
||||
*
|
||||
* Though the cpu is not in VMX operation now, there
|
||||
* is no problem to enable the vmclear operation
|
||||
* for the loaded_vmcss_on_cpu list is empty!
|
||||
*/
|
||||
crash_enable_local_vmclear(cpu);
|
||||
|
||||
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
|
||||
|
||||
test_bits = FEATURE_CONTROL_LOCKED;
|
||||
@ -2697,6 +2766,7 @@ static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment
|
||||
if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
|
||||
tmp.base = vmcs_readl(sf->base);
|
||||
tmp.selector = vmcs_read16(sf->selector);
|
||||
tmp.dpl = tmp.selector & SELECTOR_RPL_MASK;
|
||||
tmp.s = 1;
|
||||
}
|
||||
vmx_set_segment(vcpu, &tmp, seg);
|
||||
@ -3246,7 +3316,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
||||
* unrestricted guest like Westmere to older host that don't have
|
||||
* unrestricted guest like Nehelem.
|
||||
*/
|
||||
if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
|
||||
if (vmx->rmode.vm86_active) {
|
||||
switch (seg) {
|
||||
case VCPU_SREG_CS:
|
||||
vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
|
||||
@ -3897,8 +3967,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
|
||||
set_cr4_guest_host_mask(vmx);
|
||||
|
||||
kvm_write_tsc(&vmx->vcpu, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3908,8 +3976,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
u64 msr;
|
||||
int ret;
|
||||
|
||||
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
|
||||
|
||||
vmx->rmode.vm86_active = 0;
|
||||
|
||||
vmx->soft_vnmi_blocked = 0;
|
||||
@ -3921,10 +3987,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
msr |= MSR_IA32_APICBASE_BSP;
|
||||
kvm_set_apic_base(&vmx->vcpu, msr);
|
||||
|
||||
ret = fx_init(&vmx->vcpu);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
|
||||
seg_setup(VCPU_SREG_CS);
|
||||
@ -3965,7 +4027,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
else
|
||||
kvm_rip_write(vcpu, 0);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
|
||||
|
||||
vmcs_writel(GUEST_GDTR_BASE, 0);
|
||||
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
||||
@ -4015,7 +4076,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
/* HACK: Don't enable emulation on guest boot/reset */
|
||||
vmx->emulation_required = 0;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4287,16 +4347,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
if (is_machine_check(intr_info))
|
||||
return handle_machine_check(vcpu);
|
||||
|
||||
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
|
||||
!is_page_fault(intr_info)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vect_info;
|
||||
vcpu->run->internal.data[1] = intr_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
|
||||
return 1; /* already handled by vmx_vcpu_run() */
|
||||
|
||||
@ -4315,6 +4365,22 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
error_code = 0;
|
||||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
|
||||
/*
|
||||
* The #PF with PFEC.RSVD = 1 indicates the guest is accessing
|
||||
* MMIO, it is better to report an internal error.
|
||||
* See the comments in vmx_handle_exit.
|
||||
*/
|
||||
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
|
||||
!(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vect_info;
|
||||
vcpu->run->internal.data[1] = intr_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
BUG_ON(enable_ept);
|
||||
@ -4626,11 +4692,15 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_wrmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
|
||||
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
|
||||
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
|
||||
|
||||
if (vmx_set_msr(vcpu, ecx, data) != 0) {
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
if (vmx_set_msr(vcpu, &msr) != 0) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
@ -4827,11 +4897,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||
|
||||
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
|
||||
if (exit_qualification & (1 << 6)) {
|
||||
printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gla_validity = (exit_qualification >> 7) & 0x3;
|
||||
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
|
||||
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
|
||||
@ -5979,13 +6044,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
|
||||
* delivery event since it indicates guest is accessing MMIO.
|
||||
* The vm-exit can be triggered again after return to guest that
|
||||
* will cause infinite loop.
|
||||
*/
|
||||
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
|
||||
exit_reason != EXIT_REASON_EPT_VIOLATION &&
|
||||
exit_reason != EXIT_REASON_TASK_SWITCH))
|
||||
printk(KERN_WARNING "%s: unexpected, valid vectoring info "
|
||||
"(0x%x) and exit reason is 0x%x\n",
|
||||
__func__, vectoring_info, exit_reason);
|
||||
exit_reason != EXIT_REASON_TASK_SWITCH)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vectoring_info;
|
||||
vcpu->run->internal.data[1] = exit_reason;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
|
||||
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
|
||||
@ -7309,6 +7385,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
|
||||
|
||||
.set_tsc_khz = vmx_set_tsc_khz,
|
||||
.read_tsc_offset = vmx_read_tsc_offset,
|
||||
.write_tsc_offset = vmx_write_tsc_offset,
|
||||
.adjust_tsc_offset = vmx_adjust_tsc_offset,
|
||||
.compute_tsc_offset = vmx_compute_tsc_offset,
|
||||
@ -7367,6 +7444,11 @@ static int __init vmx_init(void)
|
||||
if (r)
|
||||
goto out3;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
|
||||
crash_vmclear_local_loaded_vmcss);
|
||||
#endif
|
||||
|
||||
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
|
||||
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
|
||||
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
|
||||
@ -7404,6 +7486,11 @@ static void __exit vmx_exit(void)
|
||||
free_page((unsigned long)vmx_io_bitmap_b);
|
||||
free_page((unsigned long)vmx_io_bitmap_a);
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
|
||||
synchronize_rcu();
|
||||
#endif
|
||||
|
||||
kvm_exit();
|
||||
}
|
||||
|
||||
|
@ -46,6 +46,8 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
|
||||
u64 __read_mostly host_xcr0;
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
}
|
||||
|
||||
if (is_long_mode(vcpu)) {
|
||||
if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
|
||||
if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
|
||||
return 1;
|
||||
} else
|
||||
@ -827,6 +831,7 @@ static u32 msrs_to_save[] = {
|
||||
static unsigned num_msrs_to_save;
|
||||
|
||||
static const u32 emulated_msrs[] = {
|
||||
MSR_IA32_TSC_ADJUST,
|
||||
MSR_IA32_TSCDEADLINE,
|
||||
MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MCG_STATUS,
|
||||
@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
return kvm_x86_ops->set_msr(vcpu, msr_index, data);
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
*/
|
||||
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
{
|
||||
return kvm_set_msr(vcpu, index, *data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = *data;
|
||||
msr.index = index;
|
||||
msr.host_initiated = true;
|
||||
return kvm_set_msr(vcpu, &msr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct pvclock_gtod_data {
|
||||
seqcount_t seq;
|
||||
|
||||
struct { /* extract of a clocksource struct */
|
||||
int vclock_mode;
|
||||
cycle_t cycle_last;
|
||||
cycle_t mask;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
} clock;
|
||||
|
||||
/* open coded 'struct timespec' */
|
||||
u64 monotonic_time_snsec;
|
||||
time_t monotonic_time_sec;
|
||||
};
|
||||
|
||||
static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
{
|
||||
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
|
||||
|
||||
write_seqcount_begin(&vdata->seq);
|
||||
|
||||
/* copy pvclock gtod data */
|
||||
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
|
||||
vdata->clock.cycle_last = tk->clock->cycle_last;
|
||||
vdata->clock.mask = tk->clock->mask;
|
||||
vdata->clock.mult = tk->mult;
|
||||
vdata->clock.shift = tk->shift;
|
||||
|
||||
vdata->monotonic_time_sec = tk->xtime_sec
|
||||
+ tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->xtime_nsec
|
||||
+ (tk->wall_to_monotonic.tv_nsec
|
||||
<< tk->shift);
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
{
|
||||
int version;
|
||||
@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void)
|
||||
return timespec_to_ns(&ts);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
|
||||
unsigned long max_tsc_khz;
|
||||
|
||||
@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
||||
return tsc;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
bool vcpus_matched;
|
||||
bool do_request = false;
|
||||
struct kvm_arch *ka = &vcpu->kvm->arch;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&vcpu->kvm->online_vcpus));
|
||||
|
||||
if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
|
||||
if (!ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (!vcpus_matched && ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (do_request)
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
|
||||
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
|
||||
atomic_read(&vcpu->kvm->online_vcpus),
|
||||
ka->use_master_clock, gtod->clock.vclock_mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
|
||||
{
|
||||
u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
|
||||
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u64 offset, ns, elapsed;
|
||||
unsigned long flags;
|
||||
s64 usdiff;
|
||||
bool matched;
|
||||
u64 data = msr->data;
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
|
||||
}
|
||||
matched = true;
|
||||
} else {
|
||||
/*
|
||||
* We split periods of matched TSC writes into generations.
|
||||
@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
kvm->arch.cur_tsc_nsec = ns;
|
||||
kvm->arch.cur_tsc_write = data;
|
||||
kvm->arch.cur_tsc_offset = offset;
|
||||
matched = false;
|
||||
pr_debug("kvm: new tsc generation %u, clock %llu\n",
|
||||
kvm->arch.cur_tsc_generation, data);
|
||||
}
|
||||
@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
|
||||
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
|
||||
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
|
||||
update_ia32_tsc_adjust_msr(vcpu, offset);
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
|
||||
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
if (matched)
|
||||
kvm->arch.nr_vcpus_matched_tsc++;
|
||||
else
|
||||
kvm->arch.nr_vcpus_matched_tsc = 0;
|
||||
|
||||
kvm_track_tsc_matching(vcpu);
|
||||
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static cycle_t read_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)vget_cycles();
|
||||
|
||||
last = pvclock_gtod_data.clock.cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* GCC likes to generate cmov here, but this branch is extremely
|
||||
* predictable (it's just a funciton of time and the likely is
|
||||
* very likely) and there's a data dependence, so force GCC
|
||||
* to generate a branch instead. I don't barrier() because
|
||||
* we don't actually need a barrier, and if this function
|
||||
* ever gets inlined it will generate worse code.
|
||||
*/
|
||||
asm volatile ("");
|
||||
return last;
|
||||
}
|
||||
|
||||
static inline u64 vgettsc(cycle_t *cycle_now)
|
||||
{
|
||||
long v;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
*cycle_now = read_tsc();
|
||||
|
||||
v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||
return v * gtod->clock.mult;
|
||||
}
|
||||
|
||||
static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
ts->tv_nsec = 0;
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgettsc(cycle_now);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
timespec_add_ns(ts, ns);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
/* returns true if host is using tsc clocksource */
|
||||
static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
/* checked again under seqlock below */
|
||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
monotonic_to_bootbased(&ts);
|
||||
*kernel_ns = timespec_to_ns(&ts);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Assuming a stable TSC across physical CPUS, and a stable TSC
|
||||
* across virtual CPUs, the following condition is possible.
|
||||
* Each numbered line represents an event visible to both
|
||||
* CPUs at the next numbered event.
|
||||
*
|
||||
* "timespecX" represents host monotonic time. "tscX" represents
|
||||
* RDTSC value.
|
||||
*
|
||||
* VCPU0 on CPU0 | VCPU1 on CPU1
|
||||
*
|
||||
* 1. read timespec0,tsc0
|
||||
* 2. | timespec1 = timespec0 + N
|
||||
* | tsc1 = tsc0 + M
|
||||
* 3. transition to guest | transition to guest
|
||||
* 4. ret0 = timespec0 + (rdtsc - tsc0) |
|
||||
* 5. | ret1 = timespec1 + (rdtsc - tsc1)
|
||||
* | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
*
|
||||
* Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
|
||||
*
|
||||
* - ret0 < ret1
|
||||
* - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
* ...
|
||||
* - 0 < N - M => M < N
|
||||
*
|
||||
* That is, when timespec0 != timespec1, M < N. Unfortunately that is not
|
||||
* always the case (the difference between two distinct xtime instances
|
||||
* might be smaller then the difference between corresponding TSC reads,
|
||||
* when updating guest vcpus pvclock areas).
|
||||
*
|
||||
* To avoid that problem, do not allow visibility of distinct
|
||||
* system_timestamp/tsc_timestamp values simultaneously: use a master
|
||||
* copy of host monotonic time values. Update that master copy
|
||||
* in lockstep.
|
||||
*
|
||||
* Rely on synchronization of host TSCs and guest TSCs for monotonicity.
|
||||
*
|
||||
*/
|
||||
|
||||
static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
int vclock_mode;
|
||||
bool host_tsc_clocksource, vcpus_matched;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&kvm->online_vcpus));
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
host_tsc_clocksource = kvm_get_time_and_clockread(
|
||||
&ka->master_kernel_ns,
|
||||
&ka->master_cycle_now);
|
||||
|
||||
ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
|
||||
|
||||
if (ka->use_master_clock)
|
||||
atomic_set(&kvm_guest_has_master_clock, 1);
|
||||
|
||||
vclock_mode = pvclock_gtod_data.clock.vclock_mode;
|
||||
trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
|
||||
vcpus_matched);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, this_tsc_khz;
|
||||
struct kvm_vcpu_arch *vcpu = &v->arch;
|
||||
struct kvm_arch *ka = &v->kvm->arch;
|
||||
void *shared_kaddr;
|
||||
unsigned long this_tsc_khz;
|
||||
s64 kernel_ns, max_kernel_ns;
|
||||
u64 tsc_timestamp;
|
||||
u64 tsc_timestamp, host_tsc;
|
||||
struct pvclock_vcpu_time_info *guest_hv_clock;
|
||||
u8 pvclock_flags;
|
||||
bool use_master_clock;
|
||||
|
||||
kernel_ns = 0;
|
||||
host_tsc = 0;
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
|
||||
kernel_ns = get_kernel_ns();
|
||||
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
|
||||
if (unlikely(this_tsc_khz == 0)) {
|
||||
local_irq_restore(flags);
|
||||
@ -1156,6 +1425,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
use_master_clock = ka->use_master_clock;
|
||||
if (use_master_clock) {
|
||||
host_tsc = ka->master_cycle_now;
|
||||
kernel_ns = ka->master_kernel_ns;
|
||||
}
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
if (!use_master_clock) {
|
||||
host_tsc = native_read_tsc();
|
||||
kernel_ns = get_kernel_ns();
|
||||
}
|
||||
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
|
||||
|
||||
/*
|
||||
* We may have to catch up the TSC to match elapsed wall clock
|
||||
* time for two reasons, even if kvmclock is used.
|
||||
@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
vcpu->hw_tsc_khz = this_tsc_khz;
|
||||
}
|
||||
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
|
||||
/* with a master <monotonic time, tsc value> tuple,
|
||||
* pvclock clock reads always increase at the (scaled) rate
|
||||
* of guest TSC - no need to deal with sampling errors.
|
||||
*/
|
||||
if (!use_master_clock) {
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
}
|
||||
/* With all the info we got, fill in the values */
|
||||
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||
vcpu->last_kernel_ns = kernel_ns;
|
||||
vcpu->last_guest_tsc = tsc_timestamp;
|
||||
|
||||
pvclock_flags = 0;
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
/*
|
||||
* The interface expects us to write an even number signaling that the
|
||||
* update is finished. Since the guest won't see the intermediate
|
||||
@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
shared_kaddr = kmap_atomic(vcpu->time_page);
|
||||
|
||||
guest_hv_clock = shared_kaddr + vcpu->time_offset;
|
||||
|
||||
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
|
||||
pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
|
||||
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
/* If the host uses TSC clocksource, then it is stable */
|
||||
if (use_master_clock)
|
||||
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock));
|
||||
|
||||
@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
bool pr = false;
|
||||
u32 msr = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_EFER:
|
||||
@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
kvm_set_lapic_tscdeadline_msr(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu)) {
|
||||
if (!msr_info->host_initiated) {
|
||||
u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
|
||||
}
|
||||
vcpu->arch.ia32_tsc_adjust_msr = data;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
break;
|
||||
@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
data = kvm_get_lapic_tscdeadline_msr(vcpu);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
data = vcpu->arch.ia32_misc_enable_msr;
|
||||
break;
|
||||
@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
}
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
/*
|
||||
* On a host with synchronized TSC, there is no need to update
|
||||
* kvmclock on vcpu->cpu migration
|
||||
*/
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
if (vcpu->cpu != cpu)
|
||||
kvm_migrate_timers(vcpu);
|
||||
vcpu->cpu = cpu;
|
||||
@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (!vcpu->arch.apic)
|
||||
goto out;
|
||||
u.lapic = memdup_user(argp, sizeof(*u.lapic));
|
||||
if (IS_ERR(u.lapic)) {
|
||||
r = PTR_ERR(u.lapic);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.lapic))
|
||||
return PTR_ERR(u.lapic);
|
||||
|
||||
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_INTERRUPT: {
|
||||
@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&irq, argp, sizeof irq))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_NMI: {
|
||||
r = kvm_vcpu_ioctl_nmi(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID: {
|
||||
@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID2: {
|
||||
@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
|
||||
cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_CPUID2: {
|
||||
@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XSAVE: {
|
||||
u.xsave = memdup_user(argp, sizeof(*u.xsave));
|
||||
if (IS_ERR(u.xsave)) {
|
||||
r = PTR_ERR(u.xsave);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xsave))
|
||||
return PTR_ERR(u.xsave);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
|
||||
break;
|
||||
@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XCRS: {
|
||||
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
|
||||
if (IS_ERR(u.xcrs)) {
|
||||
r = PTR_ERR(u.xcrs);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xcrs))
|
||||
return PTR_ERR(u.xcrs);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
|
||||
break;
|
||||
@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
|
||||
int ret;
|
||||
|
||||
if (addr > (unsigned int)(-3 * PAGE_SIZE))
|
||||
return -1;
|
||||
return -EINVAL;
|
||||
ret = kvm_x86_ops->set_tss_addr(kvm, addr);
|
||||
return ret;
|
||||
}
|
||||
@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
switch (ioctl) {
|
||||
case KVM_SET_TSS_ADDR:
|
||||
r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_SET_IDENTITY_MAP_ADDR: {
|
||||
u64 ident_addr;
|
||||
@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_GET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
|
||||
@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
get_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_IRQCHIP: {
|
||||
@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
set_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT: {
|
||||
@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT2: {
|
||||
@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_REINJECT_CONTROL: {
|
||||
@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&control, argp, sizeof(control)))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_reinject(kvm, &control);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_XEN_HVM_CONFIG: {
|
||||
@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
|
||||
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
|
||||
u32 msr_index, u64 data)
|
||||
{
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = data;
|
||||
msr.index = msr_index;
|
||||
msr.host_initiated = false;
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
|
||||
}
|
||||
|
||||
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
* instruction -> ...
|
||||
*/
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
if (!is_error_pfn(pfn)) {
|
||||
if (!is_error_noslot_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return true;
|
||||
}
|
||||
@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
kvm_mmu_set_mmio_spte_mask(mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void pvclock_gtod_update_fn(struct work_struct *work)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
raw_spin_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
|
||||
atomic_set(&kvm_guest_has_master_clock, 0);
|
||||
raw_spin_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
||||
|
||||
/*
|
||||
* Notification about pvclock gtod data update.
|
||||
*/
|
||||
static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
|
||||
void *priv)
|
||||
{
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
struct timekeeper *tk = priv;
|
||||
|
||||
update_pvclock_gtod(tk);
|
||||
|
||||
/* disable master clock if host does not trust, or does not
|
||||
* use, TSC clocksource
|
||||
*/
|
||||
if (gtod->clock.vclock_mode != VCLOCK_TSC &&
|
||||
atomic_read(&kvm_guest_has_master_clock) != 0)
|
||||
queue_work(system_long_wq, &pvclock_gtod_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pvclock_gtod_notifier = {
|
||||
.notifier_call = pvclock_gtod_notify,
|
||||
};
|
||||
#endif
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int r;
|
||||
@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque)
|
||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
|
||||
kvm_lapic_init();
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
@ -4936,6 +5270,9 @@ void kvm_arch_exit(void)
|
||||
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
kvm_x86_ops = NULL;
|
||||
kvm_mmu_module_exit();
|
||||
}
|
||||
@ -5059,7 +5396,7 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
char instruction[3];
|
||||
@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
/* no guest entries from this point */
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
|
||||
/* guest entries allowed */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
|
||||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_mmu_unload(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
|
||||
__kvm_migrate_timers(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
|
||||
kvm_gen_update_masterclock(vcpu->kvm);
|
||||
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
|
||||
r = kvm_guest_time_update(vcpu);
|
||||
if (unlikely(r))
|
||||
@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
|
||||
native_read_tsc());
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
pr_debug("vcpu %d received sipi with vector # %x\n",
|
||||
vcpu->vcpu_id, vcpu->arch.sipi_vector);
|
||||
kvm_lapic_reset(vcpu);
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r == 0)
|
||||
r = kvm_mmu_setup(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
struct msr_data msr;
|
||||
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
msr.data = 0x0;
|
||||
msr.index = MSR_IA32_TSC;
|
||||
msr.host_initiated = true;
|
||||
kvm_write_tsc(vcpu, &msr);
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops->vcpu_free(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.nmi_queued, 0);
|
||||
vcpu->arch.nmi_pending = 0;
|
||||
@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_pmu_reset(vcpu);
|
||||
|
||||
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
return kvm_x86_ops->vcpu_reset(vcpu);
|
||||
}
|
||||
|
||||
@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.tsc_offset_adjustment += delta_cyc;
|
||||
vcpu->arch.last_host_tsc = local_tsc;
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
|
||||
&vcpu->requests);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
|
||||
goto fail_free_mce_banks;
|
||||
|
||||
r = fx_init(vcpu);
|
||||
if (r)
|
||||
goto fail_free_wbinvd_dirty_mask;
|
||||
|
||||
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
kvm_pmu_init(vcpu);
|
||||
|
||||
return 0;
|
||||
fail_free_wbinvd_dirty_mask:
|
||||
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
|
||||
fail_free_mce_banks:
|
||||
kfree(vcpu->arch.mce_banks);
|
||||
fail_free_lapic:
|
||||
@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
|
||||
gva_t addr, void *val, unsigned int bytes,
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/pvclock.h>
|
||||
|
||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||
|
||||
@ -62,6 +63,76 @@ static notrace cycle_t vread_hpet(void)
|
||||
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti_base;
|
||||
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
|
||||
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
|
||||
|
||||
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
|
||||
|
||||
pvti_base = (struct pvclock_vsyscall_time_info *)
|
||||
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
|
||||
|
||||
return &pvti_base[offset];
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u32 version;
|
||||
u32 migrate_count;
|
||||
u8 flags;
|
||||
unsigned cpu, cpu1;
|
||||
|
||||
|
||||
/*
|
||||
* When looping to get a consistent (time-info, tsc) pair, we
|
||||
* also need to deal with the possibility we can switch vcpus,
|
||||
* so make sure we always re-fetch time-info for the current vcpu.
|
||||
*/
|
||||
do {
|
||||
cpu = __getcpu() & VGETCPU_CPU_MASK;
|
||||
/* TODO: We can put vcpu id into higher bits of pvti.version.
|
||||
* This will save a couple of cycles by getting rid of
|
||||
* __getcpu() calls (Gleb).
|
||||
*/
|
||||
|
||||
pvti = get_pvti(cpu);
|
||||
|
||||
migrate_count = pvti->migrate_count;
|
||||
|
||||
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
|
||||
|
||||
/*
|
||||
* Test we're still on the cpu as well as the version.
|
||||
* We could have been migrated just after the first
|
||||
* vgetcpu but before fetching the version, so we
|
||||
* wouldn't notice a version change.
|
||||
*/
|
||||
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
|
||||
} while (unlikely(cpu != cpu1 ||
|
||||
(pvti->pvti.version & 1) ||
|
||||
pvti->pvti.version != version ||
|
||||
pvti->migrate_count != migrate_count));
|
||||
|
||||
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
|
||||
*mode = VCLOCK_NONE;
|
||||
|
||||
/* refer to tsc.c read_tsc() comment for rationale */
|
||||
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
return last;
|
||||
}
|
||||
#endif
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
@ -80,7 +151,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
}
|
||||
|
||||
|
||||
notrace static inline u64 vgetsns(void)
|
||||
notrace static inline u64 vgetsns(int *mode)
|
||||
{
|
||||
long v;
|
||||
cycles_t cycles;
|
||||
@ -88,6 +159,10 @@ notrace static inline u64 vgetsns(void)
|
||||
cycles = vread_tsc();
|
||||
else if (gtod->clock.vclock_mode == VCLOCK_HPET)
|
||||
cycles = vread_hpet();
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
|
||||
cycles = vread_pvclock(mode);
|
||||
#endif
|
||||
else
|
||||
return 0;
|
||||
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||
@ -107,7 +182,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ns = gtod->wall_time_snsec;
|
||||
ns += vgetsns();
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
|
||||
@ -127,7 +202,7 @@ notrace static int do_monotonic(struct timespec *ts)
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgetsns();
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
timespec_add_ns(ts, ns);
|
||||
|
@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
|
||||
{
|
||||
unsigned int p;
|
||||
|
||||
if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
|
||||
/* Load per CPU data from RDTSCP */
|
||||
native_read_tscp(&p);
|
||||
} else {
|
||||
/* Load per CPU data from GDT */
|
||||
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
||||
}
|
||||
p = __getcpu();
|
||||
|
||||
if (cpu)
|
||||
*cpu = p & 0xfff;
|
||||
*cpu = p & VGETCPU_CPU_MASK;
|
||||
if (node)
|
||||
*node = p >> 12;
|
||||
return 0;
|
||||
|
@ -357,6 +357,7 @@ config TRACE_SINK
|
||||
config PPC_EPAPR_HV_BYTECHAN
|
||||
tristate "ePAPR hypervisor byte channel driver"
|
||||
depends on PPC
|
||||
select EPAPR_PARAVIRT
|
||||
help
|
||||
This driver creates /dev entries for each ePAPR hypervisor byte
|
||||
channel, thereby allowing applications to communicate with byte
|
||||
|
@ -15,6 +15,7 @@ if VIRT_DRIVERS
|
||||
config FSL_HV_MANAGER
|
||||
tristate "Freescale hypervisor management driver"
|
||||
depends on FSL_SOC
|
||||
select EPAPR_PARAVIRT
|
||||
help
|
||||
The Freescale hypervisor management driver provides several services
|
||||
to drivers and applications related to the Freescale hypervisor:
|
||||
|
@ -47,28 +47,40 @@
|
||||
|
||||
/*
|
||||
* For the normal pfn, the highest 12 bits should be zero,
|
||||
* so we can mask these bits to indicate the error.
|
||||
* so we can mask bit 62 ~ bit 52 to indicate the error pfn,
|
||||
* mask bit 63 to indicate the noslot pfn.
|
||||
*/
|
||||
#define KVM_PFN_ERR_MASK (0xfffULL << 52)
|
||||
#define KVM_PFN_ERR_MASK (0x7ffULL << 52)
|
||||
#define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52)
|
||||
#define KVM_PFN_NOSLOT (0x1ULL << 63)
|
||||
|
||||
#define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK)
|
||||
#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
|
||||
#define KVM_PFN_ERR_BAD (KVM_PFN_ERR_MASK + 2)
|
||||
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 3)
|
||||
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
|
||||
|
||||
/*
|
||||
* error pfns indicate that the gfn is in slot but faild to
|
||||
* translate it to pfn on host.
|
||||
*/
|
||||
static inline bool is_error_pfn(pfn_t pfn)
|
||||
{
|
||||
return !!(pfn & KVM_PFN_ERR_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_noslot_pfn(pfn_t pfn)
|
||||
/*
|
||||
* error_noslot pfns indicate that the gfn can not be
|
||||
* translated to pfn - it is not in slot or failed to
|
||||
* translate it to pfn.
|
||||
*/
|
||||
static inline bool is_error_noslot_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == KVM_PFN_ERR_BAD;
|
||||
return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_invalid_pfn(pfn_t pfn)
|
||||
/* noslot pfn indicates that the gfn is not in slot. */
|
||||
static inline bool is_noslot_pfn(pfn_t pfn)
|
||||
{
|
||||
return !is_noslot_pfn(pfn) && is_error_pfn(pfn);
|
||||
return pfn == KVM_PFN_NOSLOT;
|
||||
}
|
||||
|
||||
#define KVM_HVA_ERR_BAD (PAGE_OFFSET)
|
||||
@ -107,6 +119,9 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQ_IMMEDIATE_EXIT 15
|
||||
#define KVM_REQ_PMU 16
|
||||
#define KVM_REQ_PMI 17
|
||||
#define KVM_REQ_WATCHDOG 18
|
||||
#define KVM_REQ_MASTERCLOCK_UPDATE 19
|
||||
#define KVM_REQ_MCLOCK_INPROGRESS 20
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
@ -516,6 +531,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm);
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm);
|
||||
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
@ -569,9 +585,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_hardware_enable(void *garbage);
|
||||
void kvm_arch_hardware_disable(void *garbage);
|
||||
int kvm_arch_hardware_setup(void);
|
||||
@ -666,6 +682,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
|
||||
unsigned long *deliver_bitmask);
|
||||
#endif
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
|
||||
int irq_source_id, int level);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
@ -838,9 +855,9 @@ extern struct kvm_stats_debugfs_item debugfs_entries[];
|
||||
extern struct dentry *kvm_debugfs_dir;
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
|
||||
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
|
||||
{
|
||||
if (unlikely(vcpu->kvm->mmu_notifier_count))
|
||||
if (unlikely(kvm->mmu_notifier_count))
|
||||
return 1;
|
||||
/*
|
||||
* Ensure the read of mmu_notifier_count happens before the read
|
||||
@ -853,7 +870,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
|
||||
* can't rely on kvm->mmu_lock to keep things ordered.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
|
||||
if (kvm->mmu_notifier_seq != mmu_seq)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -881,10 +898,20 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
|
||||
void kvm_eventfd_init(struct kvm *kvm);
|
||||
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
|
||||
void kvm_irqfd_release(struct kvm *kvm);
|
||||
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
|
||||
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
|
||||
#else
|
||||
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void kvm_irqfd_release(struct kvm *kvm) {}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
|
9
include/linux/pvclock_gtod.h
Normal file
9
include/linux/pvclock_gtod.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef _PVCLOCK_GTOD_H
|
||||
#define _PVCLOCK_GTOD_H
|
||||
|
||||
#include <linux/notifier.h>
|
||||
|
||||
extern int pvclock_gtod_register_notifier(struct notifier_block *nb);
|
||||
extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb);
|
||||
|
||||
#endif /* _PVCLOCK_GTOD_H */
|
@ -107,6 +107,14 @@ extern unsigned long this_cpu_load(void);
|
||||
extern void calc_global_load(unsigned long ticks);
|
||||
extern void update_cpu_load_nohz(void);
|
||||
|
||||
/* Notifier for when a task gets migrated to a new CPU */
|
||||
struct task_migration_notifier {
|
||||
struct task_struct *task;
|
||||
int from_cpu;
|
||||
int to_cpu;
|
||||
};
|
||||
extern void register_task_migration_notifier(struct notifier_block *n);
|
||||
|
||||
extern unsigned long get_parent_ip(unsigned long addr);
|
||||
|
||||
extern void dump_cpu_task(int cpu);
|
||||
|
@ -167,10 +167,15 @@ struct kvm_pit_config {
|
||||
#define KVM_EXIT_OSI 18
|
||||
#define KVM_EXIT_PAPR_HCALL 19
|
||||
#define KVM_EXIT_S390_UCONTROL 20
|
||||
#define KVM_EXIT_WATCHDOG 21
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
|
||||
/* Emulate instruction failed. */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
/* Encounter unexpected simultaneous exceptions. */
|
||||
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
|
||||
/* Encounter unexpected vm-exit due to delivery event. */
|
||||
#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
|
||||
|
||||
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
|
||||
struct kvm_run {
|
||||
@ -477,6 +482,8 @@ struct kvm_ppc_smmu_info {
|
||||
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
|
||||
};
|
||||
|
||||
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/* machine type bits, to be used as argument to KVM_CREATE_VM */
|
||||
@ -626,6 +633,8 @@ struct kvm_ppc_smmu_info {
|
||||
#define KVM_CAP_READONLY_MEM 81
|
||||
#endif
|
||||
#define KVM_CAP_IRQFD_RESAMPLE 82
|
||||
#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
|
||||
#define KVM_CAP_PPC_HTAB_FD 84
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -848,6 +857,11 @@ struct kvm_s390_ucas_mapping {
|
||||
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
|
||||
/* Available with KVM_CAP_PPC_ALLOC_HTAB */
|
||||
#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
|
||||
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
|
||||
/* Available with KVM_CAP_RMA */
|
||||
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
|
||||
/* Available with KVM_CAP_PPC_HTAB_FD */
|
||||
#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
|
||||
|
||||
/*
|
||||
* ioctls for vcpu fds
|
||||
@ -911,9 +925,6 @@ struct kvm_s390_ucas_mapping {
|
||||
/* Available with KVM_CAP_XCRS */
|
||||
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
|
||||
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
|
||||
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
|
||||
/* Available with KVM_CAP_RMA */
|
||||
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
|
||||
/* Available with KVM_CAP_SW_TLB */
|
||||
#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb)
|
||||
/* Available with KVM_CAP_ONE_REG */
|
||||
|
@ -923,6 +923,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
rq->skip_clock_update = 1;
|
||||
}
|
||||
|
||||
static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
|
||||
|
||||
void register_task_migration_notifier(struct notifier_block *n)
|
||||
{
|
||||
atomic_notifier_chain_register(&task_migration_notifier, n);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
{
|
||||
@ -953,10 +960,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
trace_sched_migrate_task(p, new_cpu);
|
||||
|
||||
if (task_cpu(p) != new_cpu) {
|
||||
struct task_migration_notifier tmn;
|
||||
|
||||
if (p->sched_class->migrate_task_rq)
|
||||
p->sched_class->migrate_task_rq(p, new_cpu);
|
||||
p->se.nr_migrations++;
|
||||
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
|
||||
|
||||
tmn.task = p;
|
||||
tmn.from_cpu = task_cpu(p);
|
||||
tmn.to_cpu = new_cpu;
|
||||
|
||||
atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
|
||||
}
|
||||
|
||||
__set_task_cpu(p, new_cpu);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/time.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
|
||||
|
||||
static struct timekeeper timekeeper;
|
||||
@ -174,6 +175,54 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
|
||||
return nsec + arch_gettimeoffset();
|
||||
}
|
||||
|
||||
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
|
||||
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
{
|
||||
raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
|
||||
}
|
||||
|
||||
/**
|
||||
* pvclock_gtod_register_notifier - register a pvclock timedata update listener
|
||||
*
|
||||
* Must hold write on timekeeper.lock
|
||||
*/
|
||||
int pvclock_gtod_register_notifier(struct notifier_block *nb)
|
||||
{
|
||||
struct timekeeper *tk = &timekeeper;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
write_seqlock_irqsave(&tk->lock, flags);
|
||||
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
|
||||
/* update timekeeping data */
|
||||
update_pvclock_gtod(tk);
|
||||
write_sequnlock_irqrestore(&tk->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
|
||||
|
||||
/**
|
||||
* pvclock_gtod_unregister_notifier - unregister a pvclock
|
||||
* timedata update listener
|
||||
*
|
||||
* Must hold write on timekeeper.lock
|
||||
*/
|
||||
int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
|
||||
{
|
||||
struct timekeeper *tk = &timekeeper;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
write_seqlock_irqsave(&tk->lock, flags);
|
||||
ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
|
||||
write_sequnlock_irqrestore(&tk->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
|
||||
|
||||
/* must hold write on timekeeper.lock */
|
||||
static void timekeeping_update(struct timekeeper *tk, bool clearntp)
|
||||
{
|
||||
@ -182,6 +231,7 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
|
||||
ntp_clear();
|
||||
}
|
||||
update_vsyscall(tk);
|
||||
update_pvclock_gtod(tk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -105,6 +105,15 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
@ -117,6 +126,23 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
int index = find_index_from_host_irq(assigned_dev, irq);
|
||||
u32 vector;
|
||||
int ret = 0;
|
||||
|
||||
if (index >= 0) {
|
||||
vector = assigned_dev->guest_msix_entries[index].vector;
|
||||
ret = kvm_set_irq_inatomic(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id,
|
||||
vector, 1);
|
||||
}
|
||||
|
||||
return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
@ -334,11 +360,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
|
||||
{
|
||||
return IRQ_WAKE_THREAD;
|
||||
}
|
||||
|
||||
static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
@ -363,11 +384,6 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
|
||||
{
|
||||
return IRQ_WAKE_THREAD;
|
||||
}
|
||||
|
||||
static int assigned_device_enable_host_msix(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#include "iodev.h"
|
||||
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest
|
||||
@ -332,7 +333,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||
|
||||
list_for_each_entry(resampler,
|
||||
&kvm->irqfds.resampler_list, list) {
|
||||
&kvm->irqfds.resampler_list, link) {
|
||||
if (resampler->notifier.gsi == irqfd->gsi) {
|
||||
irqfd->resampler = resampler;
|
||||
break;
|
||||
@ -425,17 +426,21 @@ fail:
|
||||
kfree(irqfd);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
kvm_eventfd_init(struct kvm *kvm)
|
||||
{
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
spin_lock_init(&kvm->irqfds.lock);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
|
||||
mutex_init(&kvm->irqfds.resampler_lock);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
/*
|
||||
* shutdown any irqfd's that match fd+gsi
|
||||
*/
|
||||
@ -555,6 +560,7 @@ static void __exit irqfd_module_exit(void)
|
||||
|
||||
module_init(irqfd_module_init);
|
||||
module_exit(irqfd_module_exit);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
|
@ -52,7 +52,7 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
end_gfn = gfn + (size >> PAGE_SHIFT);
|
||||
gfn += 1;
|
||||
|
||||
if (is_error_pfn(pfn))
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return pfn;
|
||||
|
||||
while (gfn < end_gfn)
|
||||
@ -106,7 +106,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
* important because we unmap and unpin in 4kb steps later.
|
||||
*/
|
||||
pfn = kvm_pin_pages(slot, gfn, page_size);
|
||||
if (is_error_pfn(pfn)) {
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
gfn += 1;
|
||||
continue;
|
||||
}
|
||||
@ -168,11 +168,7 @@ int kvm_assign_device(struct kvm *kvm,
|
||||
|
||||
r = iommu_attach_device(domain, &pdev->dev);
|
||||
if (r) {
|
||||
printk(KERN_ERR "assign device %x:%x:%x.%x failed",
|
||||
pci_domain_nr(pdev->bus),
|
||||
pdev->bus->number,
|
||||
PCI_SLOT(pdev->devfn),
|
||||
PCI_FUNC(pdev->devfn));
|
||||
dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -102,6 +102,23 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq)
|
||||
{
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
|
||||
irq->dest_id = (e->msi.address_lo &
|
||||
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
|
||||
irq->vector = (e->msi.data &
|
||||
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
|
||||
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
|
||||
irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
|
||||
irq->delivery_mode = e->msi.data & 0x700;
|
||||
irq->level = 1;
|
||||
irq->shorthand = 0;
|
||||
/* TODO Deal with RH bit of MSI message address */
|
||||
}
|
||||
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
{
|
||||
@ -110,22 +127,26 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
kvm_set_msi_irq(e, &irq);
|
||||
|
||||
irq.dest_id = (e->msi.address_lo &
|
||||
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
|
||||
irq.vector = (e->msi.data &
|
||||
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
|
||||
irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
|
||||
irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
|
||||
irq.delivery_mode = e->msi.data & 0x700;
|
||||
irq.level = 1;
|
||||
irq.shorthand = 0;
|
||||
|
||||
/* TODO Deal with RH bit of MSI message address */
|
||||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
|
||||
}
|
||||
|
||||
|
||||
static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
int r;
|
||||
|
||||
kvm_set_msi_irq(e, &irq);
|
||||
|
||||
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
|
||||
return r;
|
||||
else
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry route;
|
||||
@ -178,6 +199,44 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deliver an IRQ in an atomic context if we can, or return a failure,
|
||||
* user can retry in a process context.
|
||||
* Return value:
|
||||
* -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
|
||||
* Other values - No need to retry.
|
||||
*/
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int ret = -EINVAL;
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
struct hlist_node *n;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
/*
|
||||
* Injection into either PIC or IOAPIC might need to scan all CPUs,
|
||||
* which would need to be retried from thread context; when same GSI
|
||||
* is connected to both PIC and IOAPIC, we'd have to report a
|
||||
* partial failure here.
|
||||
* Since there's no easy way to do this, we only support injecting MSI
|
||||
* which is limited to 1:1 GSI mapping.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
irq_rt = rcu_dereference(kvm->irq_routing);
|
||||
if (irq < irq_rt->nr_rt_entries)
|
||||
hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
|
||||
if (likely(e->type == KVM_IRQ_ROUTING_MSI))
|
||||
ret = kvm_set_msi_inatomic(e, kvm);
|
||||
else
|
||||
ret = -EWOULDBLOCK;
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user