2008-12-16 18:43:58 +08:00
|
|
|
/*
|
|
|
|
* PowerPC implementation of KVM hooks
|
|
|
|
*
|
|
|
|
* Copyright IBM Corp. 2007
|
2011-04-30 06:10:23 +08:00
|
|
|
* Copyright (C) 2011 Freescale Semiconductor, Inc.
|
2008-12-16 18:43:58 +08:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Jerone Young <jyoung5@us.ibm.com>
|
|
|
|
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
|
|
|
|
* Hollis Blanchard <hollisb@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2011-07-21 08:29:15 +08:00
|
|
|
#include <dirent.h>
|
2008-12-16 18:43:58 +08:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/mman.h>
|
ppc64: Rudimentary Support for extra page sizes on server CPUs
More recent Power server chips (i.e. based on the 64 bit hash MMU)
support more than just the traditional 4k and 16M page sizes. This
can get quite complicated, because which page sizes are supported,
which combinations are supported within an MMU segment and how these
page sizes are encoded both in the SLB entry and the hash PTE can vary
depending on the CPU model (they are not specified by the
architecture). In addition the firmware or hypervisor may not permit
use of certain page sizes, for various reasons. Whether various page
sizes are supported on KVM, for example, depends on whether the PR or
HV variant of KVM is in use, and on the page size of the memory
backing the guest's RAM.
This patch adds information to the CPUState and cpu defs to describe
the supported page sizes and encodings. Since TCG does not yet
support any extended page sizes, we just set this to NULL in the
static CPU definitions, expanding this to the default 4k and 16M page
sizes when we initialize the cpu state. When using KVM, however, we
instead determine available page sizes using the new
KVM_PPC_GET_SMMU_INFO call. For old kernels without that call, we use
some defaults, with some guesswork which should do the right thing for
existing HV and PR implementations. The fallback might not be correct
for future versions, but that's ok, because they'll have
KVM_PPC_GET_SMMU_INFO.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-06-19 03:56:25 +08:00
|
|
|
#include <sys/vfs.h>
|
2008-12-16 18:43:58 +08:00
|
|
|
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
|
|
|
|
#include "qemu-common.h"
|
|
|
|
#include "qemu-timer.h"
|
|
|
|
#include "sysemu.h"
|
|
|
|
#include "kvm.h"
|
|
|
|
#include "kvm_ppc.h"
|
|
|
|
#include "cpu.h"
|
2012-04-04 13:02:05 +08:00
|
|
|
#include "cpus.h"
|
2008-12-16 18:43:58 +08:00
|
|
|
#include "device_tree.h"
|
2011-09-30 05:39:12 +08:00
|
|
|
#include "hw/sysbus.h"
|
2011-09-30 05:39:10 +08:00
|
|
|
#include "hw/spapr.h"
|
2008-12-16 18:43:58 +08:00
|
|
|
|
2011-08-09 23:57:37 +08:00
|
|
|
#include "hw/sysbus.h"
|
|
|
|
#include "hw/spapr.h"
|
|
|
|
#include "hw/spapr_vio.h"
|
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
//#define DEBUG_KVM
|
|
|
|
|
|
|
|
#ifdef DEBUG_KVM
|
|
|
|
#define dprintf(fmt, ...) \
|
|
|
|
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
|
|
|
|
#else
|
|
|
|
#define dprintf(fmt, ...) \
|
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
2011-07-21 08:29:15 +08:00
|
|
|
#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
|
|
|
|
|
2011-01-22 04:48:17 +08:00
|
|
|
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
|
|
|
|
KVM_CAP_LAST_INFO
|
|
|
|
};
|
|
|
|
|
2010-08-30 19:49:15 +08:00
|
|
|
static int cap_interrupt_unset = false;
|
|
|
|
static int cap_interrupt_level = false;
|
2011-04-30 06:10:23 +08:00
|
|
|
static int cap_segstate;
|
|
|
|
static int cap_booke_sregs;
|
2011-09-30 05:39:10 +08:00
|
|
|
static int cap_ppc_smt;
|
2011-09-30 05:39:11 +08:00
|
|
|
static int cap_ppc_rma;
|
2011-09-30 05:39:12 +08:00
|
|
|
static int cap_spapr_tce;
|
2012-09-13 00:57:09 +08:00
|
|
|
static int cap_hior;
|
2010-08-30 19:49:15 +08:00
|
|
|
|
2010-04-19 05:10:17 +08:00
|
|
|
/* XXX We have a race condition where we actually have a level triggered
|
|
|
|
* interrupt, but the infrastructure can't expose that yet, so the guest
|
|
|
|
* takes but ignores it, goes to sleep and never gets notified that there's
|
|
|
|
* still an interrupt pending.
|
2010-02-10 00:37:10 +08:00
|
|
|
*
|
2010-04-19 05:10:17 +08:00
|
|
|
* As a quick workaround, let's just wake up again 20 ms after we injected
|
|
|
|
* an interrupt. That way we can assure that we're always reinjecting
|
|
|
|
* interrupts in case the guest swallowed them.
|
2010-02-10 00:37:10 +08:00
|
|
|
*/
|
|
|
|
static QEMUTimer *idle_timer;
|
|
|
|
|
2012-05-03 10:02:03 +08:00
|
|
|
static void kvm_kick_cpu(void *opaque)
|
2010-02-10 00:37:10 +08:00
|
|
|
{
|
2012-05-03 10:02:03 +08:00
|
|
|
PowerPCCPU *cpu = opaque;
|
|
|
|
|
2012-05-03 10:34:15 +08:00
|
|
|
qemu_cpu_kick(CPU(cpu));
|
2010-02-10 00:37:10 +08:00
|
|
|
}
|
|
|
|
|
2011-01-22 04:48:16 +08:00
|
|
|
int kvm_arch_init(KVMState *s)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
2010-08-30 19:49:15 +08:00
|
|
|
cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
|
|
|
|
cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
|
2011-04-30 06:10:23 +08:00
|
|
|
cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
|
|
|
|
cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
|
2011-09-30 05:39:10 +08:00
|
|
|
cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
|
2011-09-30 05:39:11 +08:00
|
|
|
cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
|
2011-09-30 05:39:12 +08:00
|
|
|
cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
|
2012-09-13 00:57:09 +08:00
|
|
|
cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
|
2010-08-30 19:49:15 +08:00
|
|
|
|
|
|
|
if (!cap_interrupt_level) {
|
|
|
|
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
|
|
|
|
"VM to stall at times!\n");
|
|
|
|
}
|
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
static int kvm_arch_sync_sregs(CPUPPCState *cenv)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
2009-07-17 19:51:43 +08:00
|
|
|
struct kvm_sregs sregs;
|
2011-04-12 07:34:34 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
|
2011-04-16 08:00:36 +08:00
|
|
|
/* What we're really trying to say is "if we're on BookE, we use
|
|
|
|
the native PVR for now". This is the only sane way to check
|
|
|
|
it though, so we potentially confuse users that they can run
|
|
|
|
BookE guests on BookS. Let's hope nobody dares enough :) */
|
2011-04-12 07:34:34 +08:00
|
|
|
return 0;
|
|
|
|
} else {
|
2011-04-30 06:10:23 +08:00
|
|
|
if (!cap_segstate) {
|
2011-04-16 08:00:36 +08:00
|
|
|
fprintf(stderr, "kvm error: missing PVR setting capability\n");
|
|
|
|
return -ENOSYS;
|
2011-04-12 07:34:34 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2009-07-17 19:51:43 +08:00
|
|
|
|
|
|
|
sregs.pvr = cenv->spr[SPR_PVR];
|
2011-04-12 07:34:34 +08:00
|
|
|
return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
|
|
|
|
}
|
|
|
|
|
2011-08-31 19:26:56 +08:00
|
|
|
/* Set up a shared TLB array with KVM */
|
2012-03-14 08:38:22 +08:00
|
|
|
static int kvm_booke206_tlb_init(CPUPPCState *env)
|
2011-08-31 19:26:56 +08:00
|
|
|
{
|
|
|
|
struct kvm_book3e_206_tlb_params params = {};
|
|
|
|
struct kvm_config_tlb cfg = {};
|
|
|
|
struct kvm_enable_cap encap = {};
|
|
|
|
unsigned int entries = 0;
|
|
|
|
int ret, i;
|
|
|
|
|
|
|
|
if (!kvm_enabled() ||
|
|
|
|
!kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
|
|
|
|
|
|
|
|
for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
|
|
|
|
params.tlb_sizes[i] = booke206_tlb_size(env, i);
|
|
|
|
params.tlb_ways[i] = booke206_tlb_ways(env, i);
|
|
|
|
entries += params.tlb_sizes[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(entries == env->nb_tlb);
|
|
|
|
assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
|
|
|
|
|
|
|
|
env->tlb_dirty = true;
|
|
|
|
|
|
|
|
cfg.array = (uintptr_t)env->tlb.tlbm;
|
|
|
|
cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
|
|
|
|
cfg.params = (uintptr_t)¶ms;
|
|
|
|
cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
|
|
|
|
|
|
|
|
encap.cap = KVM_CAP_SW_TLB;
|
|
|
|
encap.args[0] = (uintptr_t)&cfg;
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
|
|
|
|
__func__, strerror(-ret));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
env->kvm_sw_tlb = true;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ppc64: Rudimentary Support for extra page sizes on server CPUs
More recent Power server chips (i.e. based on the 64 bit hash MMU)
support more than just the traditional 4k and 16M page sizes. This
can get quite complicated, because which page sizes are supported,
which combinations are supported within an MMU segment and how these
page sizes are encoded both in the SLB entry and the hash PTE can vary
depending on the CPU model (they are not specified by the
architecture). In addition the firmware or hypervisor may not permit
use of certain page sizes, for various reasons. Whether various page
sizes are supported on KVM, for example, depends on whether the PR or
HV variant of KVM is in use, and on the page size of the memory
backing the guest's RAM.
This patch adds information to the CPUState and cpu defs to describe
the supported page sizes and encodings. Since TCG does not yet
support any extended page sizes, we just set this to NULL in the
static CPU definitions, expanding this to the default 4k and 16M page
sizes when we initialize the cpu state. When using KVM, however, we
instead determine available page sizes using the new
KVM_PPC_GET_SMMU_INFO call. For old kernels without that call, we use
some defaults, with some guesswork which should do the right thing for
existing HV and PR implementations. The fallback might not be correct
for future versions, but that's ok, because they'll have
KVM_PPC_GET_SMMU_INFO.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-06-19 03:56:25 +08:00
|
|
|
|
|
|
|
#if defined(TARGET_PPC64)
|
|
|
|
static void kvm_get_fallback_smmu_info(CPUPPCState *env,
|
|
|
|
struct kvm_ppc_smmu_info *info)
|
|
|
|
{
|
|
|
|
memset(info, 0, sizeof(*info));
|
|
|
|
|
|
|
|
/* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
|
|
|
|
* need to "guess" what the supported page sizes are.
|
|
|
|
*
|
|
|
|
* For that to work we make a few assumptions:
|
|
|
|
*
|
|
|
|
* - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
|
|
|
|
* KVM which only supports 4K and 16M pages, but supports them
|
|
|
|
* regardless of the backing store characteritics. We also don't
|
|
|
|
* support 1T segments.
|
|
|
|
*
|
|
|
|
* This is safe as if HV KVM ever supports that capability or PR
|
|
|
|
* KVM grows supports for more page/segment sizes, those versions
|
|
|
|
* will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
|
|
|
|
* will not hit this fallback
|
|
|
|
*
|
|
|
|
* - Else we are running HV KVM. This means we only support page
|
|
|
|
* sizes that fit in the backing store. Additionally we only
|
|
|
|
* advertize 64K pages if the processor is ARCH 2.06 and we assume
|
|
|
|
* P7 encodings for the SLB and hash table. Here too, we assume
|
|
|
|
* support for any newer processor will mean a kernel that
|
|
|
|
* implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
|
|
|
|
* this fallback.
|
|
|
|
*/
|
|
|
|
if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
|
|
|
|
/* No flags */
|
|
|
|
info->flags = 0;
|
|
|
|
info->slb_size = 64;
|
|
|
|
|
|
|
|
/* Standard 4k base page size segment */
|
|
|
|
info->sps[0].page_shift = 12;
|
|
|
|
info->sps[0].slb_enc = 0;
|
|
|
|
info->sps[0].enc[0].page_shift = 12;
|
|
|
|
info->sps[0].enc[0].pte_enc = 0;
|
|
|
|
|
|
|
|
/* Standard 16M large page size segment */
|
|
|
|
info->sps[1].page_shift = 24;
|
|
|
|
info->sps[1].slb_enc = SLB_VSID_L;
|
|
|
|
info->sps[1].enc[0].page_shift = 24;
|
|
|
|
info->sps[1].enc[0].pte_enc = 0;
|
|
|
|
} else {
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
/* HV KVM has backing store size restrictions */
|
|
|
|
info->flags = KVM_PPC_PAGE_SIZES_REAL;
|
|
|
|
|
|
|
|
if (env->mmu_model & POWERPC_MMU_1TSEG) {
|
|
|
|
info->flags |= KVM_PPC_1T_SEGMENTS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->mmu_model == POWERPC_MMU_2_06) {
|
|
|
|
info->slb_size = 32;
|
|
|
|
} else {
|
|
|
|
info->slb_size = 64;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Standard 4k base page size segment */
|
|
|
|
info->sps[i].page_shift = 12;
|
|
|
|
info->sps[i].slb_enc = 0;
|
|
|
|
info->sps[i].enc[0].page_shift = 12;
|
|
|
|
info->sps[i].enc[0].pte_enc = 0;
|
|
|
|
i++;
|
|
|
|
|
|
|
|
/* 64K on MMU 2.06 */
|
|
|
|
if (env->mmu_model == POWERPC_MMU_2_06) {
|
|
|
|
info->sps[i].page_shift = 16;
|
|
|
|
info->sps[i].slb_enc = 0x110;
|
|
|
|
info->sps[i].enc[0].page_shift = 16;
|
|
|
|
info->sps[i].enc[0].pte_enc = 1;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Standard 16M large page size segment */
|
|
|
|
info->sps[i].page_shift = 24;
|
|
|
|
info->sps[i].slb_enc = SLB_VSID_L;
|
|
|
|
info->sps[i].enc[0].page_shift = 24;
|
|
|
|
info->sps[i].enc[0].pte_enc = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
|
|
|
|
ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
|
|
|
|
if (ret == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_get_fallback_smmu_info(env, info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static long getrampagesize(void)
|
|
|
|
{
|
|
|
|
struct statfs fs;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!mem_path) {
|
|
|
|
/* guest RAM is backed by normal anonymous pages */
|
|
|
|
return getpagesize();
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
ret = statfs(mem_path, &fs);
|
|
|
|
} while (ret != 0 && errno == EINTR);
|
|
|
|
|
|
|
|
if (ret != 0) {
|
|
|
|
fprintf(stderr, "Couldn't statfs() memory path: %s\n",
|
|
|
|
strerror(errno));
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define HUGETLBFS_MAGIC 0x958458f6
|
|
|
|
|
|
|
|
if (fs.f_type != HUGETLBFS_MAGIC) {
|
|
|
|
/* Explicit mempath, but it's ordinary pages */
|
|
|
|
return getpagesize();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It's hugepage, return the huge page size */
|
|
|
|
return fs.f_bsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
|
|
|
|
{
|
|
|
|
if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (1ul << shift) <= rampgsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_fixup_page_sizes(CPUPPCState *env)
|
|
|
|
{
|
|
|
|
static struct kvm_ppc_smmu_info smmu_info;
|
|
|
|
static bool has_smmu_info;
|
|
|
|
long rampagesize;
|
|
|
|
int iq, ik, jq, jk;
|
|
|
|
|
|
|
|
/* We only handle page sizes for 64-bit server guests for now */
|
|
|
|
if (!(env->mmu_model & POWERPC_MMU_64)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Collect MMU info from kernel if not already */
|
|
|
|
if (!has_smmu_info) {
|
|
|
|
kvm_get_smmu_info(env, &smmu_info);
|
|
|
|
has_smmu_info = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
rampagesize = getrampagesize();
|
|
|
|
|
|
|
|
/* Convert to QEMU form */
|
|
|
|
memset(&env->sps, 0, sizeof(env->sps));
|
|
|
|
|
|
|
|
for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
|
|
|
|
struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
|
|
|
|
struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
|
|
|
|
|
|
|
|
if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
|
|
|
|
ksps->page_shift)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
qsps->page_shift = ksps->page_shift;
|
|
|
|
qsps->slb_enc = ksps->slb_enc;
|
|
|
|
for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
|
|
|
|
if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
|
|
|
|
ksps->enc[jk].page_shift)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
|
|
|
|
qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
|
|
|
|
if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
env->slb_nr = smmu_info.slb_size;
|
|
|
|
if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
|
|
|
|
env->mmu_model |= POWERPC_MMU_1TSEG;
|
|
|
|
} else {
|
|
|
|
env->mmu_model &= ~POWERPC_MMU_1TSEG;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else /* defined (TARGET_PPC64) */
|
|
|
|
|
|
|
|
static inline void kvm_fixup_page_sizes(CPUPPCState *env)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* !defined (TARGET_PPC64) */
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_init_vcpu(CPUPPCState *cenv)
|
2011-04-12 07:34:34 +08:00
|
|
|
{
|
2012-05-03 10:02:03 +08:00
|
|
|
PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
|
2011-04-12 07:34:34 +08:00
|
|
|
int ret;
|
|
|
|
|
ppc64: Rudimentary Support for extra page sizes on server CPUs
More recent Power server chips (i.e. based on the 64 bit hash MMU)
support more than just the traditional 4k and 16M page sizes. This
can get quite complicated, because which page sizes are supported,
which combinations are supported within an MMU segment and how these
page sizes are encoded both in the SLB entry and the hash PTE can vary
depending on the CPU model (they are not specified by the
architecture). In addition the firmware or hypervisor may not permit
use of certain page sizes, for various reasons. Whether various page
sizes are supported on KVM, for example, depends on whether the PR or
HV variant of KVM is in use, and on the page size of the memory
backing the guest's RAM.
This patch adds information to the CPUState and cpu defs to describe
the supported page sizes and encodings. Since TCG does not yet
support any extended page sizes, we just set this to NULL in the
static CPU definitions, expanding this to the default 4k and 16M page
sizes when we initialize the cpu state. When using KVM, however, we
instead determine available page sizes using the new
KVM_PPC_GET_SMMU_INFO call. For old kernels without that call, we use
some defaults, with some guesswork which should do the right thing for
existing HV and PR implementations. The fallback might not be correct
for future versions, but that's ok, because they'll have
KVM_PPC_GET_SMMU_INFO.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-06-19 03:56:25 +08:00
|
|
|
/* Gather server mmu info from KVM and update the CPU state */
|
|
|
|
kvm_fixup_page_sizes(cenv);
|
|
|
|
|
|
|
|
/* Synchronize sregs with kvm */
|
2011-04-12 07:34:34 +08:00
|
|
|
ret = kvm_arch_sync_sregs(cenv);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2009-07-17 19:51:43 +08:00
|
|
|
|
2012-05-03 10:02:03 +08:00
|
|
|
idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
|
2010-04-19 05:10:17 +08:00
|
|
|
|
2011-08-31 19:26:56 +08:00
|
|
|
/* Some targets support access to KVM's guest TLB. */
|
|
|
|
switch (cenv->mmu_model) {
|
|
|
|
case POWERPC_MMU_BOOKE206:
|
|
|
|
ret = kvm_booke206_tlb_init(cenv);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-07-17 19:51:43 +08:00
|
|
|
return ret;
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
void kvm_arch_reset_vcpu(CPUPPCState *env)
|
2009-11-07 02:39:24 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
static void kvm_sw_tlb_put(CPUPPCState *env)
|
2011-08-31 19:26:56 +08:00
|
|
|
{
|
|
|
|
struct kvm_dirty_tlb dirty_tlb;
|
|
|
|
unsigned char *bitmap;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!env->kvm_sw_tlb) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bitmap = g_malloc((env->nb_tlb + 7) / 8);
|
|
|
|
memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
|
|
|
|
|
|
|
|
dirty_tlb.bitmap = (uintptr_t)bitmap;
|
|
|
|
dirty_tlb.num_dirty = env->nb_tlb;
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
|
|
|
|
if (ret) {
|
|
|
|
fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
|
|
|
|
__func__, strerror(-ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
g_free(bitmap);
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_put_registers(CPUPPCState *env, int level)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
struct kvm_regs regs;
|
|
|
|
int ret;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
regs.ctr = env->ctr;
|
|
|
|
regs.lr = env->lr;
|
|
|
|
regs.xer = env->xer;
|
|
|
|
regs.msr = env->msr;
|
|
|
|
regs.pc = env->nip;
|
|
|
|
|
|
|
|
regs.srr0 = env->spr[SPR_SRR0];
|
|
|
|
regs.srr1 = env->spr[SPR_SRR1];
|
|
|
|
|
|
|
|
regs.sprg0 = env->spr[SPR_SPRG0];
|
|
|
|
regs.sprg1 = env->spr[SPR_SPRG1];
|
|
|
|
regs.sprg2 = env->spr[SPR_SPRG2];
|
|
|
|
regs.sprg3 = env->spr[SPR_SPRG3];
|
|
|
|
regs.sprg4 = env->spr[SPR_SPRG4];
|
|
|
|
regs.sprg5 = env->spr[SPR_SPRG5];
|
|
|
|
regs.sprg6 = env->spr[SPR_SPRG6];
|
|
|
|
regs.sprg7 = env->spr[SPR_SPRG7];
|
|
|
|
|
2011-04-30 06:10:23 +08:00
|
|
|
regs.pid = env->spr[SPR_BOOKE_PID];
|
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
for (i = 0;i < 32; i++)
|
|
|
|
regs.gpr[i] = env->gpr[i];
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2011-08-31 19:26:56 +08:00
|
|
|
if (env->tlb_dirty) {
|
|
|
|
kvm_sw_tlb_put(env);
|
|
|
|
env->tlb_dirty = false;
|
|
|
|
}
|
|
|
|
|
2012-09-13 00:57:09 +08:00
|
|
|
if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
|
|
|
|
struct kvm_sregs sregs;
|
|
|
|
|
|
|
|
sregs.pvr = env->spr[SPR_PVR];
|
|
|
|
|
|
|
|
sregs.u.s.sdr1 = env->spr[SPR_SDR1];
|
|
|
|
|
|
|
|
/* Sync SLB */
|
|
|
|
#ifdef TARGET_PPC64
|
|
|
|
for (i = 0; i < 64; i++) {
|
|
|
|
sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
|
|
|
|
sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Sync SRs */
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
sregs.u.s.ppc32.sr[i] = env->sr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sync BATs */
|
|
|
|
for (i = 0; i < 8; i++) {
|
2012-10-05 10:34:40 +08:00
|
|
|
/* Beware. We have to swap upper and lower bits here */
|
|
|
|
sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
|
|
|
|
| env->DBAT[1][i];
|
|
|
|
sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
|
|
|
|
| env->IBAT[1][i];
|
2012-09-13 00:57:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
|
|
|
|
uint64_t hior = env->spr[SPR_HIOR];
|
|
|
|
struct kvm_one_reg reg = {
|
|
|
|
.id = KVM_REG_PPC_HIOR,
|
|
|
|
.addr = (uintptr_t) &hior,
|
|
|
|
};
|
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_get_registers(CPUPPCState *env)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
struct kvm_regs regs;
|
2009-12-03 06:19:47 +08:00
|
|
|
struct kvm_sregs sregs;
|
2011-04-30 06:10:23 +08:00
|
|
|
uint32_t cr;
|
2010-11-25 15:20:46 +08:00
|
|
|
int i, ret;
|
2008-12-16 18:43:58 +08:00
|
|
|
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2011-04-30 06:10:23 +08:00
|
|
|
cr = regs.cr;
|
|
|
|
for (i = 7; i >= 0; i--) {
|
|
|
|
env->crf[i] = cr & 15;
|
|
|
|
cr >>= 4;
|
|
|
|
}
|
2009-12-03 06:19:47 +08:00
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
env->ctr = regs.ctr;
|
|
|
|
env->lr = regs.lr;
|
|
|
|
env->xer = regs.xer;
|
|
|
|
env->msr = regs.msr;
|
|
|
|
env->nip = regs.pc;
|
|
|
|
|
|
|
|
env->spr[SPR_SRR0] = regs.srr0;
|
|
|
|
env->spr[SPR_SRR1] = regs.srr1;
|
|
|
|
|
|
|
|
env->spr[SPR_SPRG0] = regs.sprg0;
|
|
|
|
env->spr[SPR_SPRG1] = regs.sprg1;
|
|
|
|
env->spr[SPR_SPRG2] = regs.sprg2;
|
|
|
|
env->spr[SPR_SPRG3] = regs.sprg3;
|
|
|
|
env->spr[SPR_SPRG4] = regs.sprg4;
|
|
|
|
env->spr[SPR_SPRG5] = regs.sprg5;
|
|
|
|
env->spr[SPR_SPRG6] = regs.sprg6;
|
|
|
|
env->spr[SPR_SPRG7] = regs.sprg7;
|
|
|
|
|
2011-04-30 06:10:23 +08:00
|
|
|
env->spr[SPR_BOOKE_PID] = regs.pid;
|
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
for (i = 0;i < 32; i++)
|
|
|
|
env->gpr[i] = regs.gpr[i];
|
|
|
|
|
2011-04-30 06:10:23 +08:00
|
|
|
if (cap_booke_sregs) {
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_BASE) {
|
|
|
|
env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
|
|
|
|
env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
|
|
|
|
env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
|
|
|
|
env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
|
|
|
|
env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
|
|
|
|
env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
|
|
|
|
env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
|
|
|
|
env->spr[SPR_DECR] = sregs.u.e.dec;
|
|
|
|
env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
|
|
|
|
env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
|
|
|
|
env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
|
|
|
|
env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
|
|
|
|
env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
|
|
|
|
env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
|
|
|
|
env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
|
|
|
|
env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_64) {
|
|
|
|
env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
|
|
|
|
env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
|
|
|
|
env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
|
|
|
|
env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
|
|
|
|
env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
|
|
|
|
env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
|
|
|
|
env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
|
|
|
|
env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
|
|
|
|
env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
|
|
|
|
env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
|
|
|
|
env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
|
|
|
|
env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
|
|
|
|
env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
|
|
|
|
env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
|
|
|
|
env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
|
|
|
|
env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
|
|
|
|
env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
|
|
|
|
env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_SPE) {
|
|
|
|
env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
|
|
|
|
env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
|
|
|
|
env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_PM) {
|
|
|
|
env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_PC) {
|
|
|
|
env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
|
|
|
|
env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
|
|
|
|
env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
|
|
|
|
env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
|
|
|
|
env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
|
|
|
|
env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
|
|
|
|
env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
|
|
|
|
env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
|
|
|
|
env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
|
|
|
|
env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
|
|
|
|
env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
|
|
|
|
env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_EXP) {
|
|
|
|
env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.features & KVM_SREGS_E_PD) {
|
|
|
|
env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
|
|
|
|
env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
|
|
|
|
env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
|
|
|
|
env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
|
|
|
|
env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
|
|
|
|
|
|
|
|
if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
|
|
|
|
env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
|
|
|
|
env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
|
|
|
|
}
|
|
|
|
}
|
2011-05-25 21:04:42 +08:00
|
|
|
}
|
2011-04-30 06:10:23 +08:00
|
|
|
|
|
|
|
if (cap_segstate) {
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
|
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-04-01 12:15:15 +08:00
|
|
|
ppc_store_sdr1(env, sregs.u.s.sdr1);
|
2009-12-03 06:19:47 +08:00
|
|
|
|
|
|
|
/* Sync SLB */
|
2009-12-19 08:58:59 +08:00
|
|
|
#ifdef TARGET_PPC64
|
2009-12-03 06:19:47 +08:00
|
|
|
for (i = 0; i < 64; i++) {
|
|
|
|
ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
|
|
|
|
sregs.u.s.ppc64.slb[i].slbv);
|
|
|
|
}
|
2009-12-19 08:58:59 +08:00
|
|
|
#endif
|
2009-12-03 06:19:47 +08:00
|
|
|
|
|
|
|
/* Sync SRs */
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
env->sr[i] = sregs.u.s.ppc32.sr[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sync BATs */
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
|
|
|
|
env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
|
|
|
|
env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
|
|
|
|
env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
|
|
|
|
}
|
2011-05-25 21:04:42 +08:00
|
|
|
}
|
2009-12-03 06:19:47 +08:00
|
|
|
|
2008-12-16 18:43:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
|
2010-08-30 19:49:15 +08:00
|
|
|
{
|
|
|
|
unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
|
|
|
|
|
|
|
|
if (irq != PPC_INTERRUPT_EXT) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-07-17 19:51:46 +08:00
|
|
|
#if defined(TARGET_PPCEMB)
|
|
|
|
#define PPC_INPUT_INT PPC40x_INPUT_INT
|
|
|
|
#elif defined(TARGET_PPC64)
|
|
|
|
#define PPC_INPUT_INT PPC970_INPUT_INT
|
|
|
|
#else
|
|
|
|
#define PPC_INPUT_INT PPC6xx_INPUT_INT
|
|
|
|
#endif
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
unsigned irq;
|
|
|
|
|
2012-04-07 15:23:39 +08:00
|
|
|
/* PowerPC QEMU tracks the various core input pins (interrupt, critical
|
2008-12-16 18:43:58 +08:00
|
|
|
* interrupt, reset, etc) in PPC-specific env->irq_input_state. */
|
2010-08-30 19:49:15 +08:00
|
|
|
if (!cap_interrupt_level &&
|
|
|
|
run->ready_for_interrupt_injection &&
|
2008-12-16 18:43:58 +08:00
|
|
|
(env->interrupt_request & CPU_INTERRUPT_HARD) &&
|
2009-07-17 19:51:46 +08:00
|
|
|
(env->irq_input_state & (1<<PPC_INPUT_INT)))
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
/* For now KVM disregards the 'irq' argument. However, in the
|
|
|
|
* future KVM could cache it in-kernel to avoid a heavyweight exit
|
|
|
|
* when reading the UIC.
|
|
|
|
*/
|
2010-08-30 19:49:15 +08:00
|
|
|
irq = KVM_INTERRUPT_SET;
|
2008-12-16 18:43:58 +08:00
|
|
|
|
|
|
|
dprintf("injected interrupt %d\n", irq);
|
|
|
|
r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
|
|
|
|
if (r < 0)
|
|
|
|
printf("cpu %d fail inject %x\n", env->cpu_index, irq);
|
2010-04-19 05:10:17 +08:00
|
|
|
|
|
|
|
/* Always wake up soon in case the interrupt was level based */
|
2011-03-11 23:47:48 +08:00
|
|
|
qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
|
2010-04-19 05:10:17 +08:00
|
|
|
(get_ticks_per_sec() / 50));
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* We don't know if there are more interrupts pending after this. However,
|
|
|
|
* the guest will return to userspace in the course of handling this one
|
|
|
|
* anyways, so we will get a chance to deliver the rest. */
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_process_async_events(CPUPPCState *env)
|
2010-05-04 20:45:27 +08:00
|
|
|
{
|
2011-11-29 04:41:18 +08:00
|
|
|
return env->halted;
|
2010-05-04 20:45:27 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
static int kvmppc_handle_halt(CPUPPCState *env)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
|
|
|
|
env->halted = 1;
|
|
|
|
env->exception_index = EXCP_HLT;
|
|
|
|
}
|
|
|
|
|
2011-03-15 19:26:28 +08:00
|
|
|
return 0;
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* map dcr access to existing qemu dcr emulation */
|
2012-03-14 08:38:22 +08:00
|
|
|
static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
|
|
|
|
fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
|
|
|
|
|
2011-03-15 19:26:28 +08:00
|
|
|
return 0;
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
|
|
|
if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
|
|
|
|
fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
|
|
|
|
|
2011-03-15 19:26:28 +08:00
|
|
|
return 0;
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
|
2008-12-16 18:43:58 +08:00
|
|
|
{
|
2011-03-15 19:26:28 +08:00
|
|
|
int ret;
|
2008-12-16 18:43:58 +08:00
|
|
|
|
|
|
|
switch (run->exit_reason) {
|
|
|
|
case KVM_EXIT_DCR:
|
|
|
|
if (run->dcr.is_write) {
|
|
|
|
dprintf("handle dcr write\n");
|
|
|
|
ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
|
|
|
|
} else {
|
|
|
|
dprintf("handle dcr read\n");
|
|
|
|
ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case KVM_EXIT_HLT:
|
|
|
|
dprintf("handle halt\n");
|
|
|
|
ret = kvmppc_handle_halt(env);
|
|
|
|
break;
|
2011-08-09 23:57:37 +08:00
|
|
|
#ifdef CONFIG_PSERIES
|
|
|
|
case KVM_EXIT_PAPR_HCALL:
|
|
|
|
dprintf("handle PAPR hypercall\n");
|
2012-05-03 12:13:14 +08:00
|
|
|
run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env),
|
|
|
|
run->papr_hcall.nr,
|
2011-08-09 23:57:37 +08:00
|
|
|
run->papr_hcall.args);
|
2012-08-07 02:44:45 +08:00
|
|
|
ret = 0;
|
2011-08-09 23:57:37 +08:00
|
|
|
break;
|
|
|
|
#endif
|
2011-01-22 04:48:06 +08:00
|
|
|
default:
|
|
|
|
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
|
|
|
|
ret = -1;
|
|
|
|
break;
|
2008-12-16 18:43:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-02-10 00:37:05 +08:00
|
|
|
static int read_cpuinfo(const char *field, char *value, int len)
|
|
|
|
{
|
|
|
|
FILE *f;
|
|
|
|
int ret = -1;
|
|
|
|
int field_len = strlen(field);
|
|
|
|
char line[512];
|
|
|
|
|
|
|
|
f = fopen("/proc/cpuinfo", "r");
|
|
|
|
if (!f) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
if(!fgets(line, sizeof(line), f)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!strncmp(line, field, field_len)) {
|
2012-10-04 19:09:52 +08:00
|
|
|
pstrcpy(value, len, line);
|
2010-02-10 00:37:05 +08:00
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while(*line);
|
|
|
|
|
|
|
|
fclose(f);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t kvmppc_get_tbfreq(void)
|
|
|
|
{
|
|
|
|
char line[512];
|
|
|
|
char *ns;
|
|
|
|
uint32_t retval = get_ticks_per_sec();
|
|
|
|
|
|
|
|
if (read_cpuinfo("timebase", line, sizeof(line))) {
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(ns = strchr(line, ':'))) {
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
ns++;
|
|
|
|
|
|
|
|
retval = atoi(ns);
|
|
|
|
return retval;
|
|
|
|
}
|
2010-05-10 16:21:34 +08:00
|
|
|
|
2011-07-21 08:29:15 +08:00
|
|
|
/* Try to find a device tree node for a CPU with clock-frequency property */
|
|
|
|
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
|
|
|
|
{
|
|
|
|
struct dirent *dirp;
|
|
|
|
DIR *dp;
|
|
|
|
|
|
|
|
if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
|
|
|
|
printf("Can't open directory " PROC_DEVTREE_CPU "\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf[0] = '\0';
|
|
|
|
while ((dirp = readdir(dp)) != NULL) {
|
|
|
|
FILE *f;
|
|
|
|
snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
|
|
|
|
dirp->d_name);
|
|
|
|
f = fopen(buf, "r");
|
|
|
|
if (f) {
|
|
|
|
snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
|
|
|
|
fclose(f);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
buf[0] = '\0';
|
|
|
|
}
|
|
|
|
closedir(dp);
|
|
|
|
if (buf[0] == '\0') {
|
|
|
|
printf("Unknown host!\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-10-11 02:31:00 +08:00
|
|
|
/* Read a CPU node property from the host device tree that's a single
|
|
|
|
* integer (32-bit or 64-bit). Returns 0 if anything goes wrong
|
|
|
|
* (can't find or open the property, or doesn't understand the
|
|
|
|
* format) */
|
|
|
|
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
|
2011-07-21 08:29:15 +08:00
|
|
|
{
|
2011-10-11 02:31:00 +08:00
|
|
|
char buf[PATH_MAX];
|
|
|
|
union {
|
|
|
|
uint32_t v32;
|
|
|
|
uint64_t v64;
|
|
|
|
} u;
|
2011-07-21 08:29:15 +08:00
|
|
|
FILE *f;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
|
2011-10-11 02:31:00 +08:00
|
|
|
return -1;
|
2011-07-21 08:29:15 +08:00
|
|
|
}
|
|
|
|
|
2011-10-11 02:31:00 +08:00
|
|
|
strncat(buf, "/", sizeof(buf) - strlen(buf));
|
|
|
|
strncat(buf, propname, sizeof(buf) - strlen(buf));
|
2011-07-21 08:29:15 +08:00
|
|
|
|
|
|
|
f = fopen(buf, "rb");
|
|
|
|
if (!f) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-10-11 02:31:00 +08:00
|
|
|
len = fread(&u, 1, sizeof(u), f);
|
2011-07-21 08:29:15 +08:00
|
|
|
fclose(f);
|
|
|
|
switch (len) {
|
2011-10-11 02:31:00 +08:00
|
|
|
case 4:
|
|
|
|
/* property is a 32-bit quantity */
|
|
|
|
return be32_to_cpu(u.v32);
|
|
|
|
case 8:
|
|
|
|
return be64_to_cpu(u.v64);
|
2011-07-21 08:29:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-10-11 02:31:00 +08:00
|
|
|
uint64_t kvmppc_get_clockfreq(void)
|
|
|
|
{
|
|
|
|
return kvmppc_read_int_cpu_dt("clock-frequency");
|
|
|
|
}
|
|
|
|
|
pseries: Add device tree properties for VMX/VSX and DFP under kvm
Sufficiently recent PAPR specifications define properties "ibm,vmx"
and "ibm,dfp" on the CPU node which advertise whether the VMX vector
extensions (or the later VSX version) and/or the Decimal Floating
Point operations from IBM's recent POWER CPUs are available.
Currently we do not put these in the guest device tree and the guest
kernel will consequently assume they are not available. This is good,
because they are not supported under TCG. VMX is similar enough to
Altivec that it might be trivial to support, but VSX and DFP would
both require significant work to support in TCG.
However, when running under kvm on a host which supports these
instructions, there's no reason not to let the guest use them. This
patch, therefore, checks for the relevant support on the host CPU
and, if present, advertises them to the guest as well.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-10-11 02:31:01 +08:00
|
|
|
uint32_t kvmppc_get_vmx(void)
|
|
|
|
{
|
|
|
|
return kvmppc_read_int_cpu_dt("ibm,vmx");
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t kvmppc_get_dfp(void)
|
|
|
|
{
|
|
|
|
return kvmppc_read_int_cpu_dt("ibm,dfp");
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
|
2010-08-03 21:22:42 +08:00
|
|
|
{
|
|
|
|
uint32_t *hc = (uint32_t*)buf;
|
|
|
|
|
|
|
|
struct kvm_ppc_pvinfo pvinfo;
|
|
|
|
|
|
|
|
if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
|
|
|
|
!kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
|
|
|
|
memcpy(buf, pvinfo.hcall, buf_len);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fallback to always fail hypercalls:
|
|
|
|
*
|
|
|
|
* li r3, -1
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
*/
|
|
|
|
|
|
|
|
hc[0] = 0x3860ffff;
|
|
|
|
hc[1] = 0x60000000;
|
|
|
|
hc[2] = 0x60000000;
|
|
|
|
hc[3] = 0x60000000;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
void kvmppc_set_papr(CPUPPCState *env)
|
2011-08-09 23:57:37 +08:00
|
|
|
{
|
2011-09-15 03:38:45 +08:00
|
|
|
struct kvm_enable_cap cap = {};
|
2011-08-09 23:57:37 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
cap.cap = KVM_CAP_PPC_PAPR;
|
|
|
|
ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
|
|
|
|
|
|
|
|
if (ret) {
|
2012-09-13 00:57:09 +08:00
|
|
|
cpu_abort(env, "This KVM version does not support PAPR\n");
|
2011-09-15 03:38:45 +08:00
|
|
|
}
|
2011-08-09 23:57:37 +08:00
|
|
|
}
|
|
|
|
|
2011-09-30 05:39:10 +08:00
|
|
|
int kvmppc_smt_threads(void)
|
|
|
|
{
|
|
|
|
return cap_ppc_smt ? cap_ppc_smt : 1;
|
|
|
|
}
|
|
|
|
|
2012-09-13 00:57:12 +08:00
|
|
|
#ifdef TARGET_PPC64
|
2011-09-30 05:39:11 +08:00
|
|
|
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
|
|
|
|
{
|
|
|
|
void *rma;
|
|
|
|
off_t size;
|
|
|
|
int fd;
|
|
|
|
struct kvm_allocate_rma ret;
|
|
|
|
MemoryRegion *rma_region;
|
|
|
|
|
|
|
|
/* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
|
|
|
|
* if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
|
|
|
|
* not necessary on this hardware
|
|
|
|
* if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
|
|
|
|
*
|
|
|
|
* FIXME: We should allow the user to force contiguous RMA
|
|
|
|
* allocation in the cap_ppc_rma==1 case.
|
|
|
|
*/
|
|
|
|
if (cap_ppc_rma < 2) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
|
|
|
|
if (fd < 0) {
|
|
|
|
fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
|
|
|
|
strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
size = MIN(ret.rma_size, 256ul << 20);
|
|
|
|
|
|
|
|
rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
|
|
|
if (rma == MAP_FAILED) {
|
|
|
|
fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
|
|
|
|
return -1;
|
|
|
|
};
|
|
|
|
|
|
|
|
rma_region = g_new(MemoryRegion, 1);
|
2012-01-05 18:30:31 +08:00
|
|
|
memory_region_init_ram_ptr(rma_region, name, size, rma);
|
|
|
|
vmstate_register_ram_global(rma_region);
|
2011-09-30 05:39:11 +08:00
|
|
|
memory_region_add_subregion(sysmem, 0, rma_region);
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2012-09-13 00:57:12 +08:00
|
|
|
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
|
|
|
|
{
|
|
|
|
if (cap_ppc_rma >= 2) {
|
|
|
|
return current_size;
|
|
|
|
}
|
|
|
|
return MIN(current_size,
|
|
|
|
getrampagesize() << (hash_shift - 7));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-09-30 05:39:12 +08:00
|
|
|
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
|
|
|
|
{
|
|
|
|
struct kvm_create_spapr_tce args = {
|
|
|
|
.liobn = liobn,
|
|
|
|
.window_size = window_size,
|
|
|
|
};
|
|
|
|
long len;
|
|
|
|
int fd;
|
|
|
|
void *table;
|
|
|
|
|
pseries: Don't try to munmap() a malloc()ed TCE table
For the pseries machine, TCE (IOMMU) tables can either be directly
malloc()ed in qemu or, when running on a KVM which supports it, mmap()ed
from a KVM ioctl. The latter option is used when available, because it
allows the (frequent bottlenext) H_PUT_TCE hypercall to be KVM accelerated.
However, even when KVM is persent, TCE acceleration is not always possible.
Only KVM HV supports this ioctl(), not KVM PR, or the kernel could run out
of contiguous memory to allocate the new table. In this case we need to
fall back on the malloc()ed table.
When a device is removed, and we need to remove the TCE table, we need to
either munmap() or free() the table as appropriate for how it was
allocated. The code is supposed to do that, but we buggily fail to
initialize the tcet->fd variable in the malloc() case, which is used as a
flag to determine which is the right choice.
This patch fixes the bug, and cleans up error messages relating to this
path while we're at it.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-02-28 01:18:07 +08:00
|
|
|
/* Must set fd to -1 so we don't try to munmap when called for
|
|
|
|
* destroying the table, which the upper layers -will- do
|
|
|
|
*/
|
|
|
|
*pfd = -1;
|
2011-09-30 05:39:12 +08:00
|
|
|
if (!cap_spapr_tce) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
|
|
|
|
if (fd < 0) {
|
pseries: Don't try to munmap() a malloc()ed TCE table
For the pseries machine, TCE (IOMMU) tables can either be directly
malloc()ed in qemu or, when running on a KVM which supports it, mmap()ed
from a KVM ioctl. The latter option is used when available, because it
allows the (frequent bottlenext) H_PUT_TCE hypercall to be KVM accelerated.
However, even when KVM is persent, TCE acceleration is not always possible.
Only KVM HV supports this ioctl(), not KVM PR, or the kernel could run out
of contiguous memory to allocate the new table. In this case we need to
fall back on the malloc()ed table.
When a device is removed, and we need to remove the TCE table, we need to
either munmap() or free() the table as appropriate for how it was
allocated. The code is supposed to do that, but we buggily fail to
initialize the tcet->fd variable in the malloc() case, which is used as a
flag to determine which is the right choice.
This patch fixes the bug, and cleans up error messages relating to this
path while we're at it.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-02-28 01:18:07 +08:00
|
|
|
fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
|
|
|
|
liobn);
|
2011-09-30 05:39:12 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-06-27 12:50:44 +08:00
|
|
|
len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
|
2011-09-30 05:39:12 +08:00
|
|
|
/* FIXME: round this up to page size */
|
|
|
|
|
2011-10-27 23:56:31 +08:00
|
|
|
table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
2011-09-30 05:39:12 +08:00
|
|
|
if (table == MAP_FAILED) {
|
pseries: Don't try to munmap() a malloc()ed TCE table
For the pseries machine, TCE (IOMMU) tables can either be directly
malloc()ed in qemu or, when running on a KVM which supports it, mmap()ed
from a KVM ioctl. The latter option is used when available, because it
allows the (frequent bottlenext) H_PUT_TCE hypercall to be KVM accelerated.
However, even when KVM is persent, TCE acceleration is not always possible.
Only KVM HV supports this ioctl(), not KVM PR, or the kernel could run out
of contiguous memory to allocate the new table. In this case we need to
fall back on the malloc()ed table.
When a device is removed, and we need to remove the TCE table, we need to
either munmap() or free() the table as appropriate for how it was
allocated. The code is supposed to do that, but we buggily fail to
initialize the tcet->fd variable in the malloc() case, which is used as a
flag to determine which is the right choice.
This patch fixes the bug, and cleans up error messages relating to this
path while we're at it.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-02-28 01:18:07 +08:00
|
|
|
fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
|
|
|
|
liobn);
|
2011-09-30 05:39:12 +08:00
|
|
|
close(fd);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
*pfd = fd;
|
|
|
|
return table;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
|
|
|
|
{
|
|
|
|
long len;
|
|
|
|
|
|
|
|
if (fd < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2012-06-27 12:50:44 +08:00
|
|
|
len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
|
2011-09-30 05:39:12 +08:00
|
|
|
if ((munmap(table, len) < 0) ||
|
|
|
|
(close(fd) < 0)) {
|
pseries: Don't try to munmap() a malloc()ed TCE table
For the pseries machine, TCE (IOMMU) tables can either be directly
malloc()ed in qemu or, when running on a KVM which supports it, mmap()ed
from a KVM ioctl. The latter option is used when available, because it
allows the (frequent bottlenext) H_PUT_TCE hypercall to be KVM accelerated.
However, even when KVM is persent, TCE acceleration is not always possible.
Only KVM HV supports this ioctl(), not KVM PR, or the kernel could run out
of contiguous memory to allocate the new table. In this case we need to
fall back on the malloc()ed table.
When a device is removed, and we need to remove the TCE table, we need to
either munmap() or free() the table as appropriate for how it was
allocated. The code is supposed to do that, but we buggily fail to
initialize the tcet->fd variable in the malloc() case, which is used as a
flag to determine which is the right choice.
This patch fixes the bug, and cleans up error messages relating to this
path while we're at it.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-02-28 01:18:07 +08:00
|
|
|
fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
|
|
|
|
strerror(errno));
|
2011-09-30 05:39:12 +08:00
|
|
|
/* Leak the table */
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-09-13 00:57:12 +08:00
|
|
|
int kvmppc_reset_htab(int shift_hint)
|
|
|
|
{
|
|
|
|
uint32_t shift = shift_hint;
|
|
|
|
|
2012-09-20 05:08:42 +08:00
|
|
|
if (!kvm_enabled()) {
|
|
|
|
/* Full emulation, tell caller to allocate htab itself */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
|
2012-09-13 00:57:12 +08:00
|
|
|
int ret;
|
|
|
|
ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
|
2012-09-20 05:08:42 +08:00
|
|
|
if (ret == -ENOTTY) {
|
|
|
|
/* At least some versions of PR KVM advertise the
|
|
|
|
* capability, but don't implement the ioctl(). Oops.
|
|
|
|
* Return 0 so that we allocate the htab in qemu, as is
|
|
|
|
* correct for PR. */
|
|
|
|
return 0;
|
|
|
|
} else if (ret < 0) {
|
2012-09-13 00:57:12 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return shift;
|
|
|
|
}
|
|
|
|
|
2012-09-20 05:08:42 +08:00
|
|
|
/* We have a kernel that predates the htab reset calls. For PR
|
|
|
|
* KVM, we need to allocate the htab ourselves, for an HV KVM of
|
|
|
|
* this era, it has allocated a 16MB fixed size hash table
|
|
|
|
* already. Kernels of this era have the GET_PVINFO capability
|
|
|
|
* only on PR, so we use this hack to determine the right
|
|
|
|
* answer */
|
|
|
|
if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
|
|
|
|
/* PR - tell caller to allocate htab */
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/* HV - assume 16MB kernel allocated htab */
|
|
|
|
return 24;
|
|
|
|
}
|
2012-09-13 00:57:12 +08:00
|
|
|
}
|
|
|
|
|
2011-10-13 06:40:32 +08:00
|
|
|
static inline uint32_t mfpvr(void)
|
|
|
|
{
|
|
|
|
uint32_t pvr;
|
|
|
|
|
|
|
|
asm ("mfpvr %0"
|
|
|
|
: "=r"(pvr));
|
|
|
|
return pvr;
|
|
|
|
}
|
|
|
|
|
2011-10-18 02:15:41 +08:00
|
|
|
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
|
|
|
|
{
|
|
|
|
if (on) {
|
|
|
|
*word |= flags;
|
|
|
|
} else {
|
|
|
|
*word &= ~flags;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-13 06:40:32 +08:00
|
|
|
const ppc_def_t *kvmppc_host_cpu_def(void)
|
|
|
|
{
|
|
|
|
uint32_t host_pvr = mfpvr();
|
|
|
|
const ppc_def_t *base_spec;
|
2011-10-18 02:15:41 +08:00
|
|
|
ppc_def_t *spec;
|
|
|
|
uint32_t vmx = kvmppc_get_vmx();
|
|
|
|
uint32_t dfp = kvmppc_get_dfp();
|
2011-10-13 06:40:32 +08:00
|
|
|
|
|
|
|
base_spec = ppc_find_by_pvr(host_pvr);
|
|
|
|
|
2011-10-18 02:15:41 +08:00
|
|
|
spec = g_malloc0(sizeof(*spec));
|
|
|
|
memcpy(spec, base_spec, sizeof(*spec));
|
|
|
|
|
|
|
|
/* Now fix up the spec with information we can query from the host */
|
|
|
|
|
2011-10-25 02:43:22 +08:00
|
|
|
if (vmx != -1) {
|
|
|
|
/* Only override when we know what the host supports */
|
|
|
|
alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
|
|
|
|
alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
|
|
|
|
}
|
|
|
|
if (dfp != -1) {
|
|
|
|
/* Only override when we know what the host supports */
|
|
|
|
alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
|
|
|
|
}
|
2011-10-18 02:15:41 +08:00
|
|
|
|
|
|
|
return spec;
|
2011-10-13 06:40:32 +08:00
|
|
|
}
|
|
|
|
|
2012-04-04 13:02:05 +08:00
|
|
|
int kvmppc_fixup_cpu(CPUPPCState *env)
|
|
|
|
{
|
|
|
|
int smt;
|
|
|
|
|
|
|
|
/* Adjust cpu index for SMT */
|
|
|
|
smt = kvmppc_smt_threads();
|
|
|
|
env->cpu_index = (env->cpu_index / smp_threads) * smt
|
|
|
|
+ (env->cpu_index % smp_threads);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
|
2010-05-10 16:21:34 +08:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2011-02-02 05:15:51 +08:00
|
|
|
|
2012-03-14 08:38:22 +08:00
|
|
|
int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
|
2011-02-02 05:15:51 +08:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arch_on_sigbus(int code, void *addr)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|