diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca..085fb8ecbf68 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8543ad538b0c..898f942eb198 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); @@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 83f592eadcd2..961b3d70483c 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tm_return_to_guest: +#endif mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); @@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * is in real suspend mode and is trying to transition to * transactional mode. */ - if (local_paca->kvm_hstate.fake_suspend && + if (!local_paca->kvm_hstate.fake_suspend && (vcpu->arch.shregs.msr & MSR_TS_S)) { if (kvmhv_p9_tm_emulation_early(vcpu)) { - /* Prevent it being handled again. */ - trap = 0; + /* + * Go straight back into the guest with the + * new NIP/MSR as set by TM emulation. + */ + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); + mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); + + /* + * tm_return_to_guest re-loads SRR0/1, DAR, + * DSISR after RI is cleared, in case they had + * been clobbered by a MCE. + */ + __mtmsrd(0, 1); /* clear RI */ + goto tm_return_to_guest; } } #endif @@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * If we are in real mode, only switch MMU on after the MMU is * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + vcpu->arch.shregs.msr & MSR_TS_MASK) + msr |= MSR_TS_S; + __mtmsrd(msr, 0); end_timing(vcpu); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index c5e677508d3b..0f847f1e5ddd 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be33b5321a76..b4e6f70b97b9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break;