Merge branch 'topic/ppc-kvm' into next

Merge our ppc-kvm topic branch to bring in the Ultravisor support
patches.
This commit is contained in:
Michael Ellerman 2019-08-30 09:52:57 +10:00
commit 9044adca78
29 changed files with 1564 additions and 141 deletions

View File

@ -0,0 +1,42 @@
==========================
ELF Note PowerPC Namespace
==========================
The PowerPC namespace in an ELF Note of the kernel binary is used to store
capabilities and information which can be used by a bootloader or userland.
Types and Descriptors
---------------------
The types to be used with the "PowerPC" namesapce are defined in the
include/uapi/asm/elfnote.h
1) PPC_ELFNOTE_CAPABILITIES
Define the capabilities supported/required by the kernel. This type uses a
bitmap as "descriptor" field. Each bit is described below:
- Ultravisor-capable bit (PowerNV only).
#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
Indicate that the powerpc kernel binary knows how to run in an
ultravisor-enabled system.
In an ultravisor-enabled system, some machine resources are now controlled
by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
being run on a machine with ultravisor, the kernel will probably crash
trying to access ultravisor resources. For instance, it may crash in early
boot trying to set the partition table entry 0.
In an ultravisor-enabled system, a bootloader could warn the user or prevent
the kernel from being run if the PowerPC ultravisor capability doesn't exist
or the Ultravisor-capable bit is not set.
References
----------
arch/powerpc/include/asm/elfnote.h
arch/powerpc/kernel/note.S

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/dcr.h>
#include <asm/mmu_context.h>
#include <asm/ultravisor-api.h>
#include <uapi/asm/ucontext.h>
@ -34,6 +35,16 @@ extern struct static_key hcall_tracepoint_key;
void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
/* Ultravisor */
#ifdef CONFIG_PPC_POWERNV
long ucall_norets(unsigned long opcode, ...);
#else
static inline long ucall_norets(unsigned long opcode, ...)
{
return U_NOT_AVAILABLE;
}
#endif
/* OPAL */
int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6, int64_t a7,

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC ELF notes.
*
* Copyright 2019, IBM Corporation
*/
#ifndef __ASM_POWERPC_ELFNOTE_H__
#define __ASM_POWERPC_ELFNOTE_H__
/*
* These note types should live in a SHT_NOTE segment and have
* "PowerPC" in the name field.
*/
/*
* The capabilities supported/required by this kernel (bitmap).
*
* This type uses a bitmap as "desc" field. Each bit is described
* in arch/powerpc/kernel/note.S
*/
#define PPC_ELFNOTE_CAPABILITIES 1
#endif /* __ASM_POWERPC_ELFNOTE_H__ */

View File

@ -50,6 +50,7 @@
#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
#ifndef __ASSEMBLY__
@ -68,9 +69,9 @@ enum {
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
FW_FEATURE_PAPR_SCM,
FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,

View File

@ -48,15 +48,16 @@ struct iommu_table_ops {
* returns old TCE and DMA direction mask.
* @tce is a physical address.
*/
int (*exchange)(struct iommu_table *tbl,
int (*xchg_no_kill)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
/* Real mode */
int (*exchange_rm)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
enum dma_data_direction *direction,
bool realmode);
void (*tce_kill)(struct iommu_table *tbl,
unsigned long index,
unsigned long pages,
bool realmode);
__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
#endif
@ -209,6 +210,12 @@ extern void iommu_del_device(struct device *dev);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
extern void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,

View File

@ -283,6 +283,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Ultravisor API.
*
* Copyright 2019, IBM Corporation.
*
*/
#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
#define _ASM_POWERPC_ULTRAVISOR_API_H
#include <asm/hvcall.h>
/* Return codes */
#define U_BUSY H_BUSY
#define U_FUNCTION H_FUNCTION
#define U_NOT_AVAILABLE H_NOT_AVAILABLE
#define U_P2 H_P2
#define U_P3 H_P3
#define U_P4 H_P4
#define U_P5 H_P5
#define U_PARAMETER H_PARAMETER
#define U_PERMISSION H_PERMISSION
#define U_SUCCESS H_SUCCESS
/* opcodes */
#define UV_WRITE_PATE 0xF104
#define UV_RETURN 0xF11C
#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */

View File

@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Ultravisor definitions
*
* Copyright 2019, IBM Corporation.
*
*/
#ifndef _ASM_POWERPC_ULTRAVISOR_H
#define _ASM_POWERPC_ULTRAVISOR_H
#include <asm/asm-prototypes.h>
#include <asm/ultravisor-api.h>
#include <asm/firmware.h>
int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
int depth, void *data);
/*
* In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
* writing and an attempt to write to it will cause a Hypervisor Emulation
* Assistance interrupt.
*/
static inline void set_ptcr_when_no_uv(u64 val)
{
if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
mtspr(SPRN_PTCR, val);
}
static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
{
return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
}
#endif /* _ASM_POWERPC_ULTRAVISOR_H */

View File

@ -53,7 +53,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
dma-common.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@ -156,6 +156,7 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
obj-$(CONFIG_PPC_POWERNV) += ucall.o
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n

View File

@ -506,6 +506,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);

View File

@ -1013,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
__func__, hwaddr, entry << tbl->it_page_shift,
hwaddr, ret); */
return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
tbl->it_ops->tce_kill(tbl, entry, pages, false);
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@ -1049,7 +1052,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
if (!tbl->it_ops->exchange)
if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);

View File

@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC ELF notes.
*
* Copyright 2019, IBM Corporation
*/
#include <linux/elfnote.h>
#include <asm/elfnote.h>
/*
* Ultravisor-capable bit (PowerNV only).
*
* Bit 0 indicates that the powerpc kernel binary knows how to run in an
* ultravisor-enabled system.
*
* In an ultravisor-enabled system, some machine resources are now controlled
* by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
* being run on a machine with ultravisor, the kernel will probably crash
* trying to access ultravisor resources. For instance, it may crash in early
* boot trying to set the partition table entry 0.
*
* In an ultravisor-enabled system, a bootloader could warn the user or prevent
* the kernel from being run if the PowerPC ultravisor capability doesn't exist
* or the Ultravisor-capable bit is not set.
*/
#ifdef CONFIG_PPC_POWERNV
#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
#else
#define PPCCAP_ULTRAVISOR_BIT 0
#endif
/*
* Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
* can be used to advertise kernel capabilities to userland.
*/
#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
.long PPC_CAPABILITIES_BITMAP)

View File

@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@ -702,6 +703,9 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
/* Scan tree for ultravisor feature */
of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
#ifdef CONFIG_FA_DUMP

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Generic code to perform an ultravisor call.
*
* Copyright 2019, IBM Corporation.
*
*/
#include <asm/ppc_asm.h>
#include <asm/export.h>
_GLOBAL(ucall_norets)
EXPORT_SYMBOL_GPL(ucall_norets)
sc 2 /* Invoke the ultravisor */
blr /* Return r3 = status */

View File

@ -416,7 +416,7 @@ static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@ -447,7 +447,8 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
&dir)))
return H_TOO_HARD;
if (dir == DMA_NONE)
@ -455,7 +456,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@ -501,7 +502,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;
ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
@ -579,6 +580,8 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
iommu_tce_kill(stit->tbl, entry, 1);
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
@ -656,12 +659,14 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
goto invalidate_exit;
}
tce = be64_to_cpu(tce);
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto invalidate_exit;
}
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
@ -671,13 +676,17 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
entry);
goto unlock_exit;
goto invalidate_exit;
}
}
kvmppc_tce_put(stt, entry + i, tce);
}
invalidate_exit:
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
iommu_tce_kill(stit->tbl, entry, npages);
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@ -716,7 +725,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
return ret;
goto invalidate_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
@ -726,6 +735,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
invalidate_exit:
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);

View File

@ -218,13 +218,14 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm,
struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
@ -240,13 +241,20 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
return ret;
}
extern void iommu_tce_kill_rm(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
tbl->it_ops->tce_kill(tbl, entry, pages, true);
}
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@ -278,7 +286,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
@ -290,7 +298,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@ -336,7 +344,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_TOO_HARD;
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
@ -417,6 +425,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
iommu_tce_kill_rm(stit->tbl, entry, 1);
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
@ -556,8 +566,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto invalidate_exit;
}
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
@ -567,13 +579,17 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
goto unlock_exit;
goto invalidate_exit;
}
}
kvmppc_rm_tce_put(stt, entry + i, tce);
}
invalidate_exit:
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
iommu_tce_kill_rm(stit->tbl, entry, npages);
unlock_exit:
if (rmap)
unlock_rmap(rmap);
@ -616,7 +632,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
return ret;
goto invalidate_exit;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
@ -626,7 +642,11 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
invalidate_exit:
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
return ret;
}
/* This can be called in either virtual mode or real mode */

View File

@ -29,6 +29,7 @@
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
#include <asm/ultravisor-api.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@ -1085,16 +1086,10 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
ld r6, VCPU_CR(r4)
mtlr r5
mtcr r6
ld r1, VCPU_GPR(R1)(r4)
ld r2, VCPU_GPR(R2)(r4)
ld r3, VCPU_GPR(R3)(r4)
ld r5, VCPU_GPR(R5)(r4)
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
ld r8, VCPU_GPR(R8)(r4)
ld r9, VCPU_GPR(R9)(r4)
ld r10, VCPU_GPR(R10)(r4)
@ -1112,10 +1107,42 @@ BEGIN_FTR_SECTION
mtspr SPRN_HDSISR, r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r6, VCPU_KVM(r4)
lbz r7, KVM_SECURE_GUEST(r6)
cmpdi r7, 0
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
bne ret_to_ultra
lwz r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R0)(r4)
ld r2, VCPU_GPR(R2)(r4)
ld r3, VCPU_GPR(R3)(r4)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
/*
* Use UV_RETURN ultracall to return control back to the Ultravisor after
* processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
* to the Hypervisor.
*
* All registers have already been loaded, except:
* R0 = hcall result
* R2 = SRR1, so UV can detect a synthesized interrupt (if any)
* R3 = UV_RETURN
*/
ret_to_ultra:
lwz r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R3)(r4)
mfspr r2, SPRN_SRR1
li r3, 0
ori r3, r3, UV_RETURN
ld r4, VCPU_GPR(R4)(r4)
sc 2
/*
* Enter the guest on a P9 or later system where we have exactly

View File

@ -62,6 +62,7 @@
#include <asm/ps3.h>
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@ -1076,8 +1077,8 @@ void hash__early_init_mmu_secondary(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
set_ptcr_when_no_uv(__pa(partition_tb) |
(PATB_SIZE_SHIFT - 12));
}
/* Initialize SLB */
slb_initialize();

View File

@ -12,6 +12,8 @@
#include <asm/tlb.h>
#include <asm/trace.h>
#include <asm/powernv.h>
#include <asm/firmware.h>
#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@ -205,25 +207,14 @@ void __init mmu_partition_table_init(void)
* 64 K size.
*/
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
mtspr(SPRN_PTCR, ptcr);
set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
}
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1)
static void flush_partition(unsigned int lpid, bool radix)
{
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/*
* Global flush of TLBs and partition table caches for this lpid.
* The type of flush (hash or radix) depends on what the previous
* use of this partition ID was, not the new use.
*/
asm volatile("ptesync" : : : "memory");
if (old & PATB_HR) {
if (radix) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
@ -237,6 +228,39 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
/* do we need fixup here ?*/
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1)
{
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
/*
* When ultravisor is enabled, the partition table is stored in secure
* memory and can only be accessed doing an ultravisor call. However, we
* maintain a copy of the partition table in normal memory to allow Nest
* MMU translations to occur (for normal VMs).
*
* Therefore, here we always update partition_tb, regardless of whether
* we are running under an ultravisor or not.
*/
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/*
* If ultravisor is enabled, we do an ultravisor call to register the
* partition table entry (PATE), which also do a global flush of TLBs
* and partition table caches for the lpid. Otherwise, just do the
* flush. The type of flush (hash or radix) depends on what the previous
* use of the partition ID was, not the new use.
*/
if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
uv_register_pate(lpid, dw0, dw1);
pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
dw0, dw1);
} else {
flush_partition(lpid, (old & PATB_HR));
}
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
static pmd_t *get_pmd_from_cache(struct mm_struct *mm)

View File

@ -27,6 +27,7 @@
#include <asm/sections.h>
#include <asm/trace.h>
#include <asm/uaccess.h>
#include <asm/ultravisor.h>
#include <trace/events/thp.h>
@ -650,8 +651,9 @@ void radix__early_init_mmu_secondary(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
set_ptcr_when_no_uv(__pa(partition_tb) |
(PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
@ -667,7 +669,7 @@ void radix__mmu_cleanup_all(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
mtspr(SPRN_PTCR, 0);
set_ptcr_when_no_uv(0);
powernv_set_nmmu_ptcr(0);
radix__flush_tlb_all();
}

View File

@ -4,6 +4,7 @@ obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-y += ultravisor.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o

View File

@ -675,7 +675,8 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
sprs.ptcr = mfspr(SPRN_PTCR);
sprs.rpr = mfspr(SPRN_RPR);
sprs.tscr = mfspr(SPRN_TSCR);
sprs.ldbar = mfspr(SPRN_LDBAR);
if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
sprs.ldbar = mfspr(SPRN_LDBAR);
sprs_saved = true;
@ -789,7 +790,8 @@ core_woken:
mtspr(SPRN_MMCR0, sprs.mmcr0);
mtspr(SPRN_MMCR1, sprs.mmcr1);
mtspr(SPRN_MMCR2, sprs.mmcr2);
mtspr(SPRN_LDBAR, sprs.ldbar);
if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
mtspr(SPRN_LDBAR, sprs.ldbar);
mtspr(SPRN_SPRG3, local_paca->sprg_vdso);

View File

@ -29,23 +29,23 @@ struct memcons {
static struct memcons *opal_memcons = NULL;
ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count)
{
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
if (!opal_memcons)
if (!mc)
return -ENODEV;
out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos));
out_pos = be32_to_cpu(READ_ONCE(mc->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@ -53,7 +53,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
avail = be32_to_cpu(mc->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@ -71,7 +71,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
}
/* Sanity check. The firmware should not do this to us. */
if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
if (out_pos > be32_to_cpu(mc->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@ -86,6 +86,11 @@ out:
return ret;
}
ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
{
return memcons_copy(opal_memcons, to, pos, count);
}
static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
@ -98,32 +103,48 @@ static struct bin_attribute opal_msglog_attr = {
.read = opal_msglog_read
};
void __init opal_msglog_init(void)
struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name)
{
u64 mcaddr;
struct memcons *mc;
if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
return;
if (of_property_read_u64(node, mc_prop_name, &mcaddr)) {
pr_warn("%s property not found, no message log\n",
mc_prop_name);
goto out_err;
}
mc = phys_to_virt(mcaddr);
if (!mc) {
pr_warn("OPAL: memory console address is invalid\n");
return;
pr_warn("memory console address is invalid\n");
goto out_err;
}
if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
pr_warn("OPAL: memory console version is invalid\n");
pr_warn("memory console version is invalid\n");
goto out_err;
}
return mc;
out_err:
return NULL;
}
u32 memcons_get_size(struct memcons *mc)
{
return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
}
void __init opal_msglog_init(void)
{
opal_memcons = memcons_init(opal_node, "ibm,opal-memcons");
if (!opal_memcons) {
pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n");
return;
}
/* Report maximum size */
opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) +
be32_to_cpu(mc->obuf_size);
opal_memcons = mc;
opal_msglog_attr.size = memcons_get_size(opal_memcons);
}
void __init opal_msglog_sysfs_init(void)

View File

@ -1939,26 +1939,12 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
}
#ifdef CONFIG_IOMMU_API
static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
/* Common for IODA1 and IODA2 */
static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction,
bool realmode)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
if (!ret)
pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
return ret;
}
static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
if (!ret)
pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
return ret;
return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
}
#endif
@ -1973,8 +1959,8 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.set = pnv_ioda1_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda1_tce_xchg,
.exchange_rm = pnv_ioda1_tce_xchg_rm,
.xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
.tce_kill = pnv_pci_p7ioc_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda1_tce_free,
@ -2103,30 +2089,6 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
return ret;
}
#ifdef CONFIG_IOMMU_API
static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
if (!ret)
pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
return ret;
}
static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
if (!ret)
pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
return ret;
}
#endif
static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
long npages)
{
@ -2138,8 +2100,8 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda2_tce_xchg,
.exchange_rm = pnv_ioda2_tce_xchg_rm,
.xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
.tce_kill = pnv_pci_ioda2_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda2_tce_free,

View File

@ -30,4 +30,9 @@ extern void opal_event_shutdown(void);
bool cpu_core_split_required(void);
struct memcons;
ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
u32 memcons_get_size(struct memcons *mc);
struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name);
#endif /* _POWERNV_H */

View File

@ -0,0 +1,69 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Ultravisor high level interfaces
*
* Copyright 2019, IBM Corporation.
*
*/
#include <linux/init.h>
#include <linux/printk.h>
#include <linux/of_fdt.h>
#include <linux/of.h>
#include <asm/ultravisor.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include "powernv.h"
static struct kobject *ultravisor_kobj;
int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
int depth, void *data)
{
if (!of_flat_dt_is_compatible(node, "ibm,ultravisor"))
return 0;
powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
pr_debug("Ultravisor detected!\n");
return 1;
}
static struct memcons *uv_memcons;
static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
{
return memcons_copy(uv_memcons, to, pos, count);
}
static struct bin_attribute uv_msglog_attr = {
.attr = {.name = "msglog", .mode = 0400},
.read = uv_msglog_read
};
static int __init uv_init(void)
{
struct device_node *node;
if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
return 0;
node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
if (!node)
return -ENODEV;
uv_memcons = memcons_init(node, "memcons");
if (!uv_memcons)
return -ENOENT;
uv_msglog_attr.size = memcons_get_size(uv_memcons);
ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj);
if (!ultravisor_kobj)
return -ENOMEM;
return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr);
}
machine_subsys_initcall(powernv, uv_init);

View File

@ -621,7 +621,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
long *tce, enum dma_data_direction *direction)
long *tce, enum dma_data_direction *direction,
bool realmode)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@ -649,7 +650,7 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
#ifdef CONFIG_IOMMU_API
.exchange = tce_exchange_pseries,
.xchg_no_kill = tce_exchange_pseries,
#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP

View File

@ -435,7 +435,7 @@ static int tce_iommu_clear(struct tce_container *container,
unsigned long oldhpa;
long ret;
enum dma_data_direction direction;
unsigned long lastentry = entry + pages;
unsigned long lastentry = entry + pages, firstentry = entry;
for ( ; entry < lastentry; ++entry) {
if (tbl->it_indirect_levels && tbl->it_userspace) {
@ -460,7 +460,7 @@ static int tce_iommu_clear(struct tce_container *container,
direction = DMA_NONE;
oldhpa = 0;
ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa,
&direction);
if (ret)
continue;
@ -476,6 +476,8 @@ static int tce_iommu_clear(struct tce_container *container,
tce_iommu_unuse_page(container, oldhpa);
}
iommu_tce_kill(tbl, firstentry, pages);
return 0;
}
@ -518,8 +520,8 @@ static long tce_iommu_build(struct tce_container *container,
hpa |= offset;
dirtmp = direction;
ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
&dirtmp);
ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
&hpa, &dirtmp);
if (ret) {
tce_iommu_unuse_page(container, hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
@ -536,6 +538,8 @@ static long tce_iommu_build(struct tce_container *container,
if (ret)
tce_iommu_clear(container, tbl, entry, i);
else
iommu_tce_kill(tbl, entry, pages);
return ret;
}
@ -572,8 +576,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (mm_iommu_mapped_inc(mem))
break;
ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
&dirtmp);
ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
&hpa, &dirtmp);
if (ret) {
/* dirtmp cannot be DMA_NONE here */
tce_iommu_unuse_page_v2(container, tbl, entry + i);
@ -593,6 +597,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (ret)
tce_iommu_clear(container, tbl, entry, i);
else
iommu_tce_kill(tbl, entry, pages);
return ret;
}