KVM/arm64 fixes for 6.2, take #1

- Fix the PMCR_EL0 reset value after the PMU rework
 
 - Correctly handle S2 fault triggered by a S1 page table walk
   by not always classifying it as a write, as this breaks on
   R/O memslots
 
 - Document why we cannot exit with KVM_EXIT_MMIO when taking
   a write fault from a S1 PTW on a R/O memslot
 
 - Put the Apple M2 on the naughty step for not being able to
   correctly implement the vgic SEIS feature, just liek the M1
   before it
 
 - Reviewer updates: Alex is stepping down, replaced by Zenghui
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmO27gQPHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpDwioP/A0UE7ujSxv3dlBstBhmtzOoX64pRufX01Kr
 1oF24M1VuTVLwl3pp1nWH10SVWv5kukYZJAJ/3tDJOaMt/Q9c0exPCPc95i2p/r7
 OC9j8rZVZnjGN6sAP5zazIT67tSanyLDeCC+j4J1pw20r2tB67LKSOoozEb5How7
 CX+Oa2OiEiI34jp33v3mFQ3VxY3714QUMBUK7n+L29IFXGmQp6dfbhn2iY3uNpoU
 YYrkPzBLUC1H//oCx0qoDDCXXeOKMGuWP1At5GIDz6ZSCBVpKdVbftCC59Dk7dDz
 7BdQ5JoEc15RTZajdopOog4RV4YHP8VszaClhCA1ML0Pd2Mf4UVLlPnn7F+3yR3r
 pMgjlOAlLJwHiwggJZ0EQ0wFdx9LuGeu3OwckGE/JxeEwaMdzGAEfcFoAGZV0ExZ
 7riiKS+NmtrkuE9wJfWOrpDiseymmUbuhHq+F/HDq/SP6UdezAylkcxZRuN/ZCRc
 9XVhTcWu/UPxoaSSd/sB4l9X8Ey/cZe28+kV7eE/m2g79bZKxHd4UUOUymb/aJxj
 og10A6i0B1DOWMtKJ9hEsB6wI6Hllrqcbo8ewX1znKoKbfHZDeU/N5D4ZvTz85sf
 zyqbsSZPDxMOwBPYTqZqG65tEWWw68HIJ9cqQzKDehN1Xm1coNIWSPrUnBMpSsWJ
 qDQNmIzf
 =XBtQ
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-fixes-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master

KVM/arm64 fixes for 6.2, take #1

- Fix the PMCR_EL0 reset value after the PMU rework

- Correctly handle S2 fault triggered by a S1 page table walk
  by not always classifying it as a write, as this breaks on
  R/O memslots

- Document why we cannot exit with KVM_EXIT_MMIO when taking
  a write fault from a S1 PTW on a R/O memslot

- Put the Apple M2 on the naughty step for not being able to
  correctly implement the vgic SEIS feature, just liek the M1
  before it

- Reviewer updates: Alex is stepping down, replaced by Zenghui
This commit is contained in:
Paolo Bonzini 2023-01-11 13:31:53 -05:00
commit 71d0393576
11 changed files with 69 additions and 40 deletions

View File

@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
mmap() that affects the region will be made visible immediately. Another mmap() that affects the region will be made visible immediately. Another
example is madvise(MADV_DROP). example is madvise(MADV_DROP).
Note: On arm64, a write generated by the page-table walker (to update
the Access and Dirty flags, for example) never results in a
KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
is because KVM cannot provide the data that would be written by the
page-table walker, making it impossible to emulate the access.
Instead, an abort (data abort if the cause of the page-table update
was a load or a store, instruction abort if it was an instruction
fetch) is injected in the guest.
4.36 KVM_SET_TSS_ADDR 4.36 KVM_SET_TSS_ADDR
--------------------- ---------------------

View File

@ -11356,9 +11356,9 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org> M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com> R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com> R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev> R: Oliver Upton <oliver.upton@linux.dev>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.linux.dev L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)

View File

@ -124,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3 #define AMPERE_CPU_PART_AMPERE1 0xAC3
@ -177,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */

View File

@ -114,6 +114,15 @@
#define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C) #define ESR_ELx_FSC_PERM (0x0C)
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */ /* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24) #define ESR_ELx_ISV_SHIFT (24)

View File

@ -319,21 +319,6 @@
BIT(18) | \ BIT(18) | \
GENMASK(16, 15)) GENMASK(16, 15))
/* For compatibility with fault code shared with 32-bit */
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
#define FSC_SEA ESR_ELx_FSC_EXTABT
#define FSC_SEA_TTW0 (0x14)
#define FSC_SEA_TTW1 (0x15)
#define FSC_SEA_TTW2 (0x16)
#define FSC_SEA_TTW3 (0x17)
#define FSC_SECC (0x18)
#define FSC_SECC_TTW0 (0x1c)
#define FSC_SECC_TTW1 (0x1d)
#define FSC_SECC_TTW2 (0x1e)
#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf)) #define HPFAR_MASK (~UL(0xf))
/* /*

View File

@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{ {
switch (kvm_vcpu_trap_get_fault(vcpu)) { switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA: case ESR_ELx_FSC_EXTABT:
case FSC_SEA_TTW0: case ESR_ELx_FSC_SEA_TTW0:
case FSC_SEA_TTW1: case ESR_ELx_FSC_SEA_TTW1:
case FSC_SEA_TTW2: case ESR_ELx_FSC_SEA_TTW2:
case FSC_SEA_TTW3: case ESR_ELx_FSC_SEA_TTW3:
case FSC_SECC: case ESR_ELx_FSC_SECC:
case FSC_SECC_TTW0: case ESR_ELx_FSC_SECC_TTW0:
case FSC_SECC_TTW1: case ESR_ELx_FSC_SECC_TTW1:
case FSC_SECC_TTW2: case ESR_ELx_FSC_SECC_TTW2:
case FSC_SECC_TTW3: case ESR_ELx_FSC_SECC_TTW3:
return true; return true;
default: default:
return false; return false;
@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{ {
if (kvm_vcpu_abt_iss1tw(vcpu)) if (kvm_vcpu_abt_iss1tw(vcpu)) {
return true; /*
* Only a permission fault on a S1PTW should be
* considered as a write. Otherwise, page tables baked
* in a read-only memslot will result in an exception
* being delivered in the guest.
*
* The drawback is that we end-up faulting twice if the
* guest is using any of HW AF/DB: a translation fault
* to map the page containing the PT (read only at
* first), then a permission fault to allow the flags
* to be set.
*/
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
case ESR_ELx_FSC_PERM:
return true;
default:
return false;
}
}
if (kvm_vcpu_trap_is_iabt(vcpu)) if (kvm_vcpu_trap_is_iabt(vcpu))
return false; return false;

View File

@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/ */
if (!(esr & ESR_ELx_S1PTW) && if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) || (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar)) if (!__translate_far_to_hpfar(far, &hpfar))
return false; return false;
} else { } else {

View File

@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid; bool valid;
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) && kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu); !kvm_vcpu_abt_iss1tw(vcpu);

View File

@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault); VM_BUG_ON(write_fault && exec_fault);
if (fault_status == FSC_PERM && !write_fault && !exec_fault) { if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n"); kvm_err("Unexpected L2 read permission error\n");
return -EFAULT; return -EFAULT;
} }
@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime * only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table. * and a write fault needs to collapse a block entry into a table.
*/ */
if (fault_status != FSC_PERM || (logging_active && write_fault)) { if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache, ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm)); kvm_mmu_cache_min_pages(kvm));
if (ret) if (ret)
@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible. * backed by a THP and thus use block mapping if possible.
*/ */
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) if (fault_status == ESR_ELx_FSC_PERM &&
fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule; vma_pagesize = fault_granule;
else else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot, vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
&fault_ipa); &fault_ipa);
} }
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */ /* Check the VMM hasn't introduced a new disallowed VMA */
if (kvm_vma_mte_allowed(vma)) { if (kvm_vma_mte_allowed(vma)) {
sanitise_mte_tags(kvm, pfn, vma_pagesize); sanitise_mte_tags(kvm, pfn, vma_pagesize);
@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise, * permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size. * kvm_pgtable_stage2_map() should be called to change block size.
*/ */
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (fault_status == FSC_FAULT) { if (fault_status == ESR_ELx_FSC_FAULT) {
/* Beyond sanitised PARange (which is the IPA limit) */ /* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu); kvm_inject_size_fault(vcpu);
@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa); kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */ /* Check the stage-2 fault is trans. fault or write fault */
if (fault_status != FSC_FAULT && fault_status != FSC_PERM && if (fault_status != ESR_ELx_FSC_FAULT &&
fault_status != FSC_ACCESS) { fault_status != ESR_ELx_FSC_PERM &&
fault_status != ESR_ELx_FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu), kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */ /* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
if (fault_status == FSC_ACCESS) { if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa); handle_access_fault(vcpu, fault_ipa);
ret = 1; ret = 1;
goto out_unlock; goto out_unlock;

View File

@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return; return;
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */ /* Only preserve PMCR_EL0.N, and reset the rest to 0 */
pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0()) if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC; pmcr |= ARMV8_PMU_PMCR_LC;

View File

@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
{}, {},
}; };