mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-17 09:43:59 +08:00
Merge branch 'topic/ppc-kvm' into next
Merge the topic branch we're sharing with the kvm-ppc tree.
This commit is contained in:
commit
da0e7e6276
@ -3201,6 +3201,71 @@ struct kvm_reinject_control {
|
||||
pit_reinject = 0 (!reinject mode) is recommended, unless running an old
|
||||
operating system that uses the PIT for timing (e.g. Linux 2.4.x).
|
||||
|
||||
4.99 KVM_PPC_CONFIGURE_V3_MMU
|
||||
|
||||
Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
|
||||
Architectures: ppc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ppc_mmuv3_cfg (in)
|
||||
Returns: 0 on success,
|
||||
-EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
|
||||
-EINVAL if the configuration is invalid
|
||||
|
||||
This ioctl controls whether the guest will use radix or HPT (hashed
|
||||
page table) translation, and sets the pointer to the process table for
|
||||
the guest.
|
||||
|
||||
struct kvm_ppc_mmuv3_cfg {
|
||||
__u64 flags;
|
||||
__u64 process_table;
|
||||
};
|
||||
|
||||
There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
|
||||
KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest
|
||||
to use radix tree translation, and if clear, to use HPT translation.
|
||||
KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
|
||||
to be able to use the global TLB and SLB invalidation instructions;
|
||||
if clear, the guest may not use these instructions.
|
||||
|
||||
The process_table field specifies the address and size of the guest
|
||||
process table, which is in the guest's space. This field is formatted
|
||||
as the second doubleword of the partition table entry, as defined in
|
||||
the Power ISA V3.00, Book III section 5.7.6.1.
|
||||
|
||||
4.100 KVM_PPC_GET_RMMU_INFO
|
||||
|
||||
Capability: KVM_CAP_PPC_RADIX_MMU
|
||||
Architectures: ppc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ppc_rmmu_info (out)
|
||||
Returns: 0 on success,
|
||||
-EFAULT if struct kvm_ppc_rmmu_info cannot be written,
|
||||
-EINVAL if no useful information can be returned
|
||||
|
||||
This ioctl returns a structure containing two things: (a) a list
|
||||
containing supported radix tree geometries, and (b) a list that maps
|
||||
page sizes to put in the "AP" (actual page size) field for the tlbie
|
||||
(TLB invalidate entry) instruction.
|
||||
|
||||
struct kvm_ppc_rmmu_info {
|
||||
struct kvm_ppc_radix_geom {
|
||||
__u8 page_shift;
|
||||
__u8 level_bits[4];
|
||||
__u8 pad[3];
|
||||
} geometries[8];
|
||||
__u32 ap_encodings[8];
|
||||
};
|
||||
|
||||
The geometries[] field gives up to 8 supported geometries for the
|
||||
radix page table, in terms of the log base 2 of the smallest page
|
||||
size, and the number of bits indexed at each level of the tree, from
|
||||
the PTE level up to the PGD level in that order. Any unused entries
|
||||
will have 0 in the page_shift field.
|
||||
|
||||
The ap_encodings gives the supported page sizes and their AP field
|
||||
encodings, encoded with the AP value in the top 3 bits and the log
|
||||
base 2 of the page size in the bottom 6 bits.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this
|
||||
capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
|
||||
will disable the use of APIC hardware virtualization even if supported
|
||||
by the CPU, as it's incompatible with SynIC auto-EOI behavior.
|
||||
|
||||
8.3 KVM_CAP_PPC_RADIX_MMU
|
||||
|
||||
Architectures: ppc
|
||||
|
||||
This capability, if KVM_CHECK_EXTENSION indicates that it is
|
||||
available, means that that the kernel can support guests using the
|
||||
radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
|
||||
processor).
|
||||
|
||||
8.4 KVM_CAP_PPC_HASH_MMU_V3
|
||||
|
||||
Architectures: ppc
|
||||
|
||||
This capability, if KVM_CHECK_EXTENSION indicates that it is
|
||||
available, means that that the kernel can support guests using the
|
||||
hashed page table MMU defined in Power ISA V3.00 (as implemented in
|
||||
the POWER9 processor), including in-memory segment tables.
|
||||
|
@ -44,10 +44,20 @@ struct patb_entry {
|
||||
};
|
||||
extern struct patb_entry *partition_tb;
|
||||
|
||||
/* Bits in patb0 field */
|
||||
#define PATB_HR (1UL << 63)
|
||||
#define PATB_GR (1UL << 63)
|
||||
#define RPDB_MASK 0x0ffffffffffff00fUL
|
||||
#define RPDB_SHIFT (1UL << 8)
|
||||
#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
|
||||
#define RTS1_MASK (3UL << RTS1_SHIFT)
|
||||
#define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */
|
||||
#define RTS2_MASK (7UL << RTS2_SHIFT)
|
||||
#define RPDS_MASK 0x1f /* root page dir. size field */
|
||||
|
||||
/* Bits in patb1 field */
|
||||
#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
|
||||
#define PRTS_MASK 0x1f /* process table size field */
|
||||
|
||||
/*
|
||||
* Limit process table to PAGE_SIZE table. This
|
||||
* also limit the max pid we can support.
|
||||
@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
extern int (*register_process_table)(unsigned long base, unsigned long page_size,
|
||||
unsigned long tbl_size);
|
||||
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
extern void radix_init_pseries(void);
|
||||
#else
|
||||
static inline void radix_init_pseries(void) { };
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
|
||||
|
@ -97,6 +97,15 @@
|
||||
ld reg,PACAKBASE(r13); \
|
||||
ori reg,reg,(ABS_ADDR(label))@l;
|
||||
|
||||
/*
|
||||
* Branches from unrelocated code (e.g., interrupts) to labels outside
|
||||
* head-y require >64K offsets.
|
||||
*/
|
||||
#define __LOAD_FAR_HANDLER(reg, label) \
|
||||
ld reg,PACAKBASE(r13); \
|
||||
ori reg,reg,(ABS_ADDR(label))@l; \
|
||||
addis reg,reg,(ABS_ADDR(label))@h;
|
||||
|
||||
/* Exception register prefixes */
|
||||
#define EXC_HV H
|
||||
#define EXC_STD
|
||||
@ -227,13 +236,49 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
mtctr reg; \
|
||||
bctr
|
||||
|
||||
#define BRANCH_LINK_TO_FAR(reg, label) \
|
||||
__LOAD_FAR_HANDLER(reg, label); \
|
||||
mtctr reg; \
|
||||
bctrl
|
||||
|
||||
/*
|
||||
* KVM requires __LOAD_FAR_HANDLER.
|
||||
*
|
||||
* __BRANCH_TO_KVM_EXIT branches are also a special case because they
|
||||
* explicitly use r9 then reload it from PACA before branching. Hence
|
||||
* the double-underscore.
|
||||
*/
|
||||
#define __BRANCH_TO_KVM_EXIT(area, label) \
|
||||
mfctr r9; \
|
||||
std r9,HSTATE_SCRATCH1(r13); \
|
||||
__LOAD_FAR_HANDLER(r9, label); \
|
||||
mtctr r9; \
|
||||
ld r9,area+EX_R9(r13); \
|
||||
bctr
|
||||
|
||||
#define BRANCH_TO_KVM(reg, label) \
|
||||
__LOAD_FAR_HANDLER(reg, label); \
|
||||
mtctr reg; \
|
||||
bctr
|
||||
|
||||
#else
|
||||
#define BRANCH_TO_COMMON(reg, label) \
|
||||
b label
|
||||
|
||||
#define BRANCH_LINK_TO_FAR(reg, label) \
|
||||
bl label
|
||||
|
||||
#define BRANCH_TO_KVM(reg, label) \
|
||||
b label
|
||||
|
||||
#define __BRANCH_TO_KVM_EXIT(area, label) \
|
||||
ld r9,area+EX_R9(r13); \
|
||||
b label
|
||||
|
||||
#endif
|
||||
|
||||
#define __KVM_HANDLER_PROLOG(area, n) \
|
||||
|
||||
#define __KVM_HANDLER(area, h, n) \
|
||||
BEGIN_FTR_SECTION_NESTED(947) \
|
||||
ld r10,area+EX_CFAR(r13); \
|
||||
std r10,HSTATE_CFAR(r13); \
|
||||
@ -243,30 +288,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
std r10,HSTATE_PPR(r13); \
|
||||
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
|
||||
ld r10,area+EX_R10(r13); \
|
||||
stw r9,HSTATE_SCRATCH1(r13); \
|
||||
ld r9,area+EX_R9(r13); \
|
||||
std r12,HSTATE_SCRATCH0(r13); \
|
||||
|
||||
#define __KVM_HANDLER(area, h, n) \
|
||||
__KVM_HANDLER_PROLOG(area, n) \
|
||||
li r12,n; \
|
||||
b kvmppc_interrupt
|
||||
sldi r12,r9,32; \
|
||||
ori r12,r12,(n); \
|
||||
/* This reloads r9 before branching to kvmppc_interrupt */ \
|
||||
__BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt)
|
||||
|
||||
#define __KVM_HANDLER_SKIP(area, h, n) \
|
||||
cmpwi r10,KVM_GUEST_MODE_SKIP; \
|
||||
ld r10,area+EX_R10(r13); \
|
||||
beq 89f; \
|
||||
stw r9,HSTATE_SCRATCH1(r13); \
|
||||
BEGIN_FTR_SECTION_NESTED(948) \
|
||||
ld r9,area+EX_PPR(r13); \
|
||||
std r9,HSTATE_PPR(r13); \
|
||||
ld r10,area+EX_PPR(r13); \
|
||||
std r10,HSTATE_PPR(r13); \
|
||||
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
|
||||
ld r9,area+EX_R9(r13); \
|
||||
ld r10,area+EX_R10(r13); \
|
||||
std r12,HSTATE_SCRATCH0(r13); \
|
||||
li r12,n; \
|
||||
b kvmppc_interrupt; \
|
||||
sldi r12,r9,32; \
|
||||
ori r12,r12,(n); \
|
||||
/* This reloads r9 before branching to kvmppc_interrupt */ \
|
||||
__BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \
|
||||
89: mtocrf 0x80,r9; \
|
||||
ld r9,area+EX_R9(r13); \
|
||||
ld r10,area+EX_R10(r13); \
|
||||
b kvmppc_skip_##h##interrupt
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
@ -393,12 +436,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD)
|
||||
|
||||
#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
|
||||
/* No guest interrupts come through here */ \
|
||||
SET_SCRATCH0(r13); /* save r13 */ \
|
||||
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec);
|
||||
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \
|
||||
EXC_HV, KVMTEST_HV, vec);
|
||||
|
||||
#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
|
||||
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
|
||||
EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
|
||||
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
|
||||
|
||||
/* This associate vector numbers with bits in paca->irq_happened */
|
||||
@ -475,10 +518,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
|
||||
|
||||
#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
|
||||
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
|
||||
EXC_HV, SOFTEN_NOTEST_HV)
|
||||
EXC_HV, SOFTEN_TEST_HV)
|
||||
|
||||
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
|
||||
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \
|
||||
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
|
||||
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
|
||||
|
||||
/*
|
||||
|
@ -224,7 +224,7 @@ name:
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
#define TRAMP_KVM_BEGIN(name) \
|
||||
TRAMP_REAL_BEGIN(name)
|
||||
TRAMP_VIRT_BEGIN(name)
|
||||
#else
|
||||
#define TRAMP_KVM_BEGIN(name)
|
||||
#endif
|
||||
|
@ -278,6 +278,7 @@
|
||||
#define H_CLEAR_HPT 0x358
|
||||
#define H_RESIZE_HPT_PREPARE 0x36C
|
||||
#define H_RESIZE_HPT_COMMIT 0x370
|
||||
#define H_REGISTER_PROC_TBL 0x37C
|
||||
#define H_SIGNAL_SYS_RESET 0x380
|
||||
#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
|
||||
|
||||
@ -315,6 +316,16 @@
|
||||
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
|
||||
/* >= 0 values are CPU number */
|
||||
|
||||
/* Flag values used in H_REGISTER_PROC_TBL hcall */
|
||||
#define PROC_TABLE_OP_MASK 0x18
|
||||
#define PROC_TABLE_DEREG 0x10
|
||||
#define PROC_TABLE_NEW 0x18
|
||||
#define PROC_TABLE_TYPE_MASK 0x06
|
||||
#define PROC_TABLE_HPT_SLB 0x00
|
||||
#define PROC_TABLE_HPT_PT 0x02
|
||||
#define PROC_TABLE_RADIX 0x04
|
||||
#define PROC_TABLE_GTSE 0x01
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/**
|
||||
|
@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
|
||||
unsigned long status);
|
||||
extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
|
||||
unsigned long slb_v, unsigned long valid);
|
||||
extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa, gva_t ea, int is_store);
|
||||
|
||||
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
|
||||
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
|
||||
@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void);
|
||||
extern int kvmppc_mmu_hv_init(void);
|
||||
extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
|
||||
|
||||
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu,
|
||||
unsigned long ea, unsigned long dsisr);
|
||||
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data, bool iswrite);
|
||||
extern int kvmppc_init_vm_radix(struct kvm *kvm);
|
||||
extern void kvmppc_free_radix(struct kvm *kvm);
|
||||
extern int kvmppc_radix_init(void);
|
||||
extern void kvmppc_radix_exit(void);
|
||||
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, unsigned long *map);
|
||||
extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
|
||||
|
||||
/* XXX remove this export when load_last_inst() is generic */
|
||||
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
|
||||
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
|
||||
@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
||||
unsigned long pte_index, unsigned long avpn,
|
||||
unsigned long *hpret);
|
||||
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
|
||||
extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, unsigned long *map);
|
||||
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned long *map);
|
||||
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
|
||||
unsigned long mask);
|
||||
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
|
||||
|
@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
|
||||
static inline bool kvm_is_radix(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.radix;
|
||||
}
|
||||
|
||||
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
|
||||
#endif
|
||||
|
||||
|
@ -263,7 +263,11 @@ struct kvm_arch {
|
||||
unsigned long hpt_mask;
|
||||
atomic_t hpte_mod_interest;
|
||||
cpumask_t need_tlb_flush;
|
||||
cpumask_t cpu_in_guest;
|
||||
int hpt_cma_alloc;
|
||||
u8 radix;
|
||||
pgd_t *pgtable;
|
||||
u64 process_table;
|
||||
struct dentry *debugfs_dir;
|
||||
struct dentry *htab_dentry;
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
@ -603,6 +607,7 @@ struct kvm_vcpu_arch {
|
||||
ulong fault_dar;
|
||||
u32 fault_dsisr;
|
||||
unsigned long intr_msr;
|
||||
ulong fault_gpa; /* guest real address of page fault (POWER9) */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BOOKE
|
||||
@ -657,6 +662,7 @@ struct kvm_vcpu_arch {
|
||||
int state;
|
||||
int ptid;
|
||||
int thread_cpu;
|
||||
int prev_cpu;
|
||||
bool timer_running;
|
||||
wait_queue_head_t cpu_run;
|
||||
|
||||
|
@ -291,6 +291,8 @@ struct kvmppc_ops {
|
||||
struct irq_bypass_producer *);
|
||||
void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
|
||||
struct irq_bypass_producer *);
|
||||
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
|
||||
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
|
||||
};
|
||||
|
||||
extern struct kvmppc_ops *kvmppc_hv_ops;
|
||||
|
@ -67,7 +67,6 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
|
||||
int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
|
||||
uint64_t offset, uint32_t data);
|
||||
int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
|
||||
int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
|
||||
int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
|
||||
int64_t opal_register_exception_handler(uint64_t opal_exception,
|
||||
uint64_t handler_address,
|
||||
@ -220,18 +219,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
|
||||
int64_t opal_pci_poll2(uint64_t id, uint64_t data);
|
||||
|
||||
int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
|
||||
int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
|
||||
int64_t opal_int_set_cppr(uint8_t cppr);
|
||||
int64_t opal_int_eoi(uint32_t xirr);
|
||||
int64_t opal_rm_int_eoi(uint32_t xirr);
|
||||
int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
|
||||
int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
|
||||
int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
|
||||
uint32_t pe_num, uint32_t tce_size,
|
||||
uint64_t dma_addr, uint32_t npages);
|
||||
int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
|
||||
uint32_t pe_num, uint32_t tce_size,
|
||||
uint64_t dma_addr, uint32_t npages);
|
||||
int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
|
||||
|
||||
/* Internal functions */
|
||||
|
@ -121,6 +121,8 @@ struct of_drconf_cell {
|
||||
#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
|
||||
#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */
|
||||
|
||||
#define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */
|
||||
|
||||
/* Option vector 2: Open Firmware options supported */
|
||||
#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
|
||||
|
||||
@ -152,10 +154,17 @@ struct of_drconf_cell {
|
||||
#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */
|
||||
#define OV5_PRRN 0x0540 /* Platform Resource Reassignment */
|
||||
#define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */
|
||||
#define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */
|
||||
#define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */
|
||||
#define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */
|
||||
#define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */
|
||||
#define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */
|
||||
#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
|
||||
#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
|
||||
#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
|
||||
#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */
|
||||
#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */
|
||||
#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */
|
||||
#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */
|
||||
#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */
|
||||
#define OV5_MMU_SLB 0x1800 /* always use SLB */
|
||||
#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */
|
||||
|
||||
/* Option Vector 6: IBM PAPR hints */
|
||||
#define OV6_LINUX 0x02 /* Linux is our OS */
|
||||
|
@ -274,10 +274,14 @@
|
||||
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
|
||||
#define DSISR_NOHPTE 0x40000000 /* no translation found */
|
||||
#define DSISR_PROTFAULT 0x08000000 /* protection fault */
|
||||
#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
|
||||
#define DSISR_ISSTORE 0x02000000 /* access was a store */
|
||||
#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
|
||||
#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */
|
||||
#define DSISR_KEYFAULT 0x00200000 /* Key fault */
|
||||
#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */
|
||||
#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */
|
||||
#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */
|
||||
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
|
||||
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
|
||||
#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */
|
||||
|
@ -413,6 +413,26 @@ struct kvm_get_htab_header {
|
||||
__u16 n_invalid;
|
||||
};
|
||||
|
||||
/* For KVM_PPC_CONFIGURE_V3_MMU */
|
||||
struct kvm_ppc_mmuv3_cfg {
|
||||
__u64 flags;
|
||||
__u64 process_table; /* second doubleword of partition table entry */
|
||||
};
|
||||
|
||||
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
|
||||
#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
|
||||
#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
|
||||
|
||||
/* For KVM_PPC_GET_RMMU_INFO */
|
||||
struct kvm_ppc_rmmu_info {
|
||||
struct kvm_ppc_radix_geom {
|
||||
__u8 page_shift;
|
||||
__u8 level_bits[4];
|
||||
__u8 pad[3];
|
||||
} geometries[8];
|
||||
__u32 ap_encodings[8];
|
||||
};
|
||||
|
||||
/* Per-vcpu XICS interrupt controller state */
|
||||
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
|
||||
|
||||
|
@ -498,6 +498,7 @@ int main(void)
|
||||
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
|
||||
DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
|
||||
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
|
||||
DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix));
|
||||
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
|
||||
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
|
||||
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
|
||||
@ -537,6 +538,7 @@ int main(void)
|
||||
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
|
||||
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
|
||||
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
|
||||
DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa));
|
||||
DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
|
||||
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
|
||||
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
|
||||
|
@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
lbz r0,HSTATE_HWTHREAD_REQ(r13)
|
||||
cmpwi r0,0
|
||||
beq 1f
|
||||
b kvm_start_guest
|
||||
BRANCH_TO_KVM(r10, kvm_start_guest)
|
||||
1:
|
||||
#endif
|
||||
|
||||
@ -717,13 +717,9 @@ hardware_interrupt_hv:
|
||||
BEGIN_FTR_SECTION
|
||||
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
|
||||
EXC_HV, SOFTEN_TEST_HV)
|
||||
do_kvm_H0x500:
|
||||
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
|
||||
FTR_SECTION_ELSE
|
||||
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
|
||||
EXC_STD, SOFTEN_TEST_PR)
|
||||
do_kvm_0x500:
|
||||
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
|
||||
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
||||
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
|
||||
|
||||
@ -737,6 +733,8 @@ hardware_interrupt_relon_hv:
|
||||
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
|
||||
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
|
||||
|
||||
TRAMP_KVM(PACA_EXGEN, 0x500)
|
||||
TRAMP_KVM_HV(PACA_EXGEN, 0x500)
|
||||
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
|
||||
|
||||
|
||||
@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
|
||||
TRAMP_KVM(PACA_EXGEN, 0xb00)
|
||||
EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
/*
|
||||
* If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
|
||||
* that support it) before changing to HMT_MEDIUM. That allows the KVM
|
||||
* code to save that value into the guest state (it is the guest's PPR
|
||||
* value). Otherwise just change to HMT_MEDIUM as userspace has
|
||||
* already saved the PPR.
|
||||
*/
|
||||
#define SYSCALL_KVMTEST \
|
||||
SET_SCRATCH0(r13); \
|
||||
GET_PACA(r13); \
|
||||
std r9,PACA_EXGEN+EX_R9(r13); \
|
||||
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
|
||||
HMT_MEDIUM; \
|
||||
std r10,PACA_EXGEN+EX_R10(r13); \
|
||||
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \
|
||||
mfcr r9; \
|
||||
KVMTEST_PR(0xc00); \
|
||||
GET_SCRATCH0(r13)
|
||||
|
||||
#else
|
||||
#define SYSCALL_KVMTEST \
|
||||
HMT_MEDIUM
|
||||
#endif
|
||||
|
||||
#define LOAD_SYSCALL_HANDLER(reg) \
|
||||
__LOAD_HANDLER(reg, system_call_common)
|
||||
|
||||
@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
|
||||
#endif
|
||||
|
||||
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
|
||||
/*
|
||||
* If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
|
||||
* that support it) before changing to HMT_MEDIUM. That allows the KVM
|
||||
* code to save that value into the guest state (it is the guest's PPR
|
||||
* value). Otherwise just change to HMT_MEDIUM as userspace has
|
||||
* already saved the PPR.
|
||||
*/
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
SET_SCRATCH0(r13)
|
||||
GET_PACA(r13)
|
||||
std r9,PACA_EXGEN+EX_R9(r13)
|
||||
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
|
||||
HMT_MEDIUM;
|
||||
std r10,PACA_EXGEN+EX_R10(r13)
|
||||
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
|
||||
mfcr r9
|
||||
KVMTEST_PR(0xc00)
|
||||
GET_SCRATCH0(r13)
|
||||
#else
|
||||
HMT_MEDIUM;
|
||||
#endif
|
||||
SYSCALL_KVMTEST
|
||||
SYSCALL_PSERIES_1
|
||||
SYSCALL_PSERIES_2_RFID
|
||||
SYSCALL_PSERIES_3
|
||||
EXC_REAL_END(system_call, 0xc00, 0x100)
|
||||
|
||||
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
|
||||
HMT_MEDIUM
|
||||
SYSCALL_KVMTEST
|
||||
SYSCALL_PSERIES_1
|
||||
SYSCALL_PSERIES_2_DIRECT
|
||||
SYSCALL_PSERIES_3
|
||||
@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00)
|
||||
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
|
||||
|
||||
EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
|
||||
EXC_VIRT_NONE(0x4e00, 0x20)
|
||||
EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
|
||||
TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
|
||||
EXC_COMMON_BEGIN(h_data_storage_common)
|
||||
mfspr r10,SPRN_HDAR
|
||||
@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common)
|
||||
|
||||
|
||||
EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
|
||||
EXC_VIRT_NONE(0x4e20, 0x20)
|
||||
EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
|
||||
TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
|
||||
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
|
||||
|
||||
@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
|
||||
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
|
||||
EXCEPTION_PROLOG_COMMON_3(0xe60)
|
||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
bl hmi_exception_realmode
|
||||
BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode)
|
||||
/* Windup the stack. */
|
||||
/* Move original HSRR0 and HSRR1 into the respective regs */
|
||||
ld r9,_MSR(r1)
|
||||
|
@ -248,7 +248,7 @@ fastsleep_workaround_at_entry:
|
||||
/* Fast sleep workaround */
|
||||
li r3,1
|
||||
li r4,1
|
||||
bl opal_rm_config_cpu_idle_state
|
||||
bl opal_config_cpu_idle_state
|
||||
|
||||
/* Clear Lock bit */
|
||||
li r0,0
|
||||
@ -552,7 +552,7 @@ timebase_resync:
|
||||
*/
|
||||
ble cr3,clear_lock
|
||||
/* Time base re-sync */
|
||||
bl opal_rm_resync_timebase;
|
||||
bl opal_resync_timebase;
|
||||
/*
|
||||
* If waking up from sleep, per core state is not lost, skip to
|
||||
* clear_lock.
|
||||
@ -641,7 +641,7 @@ hypervisor_state_restored:
|
||||
fastsleep_workaround_at_exit:
|
||||
li r3,1
|
||||
li r4,0
|
||||
bl opal_rm_config_cpu_idle_state
|
||||
bl opal_config_cpu_idle_state
|
||||
b timebase_resync
|
||||
|
||||
/*
|
||||
|
@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void)
|
||||
struct option_vector1 {
|
||||
u8 byte1;
|
||||
u8 arch_versions;
|
||||
u8 arch_versions3;
|
||||
} __packed;
|
||||
|
||||
struct option_vector2 {
|
||||
@ -691,6 +692,9 @@ struct option_vector5 {
|
||||
u8 reserved2;
|
||||
__be16 reserved3;
|
||||
u8 subprocessors;
|
||||
u8 byte22;
|
||||
u8 intarch;
|
||||
u8 mmu;
|
||||
} __packed;
|
||||
|
||||
struct option_vector6 {
|
||||
@ -700,7 +704,7 @@ struct option_vector6 {
|
||||
} __packed;
|
||||
|
||||
struct ibm_arch_vec {
|
||||
struct { u32 mask, val; } pvrs[10];
|
||||
struct { u32 mask, val; } pvrs[12];
|
||||
|
||||
u8 num_vectors;
|
||||
|
||||
@ -749,6 +753,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
|
||||
.mask = cpu_to_be32(0xffff0000), /* POWER8 */
|
||||
.val = cpu_to_be32(0x004d0000),
|
||||
},
|
||||
{
|
||||
.mask = cpu_to_be32(0xffff0000), /* POWER9 */
|
||||
.val = cpu_to_be32(0x004e0000),
|
||||
},
|
||||
{
|
||||
.mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
|
||||
.val = cpu_to_be32(0x0f000005),
|
||||
},
|
||||
{
|
||||
.mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
|
||||
.val = cpu_to_be32(0x0f000004),
|
||||
@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
|
||||
.byte1 = 0,
|
||||
.arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
|
||||
OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
|
||||
.arch_versions3 = OV1_PPC_3_00,
|
||||
},
|
||||
|
||||
.vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
|
||||
@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
|
||||
.reserved2 = 0,
|
||||
.reserved3 = 0,
|
||||
.subprocessors = 1,
|
||||
.intarch = 0,
|
||||
.mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
|
||||
OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
|
||||
},
|
||||
|
||||
/* option vector 6: IBM PAPR hints */
|
||||
|
@ -70,7 +70,8 @@ endif
|
||||
kvm-hv-y += \
|
||||
book3s_hv.o \
|
||||
book3s_hv_interrupts.o \
|
||||
book3s_64_mmu_hv.o
|
||||
book3s_64_mmu_hv.o \
|
||||
book3s_64_mmu_radix.o
|
||||
|
||||
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
|
||||
book3s_hv_rm_xics.o
|
||||
|
@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
|
||||
kvmppc_set_dsisr(vcpu, flags);
|
||||
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */
|
||||
|
||||
void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
|
||||
{
|
||||
|
@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
long err = -EBUSY;
|
||||
long order;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->arch.hpte_setup_done) {
|
||||
kvm->arch.hpte_setup_done = 0;
|
||||
@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
|
||||
|
||||
void kvmppc_free_hpt(struct kvm *kvm)
|
||||
{
|
||||
kvmppc_free_lpid(kvm->arch.lpid);
|
||||
vfree(kvm->arch.revmap);
|
||||
if (kvm->arch.hpt_cma_alloc)
|
||||
kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
|
||||
1 << (kvm->arch.hpt_order - PAGE_SHIFT));
|
||||
else
|
||||
else if (kvm->arch.hpt_virt)
|
||||
free_pages(kvm->arch.hpt_virt,
|
||||
kvm->arch.hpt_order - PAGE_SHIFT);
|
||||
}
|
||||
@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr)
|
||||
return (instr & mask) != 0;
|
||||
}
|
||||
|
||||
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa, gva_t ea, int is_store)
|
||||
int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long gpa, gva_t ea, int is_store)
|
||||
{
|
||||
u32 last_inst;
|
||||
|
||||
@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long rcbits;
|
||||
long mmio_update;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
|
||||
|
||||
/*
|
||||
* Real-mode code has already searched the HPT and found the
|
||||
* entry we're interested in. Lock the entry and check that
|
||||
@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
|
||||
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
int (*handler)(struct kvm *kvm,
|
||||
unsigned long *rmapp,
|
||||
unsigned long gfn))
|
||||
hva_handler_fn handler)
|
||||
{
|
||||
int ret;
|
||||
int retval = 0;
|
||||
@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
|
||||
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
|
||||
|
||||
for (; gfn < gfn_end; ++gfn) {
|
||||
gfn_t gfn_offset = gfn - memslot->base_gfn;
|
||||
|
||||
ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
|
||||
ret = handler(kvm, memslot, gfn);
|
||||
retval |= ret;
|
||||
}
|
||||
}
|
||||
@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm,
|
||||
}
|
||||
|
||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long gfn))
|
||||
hva_handler_fn handler)
|
||||
{
|
||||
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
|
||||
}
|
||||
|
||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long h, i, j;
|
||||
__be64 *hptep;
|
||||
unsigned long ptel, psize, rcbits;
|
||||
unsigned long *rmapp;
|
||||
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
for (;;) {
|
||||
lock_rmap(rmapp);
|
||||
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
|
||||
@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
|
||||
int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
|
||||
kvm_handle_hva(kvm, hva, handler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
|
||||
kvm_handle_hva_range(kvm, start, end, handler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long *rmapp;
|
||||
unsigned long gfn;
|
||||
unsigned long n;
|
||||
unsigned long *rmapp;
|
||||
|
||||
rmapp = memslot->arch.rmap;
|
||||
gfn = memslot->base_gfn;
|
||||
for (n = memslot->npages; n; --n) {
|
||||
rmapp = memslot->arch.rmap;
|
||||
for (n = memslot->npages; n; --n, ++gfn) {
|
||||
if (kvm_is_radix(kvm)) {
|
||||
kvm_unmap_radix(kvm, memslot, gfn);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Testing the present bit without locking is OK because
|
||||
* the memslot has been marked invalid already, and hence
|
||||
@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
||||
* thus the present bit can't go from 0 to 1.
|
||||
*/
|
||||
if (*rmapp & KVMPPC_RMAP_PRESENT)
|
||||
kvm_unmap_rmapp(kvm, rmapp, gfn);
|
||||
kvm_unmap_rmapp(kvm, memslot, gfn);
|
||||
++rmapp;
|
||||
++gfn;
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long head, i, j;
|
||||
__be64 *hptep;
|
||||
int ret = 0;
|
||||
unsigned long *rmapp;
|
||||
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
retry:
|
||||
lock_rmap(rmapp);
|
||||
if (*rmapp & KVMPPC_RMAP_REFERENCED) {
|
||||
@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
|
||||
int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
|
||||
return kvm_handle_hva_range(kvm, start, end, handler);
|
||||
}
|
||||
|
||||
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
struct revmap_entry *rev = kvm->arch.revmap;
|
||||
unsigned long head, i, j;
|
||||
unsigned long *hp;
|
||||
int ret = 1;
|
||||
unsigned long *rmapp;
|
||||
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
if (*rmapp & KVMPPC_RMAP_REFERENCED)
|
||||
return 1;
|
||||
|
||||
@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||
|
||||
int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
|
||||
return kvm_handle_hva(kvm, hva, handler);
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
|
||||
kvm_handle_hva(kvm, hva, handler);
|
||||
}
|
||||
|
||||
static int vcpus_running(struct kvm *kvm)
|
||||
@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
|
||||
return npages_dirty;
|
||||
}
|
||||
|
||||
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
|
||||
void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned long *map)
|
||||
{
|
||||
@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
|
||||
__set_bit_le(gfn - memslot->base_gfn, map);
|
||||
}
|
||||
|
||||
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long *map)
|
||||
long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, unsigned long *map)
|
||||
{
|
||||
unsigned long i, j;
|
||||
unsigned long *rmapp;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
preempt_disable();
|
||||
rmapp = memslot->arch.rmap;
|
||||
@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
__set_bit_le(j, map);
|
||||
++rmapp;
|
||||
}
|
||||
|
||||
/* Harvest dirty bits from VPA and DTL updates */
|
||||
/* Note: we never modify the SLB shadow buffer areas */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
spin_lock(&vcpu->arch.vpa_update_lock);
|
||||
harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
|
||||
harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
|
||||
spin_unlock(&vcpu->arch.vpa_update_lock);
|
||||
}
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (memslot) {
|
||||
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
lock_rmap(rmap);
|
||||
*rmap |= KVMPPC_RMAP_CHANGED;
|
||||
unlock_rmap(rmap);
|
||||
if (!kvm_is_radix(kvm)) {
|
||||
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
lock_rmap(rmap);
|
||||
*rmap |= KVMPPC_RMAP_CHANGED;
|
||||
unlock_rmap(rmap);
|
||||
} else if (memslot->dirty_bitmap) {
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
|
||||
|
||||
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
|
||||
if (kvm_is_radix(vcpu->kvm))
|
||||
mmu->xlate = kvmppc_mmu_radix_xlate;
|
||||
else
|
||||
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
|
||||
mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
|
||||
|
||||
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
|
||||
|
716
arch/powerpc/kvm/book3s_64_mmu_radix.c
Normal file
716
arch/powerpc/kvm/book3s_64_mmu_radix.c
Normal file
@ -0,0 +1,716 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
/*
|
||||
* Supported radix tree geometry.
|
||||
* Like p9, we support either 5 or 9 bits at the first (lowest) level,
|
||||
* for a page size of 64k or 4k.
|
||||
*/
|
||||
static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
|
||||
|
||||
int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data, bool iswrite)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u32 pid;
|
||||
int ret, level, ps;
|
||||
__be64 prte, rpte;
|
||||
unsigned long root, pte, index;
|
||||
unsigned long rts, bits, offset;
|
||||
unsigned long gpa;
|
||||
unsigned long proc_tbl_size;
|
||||
|
||||
/* Work out effective PID */
|
||||
switch (eaddr >> 62) {
|
||||
case 0:
|
||||
pid = vcpu->arch.pid;
|
||||
break;
|
||||
case 3:
|
||||
pid = 0;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12);
|
||||
if (pid * 16 >= proc_tbl_size)
|
||||
return -EINVAL;
|
||||
|
||||
/* Read partition table to find root of tree for effective PID */
|
||||
ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
|
||||
&prte, sizeof(prte));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
root = be64_to_cpu(prte);
|
||||
rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
|
||||
((root & RTS2_MASK) >> RTS2_SHIFT);
|
||||
bits = root & RPDS_MASK;
|
||||
root = root & RPDB_MASK;
|
||||
|
||||
/* P9 DD1 interprets RTS (radix tree size) differently */
|
||||
offset = rts + 31;
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
|
||||
offset -= 3;
|
||||
|
||||
/* current implementations only support 52-bit space */
|
||||
if (offset != 52)
|
||||
return -EINVAL;
|
||||
|
||||
for (level = 3; level >= 0; --level) {
|
||||
if (level && bits != p9_supported_radix_bits[level])
|
||||
return -EINVAL;
|
||||
if (level == 0 && !(bits == 5 || bits == 9))
|
||||
return -EINVAL;
|
||||
offset -= bits;
|
||||
index = (eaddr >> offset) & ((1UL << bits) - 1);
|
||||
/* check that low bits of page table base are zero */
|
||||
if (root & ((1UL << (bits + 3)) - 1))
|
||||
return -EINVAL;
|
||||
ret = kvm_read_guest(kvm, root + index * 8,
|
||||
&rpte, sizeof(rpte));
|
||||
if (ret)
|
||||
return ret;
|
||||
pte = __be64_to_cpu(rpte);
|
||||
if (!(pte & _PAGE_PRESENT))
|
||||
return -ENOENT;
|
||||
if (pte & _PAGE_PTE)
|
||||
break;
|
||||
bits = pte & 0x1f;
|
||||
root = pte & 0x0fffffffffffff00ul;
|
||||
}
|
||||
/* need a leaf at lowest level; 512GB pages not supported */
|
||||
if (level < 0 || level == 3)
|
||||
return -EINVAL;
|
||||
|
||||
/* offset is now log base 2 of the page size */
|
||||
gpa = pte & 0x01fffffffffff000ul;
|
||||
if (gpa & ((1ul << offset) - 1))
|
||||
return -EINVAL;
|
||||
gpa += eaddr & ((1ul << offset) - 1);
|
||||
for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
|
||||
if (offset == mmu_psize_defs[ps].shift)
|
||||
break;
|
||||
gpte->page_size = ps;
|
||||
|
||||
gpte->eaddr = eaddr;
|
||||
gpte->raddr = gpa;
|
||||
|
||||
/* Work out permissions */
|
||||
gpte->may_read = !!(pte & _PAGE_READ);
|
||||
gpte->may_write = !!(pte & _PAGE_WRITE);
|
||||
gpte->may_execute = !!(pte & _PAGE_EXEC);
|
||||
if (kvmppc_get_msr(vcpu) & MSR_PR) {
|
||||
if (pte & _PAGE_PRIVILEGED) {
|
||||
gpte->may_read = 0;
|
||||
gpte->may_write = 0;
|
||||
gpte->may_execute = 0;
|
||||
}
|
||||
} else {
|
||||
if (!(pte & _PAGE_PRIVILEGED)) {
|
||||
/* Check AMR/IAMR to see if strict mode is in force */
|
||||
if (vcpu->arch.amr & (1ul << 62))
|
||||
gpte->may_read = 0;
|
||||
if (vcpu->arch.amr & (1ul << 63))
|
||||
gpte->may_write = 0;
|
||||
if (vcpu->arch.iamr & (1ul << 62))
|
||||
gpte->may_execute = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
#define MMU_BASE_PSIZE MMU_PAGE_64K
|
||||
#else
|
||||
#define MMU_BASE_PSIZE MMU_PAGE_4K
|
||||
#endif
|
||||
|
||||
static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned int pshift)
|
||||
{
|
||||
int psize = MMU_BASE_PSIZE;
|
||||
|
||||
if (pshift >= PMD_SHIFT)
|
||||
psize = MMU_PAGE_2M;
|
||||
addr &= ~0xfffUL;
|
||||
addr |= mmu_psize_defs[psize].ap << 5;
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
|
||||
: : "r" (addr), "r" (kvm->arch.lpid) : "memory");
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
|
||||
unsigned long clr, unsigned long set,
|
||||
unsigned long addr, unsigned int shift)
|
||||
{
|
||||
unsigned long old = 0;
|
||||
|
||||
if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
|
||||
pte_present(*ptep)) {
|
||||
/* have to invalidate it first */
|
||||
old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
|
||||
kvmppc_radix_tlbie_page(kvm, addr, shift);
|
||||
set |= _PAGE_PRESENT;
|
||||
old &= _PAGE_PRESENT;
|
||||
}
|
||||
return __radix_pte_update(ptep, clr, set) | old;
|
||||
}
|
||||
|
||||
void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
{
|
||||
radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
|
||||
}
|
||||
|
||||
static struct kmem_cache *kvm_pte_cache;
|
||||
|
||||
static pte_t *kvmppc_pte_alloc(void)
|
||||
{
|
||||
return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void kvmppc_pte_free(pte_t *ptep)
|
||||
{
|
||||
kmem_cache_free(kvm_pte_cache, ptep);
|
||||
}
|
||||
|
||||
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
|
||||
unsigned int level, unsigned long mmu_seq)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud, *new_pud = NULL;
|
||||
pmd_t *pmd, *new_pmd = NULL;
|
||||
pte_t *ptep, *new_ptep = NULL;
|
||||
unsigned long old;
|
||||
int ret;
|
||||
|
||||
/* Traverse the guest's 2nd-level tree, allocate new levels needed */
|
||||
pgd = kvm->arch.pgtable + pgd_index(gpa);
|
||||
pud = NULL;
|
||||
if (pgd_present(*pgd))
|
||||
pud = pud_offset(pgd, gpa);
|
||||
else
|
||||
new_pud = pud_alloc_one(kvm->mm, gpa);
|
||||
|
||||
pmd = NULL;
|
||||
if (pud && pud_present(*pud))
|
||||
pmd = pmd_offset(pud, gpa);
|
||||
else
|
||||
new_pmd = pmd_alloc_one(kvm->mm, gpa);
|
||||
|
||||
if (level == 0 && !(pmd && pmd_present(*pmd)))
|
||||
new_ptep = kvmppc_pte_alloc();
|
||||
|
||||
/* Check if we might have been invalidated; let the guest retry if so */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
ret = -EAGAIN;
|
||||
if (mmu_notifier_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
/* Now traverse again under the lock and change the tree */
|
||||
ret = -ENOMEM;
|
||||
if (pgd_none(*pgd)) {
|
||||
if (!new_pud)
|
||||
goto out_unlock;
|
||||
pgd_populate(kvm->mm, pgd, new_pud);
|
||||
new_pud = NULL;
|
||||
}
|
||||
pud = pud_offset(pgd, gpa);
|
||||
if (pud_none(*pud)) {
|
||||
if (!new_pmd)
|
||||
goto out_unlock;
|
||||
pud_populate(kvm->mm, pud, new_pmd);
|
||||
new_pmd = NULL;
|
||||
}
|
||||
pmd = pmd_offset(pud, gpa);
|
||||
if (pmd_large(*pmd)) {
|
||||
/* Someone else has instantiated a large page here; retry */
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (level == 1 && !pmd_none(*pmd)) {
|
||||
/*
|
||||
* There's a page table page here, but we wanted
|
||||
* to install a large page. Tell the caller and let
|
||||
* it try installing a normal page if it wants.
|
||||
*/
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (level == 0) {
|
||||
if (pmd_none(*pmd)) {
|
||||
if (!new_ptep)
|
||||
goto out_unlock;
|
||||
pmd_populate(kvm->mm, pmd, new_ptep);
|
||||
new_ptep = NULL;
|
||||
}
|
||||
ptep = pte_offset_kernel(pmd, gpa);
|
||||
if (pte_present(*ptep)) {
|
||||
/* PTE was previously valid, so invalidate it */
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
|
||||
0, gpa, 0);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, 0);
|
||||
if (old & _PAGE_DIRTY)
|
||||
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
|
||||
} else {
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (new_pud)
|
||||
pud_free(kvm->mm, new_pud);
|
||||
if (new_pmd)
|
||||
pmd_free(kvm->mm, new_pmd);
|
||||
if (new_ptep)
|
||||
kvmppc_pte_free(new_ptep);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long ea, unsigned long dsisr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long mmu_seq, pte_size;
|
||||
unsigned long gpa, gfn, hva, pfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct page *page = NULL, *pages[1];
|
||||
long ret, npages, ok;
|
||||
unsigned int writing;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long flags;
|
||||
pte_t pte, *ptep;
|
||||
unsigned long pgflags;
|
||||
unsigned int shift, level;
|
||||
|
||||
/* Check for unusual errors */
|
||||
if (dsisr & DSISR_UNSUPP_MMU) {
|
||||
pr_err("KVM: Got unsupported MMU fault\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
if (dsisr & DSISR_BADACCESS) {
|
||||
/* Reflect to the guest as DSI */
|
||||
pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
|
||||
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/* Translate the logical address and get the page */
|
||||
gpa = vcpu->arch.fault_gpa & ~0xfffUL;
|
||||
gpa &= ~0xF000000000000000ul;
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
if (!(dsisr & DSISR_PGDIRFAULT))
|
||||
gpa |= ea & 0xfff;
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
/* No memslot means it's an emulated MMIO region */
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
|
||||
if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS |
|
||||
DSISR_SET_RC)) {
|
||||
/*
|
||||
* Bad address in guest page table tree, or other
|
||||
* unusual error - reflect it to the guest as DSI.
|
||||
*/
|
||||
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
|
||||
dsisr & DSISR_ISSTORE);
|
||||
}
|
||||
|
||||
/* used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
writing = (dsisr & DSISR_ISSTORE) != 0;
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
if (dsisr & DSISR_SET_RC) {
|
||||
/*
|
||||
* Need to set an R or C bit in the 2nd-level tables;
|
||||
* if the relevant bits aren't already set in the linux
|
||||
* page tables, fall through to do the gup_fast to
|
||||
* set them in the linux page tables too.
|
||||
*/
|
||||
ok = 0;
|
||||
pgflags = _PAGE_ACCESSED;
|
||||
if (writing)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
local_irq_save(flags);
|
||||
ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
|
||||
NULL, NULL);
|
||||
if (ptep) {
|
||||
pte = READ_ONCE(*ptep);
|
||||
if (pte_present(pte) &&
|
||||
(pte_val(pte) & pgflags) == pgflags)
|
||||
ok = 1;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
if (ok) {
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
|
||||
gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
|
||||
gpa, shift);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
||||
ret = -EFAULT;
|
||||
pfn = 0;
|
||||
pte_size = PAGE_SIZE;
|
||||
pgflags = _PAGE_READ | _PAGE_EXEC;
|
||||
level = 0;
|
||||
npages = get_user_pages_fast(hva, 1, writing, pages);
|
||||
if (npages < 1) {
|
||||
/* Check if it's an I/O mapping */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = vma->vm_pgoff +
|
||||
((hva - vma->vm_start) >> PAGE_SHIFT);
|
||||
pgflags = pgprot_val(vma->vm_page_prot);
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (!pfn)
|
||||
return -EFAULT;
|
||||
} else {
|
||||
page = pages[0];
|
||||
pfn = page_to_pfn(page);
|
||||
if (PageHuge(page)) {
|
||||
page = compound_head(page);
|
||||
pte_size <<= compound_order(page);
|
||||
/* See if we can insert a 2MB large-page PTE here */
|
||||
if (pte_size >= PMD_SIZE &&
|
||||
(gpa & PMD_MASK & PAGE_MASK) ==
|
||||
(hva & PMD_MASK & PAGE_MASK)) {
|
||||
level = 1;
|
||||
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
|
||||
}
|
||||
}
|
||||
/* See if we can provide write access */
|
||||
if (writing) {
|
||||
/*
|
||||
* We assume gup_fast has set dirty on the host PTE.
|
||||
*/
|
||||
pgflags |= _PAGE_WRITE;
|
||||
} else {
|
||||
local_irq_save(flags);
|
||||
ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
|
||||
hva, NULL, NULL);
|
||||
if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
|
||||
pgflags |= _PAGE_WRITE;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the PTE value that we need to insert.
|
||||
*/
|
||||
pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
|
||||
if (pgflags & _PAGE_WRITE)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
pte = pfn_pte(pfn, __pgprot(pgflags));
|
||||
|
||||
/* Allocate space in the tree and write the PTE */
|
||||
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
|
||||
if (ret == -EBUSY) {
|
||||
/*
|
||||
* There's already a PMD where wanted to install a large page;
|
||||
* for now, fall back to installing a small page.
|
||||
*/
|
||||
level = 0;
|
||||
pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
|
||||
pte = pfn_pte(pfn, __pgprot(pgflags));
|
||||
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
|
||||
}
|
||||
if (ret == 0 || ret == -EAGAIN)
|
||||
ret = RESUME_GUEST;
|
||||
|
||||
if (page) {
|
||||
/*
|
||||
* We drop pages[0] here, not page because page might
|
||||
* have been set to the head page of a compound, but
|
||||
* we have to drop the reference on the correct tail
|
||||
* page to match the get inside gup()
|
||||
*/
|
||||
put_page(pages[0]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn, unsigned int order)
|
||||
{
|
||||
unsigned long i, limit;
|
||||
unsigned long *dp;
|
||||
|
||||
if (!memslot->dirty_bitmap)
|
||||
return;
|
||||
limit = 1ul << order;
|
||||
if (limit < BITS_PER_LONG) {
|
||||
for (i = 0; i < limit; ++i)
|
||||
mark_page_dirty(kvm, gfn + i);
|
||||
return;
|
||||
}
|
||||
dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
|
||||
limit /= BITS_PER_LONG;
|
||||
for (i = 0; i < limit; ++i)
|
||||
*dp++ = ~0ul;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
unsigned long old;
|
||||
|
||||
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
|
||||
NULL, &shift);
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
|
||||
gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift);
|
||||
if (old & _PAGE_DIRTY) {
|
||||
if (!shift)
|
||||
mark_page_dirty(kvm, gfn);
|
||||
else
|
||||
mark_pages_dirty(kvm, memslot,
|
||||
gfn, shift - PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
int ref = 0;
|
||||
|
||||
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
|
||||
NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
|
||||
kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
|
||||
gpa, shift);
|
||||
/* XXX need to flush tlb here? */
|
||||
ref = 1;
|
||||
}
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
int ref = 0;
|
||||
|
||||
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
|
||||
NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_young(*ptep))
|
||||
ref = 1;
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Returns the number of PAGE_SIZE pages that are dirty */
|
||||
static int kvm_radix_test_clear_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, int pagenum)
|
||||
{
|
||||
unsigned long gfn = memslot->base_gfn + pagenum;
|
||||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
pte_t *ptep;
|
||||
unsigned int shift;
|
||||
int ret = 0;
|
||||
|
||||
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
|
||||
NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
|
||||
ret = 1;
|
||||
if (shift)
|
||||
ret = 1 << (shift - PAGE_SHIFT);
|
||||
kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
|
||||
gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, unsigned long *map)
|
||||
{
|
||||
unsigned long i, j;
|
||||
unsigned long n, *p;
|
||||
int npages;
|
||||
|
||||
/*
|
||||
* Radix accumulates dirty bits in the first half of the
|
||||
* memslot's dirty_bitmap area, for when pages are paged
|
||||
* out or modified by the host directly. Pick up these
|
||||
* bits and add them to the map.
|
||||
*/
|
||||
n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
|
||||
p = memslot->dirty_bitmap;
|
||||
for (i = 0; i < n; ++i)
|
||||
map[i] |= xchg(&p[i], 0);
|
||||
|
||||
for (i = 0; i < memslot->npages; i = j) {
|
||||
npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
|
||||
|
||||
/*
|
||||
* Note that if npages > 0 then i must be a multiple of npages,
|
||||
* since huge pages are only used to back the guest at guest
|
||||
* real addresses that are a multiple of their size.
|
||||
* Since we have at most one PTE covering any given guest
|
||||
* real address, if npages > 1 we can skip to i + npages.
|
||||
*/
|
||||
j = i + 1;
|
||||
if (npages)
|
||||
for (j = i; npages; ++j, --npages)
|
||||
__set_bit_le(j, map);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
|
||||
int psize, int *indexp)
|
||||
{
|
||||
if (!mmu_psize_defs[psize].shift)
|
||||
return;
|
||||
info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
|
||||
(mmu_psize_defs[psize].ap << 29);
|
||||
++(*indexp);
|
||||
}
|
||||
|
||||
int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!radix_enabled())
|
||||
return -EINVAL;
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
||||
/* 4k page size */
|
||||
info->geometries[0].page_shift = 12;
|
||||
info->geometries[0].level_bits[0] = 9;
|
||||
for (i = 1; i < 4; ++i)
|
||||
info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
|
||||
/* 64k page size */
|
||||
info->geometries[1].page_shift = 16;
|
||||
for (i = 0; i < 4; ++i)
|
||||
info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
|
||||
|
||||
i = 0;
|
||||
add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
|
||||
add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
|
||||
add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
|
||||
add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvmppc_init_vm_radix(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.pgtable = pgd_alloc(kvm->mm);
|
||||
if (!kvm->arch.pgtable)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_free_radix(struct kvm *kvm)
|
||||
{
|
||||
unsigned long ig, iu, im;
|
||||
pte_t *pte;
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
pgd_t *pgd;
|
||||
|
||||
if (!kvm->arch.pgtable)
|
||||
return;
|
||||
pgd = kvm->arch.pgtable;
|
||||
for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
|
||||
if (!pgd_present(*pgd))
|
||||
continue;
|
||||
pud = pud_offset(pgd, 0);
|
||||
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
|
||||
if (!pud_present(*pud))
|
||||
continue;
|
||||
pmd = pmd_offset(pud, 0);
|
||||
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
|
||||
if (pmd_huge(*pmd)) {
|
||||
pmd_clear(pmd);
|
||||
continue;
|
||||
}
|
||||
if (!pmd_present(*pmd))
|
||||
continue;
|
||||
pte = pte_offset_map(pmd, 0);
|
||||
memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
|
||||
kvmppc_pte_free(pte);
|
||||
pmd_clear(pmd);
|
||||
}
|
||||
pmd_free(kvm->mm, pmd_offset(pud, 0));
|
||||
pud_clear(pud);
|
||||
}
|
||||
pud_free(kvm->mm, pud_offset(pgd, 0));
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
pgd_free(kvm->mm, kvm->arch.pgtable);
|
||||
}
|
||||
|
||||
static void pte_ctor(void *addr)
|
||||
{
|
||||
memset(addr, 0, PTE_TABLE_SIZE);
|
||||
}
|
||||
|
||||
int kvmppc_radix_init(void)
|
||||
{
|
||||
unsigned long size = sizeof(void *) << PTE_INDEX_SIZE;
|
||||
|
||||
kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
|
||||
if (!kvm_pte_cache)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_radix_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(kvm_pte_cache);
|
||||
}
|
@ -1135,7 +1135,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
|
||||
/*
|
||||
* Userspace can only modify DPFD (default prefetch depth),
|
||||
* ILE (interrupt little-endian) and TC (translation control).
|
||||
* On POWER8 userspace can also modify AIL (alt. interrupt loc.)
|
||||
* On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
|
||||
*/
|
||||
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
@ -1821,6 +1821,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
|
||||
vcpu->arch.vcore = vcore;
|
||||
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
|
||||
vcpu->arch.thread_cpu = -1;
|
||||
vcpu->arch.prev_cpu = -1;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_64;
|
||||
kvmppc_sanity_check(vcpu);
|
||||
@ -1950,11 +1951,33 @@ static void kvmppc_release_hwthread(int cpu)
|
||||
tpaca->kvm_hstate.kvm_split_mode = NULL;
|
||||
}
|
||||
|
||||
static void do_nothing(void *x)
|
||||
{
|
||||
}
|
||||
|
||||
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
cpu = cpu_first_thread_sibling(cpu);
|
||||
cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
|
||||
/*
|
||||
* Make sure setting of bit in need_tlb_flush precedes
|
||||
* testing of cpu_in_guest bits. The matching barrier on
|
||||
* the other side is the first smp_mb() in kvmppc_run_core().
|
||||
*/
|
||||
smp_mb();
|
||||
for (i = 0; i < threads_per_core; ++i)
|
||||
if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
|
||||
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
|
||||
}
|
||||
|
||||
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
|
||||
{
|
||||
int cpu;
|
||||
struct paca_struct *tpaca;
|
||||
struct kvmppc_vcore *mvc = vc->master_vcore;
|
||||
struct kvm *kvm = vc->kvm;
|
||||
|
||||
cpu = vc->pcpu;
|
||||
if (vcpu) {
|
||||
@ -1965,6 +1988,27 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
|
||||
cpu += vcpu->arch.ptid;
|
||||
vcpu->cpu = mvc->pcpu;
|
||||
vcpu->arch.thread_cpu = cpu;
|
||||
|
||||
/*
|
||||
* With radix, the guest can do TLB invalidations itself,
|
||||
* and it could choose to use the local form (tlbiel) if
|
||||
* it is invalidating a translation that has only ever been
|
||||
* used on one vcpu. However, that doesn't mean it has
|
||||
* only ever been used on one physical cpu, since vcpus
|
||||
* can move around between pcpus. To cope with this, when
|
||||
* a vcpu moves from one pcpu to another, we need to tell
|
||||
* any vcpus running on the same core as this vcpu previously
|
||||
* ran to flush the TLB. The TLB is shared between threads,
|
||||
* so we use a single bit in .need_tlb_flush for all 4 threads.
|
||||
*/
|
||||
if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
|
||||
if (vcpu->arch.prev_cpu >= 0 &&
|
||||
cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
|
||||
cpu_first_thread_sibling(cpu))
|
||||
radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
|
||||
vcpu->arch.prev_cpu = cpu;
|
||||
}
|
||||
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
|
||||
}
|
||||
tpaca = &paca[cpu];
|
||||
tpaca->kvm_hstate.kvm_vcpu = vcpu;
|
||||
@ -2552,6 +2596,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
kvmppc_release_hwthread(pcpu + i);
|
||||
if (sip && sip->napped[i])
|
||||
kvmppc_ipi_thread(pcpu + i);
|
||||
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
|
||||
}
|
||||
|
||||
kvmppc_set_host_core(pcpu);
|
||||
@ -2877,7 +2922,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
smp_mb();
|
||||
|
||||
/* On the first time here, set up HTAB and VRMA */
|
||||
if (!vcpu->kvm->arch.hpte_setup_done) {
|
||||
if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) {
|
||||
r = kvmppc_hv_setup_htab_rma(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
@ -2939,6 +2984,13 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_ppc_one_seg_page_size *sps;
|
||||
|
||||
/*
|
||||
* Since we don't yet support HPT guests on a radix host,
|
||||
* return an error if the host uses radix.
|
||||
*/
|
||||
if (radix_enabled())
|
||||
return -EINVAL;
|
||||
|
||||
info->flags = KVM_PPC_PAGE_SIZES_REAL;
|
||||
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
|
||||
info->flags |= KVM_PPC_1T_SEGMENTS;
|
||||
@ -2961,8 +3013,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r;
|
||||
int i, r;
|
||||
unsigned long n;
|
||||
unsigned long *buf;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
@ -2976,15 +3030,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
|
||||
if (!memslot->dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Use second half of bitmap area because radix accumulates
|
||||
* bits in the first half.
|
||||
*/
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
memset(memslot->dirty_bitmap, 0, n);
|
||||
buf = memslot->dirty_bitmap + n / sizeof(long);
|
||||
memset(buf, 0, n);
|
||||
|
||||
r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
|
||||
if (kvm_is_radix(kvm))
|
||||
r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
|
||||
else
|
||||
r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
/* Harvest dirty bits from VPA and DTL updates */
|
||||
/* Note: we never modify the SLB shadow buffer areas */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
spin_lock(&vcpu->arch.vpa_update_lock);
|
||||
kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
|
||||
kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
|
||||
spin_unlock(&vcpu->arch.vpa_update_lock);
|
||||
}
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
|
||||
if (copy_to_user(log->dirty_bitmap, buf, n))
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
@ -3005,6 +3076,15 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
|
||||
static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
|
||||
unsigned long npages)
|
||||
{
|
||||
/*
|
||||
* For now, if radix_enabled() then we only support radix guests,
|
||||
* and in that case we don't need the rmap array.
|
||||
*/
|
||||
if (radix_enabled()) {
|
||||
slot->arch.rmap = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
|
||||
if (!slot->arch.rmap)
|
||||
return -ENOMEM;
|
||||
@ -3037,7 +3117,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
if (npages)
|
||||
atomic64_inc(&kvm->arch.mmio_update);
|
||||
|
||||
if (npages && old->npages) {
|
||||
if (npages && old->npages && !kvm_is_radix(kvm)) {
|
||||
/*
|
||||
* If modifying a memslot, reset all the rmap dirty bits.
|
||||
* If this is a new memslot, we don't need to do anything
|
||||
@ -3046,7 +3126,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
*/
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, mem->slot);
|
||||
kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
|
||||
kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3085,14 +3165,20 @@ static void kvmppc_setup_partition_table(struct kvm *kvm)
|
||||
{
|
||||
unsigned long dw0, dw1;
|
||||
|
||||
/* PS field - page size for VRMA */
|
||||
dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
|
||||
((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
|
||||
/* HTABSIZE and HTABORG fields */
|
||||
dw0 |= kvm->arch.sdr1;
|
||||
if (!kvm_is_radix(kvm)) {
|
||||
/* PS field - page size for VRMA */
|
||||
dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
|
||||
((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
|
||||
/* HTABSIZE and HTABORG fields */
|
||||
dw0 |= kvm->arch.sdr1;
|
||||
|
||||
/* Second dword has GR=0; other fields are unused since UPRT=0 */
|
||||
dw1 = 0;
|
||||
/* Second dword as set by userspace */
|
||||
dw1 = kvm->arch.process_table;
|
||||
} else {
|
||||
dw0 = PATB_HR | radix__get_tree_size() |
|
||||
__pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
|
||||
dw1 = PATB_GR | kvm->arch.process_table;
|
||||
}
|
||||
|
||||
mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
|
||||
}
|
||||
@ -3262,6 +3348,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
||||
{
|
||||
unsigned long lpcr, lpid;
|
||||
char buf[32];
|
||||
int ret;
|
||||
|
||||
/* Allocate the guest's logical partition ID */
|
||||
|
||||
@ -3309,13 +3396,30 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
||||
lpcr |= LPCR_HVICE;
|
||||
}
|
||||
|
||||
/*
|
||||
* For now, if the host uses radix, the guest must be radix.
|
||||
*/
|
||||
if (radix_enabled()) {
|
||||
kvm->arch.radix = 1;
|
||||
lpcr &= ~LPCR_VPM1;
|
||||
lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
|
||||
ret = kvmppc_init_vm_radix(kvm);
|
||||
if (ret) {
|
||||
kvmppc_free_lpid(kvm->arch.lpid);
|
||||
return ret;
|
||||
}
|
||||
kvmppc_setup_partition_table(kvm);
|
||||
}
|
||||
|
||||
kvm->arch.lpcr = lpcr;
|
||||
|
||||
/*
|
||||
* Work out how many sets the TLB has, for the use of
|
||||
* the TLB invalidation loop in book3s_hv_rmhandlers.S.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
if (kvm_is_radix(kvm))
|
||||
kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
|
||||
else if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
|
||||
else if (cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
|
||||
@ -3325,8 +3429,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
||||
/*
|
||||
* Track that we now have a HV mode VM active. This blocks secondary
|
||||
* CPU threads from coming online.
|
||||
* On POWER9, we only need to do this for HPT guests on a radix
|
||||
* host, which is not yet supported.
|
||||
*/
|
||||
kvm_hv_vm_activated();
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
kvm_hv_vm_activated();
|
||||
|
||||
/*
|
||||
* Create a debugfs directory for the VM
|
||||
@ -3352,11 +3459,17 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
|
||||
{
|
||||
debugfs_remove_recursive(kvm->arch.debugfs_dir);
|
||||
|
||||
kvm_hv_vm_deactivated();
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
kvm_hv_vm_deactivated();
|
||||
|
||||
kvmppc_free_vcores(kvm);
|
||||
|
||||
kvmppc_free_hpt(kvm);
|
||||
kvmppc_free_lpid(kvm->arch.lpid);
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
kvmppc_free_radix(kvm);
|
||||
else
|
||||
kvmppc_free_hpt(kvm);
|
||||
|
||||
kvmppc_free_pimap(kvm);
|
||||
}
|
||||
@ -3385,11 +3498,6 @@ static int kvmppc_core_check_processor_compat_hv(void)
|
||||
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
|
||||
!cpu_has_feature(CPU_FTR_ARCH_206))
|
||||
return -EIO;
|
||||
/*
|
||||
* Disable KVM for Power9 in radix mode.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3657,6 +3765,41 @@ static void init_default_hcalls(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
|
||||
{
|
||||
unsigned long lpcr;
|
||||
int radix;
|
||||
|
||||
/* If not on a POWER9, reject it */
|
||||
if (!cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
return -ENODEV;
|
||||
|
||||
/* If any unknown flags set, reject it */
|
||||
if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
|
||||
return -EINVAL;
|
||||
|
||||
/* We can't change a guest to/from radix yet */
|
||||
radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
|
||||
if (radix != kvm_is_radix(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
/* GR (guest radix) bit in process_table field must match */
|
||||
if (!!(cfg->process_table & PATB_GR) != radix)
|
||||
return -EINVAL;
|
||||
|
||||
/* Process table size field must be reasonable, i.e. <= 24 */
|
||||
if ((cfg->process_table & PRTS_MASK) > 24)
|
||||
return -EINVAL;
|
||||
|
||||
kvm->arch.process_table = cfg->process_table;
|
||||
kvmppc_setup_partition_table(kvm);
|
||||
|
||||
lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
|
||||
kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kvmppc_ops kvm_ops_hv = {
|
||||
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
|
||||
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
|
||||
@ -3694,6 +3837,8 @@ static struct kvmppc_ops kvm_ops_hv = {
|
||||
.irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
|
||||
.irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
|
||||
#endif
|
||||
.configure_mmu = kvmhv_configure_mmu,
|
||||
.get_rmmu_info = kvmhv_get_rmmu_info,
|
||||
};
|
||||
|
||||
static int kvm_init_subcore_bitmap(void)
|
||||
@ -3728,6 +3873,11 @@ static int kvm_init_subcore_bitmap(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvmppc_radix_possible(void)
|
||||
{
|
||||
return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
|
||||
}
|
||||
|
||||
static int kvmppc_book3s_init_hv(void)
|
||||
{
|
||||
int r;
|
||||
@ -3767,12 +3917,19 @@ static int kvmppc_book3s_init_hv(void)
|
||||
init_vcore_lists();
|
||||
|
||||
r = kvmppc_mmu_hv_init();
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (kvmppc_radix_possible())
|
||||
r = kvmppc_radix_init();
|
||||
return r;
|
||||
}
|
||||
|
||||
static void kvmppc_book3s_exit_hv(void)
|
||||
{
|
||||
kvmppc_free_host_rm_ops();
|
||||
if (kvmppc_radix_possible())
|
||||
kvmppc_radix_exit();
|
||||
kvmppc_hv_ops = NULL;
|
||||
}
|
||||
|
||||
|
@ -200,7 +200,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
|
||||
|
||||
/*
|
||||
* Send an interrupt or message to another CPU.
|
||||
* This can only be called in real mode.
|
||||
* The caller needs to include any barrier needed to order writes
|
||||
* to memory vs. the IPI/message.
|
||||
*/
|
||||
@ -229,8 +228,7 @@ void kvmhv_rm_send_ipi(int cpu)
|
||||
if (xics_phys)
|
||||
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
|
||||
else
|
||||
opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
|
||||
IPI_PRIORITY);
|
||||
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -412,14 +410,13 @@ static long kvmppc_read_one_intr(bool *again)
|
||||
|
||||
/* Now read the interrupt from the ICP */
|
||||
xics_phys = local_paca->kvm_hstate.xics_phys;
|
||||
if (!xics_phys) {
|
||||
/* Use OPAL to read the XIRR */
|
||||
rc = opal_rm_int_get_xirr(&xirr, false);
|
||||
if (rc < 0)
|
||||
return 1;
|
||||
} else {
|
||||
rc = 0;
|
||||
if (!xics_phys)
|
||||
rc = opal_int_get_xirr(&xirr, false);
|
||||
else
|
||||
xirr = _lwzcix(xics_phys + XICS_XIRR);
|
||||
}
|
||||
if (rc < 0)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Save XIRR for later. Since we get control in reverse endian
|
||||
@ -445,15 +442,16 @@ static long kvmppc_read_one_intr(bool *again)
|
||||
* If it is an IPI, clear the MFRR and EOI it.
|
||||
*/
|
||||
if (xisr == XICS_IPI) {
|
||||
rc = 0;
|
||||
if (xics_phys) {
|
||||
_stbcix(xics_phys + XICS_MFRR, 0xff);
|
||||
_stwcix(xics_phys + XICS_XIRR, xirr);
|
||||
} else {
|
||||
opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
|
||||
rc = opal_rm_int_eoi(h_xirr);
|
||||
/* If rc > 0, there is another interrupt pending */
|
||||
*again = rc > 0;
|
||||
opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
|
||||
rc = opal_int_eoi(h_xirr);
|
||||
}
|
||||
/* If rc > 0, there is another interrupt pending */
|
||||
*again = rc > 0;
|
||||
|
||||
/*
|
||||
* Need to ensure side effects of above stores
|
||||
@ -474,8 +472,8 @@ static long kvmppc_read_one_intr(bool *again)
|
||||
if (xics_phys)
|
||||
_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
|
||||
else
|
||||
opal_rm_int_set_mfrr(hard_smp_processor_id(),
|
||||
IPI_PRIORITY);
|
||||
opal_int_set_mfrr(hard_smp_processor_id(),
|
||||
IPI_PRIORITY);
|
||||
/* Let side effects complete */
|
||||
smp_mb();
|
||||
return 1;
|
||||
|
@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x)
|
||||
static int global_invalidates(struct kvm *kvm, unsigned long flags)
|
||||
{
|
||||
int global;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* If there is only one vcore, and it's currently running,
|
||||
@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
|
||||
/* any other core might now have stale TLB entries... */
|
||||
smp_wmb();
|
||||
cpumask_setall(&kvm->arch.need_tlb_flush);
|
||||
cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
|
||||
&kvm->arch.need_tlb_flush);
|
||||
cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
|
||||
/*
|
||||
* On POWER9, threads are independent but the TLB is shared,
|
||||
* so use the bit for the first thread to represent the core.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
cpu = cpu_first_thread_sibling(cpu);
|
||||
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
|
||||
}
|
||||
|
||||
return global;
|
||||
@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
unsigned long mmu_seq;
|
||||
unsigned long rcbits, irq_flags = 0;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
psize = hpte_page_size(pteh, ptel);
|
||||
if (!psize)
|
||||
return H_PARAMETER;
|
||||
@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
||||
struct revmap_entry *rev;
|
||||
u64 pte, orig_pte, pte_r;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
return H_PARAMETER;
|
||||
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
|
||||
@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||
struct revmap_entry *rev, *revs[4];
|
||||
u64 hp0, hp1;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
global = global_invalidates(kvm, 0);
|
||||
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
|
||||
n = 0;
|
||||
@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long v, r, rb, mask, bits;
|
||||
u64 pte_v, pte_r;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
return H_PARAMETER;
|
||||
|
||||
@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
int i, n = 1;
|
||||
struct revmap_entry *rev = NULL;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
return H_PARAMETER;
|
||||
if (flags & H_READ_4) {
|
||||
@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long *rmap;
|
||||
long ret = H_NOT_FOUND;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
return H_PARAMETER;
|
||||
|
||||
@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long *rmap;
|
||||
long ret = H_NOT_FOUND;
|
||||
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_FUNCTION;
|
||||
if (pte_index >= kvm->arch.hpt_npte)
|
||||
return H_PARAMETER;
|
||||
|
||||
|
@ -36,7 +36,7 @@ EXPORT_SYMBOL(kvm_irq_bypass);
|
||||
|
||||
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
|
||||
u32 new_irq);
|
||||
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
|
||||
static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
|
||||
|
||||
/* -- ICS routines -- */
|
||||
static void ics_rm_check_resend(struct kvmppc_xics *xics,
|
||||
@ -70,11 +70,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
|
||||
hcpu = hcore << threads_shift;
|
||||
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
|
||||
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
|
||||
if (paca[hcpu].kvm_hstate.xics_phys)
|
||||
icp_native_cause_ipi_rm(hcpu);
|
||||
else
|
||||
opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
|
||||
IPI_PRIORITY);
|
||||
kvmppc_set_host_ipi(hcpu, 1);
|
||||
smp_mb();
|
||||
kvmhv_rm_send_ipi(hcpu);
|
||||
}
|
||||
#else
|
||||
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
|
||||
@ -730,7 +728,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
||||
++vcpu->stat.pthru_host;
|
||||
if (state->intr_cpu != pcpu) {
|
||||
++vcpu->stat.pthru_bad_aff;
|
||||
xics_opal_rm_set_server(state->host_irq, pcpu);
|
||||
xics_opal_set_server(state->host_irq, pcpu);
|
||||
}
|
||||
state->intr_cpu = -1;
|
||||
}
|
||||
@ -758,16 +756,16 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
|
||||
if (xics_phys) {
|
||||
_stwcix(xics_phys + XICS_XIRR, xirr);
|
||||
} else {
|
||||
rc = opal_rm_int_eoi(be32_to_cpu(xirr));
|
||||
rc = opal_int_eoi(be32_to_cpu(xirr));
|
||||
*again = rc > 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
|
||||
static int xics_opal_set_server(unsigned int hw_irq, int server_cpu)
|
||||
{
|
||||
unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
|
||||
|
||||
return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
|
||||
return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
addi r1, r1, 112
|
||||
ld r7, HSTATE_HOST_MSR(r13)
|
||||
|
||||
/*
|
||||
* If we came back from the guest via a relocation-on interrupt,
|
||||
* we will be in virtual mode at this point, which makes it a
|
||||
* little easier to get back to the caller.
|
||||
*/
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_IR /* in real mode? */
|
||||
bne .Lvirt_return
|
||||
|
||||
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 11f
|
||||
@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
mtspr SPRN_HSRR1, r7
|
||||
ba 0xe80
|
||||
|
||||
/* Virtual-mode return - can't get here for HMI or machine check */
|
||||
.Lvirt_return:
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 16f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
beq 17f
|
||||
andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
|
||||
beq 18f
|
||||
mtmsrd r7, 1 /* if so then re-enable them */
|
||||
18: mtlr r8
|
||||
blr
|
||||
|
||||
16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
|
||||
mtspr SPRN_HSRR1, r7
|
||||
b exc_virt_0x4500_hardware_interrupt
|
||||
|
||||
17: mtspr SPRN_HSRR0, r8
|
||||
mtspr SPRN_HSRR1, r7
|
||||
b exc_virt_0x4e80_h_doorbell
|
||||
|
||||
kvmppc_primary_no_guest:
|
||||
/* We handle this much like a ceded vcpu */
|
||||
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
|
||||
@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
/* Stack frame offsets */
|
||||
#define STACK_SLOT_TID (112-16)
|
||||
#define STACK_SLOT_PSSCR (112-24)
|
||||
#define STACK_SLOT_PID (112-32)
|
||||
|
||||
.global kvmppc_hv_entry
|
||||
kvmppc_hv_entry:
|
||||
@ -530,6 +560,7 @@ kvmppc_hv_entry:
|
||||
* R1 = host R1
|
||||
* R2 = TOC
|
||||
* all other volatile GPRS = free
|
||||
* Does not preserve non-volatile GPRs or CR fields
|
||||
*/
|
||||
mflr r0
|
||||
std r0, PPC_LR_STKOFF(r1)
|
||||
@ -549,32 +580,38 @@ kvmppc_hv_entry:
|
||||
bl kvmhv_start_timing
|
||||
1:
|
||||
#endif
|
||||
/* Clear out SLB */
|
||||
|
||||
/* Use cr7 as an indication of radix mode */
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
|
||||
lbz r0, KVM_RADIX(r9)
|
||||
cmpwi cr7, r0, 0
|
||||
|
||||
/* Clear out SLB if hash */
|
||||
bne cr7, 2f
|
||||
li r6,0
|
||||
slbmte r6,r6
|
||||
slbia
|
||||
ptesync
|
||||
|
||||
2:
|
||||
/*
|
||||
* POWER7/POWER8 host -> guest partition switch code.
|
||||
* We don't have to lock against concurrent tlbies,
|
||||
* but we do have to coordinate across hardware threads.
|
||||
*/
|
||||
/* Set bit in entry map iff exit map is zero. */
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
li r7, 1
|
||||
lbz r6, HSTATE_PTID(r13)
|
||||
sld r7, r7, r6
|
||||
addi r9, r5, VCORE_ENTRY_EXIT
|
||||
21: lwarx r3, 0, r9
|
||||
addi r8, r5, VCORE_ENTRY_EXIT
|
||||
21: lwarx r3, 0, r8
|
||||
cmpwi r3, 0x100 /* any threads starting to exit? */
|
||||
bge secondary_too_late /* if so we're too late to the party */
|
||||
or r3, r3, r7
|
||||
stwcx. r3, 0, r9
|
||||
stwcx. r3, 0, r8
|
||||
bne 21b
|
||||
|
||||
/* Primary thread switches to guest partition. */
|
||||
ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
|
||||
cmpwi r6,0
|
||||
bne 10f
|
||||
lwz r7,KVM_LPID(r9)
|
||||
@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
|
||||
/* See if we need to flush the TLB */
|
||||
lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
|
||||
BEGIN_FTR_SECTION
|
||||
/*
|
||||
* On POWER9, individual threads can come in here, but the
|
||||
* TLB is shared between the 4 threads in a core, hence
|
||||
* invalidating on one thread invalidates for all.
|
||||
* Thus we make all 4 threads use the same bit here.
|
||||
*/
|
||||
clrrdi r6,r6,2
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
clrldi r7,r6,64-6 /* extract bit number (6 bits) */
|
||||
srdi r6,r6,6 /* doubleword number */
|
||||
sldi r6,r6,3 /* address offset */
|
||||
add r6,r6,r9
|
||||
addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
|
||||
li r0,1
|
||||
sld r0,r0,r7
|
||||
li r8,1
|
||||
sld r8,r8,r7
|
||||
ld r7,0(r6)
|
||||
and. r7,r7,r0
|
||||
and. r7,r7,r8
|
||||
beq 22f
|
||||
23: ldarx r7,0,r6 /* if set, clear the bit */
|
||||
andc r7,r7,r0
|
||||
stdcx. r7,0,r6
|
||||
bne 23b
|
||||
/* Flush the TLB of any entries for this LPID */
|
||||
lwz r6,KVM_TLB_SETS(r9)
|
||||
li r0,0 /* RS for P9 version of tlbiel */
|
||||
mtctr r6
|
||||
lwz r0,KVM_TLB_SETS(r9)
|
||||
mtctr r0
|
||||
li r7,0x800 /* IS field = 0b10 */
|
||||
ptesync
|
||||
28: tlbiel r7
|
||||
li r0,0 /* RS for P9 version of tlbiel */
|
||||
bne cr7, 29f
|
||||
28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
|
||||
addi r7,r7,0x1000
|
||||
bdnz 28b
|
||||
ptesync
|
||||
b 30f
|
||||
29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */
|
||||
addi r7,r7,0x1000
|
||||
bdnz 29b
|
||||
30: ptesync
|
||||
23: ldarx r7,0,r6 /* clear the bit after TLB flushed */
|
||||
andc r7,r7,r8
|
||||
stdcx. r7,0,r6
|
||||
bne 23b
|
||||
|
||||
/* Add timebase offset onto timebase */
|
||||
22: ld r8,VCORE_TB_OFFSET(r5)
|
||||
@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
beq kvmppc_primary_no_guest
|
||||
kvmppc_got_guest:
|
||||
|
||||
/* Load up guest SLB entries */
|
||||
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
|
||||
lwz r5,VCPU_SLB_MAX(r4)
|
||||
cmpwi r5,0
|
||||
beq 9f
|
||||
@ -696,8 +747,10 @@ kvmppc_got_guest:
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_TIDR
|
||||
mfspr r6, SPRN_PSSCR
|
||||
mfspr r7, SPRN_PID
|
||||
std r5, STACK_SLOT_TID(r1)
|
||||
std r6, STACK_SLOT_PSSCR(r1)
|
||||
std r7, STACK_SLOT_PID(r1)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
@ -823,6 +876,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
||||
mtspr SPRN_BESCR, r6
|
||||
mtspr SPRN_PID, r7
|
||||
mtspr SPRN_WORT, r8
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_INVALIDATE_ERAT
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
|
||||
BEGIN_FTR_SECTION
|
||||
/* POWER8-only registers */
|
||||
ld r5, VCPU_TCSCR(r4)
|
||||
@ -1057,13 +1113,13 @@ hdec_soon:
|
||||
kvmppc_interrupt_hv:
|
||||
/*
|
||||
* Register contents:
|
||||
* R12 = interrupt vector
|
||||
* R12 = (guest CR << 32) | interrupt vector
|
||||
* R13 = PACA
|
||||
* guest CR, R12 saved in shadow VCPU SCRATCH1/0
|
||||
* guest R12 saved in shadow VCPU SCRATCH0
|
||||
* guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
|
||||
* guest R13 saved in SPRN_SCRATCH0
|
||||
*/
|
||||
std r9, HSTATE_SCRATCH2(r13)
|
||||
|
||||
lbz r9, HSTATE_IN_GUEST(r13)
|
||||
cmpwi r9, KVM_GUEST_MODE_HOST_HV
|
||||
beq kvmppc_bad_host_intr
|
||||
@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv:
|
||||
std r10, VCPU_GPR(R10)(r9)
|
||||
std r11, VCPU_GPR(R11)(r9)
|
||||
ld r3, HSTATE_SCRATCH0(r13)
|
||||
lwz r4, HSTATE_SCRATCH1(r13)
|
||||
std r3, VCPU_GPR(R12)(r9)
|
||||
/* CR is in the high half of r12 */
|
||||
srdi r4, r12, 32
|
||||
stw r4, VCPU_CR(r9)
|
||||
BEGIN_FTR_SECTION
|
||||
ld r3, HSTATE_CFAR(r13)
|
||||
@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
mfspr r11, SPRN_SRR1
|
||||
std r10, VCPU_SRR0(r9)
|
||||
std r11, VCPU_SRR1(r9)
|
||||
/* trap is in the low half of r12, clear CR from the high half */
|
||||
clrldi r12, r12, 32
|
||||
andi. r0, r12, 2 /* need to read HSRR0/1? */
|
||||
beq 1f
|
||||
mfspr r10, SPRN_HSRR0
|
||||
@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
11: stw r3,VCPU_HEIR(r9)
|
||||
|
||||
/* these are volatile across C function calls */
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
ld r3, HSTATE_SCRATCH1(r13)
|
||||
mtctr r3
|
||||
#else
|
||||
mfctr r3
|
||||
#endif
|
||||
mfxer r4
|
||||
std r3, VCPU_CTR(r9)
|
||||
std r4, VCPU_XER(r9)
|
||||
@ -1285,11 +1349,15 @@ mc_cont:
|
||||
mtspr SPRN_CTRLT,r6
|
||||
4:
|
||||
/* Read the guest SLB and save it away */
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
cmpwi r0, 0
|
||||
li r5, 0
|
||||
bne 3f /* for radix, save 0 entries */
|
||||
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
|
||||
mtctr r0
|
||||
li r6,0
|
||||
addi r7,r9,VCPU_SLB
|
||||
li r5,0
|
||||
1: slbmfee r8,r6
|
||||
andis. r0,r8,SLB_ESID_V@h
|
||||
beq 2f
|
||||
@ -1301,7 +1369,7 @@ mc_cont:
|
||||
addi r5,r5,1
|
||||
2: addi r6,r6,1
|
||||
bdnz 1b
|
||||
stw r5,VCPU_SLB_MAX(r9)
|
||||
3: stw r5,VCPU_SLB_MAX(r9)
|
||||
|
||||
/*
|
||||
* Save the guest PURR/SPURR
|
||||
@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, STACK_SLOT_TID(r1)
|
||||
ld r6, STACK_SLOT_PSSCR(r1)
|
||||
ld r7, STACK_SLOT_PID(r1)
|
||||
mtspr SPRN_TIDR, r5
|
||||
mtspr SPRN_PSSCR, r6
|
||||
mtspr SPRN_PID, r7
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_INVALIDATE_ERAT
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
|
||||
|
||||
/*
|
||||
* POWER7/POWER8 guest -> host partition switch code.
|
||||
@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
isync
|
||||
|
||||
/* load host SLB entries */
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
b 0f
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
ld r8,PACA_SLBSHADOWPTR(r13)
|
||||
|
||||
.rept SLB_NUM_BOLTED
|
||||
@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
slbmte r6,r5
|
||||
1: addi r8,r8,16
|
||||
.endr
|
||||
|
||||
0:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
/* Finish timing, if we have a vcpu */
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
* reflect the HDSI to the guest as a DSI.
|
||||
*/
|
||||
kvmppc_hdsi:
|
||||
ld r3, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r3)
|
||||
cmpwi r0, 0
|
||||
mfspr r4, SPRN_HDAR
|
||||
mfspr r6, SPRN_HDSISR
|
||||
bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
|
||||
/* HPTE not found fault or protection fault? */
|
||||
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
|
||||
beq 1f /* if not, send it to the guest */
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
|
||||
b 4f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
andi. r0, r11, MSR_DR /* data relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r4, 28
|
||||
@ -1776,13 +1860,29 @@ fast_interrupt_c_return:
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
b guest_exit_cont
|
||||
|
||||
.Lradix_hdsi:
|
||||
std r4, VCPU_FAULT_DAR(r9)
|
||||
stw r6, VCPU_FAULT_DSISR(r9)
|
||||
.Lradix_hisi:
|
||||
mfspr r5, SPRN_ASDR
|
||||
std r5, VCPU_FAULT_GPA(r9)
|
||||
b guest_exit_cont
|
||||
|
||||
/*
|
||||
* Similarly for an HISI, reflect it to the guest as an ISI unless
|
||||
* it is an HPTE not found fault for a page that we have paged out.
|
||||
*/
|
||||
kvmppc_hisi:
|
||||
ld r3, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r3)
|
||||
cmpwi r0, 0
|
||||
bne .Lradix_hisi /* for radix, just save ASDR */
|
||||
andis. r0, r11, SRR1_ISI_NOPT@h
|
||||
beq 1f
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
|
||||
b 4f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r10, 28
|
||||
|
@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end:
|
||||
* *
|
||||
*****************************************************************************/
|
||||
|
||||
.global kvmppc_handler_trampoline_exit
|
||||
kvmppc_handler_trampoline_exit:
|
||||
|
||||
.global kvmppc_interrupt_pr
|
||||
kvmppc_interrupt_pr:
|
||||
/* 64-bit entry. Register usage at this point:
|
||||
*
|
||||
* SPRG_SCRATCH0 = guest R13
|
||||
* R12 = (guest CR << 32) | exit handler id
|
||||
* R13 = PACA
|
||||
* HSTATE.SCRATCH0 = guest R12
|
||||
* HSTATE.SCRATCH1 = guest CTR if RELOCATABLE
|
||||
*/
|
||||
#ifdef CONFIG_PPC64
|
||||
/* Match 32-bit entry */
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
std r9, HSTATE_SCRATCH2(r13)
|
||||
ld r9, HSTATE_SCRATCH1(r13)
|
||||
mtctr r9
|
||||
ld r9, HSTATE_SCRATCH2(r13)
|
||||
#endif
|
||||
rotldi r12, r12, 32 /* Flip R12 halves for stw */
|
||||
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
|
||||
srdi r12, r12, 32 /* shift trap into low half */
|
||||
#endif
|
||||
|
||||
.global kvmppc_handler_trampoline_exit
|
||||
kvmppc_handler_trampoline_exit:
|
||||
/* Register usage at this point:
|
||||
*
|
||||
* SPRG_SCRATCH0 = guest R13
|
||||
* R12 = exit handler id
|
||||
* R13 = shadow vcpu (32-bit) or PACA (64-bit)
|
||||
* SPRG_SCRATCH0 = guest R13
|
||||
* R12 = exit handler id
|
||||
* R13 = shadow vcpu (32-bit) or PACA (64-bit)
|
||||
* HSTATE.SCRATCH0 = guest R12
|
||||
* HSTATE.SCRATCH1 = guest CR
|
||||
*
|
||||
*/
|
||||
|
||||
/* Save registers */
|
||||
|
@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_PPC_HWRNG:
|
||||
r = kvmppc_hwrng_present();
|
||||
break;
|
||||
case KVM_CAP_PPC_MMU_RADIX:
|
||||
r = !!(hv_enabled && radix_enabled());
|
||||
break;
|
||||
case KVM_CAP_PPC_MMU_HASH_V3:
|
||||
r = !!(hv_enabled && !radix_enabled() &&
|
||||
cpu_has_feature(CPU_FTR_ARCH_300));
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_MMU:
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
|
||||
break;
|
||||
}
|
||||
case KVM_PPC_CONFIGURE_V3_MMU: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
struct kvm_ppc_mmuv3_cfg cfg;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm->arch.kvm_ops->configure_mmu)
|
||||
goto out;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cfg, argp, sizeof(cfg)))
|
||||
goto out;
|
||||
r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
|
||||
break;
|
||||
}
|
||||
case KVM_PPC_GET_RMMU_INFO: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
struct kvm_ppc_rmmu_info info;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm->arch.kvm_ops->get_rmmu_info)
|
||||
goto out;
|
||||
r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
|
||||
if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
|
||||
|
@ -41,6 +41,7 @@ static void pmd_ctor(void *addr)
|
||||
}
|
||||
|
||||
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
|
||||
EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
|
||||
|
||||
/*
|
||||
* Create a kmem_cache() for pagetables. This is not used for PTE
|
||||
@ -86,7 +87,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
|
||||
|
||||
pr_debug("Allocated pgtable cache for order %d\n", shift);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
|
||||
|
||||
void pgtable_cache_init(void)
|
||||
{
|
||||
|
@ -42,6 +42,8 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/libfdt.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/page.h>
|
||||
@ -344,12 +346,45 @@ static int __init parse_disable_radix(char *p)
|
||||
}
|
||||
early_param("disable_radix", parse_disable_radix);
|
||||
|
||||
/*
|
||||
* If we're running under a hypervisor, we need to check the contents of
|
||||
* /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
|
||||
* radix. If not, we clear the radix feature bit so we fall back to hash.
|
||||
*/
|
||||
static void early_check_vec5(void)
|
||||
{
|
||||
unsigned long root, chosen;
|
||||
int size;
|
||||
const u8 *vec5;
|
||||
|
||||
root = of_get_flat_dt_root();
|
||||
chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
|
||||
if (chosen == -FDT_ERR_NOTFOUND)
|
||||
return;
|
||||
vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
|
||||
if (!vec5)
|
||||
return;
|
||||
if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
|
||||
!(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
|
||||
/* Hypervisor doesn't support radix */
|
||||
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
|
||||
}
|
||||
|
||||
void __init mmu_early_init_devtree(void)
|
||||
{
|
||||
/* Disable radix mode based on kernel command line. */
|
||||
if (disable_radix)
|
||||
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
|
||||
|
||||
/*
|
||||
* Check /chosen/ibm,architecture-vec-5 if running as a guest.
|
||||
* When running bare-metal, we can use radix if we like
|
||||
* even though the ibm,architecture-vec-5 property created by
|
||||
* skiboot doesn't have the necessary bits set.
|
||||
*/
|
||||
if (early_radix_enabled() && !(mfmsr() & MSR_HV))
|
||||
early_check_vec5();
|
||||
|
||||
if (early_radix_enabled())
|
||||
radix__early_init_devtree();
|
||||
else
|
||||
|
@ -414,6 +414,8 @@ void __init radix__early_init_mmu(void)
|
||||
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
|
||||
radix_init_partition_table();
|
||||
radix_init_amor();
|
||||
} else {
|
||||
radix_init_pseries();
|
||||
}
|
||||
|
||||
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
|
||||
|
@ -458,13 +458,23 @@ void __init mmu_partition_table_init(void)
|
||||
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
|
||||
unsigned long dw1)
|
||||
{
|
||||
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
|
||||
|
||||
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
|
||||
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
|
||||
|
||||
/* Global flush of TLBs and partition table caches for this lpid */
|
||||
/*
|
||||
* Global flush of TLBs and partition table caches for this lpid.
|
||||
* The type of flush (hash or radix) depends on what the previous
|
||||
* use of this partition ID was, not the new use.
|
||||
*/
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
|
||||
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
|
||||
if (old & PATB_HR)
|
||||
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
|
||||
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
|
||||
else
|
||||
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
|
||||
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
|
||||
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
|
||||
|
@ -58,14 +58,16 @@ END_FTR_SECTION(0, 1); \
|
||||
|
||||
#define OPAL_CALL(name, token) \
|
||||
_GLOBAL_TOC(name); \
|
||||
mfmsr r12; \
|
||||
mflr r0; \
|
||||
andi. r11,r12,MSR_IR|MSR_DR; \
|
||||
std r0,PPC_LR_STKOFF(r1); \
|
||||
li r0,token; \
|
||||
beq opal_real_call; \
|
||||
OPAL_BRANCH(opal_tracepoint_entry) \
|
||||
mfcr r12; \
|
||||
stw r12,8(r1); \
|
||||
mfcr r11; \
|
||||
stw r11,8(r1); \
|
||||
li r11,0; \
|
||||
mfmsr r12; \
|
||||
ori r11,r11,MSR_EE; \
|
||||
std r12,PACASAVEDMSR(r13); \
|
||||
andc r12,r12,r11; \
|
||||
@ -98,6 +100,30 @@ opal_return:
|
||||
mtcr r4;
|
||||
rfid
|
||||
|
||||
opal_real_call:
|
||||
mfcr r11
|
||||
stw r11,8(r1)
|
||||
/* Set opal return address */
|
||||
LOAD_REG_ADDR(r11, opal_return_realmode)
|
||||
mtlr r11
|
||||
li r11,MSR_LE
|
||||
andc r12,r12,r11
|
||||
mtspr SPRN_HSRR1,r12
|
||||
LOAD_REG_ADDR(r11,opal)
|
||||
ld r12,8(r11)
|
||||
ld r2,0(r11)
|
||||
mtspr SPRN_HSRR0,r12
|
||||
hrfid
|
||||
|
||||
opal_return_realmode:
|
||||
FIXUP_ENDIAN
|
||||
ld r2,PACATOC(r13);
|
||||
lwz r11,8(r1);
|
||||
ld r12,PPC_LR_STKOFF(r1)
|
||||
mtcr r11;
|
||||
mtlr r12
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
opal_tracepoint_entry:
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
@ -155,36 +181,6 @@ opal_tracepoint_return:
|
||||
blr
|
||||
#endif
|
||||
|
||||
#define OPAL_CALL_REAL(name, token) \
|
||||
_GLOBAL_TOC(name); \
|
||||
mflr r0; \
|
||||
std r0,PPC_LR_STKOFF(r1); \
|
||||
li r0,token; \
|
||||
mfcr r12; \
|
||||
stw r12,8(r1); \
|
||||
\
|
||||
/* Set opal return address */ \
|
||||
LOAD_REG_ADDR(r11, opal_return_realmode); \
|
||||
mtlr r11; \
|
||||
mfmsr r12; \
|
||||
li r11,MSR_LE; \
|
||||
andc r12,r12,r11; \
|
||||
mtspr SPRN_HSRR1,r12; \
|
||||
LOAD_REG_ADDR(r11,opal); \
|
||||
ld r12,8(r11); \
|
||||
ld r2,0(r11); \
|
||||
mtspr SPRN_HSRR0,r12; \
|
||||
hrfid
|
||||
|
||||
opal_return_realmode:
|
||||
FIXUP_ENDIAN
|
||||
ld r2,PACATOC(r13);
|
||||
lwz r11,8(r1);
|
||||
ld r12,PPC_LR_STKOFF(r1)
|
||||
mtcr r11;
|
||||
mtlr r12
|
||||
blr
|
||||
|
||||
|
||||
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
|
||||
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
|
||||
@ -208,7 +204,6 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
|
||||
OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
|
||||
OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
|
||||
OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
|
||||
OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE);
|
||||
OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
|
||||
OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
|
||||
OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
|
||||
@ -264,7 +259,6 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
|
||||
OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
|
||||
OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
|
||||
OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
|
||||
OPAL_CALL_REAL(opal_rm_resync_timebase, OPAL_RESYNC_TIMEBASE);
|
||||
OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
|
||||
OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
|
||||
OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
|
||||
@ -280,9 +274,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
|
||||
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
|
||||
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
|
||||
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
|
||||
OPAL_CALL_REAL(opal_rm_handle_hmi, OPAL_HANDLE_HMI);
|
||||
OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
|
||||
OPAL_CALL_REAL(opal_rm_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
|
||||
OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
|
||||
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
|
||||
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
|
||||
@ -304,12 +296,8 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
|
||||
OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
|
||||
OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
|
||||
OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
|
||||
OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
|
||||
OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
|
||||
OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
|
||||
OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
|
||||
OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
|
||||
OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
|
||||
OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
|
||||
OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
|
||||
OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
|
||||
|
@ -1970,11 +1970,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
|
||||
if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
|
||||
pnv_pci_phb3_tce_invalidate(pe, rm, shift,
|
||||
index, npages);
|
||||
else if (rm)
|
||||
opal_rm_pci_tce_kill(phb->opal_id,
|
||||
OPAL_PCI_TCE_KILL_PAGES,
|
||||
pe->pe_number, 1u << shift,
|
||||
index << shift, npages);
|
||||
else
|
||||
opal_pci_tce_kill(phb->opal_id,
|
||||
OPAL_PCI_TCE_KILL_PAGES,
|
||||
|
@ -127,7 +127,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
|
||||
index = OV5_INDX(vec5_fw_features_table[i].feature);
|
||||
feat = OV5_FEAT(vec5_fw_features_table[i].feature);
|
||||
|
||||
if (vec5[index] & feat)
|
||||
if (index < len && (vec5[index] & feat))
|
||||
powerpc_firmware_features |=
|
||||
vec5_fw_features_table[i].val;
|
||||
}
|
||||
|
@ -717,6 +717,29 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Actually only used for radix, so far */
|
||||
static int pseries_lpar_register_process_table(unsigned long base,
|
||||
unsigned long page_size, unsigned long table_size)
|
||||
{
|
||||
long rc;
|
||||
unsigned long flags = PROC_TABLE_NEW;
|
||||
|
||||
if (radix_enabled())
|
||||
flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
|
||||
for (;;) {
|
||||
rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
|
||||
page_size, table_size);
|
||||
if (!H_IS_LONG_BUSY(rc))
|
||||
break;
|
||||
mdelay(get_longbusy_msecs(rc));
|
||||
}
|
||||
if (rc != H_SUCCESS) {
|
||||
pr_err("Failed to register process table (rc=%ld)\n", rc);
|
||||
BUG();
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void __init hpte_init_pseries(void)
|
||||
{
|
||||
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
|
||||
@ -731,6 +754,12 @@ void __init hpte_init_pseries(void)
|
||||
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
|
||||
}
|
||||
|
||||
void radix_init_pseries(void)
|
||||
{
|
||||
pr_info("Using radix MMU under hypervisor\n");
|
||||
register_process_table = pseries_lpar_register_process_table;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_SMLPAR
|
||||
#define CMO_FREE_HINT_DEFAULT 1
|
||||
static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
|
||||
|
@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info {
|
||||
#define KVM_CAP_S390_USER_INSTR0 130
|
||||
#define KVM_CAP_MSI_DEVID 131
|
||||
#define KVM_CAP_PPC_HTM 132
|
||||
#define KVM_CAP_PPC_MMU_RADIX 134
|
||||
#define KVM_CAP_PPC_MMU_HASH_V3 135
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping {
|
||||
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
|
||||
/* Available with KVM_CAP_PPC_RTAS */
|
||||
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
|
||||
/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
|
||||
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
|
||||
/* Available with KVM_CAP_PPC_RADIX_MMU */
|
||||
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
|
||||
|
||||
/* ioctl for vm fd */
|
||||
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
|
||||
|
Loading…
Reference in New Issue
Block a user