mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
xen: features and fixes for 4.3-rc0
- Convert xen-blkfront to the multiqueue API - [arm] Support binding event channels to different VCPUs. - [x86] Support > 512 GiB in a PV guests (off by default as such a guest cannot be migrated with the current toolstack). - [x86] PMU support for PV dom0 (limited support for using perf with Xen and other guests). -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJV7wIdAAoJEFxbo/MsZsTR0hEH/04HTKLKGnSJpZ5WbMPxqZxE UqGlvhvVWNAmFocZmbPcEi9T1qtcFrX5pM55JQr6UmAp3ovYsT2q1Q1kKaOaawks pSfc/YEH3oQW5VUQ9Lm9Ru5Z8Btox0WrzRREO92OF36UOgUOBOLkGsUfOwDinNIM lSk2djbYwDYAsoeC3PHB32wwMI//Lz6B/9ZVXcyL6ULynt1ULdspETjGnptRPZa7 JTB5L4/soioKOn18HDwwOhKmvaFUPQv9Odnv7dc85XwZreajhM/KMu3qFbMDaF/d WVB1NMeCBdQYgjOrUjrmpyr5uTMySiQEG54cplrEKinfeZgKlEyjKvjcAfJfiac= =Ktjl -----END PGP SIGNATURE----- Merge tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip Pull xen updates from David Vrabel: "Xen features and fixes for 4.3: - Convert xen-blkfront to the multiqueue API - [arm] Support binding event channels to different VCPUs. - [x86] Support > 512 GiB in a PV guests (off by default as such a guest cannot be migrated with the current toolstack). - [x86] PMU support for PV dom0 (limited support for using perf with Xen and other guests)" * tag 'for-linus-4.3-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (33 commits) xen: switch extra memory accounting to use pfns xen: limit memory to architectural maximum xen: avoid another early crash of memory limited dom0 xen: avoid early crash of memory limited dom0 arm/xen: Remove helpers which are PV specific xen/x86: Don't try to set PCE bit in CR4 xen/PMU: PMU emulation code xen/PMU: Intercept PMU-related MSR and APIC accesses xen/PMU: Describe vendor-specific PMU registers xen/PMU: Initialization code for Xen PMU xen/PMU: Sysfs interface for setting Xen PMU mode xen: xensyms support xen: remove no longer needed p2m.h xen: allow more than 512 GB of RAM for 64 bit pv-domains xen: move p2m list if conflicting with e820 map xen: add explicit memblock_reserve() calls for special pages mm: provide early_memremap_ro to establish read-only mapping xen: check for initrd conflicting with e820 map xen: check pre-allocated page tables for conflict with memory map xen: check for kernel memory conflicting with memory layout ...
This commit is contained in:
commit
752240e74d
23
Documentation/ABI/testing/sysfs-hypervisor-pmu
Normal file
23
Documentation/ABI/testing/sysfs-hypervisor-pmu
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
What: /sys/hypervisor/pmu/pmu_mode
|
||||||
|
Date: August 2015
|
||||||
|
KernelVersion: 4.3
|
||||||
|
Contact: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||||
|
Description:
|
||||||
|
Describes mode that Xen's performance-monitoring unit (PMU)
|
||||||
|
uses. Accepted values are
|
||||||
|
"off" -- PMU is disabled
|
||||||
|
"self" -- The guest can profile itself
|
||||||
|
"hv" -- The guest can profile itself and, if it is
|
||||||
|
privileged (e.g. dom0), the hypervisor
|
||||||
|
"all" -- The guest can profile itself, the hypervisor
|
||||||
|
and all other guests. Only available to
|
||||||
|
privileged guests.
|
||||||
|
|
||||||
|
What: /sys/hypervisor/pmu/pmu_features
|
||||||
|
Date: August 2015
|
||||||
|
KernelVersion: 4.3
|
||||||
|
Contact: Boris Ostrovsky <boris.ostrovsky@oracle.com>
|
||||||
|
Description:
|
||||||
|
Describes Xen PMU features (as an integer). A set bit indicates
|
||||||
|
that the corresponding feature is enabled. See
|
||||||
|
include/xen/interface/xenpmu.h for available features
|
@ -4106,6 +4106,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||||||
plus one apbt timer for broadcast timer.
|
plus one apbt timer for broadcast timer.
|
||||||
x86_intel_mid_timer=apbt_only | lapic_and_apbt
|
x86_intel_mid_timer=apbt_only | lapic_and_apbt
|
||||||
|
|
||||||
|
xen_512gb_limit [KNL,X86-64,XEN]
|
||||||
|
Restricts the kernel running paravirtualized under Xen
|
||||||
|
to use only up to 512 GB of RAM. The reason to do so is
|
||||||
|
crash analysis tools and Xen tools for doing domain
|
||||||
|
save/restore/migration must be enabled to handle larger
|
||||||
|
domains.
|
||||||
|
|
||||||
xen_emul_unplug= [HW,X86,XEN]
|
xen_emul_unplug= [HW,X86,XEN]
|
||||||
Unplug Xen emulated devices
|
Unplug Xen emulated devices
|
||||||
Format: [unplug0,][unplug1]
|
Format: [unplug0,][unplug1]
|
||||||
|
@ -20,4 +20,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
|
|||||||
atomic64_t, \
|
atomic64_t, \
|
||||||
counter), (val))
|
counter), (val))
|
||||||
|
|
||||||
|
/* Rebind event channel is supported by default */
|
||||||
|
static inline bool xen_support_evtchn_rebind(void)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _ASM_ARM_XEN_EVENTS_H */
|
#endif /* _ASM_ARM_XEN_EVENTS_H */
|
||||||
|
@ -54,26 +54,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
|
|||||||
|
|
||||||
#define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn)
|
#define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn)
|
||||||
|
|
||||||
static inline xmaddr_t phys_to_machine(xpaddr_t phys)
|
|
||||||
{
|
|
||||||
unsigned offset = phys.paddr & ~PAGE_MASK;
|
|
||||||
return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline xpaddr_t machine_to_phys(xmaddr_t machine)
|
|
||||||
{
|
|
||||||
unsigned offset = machine.maddr & ~PAGE_MASK;
|
|
||||||
return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
|
|
||||||
}
|
|
||||||
/* VIRT <-> MACHINE conversion */
|
/* VIRT <-> MACHINE conversion */
|
||||||
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
|
|
||||||
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
|
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
|
||||||
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
|
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
|
||||||
|
|
||||||
|
/* Only used in PV code. But ARM guests are always HVM. */
|
||||||
static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
|
static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
|
||||||
{
|
{
|
||||||
/* TODO: assuming it is mapped in the kernel 1:1 */
|
BUG();
|
||||||
return virt_to_machine(vaddr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: this shouldn't be here but it is because the frontend drivers
|
/* TODO: this shouldn't be here but it is because the frontend drivers
|
||||||
|
@ -45,13 +45,6 @@ static struct vcpu_info __percpu *xen_vcpu_info;
|
|||||||
unsigned long xen_released_pages;
|
unsigned long xen_released_pages;
|
||||||
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
||||||
|
|
||||||
/* TODO: to be removed */
|
|
||||||
__read_mostly int xen_have_vector_callback;
|
|
||||||
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
|
|
||||||
|
|
||||||
int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
|
|
||||||
EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
|
|
||||||
|
|
||||||
static __read_mostly unsigned int xen_events_irq;
|
static __read_mostly unsigned int xen_events_irq;
|
||||||
|
|
||||||
static __initdata struct device_node *xen_node;
|
static __initdata struct device_node *xen_node;
|
||||||
|
@ -18,4 +18,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
|
|||||||
|
|
||||||
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
|
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
|
||||||
|
|
||||||
|
/* Rebind event channel is supported by default */
|
||||||
|
static inline bool xen_support_evtchn_rebind(void)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _ASM_ARM64_XEN_EVENTS_H */
|
#endif /* _ASM_ARM64_XEN_EVENTS_H */
|
||||||
|
@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
|
|||||||
/* No need for a barrier -- XCHG is a barrier on x86. */
|
/* No need for a barrier -- XCHG is a barrier on x86. */
|
||||||
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
|
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
|
||||||
|
|
||||||
|
extern int xen_have_vector_callback;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Events delivered via platform PCI interrupts are always
|
||||||
|
* routed to vcpu 0 and hence cannot be rebound.
|
||||||
|
*/
|
||||||
|
static inline bool xen_support_evtchn_rebind(void)
|
||||||
|
{
|
||||||
|
return (!xen_hvm_domain() || xen_have_vector_callback);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _ASM_X86_XEN_EVENTS_H */
|
#endif /* _ASM_X86_XEN_EVENTS_H */
|
||||||
|
@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
|
|||||||
return _hypercall1(int, tmem_op, op);
|
return _hypercall1(int, tmem_op, op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
|
||||||
|
{
|
||||||
|
return _hypercall2(int, xenpmu_op, op, arg);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
|
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
|
||||||
{
|
{
|
||||||
|
@ -3,12 +3,38 @@
|
|||||||
*
|
*
|
||||||
* Guest OS interface to x86 Xen.
|
* Guest OS interface to x86 Xen.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2004, K A Fraser
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to
|
||||||
|
* deal in the Software without restriction, including without limitation the
|
||||||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2004-2006, K A Fraser
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _ASM_X86_XEN_INTERFACE_H
|
#ifndef _ASM_X86_XEN_INTERFACE_H
|
||||||
#define _ASM_X86_XEN_INTERFACE_H
|
#define _ASM_X86_XEN_INTERFACE_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
|
||||||
|
* in a struct in memory.
|
||||||
|
* XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
|
||||||
|
* hypercall argument.
|
||||||
|
* XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
|
||||||
|
* they might not be on other architectures.
|
||||||
|
*/
|
||||||
#ifdef __XEN__
|
#ifdef __XEN__
|
||||||
#define __DEFINE_GUEST_HANDLE(name, type) \
|
#define __DEFINE_GUEST_HANDLE(name, type) \
|
||||||
typedef struct { type *p; } __guest_handle_ ## name
|
typedef struct { type *p; } __guest_handle_ ## name
|
||||||
@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
|
|||||||
* start of the GDT because some stupid OSes export hard-coded selector values
|
* start of the GDT because some stupid OSes export hard-coded selector values
|
||||||
* in their ABI. These hard-coded values are always near the start of the GDT,
|
* in their ABI. These hard-coded values are always near the start of the GDT,
|
||||||
* so Xen places itself out of the way, at the far end of the GDT.
|
* so Xen places itself out of the way, at the far end of the GDT.
|
||||||
|
*
|
||||||
|
* NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
|
||||||
*/
|
*/
|
||||||
#define FIRST_RESERVED_GDT_PAGE 14
|
#define FIRST_RESERVED_GDT_PAGE 14
|
||||||
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
|
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
|
||||||
#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
|
#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Send an array of these to HYPERVISOR_set_trap_table()
|
* Send an array of these to HYPERVISOR_set_trap_table().
|
||||||
|
* Terminate the array with a sentinel entry, with traps[].address==0.
|
||||||
* The privilege level specifies which modes may enter a trap via a software
|
* The privilege level specifies which modes may enter a trap via a software
|
||||||
* interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
|
* interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
|
||||||
* privilege levels as follows:
|
* privilege levels as follows:
|
||||||
@ -118,10 +147,41 @@ struct trap_info {
|
|||||||
DEFINE_GUEST_HANDLE_STRUCT(trap_info);
|
DEFINE_GUEST_HANDLE_STRUCT(trap_info);
|
||||||
|
|
||||||
struct arch_shared_info {
|
struct arch_shared_info {
|
||||||
unsigned long max_pfn; /* max pfn that appears in table */
|
/*
|
||||||
/* Frame containing list of mfns containing list of mfns containing p2m. */
|
* Number of valid entries in the p2m table(s) anchored at
|
||||||
unsigned long pfn_to_mfn_frame_list_list;
|
* pfn_to_mfn_frame_list_list and/or p2m_vaddr.
|
||||||
|
*/
|
||||||
|
unsigned long max_pfn;
|
||||||
|
/*
|
||||||
|
* Frame containing list of mfns containing list of mfns containing p2m.
|
||||||
|
* A value of 0 indicates it has not yet been set up, ~0 indicates it
|
||||||
|
* has been set to invalid e.g. due to the p2m being too large for the
|
||||||
|
* 3-level p2m tree. In this case the linear mapper p2m list anchored
|
||||||
|
* at p2m_vaddr is to be used.
|
||||||
|
*/
|
||||||
|
xen_pfn_t pfn_to_mfn_frame_list_list;
|
||||||
unsigned long nmi_reason;
|
unsigned long nmi_reason;
|
||||||
|
/*
|
||||||
|
* Following three fields are valid if p2m_cr3 contains a value
|
||||||
|
* different from 0.
|
||||||
|
* p2m_cr3 is the root of the address space where p2m_vaddr is valid.
|
||||||
|
* p2m_cr3 is in the same format as a cr3 value in the vcpu register
|
||||||
|
* state and holds the folded machine frame number (via xen_pfn_to_cr3)
|
||||||
|
* of a L3 or L4 page table.
|
||||||
|
* p2m_vaddr holds the virtual address of the linear p2m list. All
|
||||||
|
* entries in the range [0...max_pfn[ are accessible via this pointer.
|
||||||
|
* p2m_generation will be incremented by the guest before and after each
|
||||||
|
* change of the mappings of the p2m list. p2m_generation starts at 0
|
||||||
|
* and a value with the least significant bit set indicates that a
|
||||||
|
* mapping update is in progress. This allows guest external software
|
||||||
|
* (e.g. in Dom0) to verify that read mappings are consistent and
|
||||||
|
* whether they have changed since the last check.
|
||||||
|
* Modifying a p2m element in the linear p2m list is allowed via an
|
||||||
|
* atomic write only.
|
||||||
|
*/
|
||||||
|
unsigned long p2m_cr3; /* cr3 value of the p2m address space */
|
||||||
|
unsigned long p2m_vaddr; /* virtual address of the p2m list */
|
||||||
|
unsigned long p2m_generation; /* generation count of p2m mapping */
|
||||||
};
|
};
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
@ -137,13 +197,31 @@ struct arch_shared_info {
|
|||||||
/*
|
/*
|
||||||
* The following is all CPU context. Note that the fpu_ctxt block is filled
|
* The following is all CPU context. Note that the fpu_ctxt block is filled
|
||||||
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
|
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
|
||||||
|
*
|
||||||
|
* Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
|
||||||
|
* for HVM and PVH guests, not all information in this structure is updated:
|
||||||
|
*
|
||||||
|
* - For HVM guests, the structures read include: fpu_ctxt (if
|
||||||
|
* VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
|
||||||
|
*
|
||||||
|
* - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
|
||||||
|
* set cr3. All other fields not used should be set to 0.
|
||||||
*/
|
*/
|
||||||
struct vcpu_guest_context {
|
struct vcpu_guest_context {
|
||||||
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
|
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
|
||||||
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
|
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
|
||||||
#define VGCF_I387_VALID (1<<0)
|
#define VGCF_I387_VALID (1<<0)
|
||||||
#define VGCF_HVM_GUEST (1<<1)
|
|
||||||
#define VGCF_IN_KERNEL (1<<2)
|
#define VGCF_IN_KERNEL (1<<2)
|
||||||
|
#define _VGCF_i387_valid 0
|
||||||
|
#define VGCF_i387_valid (1<<_VGCF_i387_valid)
|
||||||
|
#define _VGCF_in_kernel 2
|
||||||
|
#define VGCF_in_kernel (1<<_VGCF_in_kernel)
|
||||||
|
#define _VGCF_failsafe_disables_events 3
|
||||||
|
#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
|
||||||
|
#define _VGCF_syscall_disables_events 4
|
||||||
|
#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
|
||||||
|
#define _VGCF_online 5
|
||||||
|
#define VGCF_online (1<<_VGCF_online)
|
||||||
unsigned long flags; /* VGCF_* flags */
|
unsigned long flags; /* VGCF_* flags */
|
||||||
struct cpu_user_regs user_regs; /* User-level CPU registers */
|
struct cpu_user_regs user_regs; /* User-level CPU registers */
|
||||||
struct trap_info trap_ctxt[256]; /* Virtual IDT */
|
struct trap_info trap_ctxt[256]; /* Virtual IDT */
|
||||||
@ -172,6 +250,129 @@ struct vcpu_guest_context {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
|
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
|
||||||
|
|
||||||
|
/* AMD PMU registers and structures */
|
||||||
|
struct xen_pmu_amd_ctxt {
|
||||||
|
/*
|
||||||
|
* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
|
||||||
|
* For PV(H) guests these fields are RO.
|
||||||
|
*/
|
||||||
|
uint32_t counters;
|
||||||
|
uint32_t ctrls;
|
||||||
|
|
||||||
|
/* Counter MSRs */
|
||||||
|
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||||
|
uint64_t regs[];
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
uint64_t regs[0];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Intel PMU registers and structures */
|
||||||
|
struct xen_pmu_cntr_pair {
|
||||||
|
uint64_t counter;
|
||||||
|
uint64_t control;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct xen_pmu_intel_ctxt {
|
||||||
|
/*
|
||||||
|
* Offsets to fixed and architectural counter MSRs (relative to
|
||||||
|
* xen_pmu_arch.c.intel).
|
||||||
|
* For PV(H) guests these fields are RO.
|
||||||
|
*/
|
||||||
|
uint32_t fixed_counters;
|
||||||
|
uint32_t arch_counters;
|
||||||
|
|
||||||
|
/* PMU registers */
|
||||||
|
uint64_t global_ctrl;
|
||||||
|
uint64_t global_ovf_ctrl;
|
||||||
|
uint64_t global_status;
|
||||||
|
uint64_t fixed_ctrl;
|
||||||
|
uint64_t ds_area;
|
||||||
|
uint64_t pebs_enable;
|
||||||
|
uint64_t debugctl;
|
||||||
|
|
||||||
|
/* Fixed and architectural counter MSRs */
|
||||||
|
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||||
|
uint64_t regs[];
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
uint64_t regs[0];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Sampled domain's registers */
|
||||||
|
struct xen_pmu_regs {
|
||||||
|
uint64_t ip;
|
||||||
|
uint64_t sp;
|
||||||
|
uint64_t flags;
|
||||||
|
uint16_t cs;
|
||||||
|
uint16_t ss;
|
||||||
|
uint8_t cpl;
|
||||||
|
uint8_t pad[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* PMU flags */
|
||||||
|
#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
|
||||||
|
#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
|
||||||
|
#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
|
||||||
|
#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Architecture-specific information describing state of the processor at
|
||||||
|
* the time of PMU interrupt.
|
||||||
|
* Fields of this structure marked as RW for guest should only be written by
|
||||||
|
* the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
|
||||||
|
* hypervisor during PMU interrupt). Hypervisor will read updated data in
|
||||||
|
* XENPMU_flush hypercall and clear PMU_CACHED bit.
|
||||||
|
*/
|
||||||
|
struct xen_pmu_arch {
|
||||||
|
union {
|
||||||
|
/*
|
||||||
|
* Processor's registers at the time of interrupt.
|
||||||
|
* WO for hypervisor, RO for guests.
|
||||||
|
*/
|
||||||
|
struct xen_pmu_regs regs;
|
||||||
|
/*
|
||||||
|
* Padding for adding new registers to xen_pmu_regs in
|
||||||
|
* the future
|
||||||
|
*/
|
||||||
|
#define XENPMU_REGS_PAD_SZ 64
|
||||||
|
uint8_t pad[XENPMU_REGS_PAD_SZ];
|
||||||
|
} r;
|
||||||
|
|
||||||
|
/* WO for hypervisor, RO for guest */
|
||||||
|
uint64_t pmu_flags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* APIC LVTPC register.
|
||||||
|
* RW for both hypervisor and guest.
|
||||||
|
* Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
|
||||||
|
* during XENPMU_flush or XENPMU_lvtpc_set.
|
||||||
|
*/
|
||||||
|
union {
|
||||||
|
uint32_t lapic_lvtpc;
|
||||||
|
uint64_t pad;
|
||||||
|
} l;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Vendor-specific PMU registers.
|
||||||
|
* RW for both hypervisor and guest (see exceptions above).
|
||||||
|
* Guest's updates to this field are verified and then loaded by the
|
||||||
|
* hypervisor into hardware during XENPMU_flush
|
||||||
|
*/
|
||||||
|
union {
|
||||||
|
struct xen_pmu_amd_ctxt amd;
|
||||||
|
struct xen_pmu_intel_ctxt intel;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Padding for contexts (fixed parts only, does not include
|
||||||
|
* MSR banks that are specified by offsets)
|
||||||
|
*/
|
||||||
|
#define XENPMU_CTXT_PAD_SZ 128
|
||||||
|
uint8_t pad[XENPMU_CTXT_PAD_SZ];
|
||||||
|
} c;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -35,9 +35,7 @@ typedef struct xpaddr {
|
|||||||
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
|
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
|
||||||
#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
|
#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
|
||||||
|
|
||||||
/* Maximum amount of memory we can handle in a domain in pages */
|
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
|
||||||
#define MAX_DOMAIN_PAGES \
|
|
||||||
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
|
|
||||||
|
|
||||||
extern unsigned long *machine_to_phys_mapping;
|
extern unsigned long *machine_to_phys_mapping;
|
||||||
extern unsigned long machine_to_phys_nr;
|
extern unsigned long machine_to_phys_nr;
|
||||||
@ -48,7 +46,7 @@ extern unsigned long xen_max_p2m_pfn;
|
|||||||
extern unsigned long get_phys_to_machine(unsigned long pfn);
|
extern unsigned long get_phys_to_machine(unsigned long pfn);
|
||||||
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
||||||
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
||||||
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
|
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
|
||||||
unsigned long pfn_e);
|
unsigned long pfn_e);
|
||||||
|
|
||||||
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
|
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
|
||||||
|
@ -7,6 +7,7 @@ config XEN
|
|||||||
depends on PARAVIRT
|
depends on PARAVIRT
|
||||||
select PARAVIRT_CLOCK
|
select PARAVIRT_CLOCK
|
||||||
select XEN_HAVE_PVMMU
|
select XEN_HAVE_PVMMU
|
||||||
|
select XEN_HAVE_VPMU
|
||||||
depends on X86_64 || (X86_32 && X86_PAE)
|
depends on X86_64 || (X86_32 && X86_PAE)
|
||||||
depends on X86_LOCAL_APIC && X86_TSC
|
depends on X86_LOCAL_APIC && X86_TSC
|
||||||
help
|
help
|
||||||
@ -23,14 +24,18 @@ config XEN_PVHVM
|
|||||||
def_bool y
|
def_bool y
|
||||||
depends on XEN && PCI && X86_LOCAL_APIC
|
depends on XEN && PCI && X86_LOCAL_APIC
|
||||||
|
|
||||||
config XEN_MAX_DOMAIN_MEMORY
|
config XEN_512GB
|
||||||
int
|
bool "Limit Xen pv-domain memory to 512GB"
|
||||||
default 500 if X86_64
|
depends on XEN && X86_64
|
||||||
default 64 if X86_32
|
default y
|
||||||
depends on XEN
|
|
||||||
help
|
help
|
||||||
This only affects the sizing of some bss arrays, the unused
|
Limit paravirtualized user domains to 512GB of RAM.
|
||||||
portions of which are freed.
|
|
||||||
|
The Xen tools and crash dump analysis tools might not support
|
||||||
|
pv-domains with more than 512 GB of RAM. This option controls the
|
||||||
|
default setting of the kernel to use only up to 512 GB or more.
|
||||||
|
It is always possible to change the default via specifying the
|
||||||
|
boot parameter "xen_512gb_limit".
|
||||||
|
|
||||||
config XEN_SAVE_RESTORE
|
config XEN_SAVE_RESTORE
|
||||||
bool
|
bool
|
||||||
|
@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
|
|||||||
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
|
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
|
||||||
time.o xen-asm.o xen-asm_$(BITS).o \
|
time.o xen-asm.o xen-asm_$(BITS).o \
|
||||||
grant-table.o suspend.o platform-pci-unplug.o \
|
grant-table.o suspend.o platform-pci-unplug.o \
|
||||||
p2m.o apic.o
|
p2m.o apic.o pmu.o
|
||||||
|
|
||||||
obj-$(CONFIG_EVENT_TRACING) += trace.o
|
obj-$(CONFIG_EVENT_TRACING) += trace.o
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <xen/xen.h>
|
#include <xen/xen.h>
|
||||||
#include <xen/interface/physdev.h>
|
#include <xen/interface/physdev.h>
|
||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
|
#include "pmu.h"
|
||||||
#include "smp.h"
|
#include "smp.h"
|
||||||
|
|
||||||
static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
|
static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
|
||||||
@ -72,6 +73,11 @@ static u32 xen_apic_read(u32 reg)
|
|||||||
|
|
||||||
static void xen_apic_write(u32 reg, u32 val)
|
static void xen_apic_write(u32 reg, u32 val)
|
||||||
{
|
{
|
||||||
|
if (reg == APIC_LVTPC) {
|
||||||
|
(void)pmu_apic_update(reg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Warn to see if there's any stray references */
|
/* Warn to see if there's any stray references */
|
||||||
WARN(1,"register: %x, value: %x\n", reg, val);
|
WARN(1,"register: %x, value: %x\n", reg, val);
|
||||||
}
|
}
|
||||||
|
@ -84,6 +84,7 @@
|
|||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
#include "smp.h"
|
#include "smp.h"
|
||||||
#include "multicalls.h"
|
#include "multicalls.h"
|
||||||
|
#include "pmu.h"
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(hypercall_page);
|
EXPORT_SYMBOL_GPL(hypercall_page);
|
||||||
|
|
||||||
@ -1010,8 +1011,7 @@ static void xen_write_cr0(unsigned long cr0)
|
|||||||
|
|
||||||
static void xen_write_cr4(unsigned long cr4)
|
static void xen_write_cr4(unsigned long cr4)
|
||||||
{
|
{
|
||||||
cr4 &= ~X86_CR4_PGE;
|
cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
|
||||||
cr4 &= ~X86_CR4_PSE;
|
|
||||||
|
|
||||||
native_write_cr4(cr4);
|
native_write_cr4(cr4);
|
||||||
}
|
}
|
||||||
@ -1030,6 +1030,9 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
|
|||||||
{
|
{
|
||||||
u64 val;
|
u64 val;
|
||||||
|
|
||||||
|
if (pmu_msr_read(msr, &val, err))
|
||||||
|
return val;
|
||||||
|
|
||||||
val = native_read_msr_safe(msr, err);
|
val = native_read_msr_safe(msr, err);
|
||||||
switch (msr) {
|
switch (msr) {
|
||||||
case MSR_IA32_APICBASE:
|
case MSR_IA32_APICBASE:
|
||||||
@ -1076,6 +1079,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
|
|||||||
Xen console noise. */
|
Xen console noise. */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
if (!pmu_msr_write(msr, low, high, &ret))
|
||||||
ret = native_write_msr_safe(msr, low, high);
|
ret = native_write_msr_safe(msr, low, high);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1215,7 +1219,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
|||||||
.read_msr = xen_read_msr_safe,
|
.read_msr = xen_read_msr_safe,
|
||||||
.write_msr = xen_write_msr_safe,
|
.write_msr = xen_write_msr_safe,
|
||||||
|
|
||||||
.read_pmc = native_read_pmc,
|
.read_pmc = xen_read_pmc,
|
||||||
|
|
||||||
.iret = xen_iret,
|
.iret = xen_iret,
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
@ -1264,6 +1268,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
|
|||||||
static void xen_reboot(int reason)
|
static void xen_reboot(int reason)
|
||||||
{
|
{
|
||||||
struct sched_shutdown r = { .reason = reason };
|
struct sched_shutdown r = { .reason = reason };
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
xen_pmu_finish(cpu);
|
||||||
|
|
||||||
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
|
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
|
||||||
BUG();
|
BUG();
|
||||||
@ -1607,7 +1615,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|||||||
early_boot_irqs_disabled = true;
|
early_boot_irqs_disabled = true;
|
||||||
|
|
||||||
xen_raw_console_write("mapping kernel into physical memory\n");
|
xen_raw_console_write("mapping kernel into physical memory\n");
|
||||||
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
|
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
|
||||||
|
xen_start_info->nr_pages);
|
||||||
|
xen_reserve_special_pages();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Modify the cache mode translation tables to match Xen's PAT
|
* Modify the cache mode translation tables to match Xen's PAT
|
||||||
|
@ -116,6 +116,7 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
|
|||||||
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
||||||
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
||||||
|
|
||||||
|
static phys_addr_t xen_pt_base, xen_pt_size __initdata;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Just beyond the highest usermode address. STACK_TOP_MAX has a
|
* Just beyond the highest usermode address. STACK_TOP_MAX has a
|
||||||
@ -1093,6 +1094,16 @@ static void xen_exit_mmap(struct mm_struct *mm)
|
|||||||
|
|
||||||
static void xen_post_allocator_init(void);
|
static void xen_post_allocator_init(void);
|
||||||
|
|
||||||
|
static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
|
||||||
|
{
|
||||||
|
struct mmuext_op op;
|
||||||
|
|
||||||
|
op.cmd = cmd;
|
||||||
|
op.arg1.mfn = pfn_to_mfn(pfn);
|
||||||
|
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
static void __init xen_cleanhighmap(unsigned long vaddr,
|
static void __init xen_cleanhighmap(unsigned long vaddr,
|
||||||
unsigned long vaddr_end)
|
unsigned long vaddr_end)
|
||||||
@ -1114,6 +1125,83 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
|
|||||||
xen_mc_flush();
|
xen_mc_flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make a page range writeable and free it.
|
||||||
|
*/
|
||||||
|
static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
|
||||||
|
{
|
||||||
|
void *vaddr = __va(paddr);
|
||||||
|
void *vaddr_end = vaddr + size;
|
||||||
|
|
||||||
|
for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
|
||||||
|
make_lowmem_page_readwrite(vaddr);
|
||||||
|
|
||||||
|
memblock_free(paddr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
|
||||||
|
{
|
||||||
|
unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
|
||||||
|
|
||||||
|
if (unpin)
|
||||||
|
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
|
||||||
|
ClearPagePinned(virt_to_page(__va(pa)));
|
||||||
|
xen_free_ro_pages(pa, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since it is well isolated we can (and since it is perhaps large we should)
|
||||||
|
* also free the page tables mapping the initial P->M table.
|
||||||
|
*/
|
||||||
|
static void __init xen_cleanmfnmap(unsigned long vaddr)
|
||||||
|
{
|
||||||
|
unsigned long va = vaddr & PMD_MASK;
|
||||||
|
unsigned long pa;
|
||||||
|
pgd_t *pgd = pgd_offset_k(va);
|
||||||
|
pud_t *pud_page = pud_offset(pgd, 0);
|
||||||
|
pud_t *pud;
|
||||||
|
pmd_t *pmd;
|
||||||
|
pte_t *pte;
|
||||||
|
unsigned int i;
|
||||||
|
bool unpin;
|
||||||
|
|
||||||
|
unpin = (vaddr == 2 * PGDIR_SIZE);
|
||||||
|
set_pgd(pgd, __pgd(0));
|
||||||
|
do {
|
||||||
|
pud = pud_page + pud_index(va);
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
va += PUD_SIZE;
|
||||||
|
} else if (pud_large(*pud)) {
|
||||||
|
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
|
||||||
|
xen_free_ro_pages(pa, PUD_SIZE);
|
||||||
|
va += PUD_SIZE;
|
||||||
|
} else {
|
||||||
|
pmd = pmd_offset(pud, va);
|
||||||
|
if (pmd_large(*pmd)) {
|
||||||
|
pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
|
||||||
|
xen_free_ro_pages(pa, PMD_SIZE);
|
||||||
|
} else if (!pmd_none(*pmd)) {
|
||||||
|
pte = pte_offset_kernel(pmd, va);
|
||||||
|
set_pmd(pmd, __pmd(0));
|
||||||
|
for (i = 0; i < PTRS_PER_PTE; ++i) {
|
||||||
|
if (pte_none(pte[i]))
|
||||||
|
break;
|
||||||
|
pa = pte_pfn(pte[i]) << PAGE_SHIFT;
|
||||||
|
xen_free_ro_pages(pa, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
xen_cleanmfnmap_free_pgtbl(pte, unpin);
|
||||||
|
}
|
||||||
|
va += PMD_SIZE;
|
||||||
|
if (pmd_index(va))
|
||||||
|
continue;
|
||||||
|
set_pud(pud, __pud(0));
|
||||||
|
xen_cleanmfnmap_free_pgtbl(pmd, unpin);
|
||||||
|
}
|
||||||
|
|
||||||
|
} while (pud_index(va) || pmd_index(va));
|
||||||
|
xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init xen_pagetable_p2m_free(void)
|
static void __init xen_pagetable_p2m_free(void)
|
||||||
{
|
{
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
@ -1128,18 +1216,31 @@ static void __init xen_pagetable_p2m_free(void)
|
|||||||
/* using __ka address and sticking INVALID_P2M_ENTRY! */
|
/* using __ka address and sticking INVALID_P2M_ENTRY! */
|
||||||
memset((void *)xen_start_info->mfn_list, 0xff, size);
|
memset((void *)xen_start_info->mfn_list, 0xff, size);
|
||||||
|
|
||||||
/* We should be in __ka space. */
|
|
||||||
BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
|
|
||||||
addr = xen_start_info->mfn_list;
|
addr = xen_start_info->mfn_list;
|
||||||
/* We roundup to the PMD, which means that if anybody at this stage is
|
/*
|
||||||
* using the __ka address of xen_start_info or xen_start_info->shared_info
|
* We could be in __ka space.
|
||||||
* they are in going to crash. Fortunatly we have already revectored
|
* We roundup to the PMD, which means that if anybody at this stage is
|
||||||
* in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
|
* using the __ka address of xen_start_info or
|
||||||
|
* xen_start_info->shared_info they are in going to crash. Fortunatly
|
||||||
|
* we have already revectored in xen_setup_kernel_pagetable and in
|
||||||
|
* xen_setup_shared_info.
|
||||||
|
*/
|
||||||
size = roundup(size, PMD_SIZE);
|
size = roundup(size, PMD_SIZE);
|
||||||
xen_cleanhighmap(addr, addr + size);
|
|
||||||
|
|
||||||
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
if (addr >= __START_KERNEL_map) {
|
||||||
memblock_free(__pa(xen_start_info->mfn_list), size);
|
xen_cleanhighmap(addr, addr + size);
|
||||||
|
size = PAGE_ALIGN(xen_start_info->nr_pages *
|
||||||
|
sizeof(unsigned long));
|
||||||
|
memblock_free(__pa(addr), size);
|
||||||
|
} else {
|
||||||
|
xen_cleanmfnmap(addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init xen_pagetable_cleanhighmap(void)
|
||||||
|
{
|
||||||
|
unsigned long size;
|
||||||
|
unsigned long addr;
|
||||||
|
|
||||||
/* At this stage, cleanup_highmap has already cleaned __ka space
|
/* At this stage, cleanup_highmap has already cleaned __ka space
|
||||||
* from _brk_limit way up to the max_pfn_mapped (which is the end of
|
* from _brk_limit way up to the max_pfn_mapped (which is the end of
|
||||||
@ -1172,6 +1273,8 @@ static void __init xen_pagetable_p2m_setup(void)
|
|||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
xen_pagetable_p2m_free();
|
xen_pagetable_p2m_free();
|
||||||
|
|
||||||
|
xen_pagetable_cleanhighmap();
|
||||||
#endif
|
#endif
|
||||||
/* And revector! Bye bye old array */
|
/* And revector! Bye bye old array */
|
||||||
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
|
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
|
||||||
@ -1461,6 +1564,24 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
|||||||
#else /* CONFIG_X86_64 */
|
#else /* CONFIG_X86_64 */
|
||||||
static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||||
{
|
{
|
||||||
|
unsigned long pfn;
|
||||||
|
|
||||||
|
if (xen_feature(XENFEAT_writable_page_tables) ||
|
||||||
|
xen_feature(XENFEAT_auto_translated_physmap) ||
|
||||||
|
xen_start_info->mfn_list >= __START_KERNEL_map)
|
||||||
|
return pte;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pages belonging to the initial p2m list mapped outside the default
|
||||||
|
* address range must be mapped read-only. This region contains the
|
||||||
|
* page tables for mapping the p2m list, too, and page tables MUST be
|
||||||
|
* mapped read-only.
|
||||||
|
*/
|
||||||
|
pfn = pte_pfn(pte);
|
||||||
|
if (pfn >= xen_start_info->first_p2m_pfn &&
|
||||||
|
pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
|
||||||
|
pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
|
||||||
|
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
@ -1489,15 +1610,6 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
|
|||||||
native_set_pte(ptep, pte);
|
native_set_pte(ptep, pte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
|
|
||||||
{
|
|
||||||
struct mmuext_op op;
|
|
||||||
op.cmd = cmd;
|
|
||||||
op.arg1.mfn = pfn_to_mfn(pfn);
|
|
||||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Early in boot, while setting up the initial pagetable, assume
|
/* Early in boot, while setting up the initial pagetable, assume
|
||||||
everything is pinned. */
|
everything is pinned. */
|
||||||
static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
|
static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
|
||||||
@ -1815,6 +1927,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
|||||||
* mappings. Considering that on Xen after the kernel mappings we
|
* mappings. Considering that on Xen after the kernel mappings we
|
||||||
* have the mappings of some pages that don't exist in pfn space, we
|
* have the mappings of some pages that don't exist in pfn space, we
|
||||||
* set max_pfn_mapped to the last real pfn mapped. */
|
* set max_pfn_mapped to the last real pfn mapped. */
|
||||||
|
if (xen_start_info->mfn_list < __START_KERNEL_map)
|
||||||
|
max_pfn_mapped = xen_start_info->first_p2m_pfn;
|
||||||
|
else
|
||||||
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
|
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
|
||||||
|
|
||||||
pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
|
pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
|
||||||
@ -1855,6 +1970,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
|||||||
/* Graft it onto L4[511][510] */
|
/* Graft it onto L4[511][510] */
|
||||||
copy_page(level2_kernel_pgt, l2);
|
copy_page(level2_kernel_pgt, l2);
|
||||||
|
|
||||||
|
/* Copy the initial P->M table mappings if necessary. */
|
||||||
|
i = pgd_index(xen_start_info->mfn_list);
|
||||||
|
if (i && i < pgd_index(__START_KERNEL_map))
|
||||||
|
init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
|
||||||
|
|
||||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||||
/* Make pagetable pieces RO */
|
/* Make pagetable pieces RO */
|
||||||
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
||||||
@ -1894,10 +2014,192 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
|||||||
check_pt_base(&pt_base, &pt_end, addr[i]);
|
check_pt_base(&pt_base, &pt_end, addr[i]);
|
||||||
|
|
||||||
/* Our (by three pages) smaller Xen pagetable that we are using */
|
/* Our (by three pages) smaller Xen pagetable that we are using */
|
||||||
memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
|
xen_pt_base = PFN_PHYS(pt_base);
|
||||||
|
xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
|
||||||
|
memblock_reserve(xen_pt_base, xen_pt_size);
|
||||||
|
|
||||||
/* Revector the xen_start_info */
|
/* Revector the xen_start_info */
|
||||||
xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
|
xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read a value from a physical address.
|
||||||
|
*/
|
||||||
|
static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
|
||||||
|
{
|
||||||
|
unsigned long *vaddr;
|
||||||
|
unsigned long val;
|
||||||
|
|
||||||
|
vaddr = early_memremap_ro(addr, sizeof(val));
|
||||||
|
val = *vaddr;
|
||||||
|
early_memunmap(vaddr, sizeof(val));
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Translate a virtual address to a physical one without relying on mapped
|
||||||
|
* page tables.
|
||||||
|
*/
|
||||||
|
static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
|
||||||
|
{
|
||||||
|
phys_addr_t pa;
|
||||||
|
pgd_t pgd;
|
||||||
|
pud_t pud;
|
||||||
|
pmd_t pmd;
|
||||||
|
pte_t pte;
|
||||||
|
|
||||||
|
pa = read_cr3();
|
||||||
|
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
|
||||||
|
sizeof(pgd)));
|
||||||
|
if (!pgd_present(pgd))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pa = pgd_val(pgd) & PTE_PFN_MASK;
|
||||||
|
pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
|
||||||
|
sizeof(pud)));
|
||||||
|
if (!pud_present(pud))
|
||||||
|
return 0;
|
||||||
|
pa = pud_pfn(pud) << PAGE_SHIFT;
|
||||||
|
if (pud_large(pud))
|
||||||
|
return pa + (vaddr & ~PUD_MASK);
|
||||||
|
|
||||||
|
pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
|
||||||
|
sizeof(pmd)));
|
||||||
|
if (!pmd_present(pmd))
|
||||||
|
return 0;
|
||||||
|
pa = pmd_pfn(pmd) << PAGE_SHIFT;
|
||||||
|
if (pmd_large(pmd))
|
||||||
|
return pa + (vaddr & ~PMD_MASK);
|
||||||
|
|
||||||
|
pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
|
||||||
|
sizeof(pte)));
|
||||||
|
if (!pte_present(pte))
|
||||||
|
return 0;
|
||||||
|
pa = pte_pfn(pte) << PAGE_SHIFT;
|
||||||
|
|
||||||
|
return pa | (vaddr & ~PAGE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find a new area for the hypervisor supplied p2m list and relocate the p2m to
|
||||||
|
* this area.
|
||||||
|
*/
|
||||||
|
void __init xen_relocate_p2m(void)
|
||||||
|
{
|
||||||
|
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
|
||||||
|
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
|
||||||
|
int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
|
||||||
|
pte_t *pt;
|
||||||
|
pmd_t *pmd;
|
||||||
|
pud_t *pud;
|
||||||
|
pgd_t *pgd;
|
||||||
|
unsigned long *new_p2m;
|
||||||
|
|
||||||
|
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
||||||
|
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
|
||||||
|
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
|
||||||
|
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
|
||||||
|
n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||||
|
n_frames = n_pte + n_pt + n_pmd + n_pud;
|
||||||
|
|
||||||
|
new_area = xen_find_free_area(PFN_PHYS(n_frames));
|
||||||
|
if (!new_area) {
|
||||||
|
xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup the page tables for addressing the new p2m list.
|
||||||
|
* We have asked the hypervisor to map the p2m list at the user address
|
||||||
|
* PUD_SIZE. It may have done so, or it may have used a kernel space
|
||||||
|
* address depending on the Xen version.
|
||||||
|
* To avoid any possible virtual address collision, just use
|
||||||
|
* 2 * PUD_SIZE for the new area.
|
||||||
|
*/
|
||||||
|
pud_phys = new_area;
|
||||||
|
pmd_phys = pud_phys + PFN_PHYS(n_pud);
|
||||||
|
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
|
||||||
|
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
|
||||||
|
|
||||||
|
pgd = __va(read_cr3());
|
||||||
|
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
|
||||||
|
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||||
|
pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||||
|
clear_page(pud);
|
||||||
|
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||||
|
idx_pmd++) {
|
||||||
|
pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||||
|
clear_page(pmd);
|
||||||
|
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||||
|
idx_pt++) {
|
||||||
|
pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||||
|
clear_page(pt);
|
||||||
|
for (idx_pte = 0;
|
||||||
|
idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||||
|
idx_pte++) {
|
||||||
|
set_pte(pt + idx_pte,
|
||||||
|
pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||||
|
p2m_pfn++;
|
||||||
|
}
|
||||||
|
n_pte -= PTRS_PER_PTE;
|
||||||
|
early_memunmap(pt, PAGE_SIZE);
|
||||||
|
make_lowmem_page_readonly(__va(pt_phys));
|
||||||
|
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||||
|
PFN_DOWN(pt_phys));
|
||||||
|
set_pmd(pmd + idx_pt,
|
||||||
|
__pmd(_PAGE_TABLE | pt_phys));
|
||||||
|
pt_phys += PAGE_SIZE;
|
||||||
|
}
|
||||||
|
n_pt -= PTRS_PER_PMD;
|
||||||
|
early_memunmap(pmd, PAGE_SIZE);
|
||||||
|
make_lowmem_page_readonly(__va(pmd_phys));
|
||||||
|
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||||
|
PFN_DOWN(pmd_phys));
|
||||||
|
set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||||
|
pmd_phys += PAGE_SIZE;
|
||||||
|
}
|
||||||
|
n_pmd -= PTRS_PER_PUD;
|
||||||
|
early_memunmap(pud, PAGE_SIZE);
|
||||||
|
make_lowmem_page_readonly(__va(pud_phys));
|
||||||
|
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||||
|
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||||
|
pud_phys += PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now copy the old p2m info to the new area. */
|
||||||
|
memcpy(new_p2m, xen_p2m_addr, size);
|
||||||
|
xen_p2m_addr = new_p2m;
|
||||||
|
|
||||||
|
/* Release the old p2m list and set new list info. */
|
||||||
|
p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
|
||||||
|
BUG_ON(!p2m_pfn);
|
||||||
|
p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
|
||||||
|
|
||||||
|
if (xen_start_info->mfn_list < __START_KERNEL_map) {
|
||||||
|
pfn = xen_start_info->first_p2m_pfn;
|
||||||
|
pfn_end = xen_start_info->first_p2m_pfn +
|
||||||
|
xen_start_info->nr_p2m_frames;
|
||||||
|
set_pgd(pgd + 1, __pgd(0));
|
||||||
|
} else {
|
||||||
|
pfn = p2m_pfn;
|
||||||
|
pfn_end = p2m_pfn_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
|
||||||
|
while (pfn < pfn_end) {
|
||||||
|
if (pfn == p2m_pfn) {
|
||||||
|
pfn = p2m_pfn_end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||||
|
pfn++;
|
||||||
|
}
|
||||||
|
|
||||||
|
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
|
||||||
|
xen_start_info->first_p2m_pfn = PFN_DOWN(new_area);
|
||||||
|
xen_start_info->nr_p2m_frames = n_frames;
|
||||||
|
}
|
||||||
|
|
||||||
#else /* !CONFIG_X86_64 */
|
#else /* !CONFIG_X86_64 */
|
||||||
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
|
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
|
||||||
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
|
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
|
||||||
@ -1938,18 +2240,41 @@ static void __init xen_write_cr3_init(unsigned long cr3)
|
|||||||
pv_mmu_ops.write_cr3 = &xen_write_cr3;
|
pv_mmu_ops.write_cr3 = &xen_write_cr3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For 32 bit domains xen_start_info->pt_base is the pgd address which might be
|
||||||
|
* not the first page table in the page table pool.
|
||||||
|
* Iterate through the initial page tables to find the real page table base.
|
||||||
|
*/
|
||||||
|
static phys_addr_t xen_find_pt_base(pmd_t *pmd)
|
||||||
|
{
|
||||||
|
phys_addr_t pt_base, paddr;
|
||||||
|
unsigned pmdidx;
|
||||||
|
|
||||||
|
pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
|
||||||
|
|
||||||
|
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
|
||||||
|
if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
|
||||||
|
paddr = m2p(pmd[pmdidx].pmd);
|
||||||
|
pt_base = min(pt_base, paddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pt_base;
|
||||||
|
}
|
||||||
|
|
||||||
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||||
{
|
{
|
||||||
pmd_t *kernel_pmd;
|
pmd_t *kernel_pmd;
|
||||||
|
|
||||||
|
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
|
||||||
|
|
||||||
|
xen_pt_base = xen_find_pt_base(kernel_pmd);
|
||||||
|
xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
|
||||||
|
|
||||||
initial_kernel_pmd =
|
initial_kernel_pmd =
|
||||||
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
|
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
|
||||||
|
|
||||||
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
|
max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
|
||||||
xen_start_info->nr_pt_frames * PAGE_SIZE +
|
|
||||||
512*1024);
|
|
||||||
|
|
||||||
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
|
|
||||||
copy_page(initial_kernel_pmd, kernel_pmd);
|
copy_page(initial_kernel_pmd, kernel_pmd);
|
||||||
|
|
||||||
xen_map_identity_early(initial_kernel_pmd, max_pfn);
|
xen_map_identity_early(initial_kernel_pmd, max_pfn);
|
||||||
@ -1968,11 +2293,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
|||||||
PFN_DOWN(__pa(initial_page_table)));
|
PFN_DOWN(__pa(initial_page_table)));
|
||||||
xen_write_cr3(__pa(initial_page_table));
|
xen_write_cr3(__pa(initial_page_table));
|
||||||
|
|
||||||
memblock_reserve(__pa(xen_start_info->pt_base),
|
memblock_reserve(xen_pt_base, xen_pt_size);
|
||||||
xen_start_info->nr_pt_frames * PAGE_SIZE);
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
||||||
|
void __init xen_reserve_special_pages(void)
|
||||||
|
{
|
||||||
|
phys_addr_t paddr;
|
||||||
|
|
||||||
|
memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
|
||||||
|
if (xen_start_info->store_mfn) {
|
||||||
|
paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
|
||||||
|
memblock_reserve(paddr, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
if (!xen_initial_domain()) {
|
||||||
|
paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
|
||||||
|
memblock_reserve(paddr, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void __init xen_pt_check_e820(void)
|
||||||
|
{
|
||||||
|
if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
|
||||||
|
xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
|
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
|
||||||
|
|
||||||
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||||
|
@ -79,10 +79,14 @@
|
|||||||
#include <xen/balloon.h>
|
#include <xen/balloon.h>
|
||||||
#include <xen/grant_table.h>
|
#include <xen/grant_table.h>
|
||||||
|
|
||||||
#include "p2m.h"
|
|
||||||
#include "multicalls.h"
|
#include "multicalls.h"
|
||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
|
|
||||||
|
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
|
||||||
|
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
|
||||||
|
|
||||||
|
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
|
||||||
|
|
||||||
#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
|
#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
|
||||||
|
|
||||||
unsigned long *xen_p2m_addr __read_mostly;
|
unsigned long *xen_p2m_addr __read_mostly;
|
||||||
@ -199,7 +203,8 @@ void __ref xen_build_mfn_list_list(void)
|
|||||||
unsigned int level, topidx, mididx;
|
unsigned int level, topidx, mididx;
|
||||||
unsigned long *mid_mfn_p;
|
unsigned long *mid_mfn_p;
|
||||||
|
|
||||||
if (xen_feature(XENFEAT_auto_translated_physmap))
|
if (xen_feature(XENFEAT_auto_translated_physmap) ||
|
||||||
|
xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Pre-initialize p2m_top_mfn to be completely missing */
|
/* Pre-initialize p2m_top_mfn to be completely missing */
|
||||||
@ -260,9 +265,16 @@ void xen_setup_mfn_list_list(void)
|
|||||||
|
|
||||||
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
|
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
|
||||||
|
|
||||||
|
if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
|
||||||
|
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL;
|
||||||
|
else
|
||||||
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
||||||
virt_to_mfn(p2m_top_mfn);
|
virt_to_mfn(p2m_top_mfn);
|
||||||
HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
|
HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_generation = 0;
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_cr3 =
|
||||||
|
xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set up p2m_top to point to the domain-builder provided p2m pages */
|
/* Set up p2m_top to point to the domain-builder provided p2m pages */
|
||||||
@ -478,8 +490,12 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
|
|||||||
|
|
||||||
ptechk = lookup_address(vaddr, &level);
|
ptechk = lookup_address(vaddr, &level);
|
||||||
if (ptechk == pte_pg) {
|
if (ptechk == pte_pg) {
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_generation++;
|
||||||
|
wmb(); /* Tools are synchronizing via p2m_generation. */
|
||||||
set_pmd(pmdp,
|
set_pmd(pmdp,
|
||||||
__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
|
__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
|
||||||
|
wmb(); /* Tools are synchronizing via p2m_generation. */
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_generation++;
|
||||||
pte_newpg[i] = NULL;
|
pte_newpg[i] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -505,7 +521,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
|
|||||||
*/
|
*/
|
||||||
static bool alloc_p2m(unsigned long pfn)
|
static bool alloc_p2m(unsigned long pfn)
|
||||||
{
|
{
|
||||||
unsigned topidx, mididx;
|
unsigned topidx;
|
||||||
unsigned long *top_mfn_p, *mid_mfn;
|
unsigned long *top_mfn_p, *mid_mfn;
|
||||||
pte_t *ptep, *pte_pg;
|
pte_t *ptep, *pte_pg;
|
||||||
unsigned int level;
|
unsigned int level;
|
||||||
@ -513,9 +529,6 @@ static bool alloc_p2m(unsigned long pfn)
|
|||||||
unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
|
unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
|
||||||
unsigned long p2m_pfn;
|
unsigned long p2m_pfn;
|
||||||
|
|
||||||
topidx = p2m_top_index(pfn);
|
|
||||||
mididx = p2m_mid_index(pfn);
|
|
||||||
|
|
||||||
ptep = lookup_address(addr, &level);
|
ptep = lookup_address(addr, &level);
|
||||||
BUG_ON(!ptep || level != PG_LEVEL_4K);
|
BUG_ON(!ptep || level != PG_LEVEL_4K);
|
||||||
pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
|
pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
|
||||||
@ -527,7 +540,8 @@ static bool alloc_p2m(unsigned long pfn)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p2m_top_mfn) {
|
if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
|
||||||
|
topidx = p2m_top_index(pfn);
|
||||||
top_mfn_p = &p2m_top_mfn[topidx];
|
top_mfn_p = &p2m_top_mfn[topidx];
|
||||||
mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
|
mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
|
||||||
|
|
||||||
@ -577,10 +591,14 @@ static bool alloc_p2m(unsigned long pfn)
|
|||||||
spin_lock_irqsave(&p2m_update_lock, flags);
|
spin_lock_irqsave(&p2m_update_lock, flags);
|
||||||
|
|
||||||
if (pte_pfn(*ptep) == p2m_pfn) {
|
if (pte_pfn(*ptep) == p2m_pfn) {
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_generation++;
|
||||||
|
wmb(); /* Tools are synchronizing via p2m_generation. */
|
||||||
set_pte(ptep,
|
set_pte(ptep,
|
||||||
pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
|
pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
|
||||||
|
wmb(); /* Tools are synchronizing via p2m_generation. */
|
||||||
|
HYPERVISOR_shared_info->arch.p2m_generation++;
|
||||||
if (mid_mfn)
|
if (mid_mfn)
|
||||||
mid_mfn[mididx] = virt_to_mfn(p2m);
|
mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m);
|
||||||
p2m = NULL;
|
p2m = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -630,6 +648,11 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The interface requires atomic updates on p2m elements.
|
||||||
|
* xen_safe_write_ulong() is using __put_user which does an atomic
|
||||||
|
* store via asm().
|
||||||
|
*/
|
||||||
if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
|
if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
#ifndef _XEN_P2M_H
|
|
||||||
#define _XEN_P2M_H
|
|
||||||
|
|
||||||
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
|
|
||||||
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
|
|
||||||
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
|
|
||||||
|
|
||||||
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
|
|
||||||
|
|
||||||
#define MAX_REMAP_RANGES 10
|
|
||||||
|
|
||||||
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
|
|
||||||
unsigned long pfn_e);
|
|
||||||
|
|
||||||
#endif /* _XEN_P2M_H */
|
|
@ -68,7 +68,7 @@ static int check_platform_magic(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool xen_has_pv_devices()
|
bool xen_has_pv_devices(void)
|
||||||
{
|
{
|
||||||
if (!xen_domain())
|
if (!xen_domain())
|
||||||
return false;
|
return false;
|
||||||
|
570
arch/x86/xen/pmu.c
Normal file
570
arch/x86/xen/pmu.c
Normal file
@ -0,0 +1,570 @@
|
|||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
|
|
||||||
|
#include <asm/xen/hypercall.h>
|
||||||
|
#include <xen/page.h>
|
||||||
|
#include <xen/interface/xen.h>
|
||||||
|
#include <xen/interface/vcpu.h>
|
||||||
|
#include <xen/interface/xenpmu.h>
|
||||||
|
|
||||||
|
#include "xen-ops.h"
|
||||||
|
#include "pmu.h"
|
||||||
|
|
||||||
|
/* x86_pmu.handle_irq definition */
|
||||||
|
#include "../kernel/cpu/perf_event.h"
|
||||||
|
|
||||||
|
#define XENPMU_IRQ_PROCESSING 1
|
||||||
|
struct xenpmu {
|
||||||
|
/* Shared page between hypervisor and domain */
|
||||||
|
struct xen_pmu_data *xenpmu_data;
|
||||||
|
|
||||||
|
uint8_t flags;
|
||||||
|
};
|
||||||
|
static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
|
||||||
|
#define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
|
||||||
|
#define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
|
||||||
|
|
||||||
|
/* Macro for computing address of a PMU MSR bank */
|
||||||
|
#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
|
||||||
|
(uintptr_t)ctxt->field))
|
||||||
|
|
||||||
|
/* AMD PMU */
|
||||||
|
#define F15H_NUM_COUNTERS 6
|
||||||
|
#define F10H_NUM_COUNTERS 4
|
||||||
|
|
||||||
|
static __read_mostly uint32_t amd_counters_base;
|
||||||
|
static __read_mostly uint32_t amd_ctrls_base;
|
||||||
|
static __read_mostly int amd_msr_step;
|
||||||
|
static __read_mostly int k7_counters_mirrored;
|
||||||
|
static __read_mostly int amd_num_counters;
|
||||||
|
|
||||||
|
/* Intel PMU */
|
||||||
|
#define MSR_TYPE_COUNTER 0
|
||||||
|
#define MSR_TYPE_CTRL 1
|
||||||
|
#define MSR_TYPE_GLOBAL 2
|
||||||
|
#define MSR_TYPE_ARCH_COUNTER 3
|
||||||
|
#define MSR_TYPE_ARCH_CTRL 4
|
||||||
|
|
||||||
|
/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
|
||||||
|
#define PMU_GENERAL_NR_SHIFT 8
|
||||||
|
#define PMU_GENERAL_NR_BITS 8
|
||||||
|
#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
|
||||||
|
<< PMU_GENERAL_NR_SHIFT)
|
||||||
|
|
||||||
|
/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
|
||||||
|
#define PMU_FIXED_NR_SHIFT 0
|
||||||
|
#define PMU_FIXED_NR_BITS 5
|
||||||
|
#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
|
||||||
|
<< PMU_FIXED_NR_SHIFT)
|
||||||
|
|
||||||
|
/* Alias registers (0x4c1) for full-width writes to PMCs */
|
||||||
|
#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
|
||||||
|
|
||||||
|
#define INTEL_PMC_TYPE_SHIFT 30
|
||||||
|
|
||||||
|
static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
|
||||||
|
|
||||||
|
|
||||||
|
static void xen_pmu_arch_init(void)
|
||||||
|
{
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||||
|
|
||||||
|
switch (boot_cpu_data.x86) {
|
||||||
|
case 0x15:
|
||||||
|
amd_num_counters = F15H_NUM_COUNTERS;
|
||||||
|
amd_counters_base = MSR_F15H_PERF_CTR;
|
||||||
|
amd_ctrls_base = MSR_F15H_PERF_CTL;
|
||||||
|
amd_msr_step = 2;
|
||||||
|
k7_counters_mirrored = 1;
|
||||||
|
break;
|
||||||
|
case 0x10:
|
||||||
|
case 0x12:
|
||||||
|
case 0x14:
|
||||||
|
case 0x16:
|
||||||
|
default:
|
||||||
|
amd_num_counters = F10H_NUM_COUNTERS;
|
||||||
|
amd_counters_base = MSR_K7_PERFCTR0;
|
||||||
|
amd_ctrls_base = MSR_K7_EVNTSEL0;
|
||||||
|
amd_msr_step = 1;
|
||||||
|
k7_counters_mirrored = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
cpuid(0xa, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
|
||||||
|
PMU_GENERAL_NR_SHIFT;
|
||||||
|
intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
|
||||||
|
PMU_FIXED_NR_SHIFT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t get_fam15h_addr(u32 addr)
|
||||||
|
{
|
||||||
|
switch (addr) {
|
||||||
|
case MSR_K7_PERFCTR0:
|
||||||
|
case MSR_K7_PERFCTR1:
|
||||||
|
case MSR_K7_PERFCTR2:
|
||||||
|
case MSR_K7_PERFCTR3:
|
||||||
|
return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
|
||||||
|
case MSR_K7_EVNTSEL0:
|
||||||
|
case MSR_K7_EVNTSEL1:
|
||||||
|
case MSR_K7_EVNTSEL2:
|
||||||
|
case MSR_K7_EVNTSEL3:
|
||||||
|
return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_amd_pmu_msr(unsigned int msr)
|
||||||
|
{
|
||||||
|
if ((msr >= MSR_F15H_PERF_CTL &&
|
||||||
|
msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
|
||||||
|
(msr >= MSR_K7_EVNTSEL0 &&
|
||||||
|
msr < MSR_K7_PERFCTR0 + amd_num_counters))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
|
||||||
|
{
|
||||||
|
u32 msr_index_pmc;
|
||||||
|
|
||||||
|
switch (msr_index) {
|
||||||
|
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||||
|
case MSR_IA32_DS_AREA:
|
||||||
|
case MSR_IA32_PEBS_ENABLE:
|
||||||
|
*type = MSR_TYPE_CTRL;
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||||
|
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||||
|
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||||
|
*type = MSR_TYPE_GLOBAL;
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
|
||||||
|
if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
|
||||||
|
(msr_index < MSR_CORE_PERF_FIXED_CTR0 +
|
||||||
|
intel_num_fixed_counters)) {
|
||||||
|
*index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
|
||||||
|
*type = MSR_TYPE_COUNTER;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((msr_index >= MSR_P6_EVNTSEL0) &&
|
||||||
|
(msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) {
|
||||||
|
*index = msr_index - MSR_P6_EVNTSEL0;
|
||||||
|
*type = MSR_TYPE_ARCH_CTRL;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
|
||||||
|
if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
|
||||||
|
(msr_index_pmc < MSR_IA32_PERFCTR0 +
|
||||||
|
intel_num_arch_counters)) {
|
||||||
|
*type = MSR_TYPE_ARCH_COUNTER;
|
||||||
|
*index = msr_index_pmc - MSR_IA32_PERFCTR0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
|
||||||
|
int index, bool is_read)
|
||||||
|
{
|
||||||
|
uint64_t *reg = NULL;
|
||||||
|
struct xen_pmu_intel_ctxt *ctxt;
|
||||||
|
uint64_t *fix_counters;
|
||||||
|
struct xen_pmu_cntr_pair *arch_cntr_pair;
|
||||||
|
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
uint8_t xenpmu_flags = get_xenpmu_flags();
|
||||||
|
|
||||||
|
|
||||||
|
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ctxt = &xenpmu_data->pmu.c.intel;
|
||||||
|
|
||||||
|
switch (msr) {
|
||||||
|
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||||
|
reg = &ctxt->global_ovf_ctrl;
|
||||||
|
break;
|
||||||
|
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||||
|
reg = &ctxt->global_status;
|
||||||
|
break;
|
||||||
|
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||||
|
reg = &ctxt->global_ctrl;
|
||||||
|
break;
|
||||||
|
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||||
|
reg = &ctxt->fixed_ctrl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
switch (type) {
|
||||||
|
case MSR_TYPE_COUNTER:
|
||||||
|
fix_counters = field_offset(ctxt, fixed_counters);
|
||||||
|
reg = &fix_counters[index];
|
||||||
|
break;
|
||||||
|
case MSR_TYPE_ARCH_COUNTER:
|
||||||
|
arch_cntr_pair = field_offset(ctxt, arch_counters);
|
||||||
|
reg = &arch_cntr_pair[index].counter;
|
||||||
|
break;
|
||||||
|
case MSR_TYPE_ARCH_CTRL:
|
||||||
|
arch_cntr_pair = field_offset(ctxt, arch_counters);
|
||||||
|
reg = &arch_cntr_pair[index].control;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reg) {
|
||||||
|
if (is_read)
|
||||||
|
*val = *reg;
|
||||||
|
else {
|
||||||
|
*reg = *val;
|
||||||
|
|
||||||
|
if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
|
||||||
|
ctxt->global_status &= (~(*val));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
|
||||||
|
{
|
||||||
|
uint64_t *reg = NULL;
|
||||||
|
int i, off = 0;
|
||||||
|
struct xen_pmu_amd_ctxt *ctxt;
|
||||||
|
uint64_t *counter_regs, *ctrl_regs;
|
||||||
|
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
uint8_t xenpmu_flags = get_xenpmu_flags();
|
||||||
|
|
||||||
|
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (k7_counters_mirrored &&
|
||||||
|
((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
|
||||||
|
msr = get_fam15h_addr(msr);
|
||||||
|
|
||||||
|
ctxt = &xenpmu_data->pmu.c.amd;
|
||||||
|
for (i = 0; i < amd_num_counters; i++) {
|
||||||
|
if (msr == amd_ctrls_base + off) {
|
||||||
|
ctrl_regs = field_offset(ctxt, ctrls);
|
||||||
|
reg = &ctrl_regs[i];
|
||||||
|
break;
|
||||||
|
} else if (msr == amd_counters_base + off) {
|
||||||
|
counter_regs = field_offset(ctxt, counters);
|
||||||
|
reg = &counter_regs[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
off += amd_msr_step;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reg) {
|
||||||
|
if (is_read)
|
||||||
|
*val = *reg;
|
||||||
|
else
|
||||||
|
*reg = *val;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
|
||||||
|
{
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||||
|
if (is_amd_pmu_msr(msr)) {
|
||||||
|
if (!xen_amd_pmu_emulate(msr, val, 1))
|
||||||
|
*val = native_read_msr_safe(msr, err);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int type, index;
|
||||||
|
|
||||||
|
if (is_intel_pmu_msr(msr, &type, &index)) {
|
||||||
|
if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
|
||||||
|
*val = native_read_msr_safe(msr, err);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
|
||||||
|
{
|
||||||
|
uint64_t val = ((uint64_t)high << 32) | low;
|
||||||
|
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
|
||||||
|
if (is_amd_pmu_msr(msr)) {
|
||||||
|
if (!xen_amd_pmu_emulate(msr, &val, 0))
|
||||||
|
*err = native_write_msr_safe(msr, low, high);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int type, index;
|
||||||
|
|
||||||
|
if (is_intel_pmu_msr(msr, &type, &index)) {
|
||||||
|
if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
|
||||||
|
*err = native_write_msr_safe(msr, low, high);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long long xen_amd_read_pmc(int counter)
|
||||||
|
{
|
||||||
|
struct xen_pmu_amd_ctxt *ctxt;
|
||||||
|
uint64_t *counter_regs;
|
||||||
|
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
uint8_t xenpmu_flags = get_xenpmu_flags();
|
||||||
|
|
||||||
|
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
|
||||||
|
uint32_t msr;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
msr = amd_counters_base + (counter * amd_msr_step);
|
||||||
|
return native_read_msr_safe(msr, &err);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctxt = &xenpmu_data->pmu.c.amd;
|
||||||
|
counter_regs = field_offset(ctxt, counters);
|
||||||
|
return counter_regs[counter];
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long long xen_intel_read_pmc(int counter)
|
||||||
|
{
|
||||||
|
struct xen_pmu_intel_ctxt *ctxt;
|
||||||
|
uint64_t *fixed_counters;
|
||||||
|
struct xen_pmu_cntr_pair *arch_cntr_pair;
|
||||||
|
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
uint8_t xenpmu_flags = get_xenpmu_flags();
|
||||||
|
|
||||||
|
if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
|
||||||
|
uint32_t msr;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
|
||||||
|
msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
|
||||||
|
else
|
||||||
|
msr = MSR_IA32_PERFCTR0 + counter;
|
||||||
|
|
||||||
|
return native_read_msr_safe(msr, &err);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctxt = &xenpmu_data->pmu.c.intel;
|
||||||
|
if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
|
||||||
|
fixed_counters = field_offset(ctxt, fixed_counters);
|
||||||
|
return fixed_counters[counter & 0xffff];
|
||||||
|
}
|
||||||
|
|
||||||
|
arch_cntr_pair = field_offset(ctxt, arch_counters);
|
||||||
|
return arch_cntr_pair[counter].counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long xen_read_pmc(int counter)
|
||||||
|
{
|
||||||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
|
||||||
|
return xen_amd_read_pmc(counter);
|
||||||
|
else
|
||||||
|
return xen_intel_read_pmc(counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
int pmu_apic_update(uint32_t val)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_warn_once("%s: pmudata not initialized\n", __func__);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
xenpmu_data->pmu.l.lapic_lvtpc = val;
|
||||||
|
|
||||||
|
if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* perf callbacks */
|
||||||
|
static int xen_is_in_guest(void)
|
||||||
|
{
|
||||||
|
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_warn_once("%s: pmudata not initialized\n", __func__);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xen_is_user_mode(void)
|
||||||
|
{
|
||||||
|
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_warn_once("%s: pmudata not initialized\n", __func__);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
|
||||||
|
return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
|
||||||
|
else
|
||||||
|
return !!(xenpmu_data->pmu.r.regs.cpl & 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long xen_get_guest_ip(void)
|
||||||
|
{
|
||||||
|
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_warn_once("%s: pmudata not initialized\n", __func__);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return xenpmu_data->pmu.r.regs.ip;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct perf_guest_info_callbacks xen_guest_cbs = {
|
||||||
|
.is_in_guest = xen_is_in_guest,
|
||||||
|
.is_user_mode = xen_is_user_mode,
|
||||||
|
.get_guest_ip = xen_get_guest_ip,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Convert registers from Xen's format to Linux' */
|
||||||
|
static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
|
||||||
|
struct pt_regs *regs, uint64_t pmu_flags)
|
||||||
|
{
|
||||||
|
regs->ip = xen_regs->ip;
|
||||||
|
regs->cs = xen_regs->cs;
|
||||||
|
regs->sp = xen_regs->sp;
|
||||||
|
|
||||||
|
if (pmu_flags & PMU_SAMPLE_PV) {
|
||||||
|
if (pmu_flags & PMU_SAMPLE_USER)
|
||||||
|
regs->cs |= 3;
|
||||||
|
else
|
||||||
|
regs->cs &= ~3;
|
||||||
|
} else {
|
||||||
|
if (xen_regs->cpl)
|
||||||
|
regs->cs |= 3;
|
||||||
|
else
|
||||||
|
regs->cs &= ~3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
|
||||||
|
{
|
||||||
|
int err, ret = IRQ_NONE;
|
||||||
|
struct pt_regs regs;
|
||||||
|
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
|
||||||
|
uint8_t xenpmu_flags = get_xenpmu_flags();
|
||||||
|
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_warn_once("%s: pmudata not initialized\n", __func__);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
this_cpu_ptr(&xenpmu_shared)->flags =
|
||||||
|
xenpmu_flags | XENPMU_IRQ_PROCESSING;
|
||||||
|
xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s,
|
||||||
|
xenpmu_data->pmu.pmu_flags);
|
||||||
|
if (x86_pmu.handle_irq(®s))
|
||||||
|
ret = IRQ_HANDLED;
|
||||||
|
|
||||||
|
/* Write out cached context to HW */
|
||||||
|
err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
|
||||||
|
this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
|
||||||
|
if (err) {
|
||||||
|
pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
|
||||||
|
return IRQ_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_xen_pmu(int cpu)
|
||||||
|
{
|
||||||
|
return (get_xenpmu_data() != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xen_pmu_init(int cpu)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
unsigned long pfn;
|
||||||
|
struct xen_pmu_data *xenpmu_data;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
|
||||||
|
|
||||||
|
if (xen_hvm_domain())
|
||||||
|
return;
|
||||||
|
|
||||||
|
xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
|
||||||
|
if (!xenpmu_data) {
|
||||||
|
pr_err("VPMU init: No memory\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pfn = virt_to_pfn(xenpmu_data);
|
||||||
|
|
||||||
|
xp.val = pfn_to_mfn(pfn);
|
||||||
|
xp.vcpu = cpu;
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
|
||||||
|
if (err)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
|
||||||
|
per_cpu(xenpmu_shared, cpu).flags = 0;
|
||||||
|
|
||||||
|
if (cpu == 0) {
|
||||||
|
perf_register_guest_info_callbacks(&xen_guest_cbs);
|
||||||
|
xen_pmu_arch_init();
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
|
||||||
|
cpu, err);
|
||||||
|
free_pages((unsigned long)xenpmu_data, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xen_pmu_finish(int cpu)
|
||||||
|
{
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
|
||||||
|
if (xen_hvm_domain())
|
||||||
|
return;
|
||||||
|
|
||||||
|
xp.vcpu = cpu;
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
|
||||||
|
(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
|
||||||
|
|
||||||
|
free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
|
||||||
|
per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
|
||||||
|
}
|
15
arch/x86/xen/pmu.h
Normal file
15
arch/x86/xen/pmu.h
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#ifndef __XEN_PMU_H
|
||||||
|
#define __XEN_PMU_H
|
||||||
|
|
||||||
|
#include <xen/interface/xenpmu.h>
|
||||||
|
|
||||||
|
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
|
||||||
|
void xen_pmu_init(int cpu);
|
||||||
|
void xen_pmu_finish(int cpu);
|
||||||
|
bool is_xen_pmu(int cpu);
|
||||||
|
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
|
||||||
|
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
|
||||||
|
int pmu_apic_update(uint32_t reg);
|
||||||
|
unsigned long long xen_read_pmc(int counter);
|
||||||
|
|
||||||
|
#endif /* __XEN_PMU_H */
|
@ -27,17 +27,23 @@
|
|||||||
#include <xen/interface/memory.h>
|
#include <xen/interface/memory.h>
|
||||||
#include <xen/interface/physdev.h>
|
#include <xen/interface/physdev.h>
|
||||||
#include <xen/features.h>
|
#include <xen/features.h>
|
||||||
|
#include <xen/hvc-console.h>
|
||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
#include "vdso.h"
|
#include "vdso.h"
|
||||||
#include "p2m.h"
|
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
|
|
||||||
|
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
|
||||||
|
|
||||||
/* Amount of extra memory space we add to the e820 ranges */
|
/* Amount of extra memory space we add to the e820 ranges */
|
||||||
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
||||||
|
|
||||||
/* Number of pages released from the initial allocation. */
|
/* Number of pages released from the initial allocation. */
|
||||||
unsigned long xen_released_pages;
|
unsigned long xen_released_pages;
|
||||||
|
|
||||||
|
/* E820 map used during setting up memory. */
|
||||||
|
static struct e820entry xen_e820_map[E820MAX] __initdata;
|
||||||
|
static u32 xen_e820_map_entries __initdata;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Buffer used to remap identity mapped pages. We only need the virtual space.
|
* Buffer used to remap identity mapped pages. We only need the virtual space.
|
||||||
* The physical page behind this address is remapped as needed to different
|
* The physical page behind this address is remapped as needed to different
|
||||||
@ -64,62 +70,89 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
|
|||||||
*/
|
*/
|
||||||
#define EXTRA_MEM_RATIO (10)
|
#define EXTRA_MEM_RATIO (10)
|
||||||
|
|
||||||
static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size)
|
static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
|
||||||
|
|
||||||
|
static void __init xen_parse_512gb(void)
|
||||||
|
{
|
||||||
|
bool val = false;
|
||||||
|
char *arg;
|
||||||
|
|
||||||
|
arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
|
||||||
|
if (!arg)
|
||||||
|
return;
|
||||||
|
|
||||||
|
arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
|
||||||
|
if (!arg)
|
||||||
|
val = true;
|
||||||
|
else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
|
||||||
|
return;
|
||||||
|
|
||||||
|
xen_512gb_limit = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init xen_add_extra_mem(unsigned long start_pfn,
|
||||||
|
unsigned long n_pfns)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No need to check for zero size, should happen rarely and will only
|
||||||
|
* write a new entry regarded to be unused due to zero size.
|
||||||
|
*/
|
||||||
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
||||||
/* Add new region. */
|
/* Add new region. */
|
||||||
if (xen_extra_mem[i].size == 0) {
|
if (xen_extra_mem[i].n_pfns == 0) {
|
||||||
xen_extra_mem[i].start = start;
|
xen_extra_mem[i].start_pfn = start_pfn;
|
||||||
xen_extra_mem[i].size = size;
|
xen_extra_mem[i].n_pfns = n_pfns;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* Append to existing region. */
|
/* Append to existing region. */
|
||||||
if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
|
if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
|
||||||
xen_extra_mem[i].size += size;
|
start_pfn) {
|
||||||
|
xen_extra_mem[i].n_pfns += n_pfns;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
|
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
|
||||||
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
|
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
|
||||||
|
|
||||||
memblock_reserve(start, size);
|
memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
|
static void __init xen_del_extra_mem(unsigned long start_pfn,
|
||||||
|
unsigned long n_pfns)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
phys_addr_t start_r, size_r;
|
unsigned long start_r, size_r;
|
||||||
|
|
||||||
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
||||||
start_r = xen_extra_mem[i].start;
|
start_r = xen_extra_mem[i].start_pfn;
|
||||||
size_r = xen_extra_mem[i].size;
|
size_r = xen_extra_mem[i].n_pfns;
|
||||||
|
|
||||||
/* Start of region. */
|
/* Start of region. */
|
||||||
if (start_r == start) {
|
if (start_r == start_pfn) {
|
||||||
BUG_ON(size > size_r);
|
BUG_ON(n_pfns > size_r);
|
||||||
xen_extra_mem[i].start += size;
|
xen_extra_mem[i].start_pfn += n_pfns;
|
||||||
xen_extra_mem[i].size -= size;
|
xen_extra_mem[i].n_pfns -= n_pfns;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* End of region. */
|
/* End of region. */
|
||||||
if (start_r + size_r == start + size) {
|
if (start_r + size_r == start_pfn + n_pfns) {
|
||||||
BUG_ON(size > size_r);
|
BUG_ON(n_pfns > size_r);
|
||||||
xen_extra_mem[i].size -= size;
|
xen_extra_mem[i].n_pfns -= n_pfns;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* Mid of region. */
|
/* Mid of region. */
|
||||||
if (start > start_r && start < start_r + size_r) {
|
if (start_pfn > start_r && start_pfn < start_r + size_r) {
|
||||||
BUG_ON(start + size > start_r + size_r);
|
BUG_ON(start_pfn + n_pfns > start_r + size_r);
|
||||||
xen_extra_mem[i].size = start - start_r;
|
xen_extra_mem[i].n_pfns = start_pfn - start_r;
|
||||||
/* Calling memblock_reserve() again is okay. */
|
/* Calling memblock_reserve() again is okay. */
|
||||||
xen_add_extra_mem(start + size, start_r + size_r -
|
xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
|
||||||
(start + size));
|
(start_pfn + n_pfns));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memblock_free(start, size);
|
memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -130,11 +163,10 @@ static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size)
|
|||||||
unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
|
unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
phys_addr_t addr = PFN_PHYS(pfn);
|
|
||||||
|
|
||||||
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
||||||
if (addr >= xen_extra_mem[i].start &&
|
if (pfn >= xen_extra_mem[i].start_pfn &&
|
||||||
addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
|
pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
|
||||||
return INVALID_P2M_ENTRY;
|
return INVALID_P2M_ENTRY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,10 +182,10 @@ void __init xen_inv_extra_mem(void)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
|
||||||
if (!xen_extra_mem[i].size)
|
if (!xen_extra_mem[i].n_pfns)
|
||||||
continue;
|
continue;
|
||||||
pfn_s = PFN_DOWN(xen_extra_mem[i].start);
|
pfn_s = xen_extra_mem[i].start_pfn;
|
||||||
pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
|
pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
|
||||||
for (pfn = pfn_s; pfn < pfn_e; pfn++)
|
for (pfn = pfn_s; pfn < pfn_e; pfn++)
|
||||||
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
||||||
}
|
}
|
||||||
@ -164,15 +196,13 @@ void __init xen_inv_extra_mem(void)
|
|||||||
* This function updates min_pfn with the pfn found and returns
|
* This function updates min_pfn with the pfn found and returns
|
||||||
* the size of that range or zero if not found.
|
* the size of that range or zero if not found.
|
||||||
*/
|
*/
|
||||||
static unsigned long __init xen_find_pfn_range(
|
static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
|
||||||
const struct e820entry *list, size_t map_size,
|
|
||||||
unsigned long *min_pfn)
|
|
||||||
{
|
{
|
||||||
const struct e820entry *entry;
|
const struct e820entry *entry = xen_e820_map;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned long done = 0;
|
unsigned long done = 0;
|
||||||
|
|
||||||
for (i = 0, entry = list; i < map_size; i++, entry++) {
|
for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
||||||
unsigned long s_pfn;
|
unsigned long s_pfn;
|
||||||
unsigned long e_pfn;
|
unsigned long e_pfn;
|
||||||
|
|
||||||
@ -221,7 +251,7 @@ static int __init xen_free_mfn(unsigned long mfn)
|
|||||||
* as a fallback if the remapping fails.
|
* as a fallback if the remapping fails.
|
||||||
*/
|
*/
|
||||||
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
||||||
unsigned long end_pfn, unsigned long nr_pages, unsigned long *released)
|
unsigned long end_pfn, unsigned long nr_pages)
|
||||||
{
|
{
|
||||||
unsigned long pfn, end;
|
unsigned long pfn, end;
|
||||||
int ret;
|
int ret;
|
||||||
@ -241,7 +271,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
|||||||
WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
|
WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
|
||||||
|
|
||||||
if (ret == 1) {
|
if (ret == 1) {
|
||||||
(*released)++;
|
xen_released_pages++;
|
||||||
if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
|
if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
|
||||||
break;
|
break;
|
||||||
} else
|
} else
|
||||||
@ -356,9 +386,8 @@ static void __init xen_do_set_identity_and_remap_chunk(
|
|||||||
* to Xen and not remapped.
|
* to Xen and not remapped.
|
||||||
*/
|
*/
|
||||||
static unsigned long __init xen_set_identity_and_remap_chunk(
|
static unsigned long __init xen_set_identity_and_remap_chunk(
|
||||||
const struct e820entry *list, size_t map_size, unsigned long start_pfn,
|
unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
|
||||||
unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
|
unsigned long remap_pfn)
|
||||||
unsigned long *released, unsigned long *remapped)
|
|
||||||
{
|
{
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
unsigned long i = 0;
|
unsigned long i = 0;
|
||||||
@ -379,12 +408,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|||||||
if (cur_pfn + size > nr_pages)
|
if (cur_pfn + size > nr_pages)
|
||||||
size = nr_pages - cur_pfn;
|
size = nr_pages - cur_pfn;
|
||||||
|
|
||||||
remap_range_size = xen_find_pfn_range(list, map_size,
|
remap_range_size = xen_find_pfn_range(&remap_pfn);
|
||||||
&remap_pfn);
|
|
||||||
if (!remap_range_size) {
|
if (!remap_range_size) {
|
||||||
pr_warning("Unable to find available pfn range, not remapping identity pages\n");
|
pr_warning("Unable to find available pfn range, not remapping identity pages\n");
|
||||||
xen_set_identity_and_release_chunk(cur_pfn,
|
xen_set_identity_and_release_chunk(cur_pfn,
|
||||||
cur_pfn + left, nr_pages, released);
|
cur_pfn + left, nr_pages);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* Adjust size to fit in current e820 RAM region */
|
/* Adjust size to fit in current e820 RAM region */
|
||||||
@ -396,7 +424,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|||||||
/* Update variables to reflect new mappings. */
|
/* Update variables to reflect new mappings. */
|
||||||
i += size;
|
i += size;
|
||||||
remap_pfn += size;
|
remap_pfn += size;
|
||||||
*remapped += size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -411,15 +438,11 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
|
|||||||
return remap_pfn;
|
return remap_pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init xen_set_identity_and_remap(
|
static void __init xen_set_identity_and_remap(unsigned long nr_pages)
|
||||||
const struct e820entry *list, size_t map_size, unsigned long nr_pages,
|
|
||||||
unsigned long *released, unsigned long *remapped)
|
|
||||||
{
|
{
|
||||||
phys_addr_t start = 0;
|
phys_addr_t start = 0;
|
||||||
unsigned long last_pfn = nr_pages;
|
unsigned long last_pfn = nr_pages;
|
||||||
const struct e820entry *entry;
|
const struct e820entry *entry = xen_e820_map;
|
||||||
unsigned long num_released = 0;
|
|
||||||
unsigned long num_remapped = 0;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -433,9 +456,9 @@ static void __init xen_set_identity_and_remap(
|
|||||||
* example) the DMI tables in a reserved region that begins on
|
* example) the DMI tables in a reserved region that begins on
|
||||||
* a non-page boundary.
|
* a non-page boundary.
|
||||||
*/
|
*/
|
||||||
for (i = 0, entry = list; i < map_size; i++, entry++) {
|
for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
||||||
phys_addr_t end = entry->addr + entry->size;
|
phys_addr_t end = entry->addr + entry->size;
|
||||||
if (entry->type == E820_RAM || i == map_size - 1) {
|
if (entry->type == E820_RAM || i == xen_e820_map_entries - 1) {
|
||||||
unsigned long start_pfn = PFN_DOWN(start);
|
unsigned long start_pfn = PFN_DOWN(start);
|
||||||
unsigned long end_pfn = PFN_UP(end);
|
unsigned long end_pfn = PFN_UP(end);
|
||||||
|
|
||||||
@ -444,17 +467,13 @@ static void __init xen_set_identity_and_remap(
|
|||||||
|
|
||||||
if (start_pfn < end_pfn)
|
if (start_pfn < end_pfn)
|
||||||
last_pfn = xen_set_identity_and_remap_chunk(
|
last_pfn = xen_set_identity_and_remap_chunk(
|
||||||
list, map_size, start_pfn,
|
start_pfn, end_pfn, nr_pages,
|
||||||
end_pfn, nr_pages, last_pfn,
|
last_pfn);
|
||||||
&num_released, &num_remapped);
|
|
||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*released = num_released;
|
pr_info("Released %ld page(s)\n", xen_released_pages);
|
||||||
*remapped = num_remapped;
|
|
||||||
|
|
||||||
pr_info("Released %ld page(s)\n", num_released);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -494,7 +513,7 @@ void __init xen_remap_memory(void)
|
|||||||
} else if (pfn_s + len == xen_remap_buf.target_pfn) {
|
} else if (pfn_s + len == xen_remap_buf.target_pfn) {
|
||||||
len += xen_remap_buf.size;
|
len += xen_remap_buf.size;
|
||||||
} else {
|
} else {
|
||||||
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
|
xen_del_extra_mem(pfn_s, len);
|
||||||
pfn_s = xen_remap_buf.target_pfn;
|
pfn_s = xen_remap_buf.target_pfn;
|
||||||
len = xen_remap_buf.size;
|
len = xen_remap_buf.size;
|
||||||
}
|
}
|
||||||
@ -504,19 +523,36 @@ void __init xen_remap_memory(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (pfn_s != ~0UL && len)
|
if (pfn_s != ~0UL && len)
|
||||||
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
|
xen_del_extra_mem(pfn_s, len);
|
||||||
|
|
||||||
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
|
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
|
||||||
|
|
||||||
pr_info("Remapped %ld page(s)\n", remapped);
|
pr_info("Remapped %ld page(s)\n", remapped);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long __init xen_get_pages_limit(void)
|
||||||
|
{
|
||||||
|
unsigned long limit;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
limit = GB(64) / PAGE_SIZE;
|
||||||
|
#else
|
||||||
|
limit = MAXMEM / PAGE_SIZE;
|
||||||
|
if (!xen_initial_domain() && xen_512gb_limit)
|
||||||
|
limit = GB(512) / PAGE_SIZE;
|
||||||
|
#endif
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned long __init xen_get_max_pages(void)
|
static unsigned long __init xen_get_max_pages(void)
|
||||||
{
|
{
|
||||||
unsigned long max_pages = MAX_DOMAIN_PAGES;
|
unsigned long max_pages, limit;
|
||||||
domid_t domid = DOMID_SELF;
|
domid_t domid = DOMID_SELF;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
limit = xen_get_pages_limit();
|
||||||
|
max_pages = limit;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For the initial domain we use the maximum reservation as
|
* For the initial domain we use the maximum reservation as
|
||||||
* the maximum page.
|
* the maximum page.
|
||||||
@ -532,7 +568,7 @@ static unsigned long __init xen_get_max_pages(void)
|
|||||||
max_pages = ret;
|
max_pages = ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
return min(max_pages, MAX_DOMAIN_PAGES);
|
return min(max_pages, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init xen_align_and_add_e820_region(phys_addr_t start,
|
static void __init xen_align_and_add_e820_region(phys_addr_t start,
|
||||||
@ -549,39 +585,188 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start,
|
|||||||
e820_add_region(start, end - start, type);
|
e820_add_region(start, end - start, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size)
|
static void __init xen_ignore_unusable(void)
|
||||||
{
|
{
|
||||||
struct e820entry *entry;
|
struct e820entry *entry = xen_e820_map;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
for (i = 0, entry = list; i < map_size; i++, entry++) {
|
for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
||||||
if (entry->type == E820_UNUSABLE)
|
if (entry->type == E820_UNUSABLE)
|
||||||
entry->type = E820_RAM;
|
entry->type = E820_RAM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
|
||||||
|
{
|
||||||
|
unsigned long extra = 0;
|
||||||
|
unsigned long start_pfn, end_pfn;
|
||||||
|
const struct e820entry *entry = xen_e820_map;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
end_pfn = 0;
|
||||||
|
for (i = 0; i < xen_e820_map_entries; i++, entry++) {
|
||||||
|
start_pfn = PFN_DOWN(entry->addr);
|
||||||
|
/* Adjacent regions on non-page boundaries handling! */
|
||||||
|
end_pfn = min(end_pfn, start_pfn);
|
||||||
|
|
||||||
|
if (start_pfn >= max_pfn)
|
||||||
|
return extra + max_pfn - end_pfn;
|
||||||
|
|
||||||
|
/* Add any holes in map to result. */
|
||||||
|
extra += start_pfn - end_pfn;
|
||||||
|
|
||||||
|
end_pfn = PFN_UP(entry->addr + entry->size);
|
||||||
|
end_pfn = min(end_pfn, max_pfn);
|
||||||
|
|
||||||
|
if (entry->type != E820_RAM)
|
||||||
|
extra += end_pfn - start_pfn;
|
||||||
|
}
|
||||||
|
|
||||||
|
return extra;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
|
||||||
|
{
|
||||||
|
struct e820entry *entry;
|
||||||
|
unsigned mapcnt;
|
||||||
|
phys_addr_t end;
|
||||||
|
|
||||||
|
if (!size)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
end = start + size;
|
||||||
|
entry = xen_e820_map;
|
||||||
|
|
||||||
|
for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++) {
|
||||||
|
if (entry->type == E820_RAM && entry->addr <= start &&
|
||||||
|
(entry->addr + entry->size) >= end)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find a free area in physical memory not yet reserved and compliant with
|
||||||
|
* E820 map.
|
||||||
|
* Used to relocate pre-allocated areas like initrd or p2m list which are in
|
||||||
|
* conflict with the to be used E820 map.
|
||||||
|
* In case no area is found, return 0. Otherwise return the physical address
|
||||||
|
* of the area which is already reserved for convenience.
|
||||||
|
*/
|
||||||
|
phys_addr_t __init xen_find_free_area(phys_addr_t size)
|
||||||
|
{
|
||||||
|
unsigned mapcnt;
|
||||||
|
phys_addr_t addr, start;
|
||||||
|
struct e820entry *entry = xen_e820_map;
|
||||||
|
|
||||||
|
for (mapcnt = 0; mapcnt < xen_e820_map_entries; mapcnt++, entry++) {
|
||||||
|
if (entry->type != E820_RAM || entry->size < size)
|
||||||
|
continue;
|
||||||
|
start = entry->addr;
|
||||||
|
for (addr = start; addr < start + size; addr += PAGE_SIZE) {
|
||||||
|
if (!memblock_is_reserved(addr))
|
||||||
|
continue;
|
||||||
|
start = addr + PAGE_SIZE;
|
||||||
|
if (start + size > entry->addr + entry->size)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (addr >= start + size) {
|
||||||
|
memblock_reserve(start, size);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like memcpy, but with physical addresses for dest and src.
|
||||||
|
*/
|
||||||
|
static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
|
||||||
|
phys_addr_t n)
|
||||||
|
{
|
||||||
|
phys_addr_t dest_off, src_off, dest_len, src_len, len;
|
||||||
|
void *from, *to;
|
||||||
|
|
||||||
|
while (n) {
|
||||||
|
dest_off = dest & ~PAGE_MASK;
|
||||||
|
src_off = src & ~PAGE_MASK;
|
||||||
|
dest_len = n;
|
||||||
|
if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
|
||||||
|
dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
|
||||||
|
src_len = n;
|
||||||
|
if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
|
||||||
|
src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
|
||||||
|
len = min(dest_len, src_len);
|
||||||
|
to = early_memremap(dest - dest_off, dest_len + dest_off);
|
||||||
|
from = early_memremap(src - src_off, src_len + src_off);
|
||||||
|
memcpy(to, from, len);
|
||||||
|
early_memunmap(to, dest_len + dest_off);
|
||||||
|
early_memunmap(from, src_len + src_off);
|
||||||
|
n -= len;
|
||||||
|
dest += len;
|
||||||
|
src += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reserve Xen mfn_list.
|
||||||
|
*/
|
||||||
|
static void __init xen_reserve_xen_mfnlist(void)
|
||||||
|
{
|
||||||
|
phys_addr_t start, size;
|
||||||
|
|
||||||
|
if (xen_start_info->mfn_list >= __START_KERNEL_map) {
|
||||||
|
start = __pa(xen_start_info->mfn_list);
|
||||||
|
size = PFN_ALIGN(xen_start_info->nr_pages *
|
||||||
|
sizeof(unsigned long));
|
||||||
|
} else {
|
||||||
|
start = PFN_PHYS(xen_start_info->first_p2m_pfn);
|
||||||
|
size = PFN_PHYS(xen_start_info->nr_p2m_frames);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xen_is_e820_reserved(start, size)) {
|
||||||
|
memblock_reserve(start, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
/*
|
||||||
|
* Relocating the p2m on 32 bit system to an arbitrary virtual address
|
||||||
|
* is not supported, so just give up.
|
||||||
|
*/
|
||||||
|
xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
|
||||||
|
BUG();
|
||||||
|
#else
|
||||||
|
xen_relocate_p2m();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* machine_specific_memory_setup - Hook for machine specific memory setup.
|
* machine_specific_memory_setup - Hook for machine specific memory setup.
|
||||||
**/
|
**/
|
||||||
char * __init xen_memory_setup(void)
|
char * __init xen_memory_setup(void)
|
||||||
{
|
{
|
||||||
static struct e820entry map[E820MAX] __initdata;
|
unsigned long max_pfn, pfn_s, n_pfns;
|
||||||
|
phys_addr_t mem_end, addr, size, chunk_size;
|
||||||
unsigned long max_pfn = xen_start_info->nr_pages;
|
u32 type;
|
||||||
phys_addr_t mem_end;
|
|
||||||
int rc;
|
int rc;
|
||||||
struct xen_memory_map memmap;
|
struct xen_memory_map memmap;
|
||||||
unsigned long max_pages;
|
unsigned long max_pages;
|
||||||
unsigned long extra_pages = 0;
|
unsigned long extra_pages = 0;
|
||||||
unsigned long remapped_pages;
|
|
||||||
int i;
|
int i;
|
||||||
int op;
|
int op;
|
||||||
|
|
||||||
max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
|
xen_parse_512gb();
|
||||||
|
max_pfn = xen_get_pages_limit();
|
||||||
|
max_pfn = min(max_pfn, xen_start_info->nr_pages);
|
||||||
mem_end = PFN_PHYS(max_pfn);
|
mem_end = PFN_PHYS(max_pfn);
|
||||||
|
|
||||||
memmap.nr_entries = E820MAX;
|
memmap.nr_entries = E820MAX;
|
||||||
set_xen_guest_handle(memmap.buffer, map);
|
set_xen_guest_handle(memmap.buffer, xen_e820_map);
|
||||||
|
|
||||||
op = xen_initial_domain() ?
|
op = xen_initial_domain() ?
|
||||||
XENMEM_machine_memory_map :
|
XENMEM_machine_memory_map :
|
||||||
@ -590,15 +775,16 @@ char * __init xen_memory_setup(void)
|
|||||||
if (rc == -ENOSYS) {
|
if (rc == -ENOSYS) {
|
||||||
BUG_ON(xen_initial_domain());
|
BUG_ON(xen_initial_domain());
|
||||||
memmap.nr_entries = 1;
|
memmap.nr_entries = 1;
|
||||||
map[0].addr = 0ULL;
|
xen_e820_map[0].addr = 0ULL;
|
||||||
map[0].size = mem_end;
|
xen_e820_map[0].size = mem_end;
|
||||||
/* 8MB slack (to balance backend allocations). */
|
/* 8MB slack (to balance backend allocations). */
|
||||||
map[0].size += 8ULL << 20;
|
xen_e820_map[0].size += 8ULL << 20;
|
||||||
map[0].type = E820_RAM;
|
xen_e820_map[0].type = E820_RAM;
|
||||||
rc = 0;
|
rc = 0;
|
||||||
}
|
}
|
||||||
BUG_ON(rc);
|
BUG_ON(rc);
|
||||||
BUG_ON(memmap.nr_entries == 0);
|
BUG_ON(memmap.nr_entries == 0);
|
||||||
|
xen_e820_map_entries = memmap.nr_entries;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Xen won't allow a 1:1 mapping to be created to UNUSABLE
|
* Xen won't allow a 1:1 mapping to be created to UNUSABLE
|
||||||
@ -609,25 +795,20 @@ char * __init xen_memory_setup(void)
|
|||||||
* a patch in the future.
|
* a patch in the future.
|
||||||
*/
|
*/
|
||||||
if (xen_initial_domain())
|
if (xen_initial_domain())
|
||||||
xen_ignore_unusable(map, memmap.nr_entries);
|
xen_ignore_unusable();
|
||||||
|
|
||||||
/* Make sure the Xen-supplied memory map is well-ordered. */
|
/* Make sure the Xen-supplied memory map is well-ordered. */
|
||||||
sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
|
sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
|
||||||
|
&xen_e820_map_entries);
|
||||||
|
|
||||||
max_pages = xen_get_max_pages();
|
max_pages = xen_get_max_pages();
|
||||||
|
|
||||||
|
/* How many extra pages do we need due to remapping? */
|
||||||
|
max_pages += xen_count_remap_pages(max_pfn);
|
||||||
|
|
||||||
if (max_pages > max_pfn)
|
if (max_pages > max_pfn)
|
||||||
extra_pages += max_pages - max_pfn;
|
extra_pages += max_pages - max_pfn;
|
||||||
|
|
||||||
/*
|
|
||||||
* Set identity map on non-RAM pages and prepare remapping the
|
|
||||||
* underlying RAM.
|
|
||||||
*/
|
|
||||||
xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
|
|
||||||
&xen_released_pages, &remapped_pages);
|
|
||||||
|
|
||||||
extra_pages += xen_released_pages;
|
|
||||||
extra_pages += remapped_pages;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
|
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
|
||||||
* factor the base size. On non-highmem systems, the base
|
* factor the base size. On non-highmem systems, the base
|
||||||
@ -635,46 +816,54 @@ char * __init xen_memory_setup(void)
|
|||||||
* is limited to the max size of lowmem, so that it doesn't
|
* is limited to the max size of lowmem, so that it doesn't
|
||||||
* get completely filled.
|
* get completely filled.
|
||||||
*
|
*
|
||||||
|
* Make sure we have no memory above max_pages, as this area
|
||||||
|
* isn't handled by the p2m management.
|
||||||
|
*
|
||||||
* In principle there could be a problem in lowmem systems if
|
* In principle there could be a problem in lowmem systems if
|
||||||
* the initial memory is also very large with respect to
|
* the initial memory is also very large with respect to
|
||||||
* lowmem, but we won't try to deal with that here.
|
* lowmem, but we won't try to deal with that here.
|
||||||
*/
|
*/
|
||||||
extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
|
extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
|
||||||
extra_pages);
|
extra_pages, max_pages - max_pfn);
|
||||||
i = 0;
|
i = 0;
|
||||||
while (i < memmap.nr_entries) {
|
addr = xen_e820_map[0].addr;
|
||||||
phys_addr_t addr = map[i].addr;
|
size = xen_e820_map[0].size;
|
||||||
phys_addr_t size = map[i].size;
|
while (i < xen_e820_map_entries) {
|
||||||
u32 type = map[i].type;
|
chunk_size = size;
|
||||||
|
type = xen_e820_map[i].type;
|
||||||
|
|
||||||
if (type == E820_RAM) {
|
if (type == E820_RAM) {
|
||||||
if (addr < mem_end) {
|
if (addr < mem_end) {
|
||||||
size = min(size, mem_end - addr);
|
chunk_size = min(size, mem_end - addr);
|
||||||
} else if (extra_pages) {
|
} else if (extra_pages) {
|
||||||
size = min(size, PFN_PHYS(extra_pages));
|
chunk_size = min(size, PFN_PHYS(extra_pages));
|
||||||
extra_pages -= PFN_DOWN(size);
|
pfn_s = PFN_UP(addr);
|
||||||
xen_add_extra_mem(addr, size);
|
n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
|
||||||
xen_max_p2m_pfn = PFN_DOWN(addr + size);
|
extra_pages -= n_pfns;
|
||||||
|
xen_add_extra_mem(pfn_s, n_pfns);
|
||||||
|
xen_max_p2m_pfn = pfn_s + n_pfns;
|
||||||
} else
|
} else
|
||||||
type = E820_UNUSABLE;
|
type = E820_UNUSABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
xen_align_and_add_e820_region(addr, size, type);
|
xen_align_and_add_e820_region(addr, chunk_size, type);
|
||||||
|
|
||||||
map[i].addr += size;
|
addr += chunk_size;
|
||||||
map[i].size -= size;
|
size -= chunk_size;
|
||||||
if (map[i].size == 0)
|
if (size == 0) {
|
||||||
i++;
|
i++;
|
||||||
|
if (i < xen_e820_map_entries) {
|
||||||
|
addr = xen_e820_map[i].addr;
|
||||||
|
size = xen_e820_map[i].size;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set the rest as identity mapped, in case PCI BARs are
|
* Set the rest as identity mapped, in case PCI BARs are
|
||||||
* located here.
|
* located here.
|
||||||
*
|
|
||||||
* PFNs above MAX_P2M_PFN are considered identity mapped as
|
|
||||||
* well.
|
|
||||||
*/
|
*/
|
||||||
set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
|
set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In domU, the ISA region is normal, usable memory, but we
|
* In domU, the ISA region is normal, usable memory, but we
|
||||||
@ -684,35 +873,54 @@ char * __init xen_memory_setup(void)
|
|||||||
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
|
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
|
||||||
E820_RESERVED);
|
E820_RESERVED);
|
||||||
|
|
||||||
/*
|
|
||||||
* Reserve Xen bits:
|
|
||||||
* - mfn_list
|
|
||||||
* - xen_start_info
|
|
||||||
* See comment above "struct start_info" in <xen/interface/xen.h>
|
|
||||||
* We tried to make the the memblock_reserve more selective so
|
|
||||||
* that it would be clear what region is reserved. Sadly we ran
|
|
||||||
* in the problem wherein on a 64-bit hypervisor with a 32-bit
|
|
||||||
* initial domain, the pt_base has the cr3 value which is not
|
|
||||||
* neccessarily where the pagetable starts! As Jan put it: "
|
|
||||||
* Actually, the adjustment turns out to be correct: The page
|
|
||||||
* tables for a 32-on-64 dom0 get allocated in the order "first L1",
|
|
||||||
* "first L2", "first L3", so the offset to the page table base is
|
|
||||||
* indeed 2. When reading xen/include/public/xen.h's comment
|
|
||||||
* very strictly, this is not a violation (since there nothing is said
|
|
||||||
* that the first thing in the page table space is pointed to by
|
|
||||||
* pt_base; I admit that this seems to be implied though, namely
|
|
||||||
* do I think that it is implied that the page table space is the
|
|
||||||
* range [pt_base, pt_base + nt_pt_frames), whereas that
|
|
||||||
* range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
|
|
||||||
* which - without a priori knowledge - the kernel would have
|
|
||||||
* difficulty to figure out)." - so lets just fall back to the
|
|
||||||
* easy way and reserve the whole region.
|
|
||||||
*/
|
|
||||||
memblock_reserve(__pa(xen_start_info->mfn_list),
|
|
||||||
xen_start_info->pt_base - xen_start_info->mfn_list);
|
|
||||||
|
|
||||||
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether the kernel itself conflicts with the target E820 map.
|
||||||
|
* Failing now is better than running into weird problems later due
|
||||||
|
* to relocating (and even reusing) pages with kernel text or data.
|
||||||
|
*/
|
||||||
|
if (xen_is_e820_reserved(__pa_symbol(_text),
|
||||||
|
__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
|
||||||
|
xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for a conflict of the hypervisor supplied page tables with
|
||||||
|
* the target E820 map.
|
||||||
|
*/
|
||||||
|
xen_pt_check_e820();
|
||||||
|
|
||||||
|
xen_reserve_xen_mfnlist();
|
||||||
|
|
||||||
|
/* Check for a conflict of the initrd with the target E820 map. */
|
||||||
|
if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
|
||||||
|
boot_params.hdr.ramdisk_size)) {
|
||||||
|
phys_addr_t new_area, start, size;
|
||||||
|
|
||||||
|
new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
|
||||||
|
if (!new_area) {
|
||||||
|
xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
start = boot_params.hdr.ramdisk_image;
|
||||||
|
size = boot_params.hdr.ramdisk_size;
|
||||||
|
xen_phys_memcpy(new_area, start, size);
|
||||||
|
pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
|
||||||
|
start, start + size, new_area, new_area + size);
|
||||||
|
memblock_free(start, size);
|
||||||
|
boot_params.hdr.ramdisk_image = new_area;
|
||||||
|
boot_params.ext_ramdisk_image = new_area >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set identity map on non-RAM pages and prepare remapping the
|
||||||
|
* underlying RAM.
|
||||||
|
*/
|
||||||
|
xen_set_identity_and_remap(max_pfn);
|
||||||
|
|
||||||
return "Xen";
|
return "Xen";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -721,26 +929,30 @@ char * __init xen_memory_setup(void)
|
|||||||
*/
|
*/
|
||||||
char * __init xen_auto_xlated_memory_setup(void)
|
char * __init xen_auto_xlated_memory_setup(void)
|
||||||
{
|
{
|
||||||
static struct e820entry map[E820MAX] __initdata;
|
|
||||||
|
|
||||||
struct xen_memory_map memmap;
|
struct xen_memory_map memmap;
|
||||||
int i;
|
int i;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
memmap.nr_entries = E820MAX;
|
memmap.nr_entries = E820MAX;
|
||||||
set_xen_guest_handle(memmap.buffer, map);
|
set_xen_guest_handle(memmap.buffer, xen_e820_map);
|
||||||
|
|
||||||
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
panic("No memory map (%d)\n", rc);
|
panic("No memory map (%d)\n", rc);
|
||||||
|
|
||||||
sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries);
|
xen_e820_map_entries = memmap.nr_entries;
|
||||||
|
|
||||||
for (i = 0; i < memmap.nr_entries; i++)
|
sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
|
||||||
e820_add_region(map[i].addr, map[i].size, map[i].type);
|
&xen_e820_map_entries);
|
||||||
|
|
||||||
memblock_reserve(__pa(xen_start_info->mfn_list),
|
for (i = 0; i < xen_e820_map_entries; i++)
|
||||||
xen_start_info->pt_base - xen_start_info->mfn_list);
|
e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size,
|
||||||
|
xen_e820_map[i].type);
|
||||||
|
|
||||||
|
/* Remove p2m info, it is not needed. */
|
||||||
|
xen_start_info->mfn_list = 0;
|
||||||
|
xen_start_info->first_p2m_pfn = 0;
|
||||||
|
xen_start_info->nr_p2m_frames = 0;
|
||||||
|
|
||||||
return "Xen";
|
return "Xen";
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
#include <xen/interface/xen.h>
|
#include <xen/interface/xen.h>
|
||||||
#include <xen/interface/vcpu.h>
|
#include <xen/interface/vcpu.h>
|
||||||
|
#include <xen/interface/xenpmu.h>
|
||||||
|
|
||||||
#include <asm/xen/interface.h>
|
#include <asm/xen/interface.h>
|
||||||
#include <asm/xen/hypercall.h>
|
#include <asm/xen/hypercall.h>
|
||||||
@ -38,6 +39,7 @@
|
|||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
#include "smp.h"
|
#include "smp.h"
|
||||||
|
#include "pmu.h"
|
||||||
|
|
||||||
cpumask_var_t xen_cpu_initialized_map;
|
cpumask_var_t xen_cpu_initialized_map;
|
||||||
|
|
||||||
@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
|
|||||||
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
|
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
|
||||||
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
|
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
|
||||||
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
|
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
|
||||||
|
static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
|
||||||
|
|
||||||
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
|
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
|
||||||
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
|
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
|
||||||
@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu)
|
|||||||
kfree(per_cpu(xen_irq_work, cpu).name);
|
kfree(per_cpu(xen_irq_work, cpu).name);
|
||||||
per_cpu(xen_irq_work, cpu).name = NULL;
|
per_cpu(xen_irq_work, cpu).name = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
|
||||||
|
unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
|
||||||
|
per_cpu(xen_pmu_irq, cpu).irq = -1;
|
||||||
|
kfree(per_cpu(xen_pmu_irq, cpu).name);
|
||||||
|
per_cpu(xen_pmu_irq, cpu).name = NULL;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static int xen_smp_intr_init(unsigned int cpu)
|
static int xen_smp_intr_init(unsigned int cpu)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
char *resched_name, *callfunc_name, *debug_name;
|
char *resched_name, *callfunc_name, *debug_name, *pmu_name;
|
||||||
|
|
||||||
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
|
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
|
||||||
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
|
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
|
||||||
@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu)
|
|||||||
per_cpu(xen_irq_work, cpu).irq = rc;
|
per_cpu(xen_irq_work, cpu).irq = rc;
|
||||||
per_cpu(xen_irq_work, cpu).name = callfunc_name;
|
per_cpu(xen_irq_work, cpu).name = callfunc_name;
|
||||||
|
|
||||||
|
if (is_xen_pmu(cpu)) {
|
||||||
|
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
|
||||||
|
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
|
||||||
|
xen_pmu_irq_handler,
|
||||||
|
IRQF_PERCPU|IRQF_NOBALANCING,
|
||||||
|
pmu_name, NULL);
|
||||||
|
if (rc < 0)
|
||||||
|
goto fail;
|
||||||
|
per_cpu(xen_pmu_irq, cpu).irq = rc;
|
||||||
|
per_cpu(xen_pmu_irq, cpu).name = pmu_name;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
|
|||||||
}
|
}
|
||||||
set_cpu_sibling_map(0);
|
set_cpu_sibling_map(0);
|
||||||
|
|
||||||
|
xen_pmu_init(0);
|
||||||
|
|
||||||
if (xen_smp_intr_init(0))
|
if (xen_smp_intr_init(0))
|
||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
|
|||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
|
xen_pmu_init(cpu);
|
||||||
|
|
||||||
rc = xen_smp_intr_init(cpu);
|
rc = xen_smp_intr_init(cpu);
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu)
|
|||||||
xen_smp_intr_free(cpu);
|
xen_smp_intr_free(cpu);
|
||||||
xen_uninit_lock_cpu(cpu);
|
xen_uninit_lock_cpu(cpu);
|
||||||
xen_teardown_timer(cpu);
|
xen_teardown_timer(cpu);
|
||||||
|
xen_pmu_finish(cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include "xen-ops.h"
|
#include "xen-ops.h"
|
||||||
#include "mmu.h"
|
#include "mmu.h"
|
||||||
|
#include "pmu.h"
|
||||||
|
|
||||||
static void xen_pv_pre_suspend(void)
|
static void xen_pv_pre_suspend(void)
|
||||||
{
|
{
|
||||||
@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled)
|
|||||||
|
|
||||||
void xen_arch_pre_suspend(void)
|
void xen_arch_pre_suspend(void)
|
||||||
{
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
xen_pmu_finish(cpu);
|
||||||
|
|
||||||
if (xen_pv_domain())
|
if (xen_pv_domain())
|
||||||
xen_pv_pre_suspend();
|
xen_pv_pre_suspend();
|
||||||
}
|
}
|
||||||
|
|
||||||
void xen_arch_post_suspend(int cancelled)
|
void xen_arch_post_suspend(int cancelled)
|
||||||
{
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
if (xen_pv_domain())
|
if (xen_pv_domain())
|
||||||
xen_pv_post_suspend(cancelled);
|
xen_pv_post_suspend(cancelled);
|
||||||
else
|
else
|
||||||
xen_hvm_post_suspend(cancelled);
|
xen_hvm_post_suspend(cancelled);
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
xen_pmu_init(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xen_vcpu_notify_restore(void *data)
|
static void xen_vcpu_notify_restore(void *data)
|
||||||
|
@ -104,6 +104,8 @@ ENTRY(hypercall_page)
|
|||||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET)
|
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET)
|
||||||
#else
|
#else
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
|
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
|
||||||
|
/* Map the p2m table to a 512GB-aligned user address. */
|
||||||
|
ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE)
|
||||||
#endif
|
#endif
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
|
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
|
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
|
||||||
|
@ -35,13 +35,20 @@ void xen_build_mfn_list_list(void);
|
|||||||
void xen_setup_machphys_mapping(void);
|
void xen_setup_machphys_mapping(void);
|
||||||
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
||||||
void xen_reserve_top(void);
|
void xen_reserve_top(void);
|
||||||
|
void __init xen_reserve_special_pages(void);
|
||||||
|
void __init xen_pt_check_e820(void);
|
||||||
|
|
||||||
void xen_mm_pin_all(void);
|
void xen_mm_pin_all(void);
|
||||||
void xen_mm_unpin_all(void);
|
void xen_mm_unpin_all(void);
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
void __init xen_relocate_p2m(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size);
|
||||||
unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
|
unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
|
||||||
void __init xen_inv_extra_mem(void);
|
void __init xen_inv_extra_mem(void);
|
||||||
void __init xen_remap_memory(void);
|
void __init xen_remap_memory(void);
|
||||||
|
phys_addr_t __init xen_find_free_area(phys_addr_t size);
|
||||||
char * __init xen_memory_setup(void);
|
char * __init xen_memory_setup(void);
|
||||||
char * xen_auto_xlated_memory_setup(void);
|
char * xen_auto_xlated_memory_setup(void);
|
||||||
void __init xen_arch_setup(void);
|
void __init xen_arch_setup(void);
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/blk-mq.h>
|
||||||
#include <linux/hdreg.h>
|
#include <linux/hdreg.h>
|
||||||
#include <linux/cdrom.h>
|
#include <linux/cdrom.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
@ -147,6 +148,7 @@ struct blkfront_info
|
|||||||
unsigned int feature_persistent:1;
|
unsigned int feature_persistent:1;
|
||||||
unsigned int max_indirect_segments;
|
unsigned int max_indirect_segments;
|
||||||
int is_ready;
|
int is_ready;
|
||||||
|
struct blk_mq_tag_set tag_set;
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned int nr_minors;
|
static unsigned int nr_minors;
|
||||||
@ -616,54 +618,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
|
|||||||
!(info->feature_flush & REQ_FUA)));
|
!(info->feature_flush & REQ_FUA)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
* do_blkif_request
|
const struct blk_mq_queue_data *qd)
|
||||||
* read a block; request is in a request queue
|
|
||||||
*/
|
|
||||||
static void do_blkif_request(struct request_queue *rq)
|
|
||||||
{
|
{
|
||||||
struct blkfront_info *info = NULL;
|
struct blkfront_info *info = qd->rq->rq_disk->private_data;
|
||||||
struct request *req;
|
|
||||||
int queued;
|
|
||||||
|
|
||||||
pr_debug("Entered do_blkif_request\n");
|
|
||||||
|
|
||||||
queued = 0;
|
|
||||||
|
|
||||||
while ((req = blk_peek_request(rq)) != NULL) {
|
|
||||||
info = req->rq_disk->private_data;
|
|
||||||
|
|
||||||
|
blk_mq_start_request(qd->rq);
|
||||||
|
spin_lock_irq(&info->io_lock);
|
||||||
if (RING_FULL(&info->ring))
|
if (RING_FULL(&info->ring))
|
||||||
goto wait;
|
goto out_busy;
|
||||||
|
|
||||||
blk_start_request(req);
|
if (blkif_request_flush_invalid(qd->rq, info))
|
||||||
|
goto out_err;
|
||||||
|
|
||||||
if (blkif_request_flush_invalid(req, info)) {
|
if (blkif_queue_request(qd->rq))
|
||||||
__blk_end_request_all(req, -EOPNOTSUPP);
|
goto out_busy;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
pr_debug("do_blk_req %p: cmd %p, sec %lx, "
|
|
||||||
"(%u/%u) [%s]\n",
|
|
||||||
req, req->cmd, (unsigned long)blk_rq_pos(req),
|
|
||||||
blk_rq_cur_sectors(req), blk_rq_sectors(req),
|
|
||||||
rq_data_dir(req) ? "write" : "read");
|
|
||||||
|
|
||||||
if (blkif_queue_request(req)) {
|
|
||||||
blk_requeue_request(rq, req);
|
|
||||||
wait:
|
|
||||||
/* Avoid pointless unplugs. */
|
|
||||||
blk_stop_queue(rq);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
queued++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queued != 0)
|
|
||||||
flush_requests(info);
|
flush_requests(info);
|
||||||
|
spin_unlock_irq(&info->io_lock);
|
||||||
|
return BLK_MQ_RQ_QUEUE_OK;
|
||||||
|
|
||||||
|
out_err:
|
||||||
|
spin_unlock_irq(&info->io_lock);
|
||||||
|
return BLK_MQ_RQ_QUEUE_ERROR;
|
||||||
|
|
||||||
|
out_busy:
|
||||||
|
spin_unlock_irq(&info->io_lock);
|
||||||
|
blk_mq_stop_hw_queue(hctx);
|
||||||
|
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct blk_mq_ops blkfront_mq_ops = {
|
||||||
|
.queue_rq = blkif_queue_rq,
|
||||||
|
.map_queue = blk_mq_map_queue,
|
||||||
|
};
|
||||||
|
|
||||||
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
||||||
unsigned int physical_sector_size,
|
unsigned int physical_sector_size,
|
||||||
unsigned int segments)
|
unsigned int segments)
|
||||||
@ -671,9 +660,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
|||||||
struct request_queue *rq;
|
struct request_queue *rq;
|
||||||
struct blkfront_info *info = gd->private_data;
|
struct blkfront_info *info = gd->private_data;
|
||||||
|
|
||||||
rq = blk_init_queue(do_blkif_request, &info->io_lock);
|
memset(&info->tag_set, 0, sizeof(info->tag_set));
|
||||||
if (rq == NULL)
|
info->tag_set.ops = &blkfront_mq_ops;
|
||||||
|
info->tag_set.nr_hw_queues = 1;
|
||||||
|
info->tag_set.queue_depth = BLK_RING_SIZE(info);
|
||||||
|
info->tag_set.numa_node = NUMA_NO_NODE;
|
||||||
|
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
||||||
|
info->tag_set.cmd_size = 0;
|
||||||
|
info->tag_set.driver_data = info;
|
||||||
|
|
||||||
|
if (blk_mq_alloc_tag_set(&info->tag_set))
|
||||||
return -1;
|
return -1;
|
||||||
|
rq = blk_mq_init_queue(&info->tag_set);
|
||||||
|
if (IS_ERR(rq)) {
|
||||||
|
blk_mq_free_tag_set(&info->tag_set);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
|
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
|
||||||
|
|
||||||
@ -901,19 +903,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|||||||
static void xlvbd_release_gendisk(struct blkfront_info *info)
|
static void xlvbd_release_gendisk(struct blkfront_info *info)
|
||||||
{
|
{
|
||||||
unsigned int minor, nr_minors;
|
unsigned int minor, nr_minors;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (info->rq == NULL)
|
if (info->rq == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
spin_lock_irqsave(&info->io_lock, flags);
|
|
||||||
|
|
||||||
/* No more blkif_request(). */
|
/* No more blkif_request(). */
|
||||||
blk_stop_queue(info->rq);
|
blk_mq_stop_hw_queues(info->rq);
|
||||||
|
|
||||||
/* No more gnttab callback work. */
|
/* No more gnttab callback work. */
|
||||||
gnttab_cancel_free_callback(&info->callback);
|
gnttab_cancel_free_callback(&info->callback);
|
||||||
spin_unlock_irqrestore(&info->io_lock, flags);
|
|
||||||
|
|
||||||
/* Flush gnttab callback work. Must be done with no locks held. */
|
/* Flush gnttab callback work. Must be done with no locks held. */
|
||||||
flush_work(&info->work);
|
flush_work(&info->work);
|
||||||
@ -925,20 +923,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
|
|||||||
xlbd_release_minors(minor, nr_minors);
|
xlbd_release_minors(minor, nr_minors);
|
||||||
|
|
||||||
blk_cleanup_queue(info->rq);
|
blk_cleanup_queue(info->rq);
|
||||||
|
blk_mq_free_tag_set(&info->tag_set);
|
||||||
info->rq = NULL;
|
info->rq = NULL;
|
||||||
|
|
||||||
put_disk(info->gd);
|
put_disk(info->gd);
|
||||||
info->gd = NULL;
|
info->gd = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Must be called with io_lock holded */
|
||||||
static void kick_pending_request_queues(struct blkfront_info *info)
|
static void kick_pending_request_queues(struct blkfront_info *info)
|
||||||
{
|
{
|
||||||
if (!RING_FULL(&info->ring)) {
|
if (!RING_FULL(&info->ring))
|
||||||
/* Re-enable calldowns. */
|
blk_mq_start_stopped_hw_queues(info->rq, true);
|
||||||
blk_start_queue(info->rq);
|
|
||||||
/* Kick things off immediately. */
|
|
||||||
do_blkif_request(info->rq);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blkif_restart_queue(struct work_struct *work)
|
static void blkif_restart_queue(struct work_struct *work)
|
||||||
@ -963,7 +959,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
|||||||
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
|
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
|
||||||
/* No more blkif_request(). */
|
/* No more blkif_request(). */
|
||||||
if (info->rq)
|
if (info->rq)
|
||||||
blk_stop_queue(info->rq);
|
blk_mq_stop_hw_queues(info->rq);
|
||||||
|
|
||||||
/* Remove all persistent grants */
|
/* Remove all persistent grants */
|
||||||
if (!list_empty(&info->grants)) {
|
if (!list_empty(&info->grants)) {
|
||||||
@ -1146,7 +1142,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|||||||
RING_IDX i, rp;
|
RING_IDX i, rp;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct blkfront_info *info = (struct blkfront_info *)dev_id;
|
struct blkfront_info *info = (struct blkfront_info *)dev_id;
|
||||||
int error;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&info->io_lock, flags);
|
spin_lock_irqsave(&info->io_lock, flags);
|
||||||
|
|
||||||
@ -1187,37 +1182,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
|
req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
|
||||||
switch (bret->operation) {
|
switch (bret->operation) {
|
||||||
case BLKIF_OP_DISCARD:
|
case BLKIF_OP_DISCARD:
|
||||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||||
struct request_queue *rq = info->rq;
|
struct request_queue *rq = info->rq;
|
||||||
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
req->errors = -EOPNOTSUPP;
|
||||||
info->feature_discard = 0;
|
info->feature_discard = 0;
|
||||||
info->feature_secdiscard = 0;
|
info->feature_secdiscard = 0;
|
||||||
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
||||||
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
|
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
|
||||||
}
|
}
|
||||||
__blk_end_request_all(req, error);
|
blk_mq_complete_request(req);
|
||||||
break;
|
break;
|
||||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||||
case BLKIF_OP_WRITE_BARRIER:
|
case BLKIF_OP_WRITE_BARRIER:
|
||||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||||
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
req->errors = -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
|
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
|
||||||
info->shadow[id].req.u.rw.nr_segments == 0)) {
|
info->shadow[id].req.u.rw.nr_segments == 0)) {
|
||||||
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
req->errors = -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
if (unlikely(error)) {
|
if (unlikely(req->errors)) {
|
||||||
if (error == -EOPNOTSUPP)
|
if (req->errors == -EOPNOTSUPP)
|
||||||
error = 0;
|
req->errors = 0;
|
||||||
info->feature_flush = 0;
|
info->feature_flush = 0;
|
||||||
xlvbd_flush(info);
|
xlvbd_flush(info);
|
||||||
}
|
}
|
||||||
@ -1228,7 +1223,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|||||||
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
|
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
|
||||||
"request: %x\n", bret->status);
|
"request: %x\n", bret->status);
|
||||||
|
|
||||||
__blk_end_request_all(req, error);
|
blk_mq_complete_request(req);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
@ -1555,28 +1550,6 @@ static int blkif_recover(struct blkfront_info *info)
|
|||||||
|
|
||||||
kfree(copy);
|
kfree(copy);
|
||||||
|
|
||||||
/*
|
|
||||||
* Empty the queue, this is important because we might have
|
|
||||||
* requests in the queue with more segments than what we
|
|
||||||
* can handle now.
|
|
||||||
*/
|
|
||||||
spin_lock_irq(&info->io_lock);
|
|
||||||
while ((req = blk_fetch_request(info->rq)) != NULL) {
|
|
||||||
if (req->cmd_flags &
|
|
||||||
(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
|
|
||||||
list_add(&req->queuelist, &requests);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
merge_bio.head = req->bio;
|
|
||||||
merge_bio.tail = req->biotail;
|
|
||||||
bio_list_merge(&bio_list, &merge_bio);
|
|
||||||
req->bio = NULL;
|
|
||||||
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
|
|
||||||
pr_alert("diskcache flush request found!\n");
|
|
||||||
__blk_end_request_all(req, 0);
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&info->io_lock);
|
|
||||||
|
|
||||||
xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
||||||
|
|
||||||
spin_lock_irq(&info->io_lock);
|
spin_lock_irq(&info->io_lock);
|
||||||
@ -1591,9 +1564,10 @@ static int blkif_recover(struct blkfront_info *info)
|
|||||||
/* Requeue pending requests (flush or discard) */
|
/* Requeue pending requests (flush or discard) */
|
||||||
list_del_init(&req->queuelist);
|
list_del_init(&req->queuelist);
|
||||||
BUG_ON(req->nr_phys_segments > segs);
|
BUG_ON(req->nr_phys_segments > segs);
|
||||||
blk_requeue_request(info->rq, req);
|
blk_mq_requeue_request(req);
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&info->io_lock);
|
spin_unlock_irq(&info->io_lock);
|
||||||
|
blk_mq_kick_requeue_list(info->rq);
|
||||||
|
|
||||||
while ((bio = bio_list_pop(&bio_list)) != NULL) {
|
while ((bio = bio_list_pop(&bio_list)) != NULL) {
|
||||||
/* Traverse the list of pending bios and re-queue them */
|
/* Traverse the list of pending bios and re-queue them */
|
||||||
|
@ -280,4 +280,15 @@ config XEN_ACPI
|
|||||||
def_bool y
|
def_bool y
|
||||||
depends on X86 && ACPI
|
depends on X86 && ACPI
|
||||||
|
|
||||||
|
config XEN_SYMS
|
||||||
|
bool "Xen symbols"
|
||||||
|
depends on X86 && XEN_DOM0 && XENFS
|
||||||
|
default y if KALLSYMS
|
||||||
|
help
|
||||||
|
Exports hypervisor symbols (along with their types and addresses) via
|
||||||
|
/proc/xen/xensyms file, similar to /proc/kallsyms
|
||||||
|
|
||||||
|
config XEN_HAVE_VPMU
|
||||||
|
bool
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
@ -638,9 +638,9 @@ static int __init balloon_init(void)
|
|||||||
* regions (see arch/x86/xen/setup.c).
|
* regions (see arch/x86/xen/setup.c).
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
|
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
|
||||||
if (xen_extra_mem[i].size)
|
if (xen_extra_mem[i].n_pfns)
|
||||||
balloon_add_region(PFN_UP(xen_extra_mem[i].start),
|
balloon_add_region(xen_extra_mem[i].start_pfn,
|
||||||
PFN_DOWN(xen_extra_mem[i].size));
|
xen_extra_mem[i].n_pfns);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1301,11 +1301,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
|
|||||||
if (!VALID_EVTCHN(evtchn))
|
if (!VALID_EVTCHN(evtchn))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/*
|
if (!xen_support_evtchn_rebind())
|
||||||
* Events delivered via platform PCI interrupts are always
|
|
||||||
* routed to vcpu 0 and hence cannot be rebound.
|
|
||||||
*/
|
|
||||||
if (xen_hvm_domain() && !xen_have_vector_callback)
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/* Send future instances of this interrupt to other vcpu. */
|
/* Send future instances of this interrupt to other vcpu. */
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
#include <xen/xenbus.h>
|
#include <xen/xenbus.h>
|
||||||
#include <xen/interface/xen.h>
|
#include <xen/interface/xen.h>
|
||||||
#include <xen/interface/version.h>
|
#include <xen/interface/version.h>
|
||||||
|
#ifdef CONFIG_XEN_HAVE_VPMU
|
||||||
|
#include <xen/interface/xenpmu.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define HYPERVISOR_ATTR_RO(_name) \
|
#define HYPERVISOR_ATTR_RO(_name) \
|
||||||
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
|
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
|
||||||
@ -368,6 +371,126 @@ static void xen_properties_destroy(void)
|
|||||||
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
|
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_XEN_HAVE_VPMU
|
||||||
|
struct pmu_mode {
|
||||||
|
const char *name;
|
||||||
|
uint32_t mode;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct pmu_mode pmu_modes[] = {
|
||||||
|
{"off", XENPMU_MODE_OFF},
|
||||||
|
{"self", XENPMU_MODE_SELF},
|
||||||
|
{"hv", XENPMU_MODE_HV},
|
||||||
|
{"all", XENPMU_MODE_ALL}
|
||||||
|
};
|
||||||
|
|
||||||
|
static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr,
|
||||||
|
const char *buffer, size_t len)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
|
||||||
|
if (strncmp(buffer, pmu_modes[i].name, len - 1) == 0) {
|
||||||
|
xp.val = pmu_modes[i].mode;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == ARRAY_SIZE(pmu_modes))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
int i;
|
||||||
|
uint32_t mode;
|
||||||
|
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
mode = (uint32_t)xp.val;
|
||||||
|
for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) {
|
||||||
|
if (mode == pmu_modes[i].mode)
|
||||||
|
return sprintf(buffer, "%s\n", pmu_modes[i].name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
HYPERVISOR_ATTR_RW(pmu_mode);
|
||||||
|
|
||||||
|
static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr,
|
||||||
|
const char *buffer, size_t len)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
uint32_t features;
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
|
||||||
|
ret = kstrtou32(buffer, 0, &features);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
xp.val = features;
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct xen_pmu_params xp;
|
||||||
|
|
||||||
|
xp.version.maj = XENPMU_VER_MAJ;
|
||||||
|
xp.version.min = XENPMU_VER_MIN;
|
||||||
|
ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return sprintf(buffer, "0x%x\n", (uint32_t)xp.val);
|
||||||
|
}
|
||||||
|
HYPERVISOR_ATTR_RW(pmu_features);
|
||||||
|
|
||||||
|
static struct attribute *xen_pmu_attrs[] = {
|
||||||
|
&pmu_mode_attr.attr,
|
||||||
|
&pmu_features_attr.attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct attribute_group xen_pmu_group = {
|
||||||
|
.name = "pmu",
|
||||||
|
.attrs = xen_pmu_attrs,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init xen_pmu_init(void)
|
||||||
|
{
|
||||||
|
return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xen_pmu_destroy(void)
|
||||||
|
{
|
||||||
|
sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int __init hyper_sysfs_init(void)
|
static int __init hyper_sysfs_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -390,7 +513,15 @@ static int __init hyper_sysfs_init(void)
|
|||||||
ret = xen_properties_init();
|
ret = xen_properties_init();
|
||||||
if (ret)
|
if (ret)
|
||||||
goto prop_out;
|
goto prop_out;
|
||||||
|
#ifdef CONFIG_XEN_HAVE_VPMU
|
||||||
|
if (xen_initial_domain()) {
|
||||||
|
ret = xen_pmu_init();
|
||||||
|
if (ret) {
|
||||||
|
xen_properties_destroy();
|
||||||
|
goto prop_out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
prop_out:
|
prop_out:
|
||||||
@ -407,6 +538,9 @@ out:
|
|||||||
|
|
||||||
static void __exit hyper_sysfs_exit(void)
|
static void __exit hyper_sysfs_exit(void)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_XEN_HAVE_VPMU
|
||||||
|
xen_pmu_destroy();
|
||||||
|
#endif
|
||||||
xen_properties_destroy();
|
xen_properties_destroy();
|
||||||
xen_compilation_destroy();
|
xen_compilation_destroy();
|
||||||
xen_sysfs_uuid_destroy();
|
xen_sysfs_uuid_destroy();
|
||||||
|
@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o
|
|||||||
|
|
||||||
xenfs-y = super.o
|
xenfs-y = super.o
|
||||||
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
|
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
|
||||||
|
xenfs-$(CONFIG_XEN_SYMS) += xensyms.o
|
||||||
|
@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
|
|||||||
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
|
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
|
||||||
{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
|
{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
|
||||||
{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
|
{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
|
||||||
|
#ifdef CONFIG_XEN_SYMS
|
||||||
|
{ "xensyms", &xensyms_ops, S_IRUSR},
|
||||||
|
#endif
|
||||||
{""},
|
{""},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3,5 +3,6 @@
|
|||||||
|
|
||||||
extern const struct file_operations xsd_kva_file_ops;
|
extern const struct file_operations xsd_kva_file_ops;
|
||||||
extern const struct file_operations xsd_port_file_ops;
|
extern const struct file_operations xsd_port_file_ops;
|
||||||
|
extern const struct file_operations xensyms_ops;
|
||||||
|
|
||||||
#endif /* _XENFS_XENBUS_H */
|
#endif /* _XENFS_XENBUS_H */
|
||||||
|
152
drivers/xen/xenfs/xensyms.c
Normal file
152
drivers/xen/xenfs/xensyms.c
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <xen/interface/platform.h>
|
||||||
|
#include <asm/xen/hypercall.h>
|
||||||
|
#include <xen/xen-ops.h>
|
||||||
|
#include "xenfs.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */
|
||||||
|
|
||||||
|
struct xensyms {
|
||||||
|
struct xen_platform_op op;
|
||||||
|
char *name;
|
||||||
|
uint32_t namelen;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Grab next output page from the hypervisor */
|
||||||
|
static int xensyms_next_sym(struct xensyms *xs)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct xenpf_symdata *symdata = &xs->op.u.symdata;
|
||||||
|
uint64_t symnum;
|
||||||
|
|
||||||
|
memset(xs->name, 0, xs->namelen);
|
||||||
|
symdata->namelen = xs->namelen;
|
||||||
|
|
||||||
|
symnum = symdata->symnum;
|
||||||
|
|
||||||
|
ret = HYPERVISOR_dom0_op(&xs->op);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If hypervisor's symbol didn't fit into the buffer then allocate
|
||||||
|
* a larger buffer and try again.
|
||||||
|
*/
|
||||||
|
if (unlikely(symdata->namelen > xs->namelen)) {
|
||||||
|
kfree(xs->name);
|
||||||
|
|
||||||
|
xs->namelen = symdata->namelen;
|
||||||
|
xs->name = kzalloc(xs->namelen, GFP_KERNEL);
|
||||||
|
if (!xs->name)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
set_xen_guest_handle(symdata->name, xs->name);
|
||||||
|
symdata->symnum--; /* Rewind */
|
||||||
|
|
||||||
|
ret = HYPERVISOR_dom0_op(&xs->op);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (symdata->symnum == symnum)
|
||||||
|
/* End of symbols */
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *xensyms_start(struct seq_file *m, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct xensyms *xs = (struct xensyms *)m->private;
|
||||||
|
|
||||||
|
xs->op.u.symdata.symnum = *pos;
|
||||||
|
|
||||||
|
if (xensyms_next_sym(xs))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return m->private;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct xensyms *xs = (struct xensyms *)m->private;
|
||||||
|
|
||||||
|
xs->op.u.symdata.symnum = ++(*pos);
|
||||||
|
|
||||||
|
if (xensyms_next_sym(xs))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xensyms_show(struct seq_file *m, void *p)
|
||||||
|
{
|
||||||
|
struct xensyms *xs = (struct xensyms *)m->private;
|
||||||
|
struct xenpf_symdata *symdata = &xs->op.u.symdata;
|
||||||
|
|
||||||
|
seq_printf(m, "%016llx %c %s\n", symdata->address,
|
||||||
|
symdata->type, xs->name);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xensyms_stop(struct seq_file *m, void *p)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct seq_operations xensyms_seq_ops = {
|
||||||
|
.start = xensyms_start,
|
||||||
|
.next = xensyms_next,
|
||||||
|
.show = xensyms_show,
|
||||||
|
.stop = xensyms_stop,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int xensyms_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct seq_file *m;
|
||||||
|
struct xensyms *xs;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = seq_open_private(file, &xensyms_seq_ops,
|
||||||
|
sizeof(struct xensyms));
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
m = file->private_data;
|
||||||
|
xs = (struct xensyms *)m->private;
|
||||||
|
|
||||||
|
xs->namelen = XEN_KSYM_NAME_LEN + 1;
|
||||||
|
xs->name = kzalloc(xs->namelen, GFP_KERNEL);
|
||||||
|
if (!xs->name) {
|
||||||
|
seq_release_private(inode, file);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
set_xen_guest_handle(xs->op.u.symdata.name, xs->name);
|
||||||
|
xs->op.cmd = XENPF_get_symbol;
|
||||||
|
xs->op.u.symdata.namelen = xs->namelen;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xensyms_release(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct seq_file *m = file->private_data;
|
||||||
|
struct xensyms *xs = (struct xensyms *)m->private;
|
||||||
|
|
||||||
|
kfree(xs->name);
|
||||||
|
return seq_release_private(inode, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct file_operations xensyms_ops = {
|
||||||
|
.open = xensyms_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = xensyms_release
|
||||||
|
};
|
@ -11,6 +11,8 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
|
|||||||
unsigned long size);
|
unsigned long size);
|
||||||
extern void *early_memremap(resource_size_t phys_addr,
|
extern void *early_memremap(resource_size_t phys_addr,
|
||||||
unsigned long size);
|
unsigned long size);
|
||||||
|
extern void *early_memremap_ro(resource_size_t phys_addr,
|
||||||
|
unsigned long size);
|
||||||
extern void early_iounmap(void __iomem *addr, unsigned long size);
|
extern void early_iounmap(void __iomem *addr, unsigned long size);
|
||||||
extern void early_memunmap(void *addr, unsigned long size);
|
extern void early_memunmap(void *addr, unsigned long size);
|
||||||
|
|
||||||
|
@ -46,6 +46,9 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
|
|||||||
#ifndef FIXMAP_PAGE_NORMAL
|
#ifndef FIXMAP_PAGE_NORMAL
|
||||||
#define FIXMAP_PAGE_NORMAL PAGE_KERNEL
|
#define FIXMAP_PAGE_NORMAL PAGE_KERNEL
|
||||||
#endif
|
#endif
|
||||||
|
#if !defined(FIXMAP_PAGE_RO) && defined(PAGE_KERNEL_RO)
|
||||||
|
#define FIXMAP_PAGE_RO PAGE_KERNEL_RO
|
||||||
|
#endif
|
||||||
#ifndef FIXMAP_PAGE_NOCACHE
|
#ifndef FIXMAP_PAGE_NOCACHE
|
||||||
#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
|
#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE
|
||||||
#endif
|
#endif
|
||||||
|
@ -92,7 +92,6 @@ void xen_hvm_callback_vector(void);
|
|||||||
#ifdef CONFIG_TRACING
|
#ifdef CONFIG_TRACING
|
||||||
#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
|
#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
|
||||||
#endif
|
#endif
|
||||||
extern int xen_have_vector_callback;
|
|
||||||
int xen_set_callback_via(uint64_t via);
|
int xen_set_callback_via(uint64_t via);
|
||||||
void xen_evtchn_do_upcall(struct pt_regs *regs);
|
void xen_evtchn_do_upcall(struct pt_regs *regs);
|
||||||
void xen_hvm_evtchn_do_upcall(void);
|
void xen_hvm_evtchn_do_upcall(void);
|
||||||
|
@ -474,6 +474,23 @@ struct xenpf_core_parking {
|
|||||||
};
|
};
|
||||||
DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);
|
DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking);
|
||||||
|
|
||||||
|
#define XENPF_get_symbol 63
|
||||||
|
struct xenpf_symdata {
|
||||||
|
/* IN/OUT variables */
|
||||||
|
uint32_t namelen; /* size of 'name' buffer */
|
||||||
|
|
||||||
|
/* IN/OUT variables */
|
||||||
|
uint32_t symnum; /* IN: Symbol to read */
|
||||||
|
/* OUT: Next available symbol. If same as IN */
|
||||||
|
/* then we reached the end */
|
||||||
|
|
||||||
|
/* OUT variables */
|
||||||
|
GUEST_HANDLE(char) name;
|
||||||
|
uint64_t address;
|
||||||
|
char type;
|
||||||
|
};
|
||||||
|
DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
|
||||||
|
|
||||||
struct xen_platform_op {
|
struct xen_platform_op {
|
||||||
uint32_t cmd;
|
uint32_t cmd;
|
||||||
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
|
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
|
||||||
@ -495,6 +512,7 @@ struct xen_platform_op {
|
|||||||
struct xenpf_cpu_hotadd cpu_add;
|
struct xenpf_cpu_hotadd cpu_add;
|
||||||
struct xenpf_mem_hotadd mem_add;
|
struct xenpf_mem_hotadd mem_add;
|
||||||
struct xenpf_core_parking core_parking;
|
struct xenpf_core_parking core_parking;
|
||||||
|
struct xenpf_symdata symdata;
|
||||||
uint8_t pad[128];
|
uint8_t pad[128];
|
||||||
} u;
|
} u;
|
||||||
};
|
};
|
||||||
|
@ -80,6 +80,7 @@
|
|||||||
#define __HYPERVISOR_kexec_op 37
|
#define __HYPERVISOR_kexec_op 37
|
||||||
#define __HYPERVISOR_tmem_op 38
|
#define __HYPERVISOR_tmem_op 38
|
||||||
#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
|
#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
|
||||||
|
#define __HYPERVISOR_xenpmu_op 40
|
||||||
|
|
||||||
/* Architecture-specific hypercall definitions. */
|
/* Architecture-specific hypercall definitions. */
|
||||||
#define __HYPERVISOR_arch_0 48
|
#define __HYPERVISOR_arch_0 48
|
||||||
@ -112,6 +113,7 @@
|
|||||||
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
|
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
|
||||||
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
|
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
|
||||||
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
|
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
|
||||||
|
#define VIRQ_XENPMU 13 /* PMC interrupt */
|
||||||
|
|
||||||
/* Architecture-specific VIRQ definitions. */
|
/* Architecture-specific VIRQ definitions. */
|
||||||
#define VIRQ_ARCH_0 16
|
#define VIRQ_ARCH_0 16
|
||||||
@ -585,26 +587,29 @@ struct shared_info {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Start-of-day memory layout for the initial domain (DOM0):
|
* Start-of-day memory layout
|
||||||
|
*
|
||||||
* 1. The domain is started within contiguous virtual-memory region.
|
* 1. The domain is started within contiguous virtual-memory region.
|
||||||
* 2. The contiguous region begins and ends on an aligned 4MB boundary.
|
* 2. The contiguous region begins and ends on an aligned 4MB boundary.
|
||||||
* 3. The region start corresponds to the load address of the OS image.
|
* 3. This the order of bootstrap elements in the initial virtual region:
|
||||||
* If the load address is not 4MB aligned then the address is rounded down.
|
|
||||||
* 4. This the order of bootstrap elements in the initial virtual region:
|
|
||||||
* a. relocated kernel image
|
* a. relocated kernel image
|
||||||
* b. initial ram disk [mod_start, mod_len]
|
* b. initial ram disk [mod_start, mod_len]
|
||||||
|
* (may be omitted)
|
||||||
* c. list of allocated page frames [mfn_list, nr_pages]
|
* c. list of allocated page frames [mfn_list, nr_pages]
|
||||||
|
* (unless relocated due to XEN_ELFNOTE_INIT_P2M)
|
||||||
* d. start_info_t structure [register ESI (x86)]
|
* d. start_info_t structure [register ESI (x86)]
|
||||||
* e. bootstrap page tables [pt_base, CR3 (x86)]
|
* in case of dom0 this page contains the console info, too
|
||||||
* f. bootstrap stack [register ESP (x86)]
|
* e. unless dom0: xenstore ring page
|
||||||
* 5. Bootstrap elements are packed together, but each is 4kB-aligned.
|
* f. unless dom0: console ring page
|
||||||
* 6. The initial ram disk may be omitted.
|
* g. bootstrap page tables [pt_base, CR3 (x86)]
|
||||||
* 7. The list of page frames forms a contiguous 'pseudo-physical' memory
|
* h. bootstrap stack [register ESP (x86)]
|
||||||
|
* 4. Bootstrap elements are packed together, but each is 4kB-aligned.
|
||||||
|
* 5. The list of page frames forms a contiguous 'pseudo-physical' memory
|
||||||
* layout for the domain. In particular, the bootstrap virtual-memory
|
* layout for the domain. In particular, the bootstrap virtual-memory
|
||||||
* region is a 1:1 mapping to the first section of the pseudo-physical map.
|
* region is a 1:1 mapping to the first section of the pseudo-physical map.
|
||||||
* 8. All bootstrap elements are mapped read-writable for the guest OS. The
|
* 6. All bootstrap elements are mapped read-writable for the guest OS. The
|
||||||
* only exception is the bootstrap page table, which is mapped read-only.
|
* only exception is the bootstrap page table, which is mapped read-only.
|
||||||
* 9. There is guaranteed to be at least 512kB padding after the final
|
* 7. There is guaranteed to be at least 512kB padding after the final
|
||||||
* bootstrap element. If necessary, the bootstrap virtual region is
|
* bootstrap element. If necessary, the bootstrap virtual region is
|
||||||
* extended by an extra 4MB to ensure this.
|
* extended by an extra 4MB to ensure this.
|
||||||
*/
|
*/
|
||||||
@ -645,6 +650,8 @@ struct start_info {
|
|||||||
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
|
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
|
||||||
#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
|
#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
|
||||||
#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
|
#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
|
||||||
|
#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */
|
||||||
|
/* P->M making the 3 level tree obsolete? */
|
||||||
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
|
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
94
include/xen/interface/xenpmu.h
Normal file
94
include/xen/interface/xenpmu.h
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
#ifndef __XEN_PUBLIC_XENPMU_H__
|
||||||
|
#define __XEN_PUBLIC_XENPMU_H__
|
||||||
|
|
||||||
|
#include "xen.h"
|
||||||
|
|
||||||
|
#define XENPMU_VER_MAJ 0
|
||||||
|
#define XENPMU_VER_MIN 1
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ` enum neg_errnoval
|
||||||
|
* ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args);
|
||||||
|
*
|
||||||
|
* @cmd == XENPMU_* (PMU operation)
|
||||||
|
* @args == struct xenpmu_params
|
||||||
|
*/
|
||||||
|
/* ` enum xenpmu_op { */
|
||||||
|
#define XENPMU_mode_get 0 /* Also used for getting PMU version */
|
||||||
|
#define XENPMU_mode_set 1
|
||||||
|
#define XENPMU_feature_get 2
|
||||||
|
#define XENPMU_feature_set 3
|
||||||
|
#define XENPMU_init 4
|
||||||
|
#define XENPMU_finish 5
|
||||||
|
#define XENPMU_lvtpc_set 6
|
||||||
|
#define XENPMU_flush 7
|
||||||
|
|
||||||
|
/* ` } */
|
||||||
|
|
||||||
|
/* Parameters structure for HYPERVISOR_xenpmu_op call */
|
||||||
|
struct xen_pmu_params {
|
||||||
|
/* IN/OUT parameters */
|
||||||
|
struct {
|
||||||
|
uint32_t maj;
|
||||||
|
uint32_t min;
|
||||||
|
} version;
|
||||||
|
uint64_t val;
|
||||||
|
|
||||||
|
/* IN parameters */
|
||||||
|
uint32_t vcpu;
|
||||||
|
uint32_t pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* PMU modes:
|
||||||
|
* - XENPMU_MODE_OFF: No PMU virtualization
|
||||||
|
* - XENPMU_MODE_SELF: Guests can profile themselves
|
||||||
|
* - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles
|
||||||
|
* itself and Xen
|
||||||
|
* - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles
|
||||||
|
* everyone: itself, the hypervisor and the guests.
|
||||||
|
*/
|
||||||
|
#define XENPMU_MODE_OFF 0
|
||||||
|
#define XENPMU_MODE_SELF (1<<0)
|
||||||
|
#define XENPMU_MODE_HV (1<<1)
|
||||||
|
#define XENPMU_MODE_ALL (1<<2)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PMU features:
|
||||||
|
* - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD)
|
||||||
|
*/
|
||||||
|
#define XENPMU_FEATURE_INTEL_BTS 1
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Shared PMU data between hypervisor and PV(H) domains.
|
||||||
|
*
|
||||||
|
* The hypervisor fills out this structure during PMU interrupt and sends an
|
||||||
|
* interrupt to appropriate VCPU.
|
||||||
|
* Architecture-independent fields of xen_pmu_data are WO for the hypervisor
|
||||||
|
* and RO for the guest but some fields in xen_pmu_arch can be writable
|
||||||
|
* by both the hypervisor and the guest (see arch-$arch/pmu.h).
|
||||||
|
*/
|
||||||
|
struct xen_pmu_data {
|
||||||
|
/* Interrupted VCPU */
|
||||||
|
uint32_t vcpu_id;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Physical processor on which the interrupt occurred. On non-privileged
|
||||||
|
* guests set to vcpu_id;
|
||||||
|
*/
|
||||||
|
uint32_t pcpu_id;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Domain that was interrupted. On non-privileged guests set to
|
||||||
|
* DOMID_SELF.
|
||||||
|
* On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in
|
||||||
|
* XENPMU_MODE_ALL mode, domain ID of another domain.
|
||||||
|
*/
|
||||||
|
domid_t domain_id;
|
||||||
|
|
||||||
|
uint8_t pad[6];
|
||||||
|
|
||||||
|
/* Architecture-specific information */
|
||||||
|
struct xen_pmu_arch pmu;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __XEN_PUBLIC_XENPMU_H__ */
|
@ -9,8 +9,8 @@ static inline unsigned long page_to_mfn(struct page *page)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct xen_memory_region {
|
struct xen_memory_region {
|
||||||
phys_addr_t start;
|
unsigned long start_pfn;
|
||||||
phys_addr_t size;
|
unsigned long n_pfns;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */
|
#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */
|
||||||
|
@ -217,6 +217,13 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
|
|||||||
return (__force void *)__early_ioremap(phys_addr, size,
|
return (__force void *)__early_ioremap(phys_addr, size,
|
||||||
FIXMAP_PAGE_NORMAL);
|
FIXMAP_PAGE_NORMAL);
|
||||||
}
|
}
|
||||||
|
#ifdef FIXMAP_PAGE_RO
|
||||||
|
void __init *
|
||||||
|
early_memremap_ro(resource_size_t phys_addr, unsigned long size)
|
||||||
|
{
|
||||||
|
return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#else /* CONFIG_MMU */
|
#else /* CONFIG_MMU */
|
||||||
|
|
||||||
void __init __iomem *
|
void __init __iomem *
|
||||||
@ -231,6 +238,11 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
|
|||||||
{
|
{
|
||||||
return (void *)phys_addr;
|
return (void *)phys_addr;
|
||||||
}
|
}
|
||||||
|
void __init *
|
||||||
|
early_memremap_ro(resource_size_t phys_addr, unsigned long size)
|
||||||
|
{
|
||||||
|
return (void *)phys_addr;
|
||||||
|
}
|
||||||
|
|
||||||
void __init early_iounmap(void __iomem *addr, unsigned long size)
|
void __init early_iounmap(void __iomem *addr, unsigned long size)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user