xen: branch for v5.9-rc1b

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRTLbB6QfY48x44uB6AXGG7T9hjvgUCXzaSXAAKCRCAXGG7T9hj
 vuSEAP4qOIv7Hr1wMJfTsN7ZoNNr/K6ph8ADcjFm9RGikn8MawD8CU/OfcFKJFwl
 UVwM1HPnRG6pvCI9bmHS4WYrIBYBVw0=
 =Bi6R
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-5.9-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:

 - Remove support for running as 32-bit Xen PV-guest.

   32-bit PV guests are rarely used, are lacking security fixes for
   Meltdown, and can be easily replaced by PVH mode. Another series for
   doing more cleanup will follow soon (removal of 32-bit-only pvops
   functionality).

 - Fixes and additional features for the Xen display frontend driver.

* tag 'for-linus-5.9-rc1b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  drm/xen-front: Pass dumb buffer data offset to the backend
  xen: Sync up with the canonical protocol definition in Xen
  drm/xen-front: Add YUYV to supported formats
  drm/xen-front: Fix misused IS_ERR_OR_NULL checks
  xen/gntdev: Fix dmabuf import with non-zero sgt offset
  x86/xen: drop tests for highmem in pv code
  x86/xen: eliminate xen-asm_64.S
  x86/xen: remove 32-bit Xen PV guest support
This commit is contained in:
Linus Torvalds 2020-08-14 13:34:37 -07:00
commit 0520058d05
27 changed files with 396 additions and 1142 deletions

View File

@ -449,8 +449,6 @@
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
BUG_IF_WRONG_CR3
SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
@ -599,8 +597,6 @@
*/
.macro SWITCH_TO_ENTRY_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
/* Bytes to copy */
movl $PTREGS_SIZE, %ecx
@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
* will ignore all of the single-step traps generated in this range.
*/
#ifdef CONFIG_XEN_PV
/*
* Xen doesn't set %esp to be precisely what the normal SYSENTER
* entry point expects, so fix it up before using the normal path.
*/
SYM_CODE_START(xen_sysenter_target)
addl $5*4, %esp /* remove xen-provided frame */
jmp .Lsysenter_past_esp
SYM_CODE_END(xen_sysenter_target)
#endif
/*
* 32-bit SYSENTER entry.
*
@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
movl %esp, %eax
call do_SYSENTER_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
testl %eax, %eax
jz .Lsyscall_32_done
STACKLEAK_ERASE
@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
#ifdef CONFIG_PARAVIRT
SYM_CODE_START(native_iret)
iret
_ASM_EXTABLE(native_iret, asm_iret_error)
SYM_CODE_END(native_iret)
#endif
#ifdef CONFIG_XEN_PV
/*
* See comment in entry_64.S for further explanation
*
* Note: This is not an actual IDT entry point. It's a XEN specific entry
* point and therefore named to match the 64-bit trampoline counterpart.
*/
SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
/*
* Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled
* events and checked for pending events. This simulates
* iret instruction's behaviour where it delivers a
* pending interrupt when enabling interrupts:
*/
cmpl $xen_iret_start_crit, (%esp)
jb 1f
cmpl $xen_iret_end_crit, (%esp)
jae 1f
call xen_iret_crit_fixup
1:
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
mov %esp, %eax
call xen_pv_evtchn_do_upcall
jmp handle_exception_return
SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
* We get here for two reasons:
* 1. Fault while reloading DS, ES, FS or GS
* 2. Fault while executing IRET
* Category 1 we fix up by reattempting the load, and zeroing the segment
* register if the load fails.
* Category 2 we fix up by jumping to do_iret_error. We cannot use the
* normal Linux return path in this case because if we use the IRET hypercall
* to pop the stack frame we end up in an infinite loop of failsafe callbacks.
* We distinguish between categories by maintaining a status value in EAX.
*/
SYM_FUNC_START(xen_failsafe_callback)
pushl %eax
movl $1, %eax
1: mov 4(%esp), %ds
2: mov 8(%esp), %es
3: mov 12(%esp), %fs
4: mov 16(%esp), %gs
/* EAX == 0 => Category 1 (Bad segment)
EAX != 0 => Category 2 (Bad IRET) */
testl %eax, %eax
popl %eax
lea 16(%esp), %esp
jz 5f
jmp asm_iret_error
5: pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
jmp handle_exception_return
.section .fixup, "ax"
6: xorl %eax, %eax
movl %eax, 4(%esp)
jmp 1b
7: xorl %eax, %eax
movl %eax, 8(%esp)
jmp 2b
8: xorl %eax, %eax
movl %eax, 12(%esp)
jmp 3b
9: xorl %eax, %eax
movl %eax, 16(%esp)
jmp 4b
.previous
_ASM_EXTABLE(1b, 6b)
_ASM_EXTABLE(2b, 7b)
_ASM_EXTABLE(3b, 8b)
_ASM_EXTABLE(4b, 9b)
SYM_FUNC_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1

View File

@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a")
ELFNOTE_END
BUILD_SALT
#ifdef CONFIG_XEN
/*
* Add a special note telling glibc's dynamic linker a fake hardware
* flavor that it will use to choose the search path for libraries in the
* same way it uses real hardware capabilities like "mmx".
* We supply "nosegneg" as the fake capability, to indicate that we
* do not like negative offsets in instructions using segment overrides,
* since we implement those inefficiently. This makes it possible to
* install libraries optimized to avoid those access patterns in someplace
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
* corresponding to the bits here is needed to make ldconfig work right.
* It should contain:
* hwcap 1 nosegneg
* to match the mapping of bit to name that we give here.
*
* At runtime, the fake hardware feature will be considered to be present
* if its bit is set in the mask word. So, we start with the mask 0, and
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
*/
#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
ELFNOTE_START(GNU, 2, "a")
.long 1 /* ncaps */
VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
.long 0 /* mask */
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
ELFNOTE_END
#endif

View File

@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
#endif
#endif

View File

@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
#ifdef CONFIG_XEN_PV
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
#endif

View File

@ -134,38 +134,7 @@ SYM_CODE_START(startup_32)
movl %eax,pa(initial_page_table+0xffc)
#endif
#ifdef CONFIG_PARAVIRT
/* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version)
jb .Ldefault_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae .Lbad_subarch
movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
.Lbad_subarch:
SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
/* Unknown implementation; there's really
nothing we can do at this point. */
ud2a
__INITDATA
subarch_entries:
.long .Ldefault_entry /* normal x86/PC */
.long xen_entry /* Xen hypervisor */
.long .Ldefault_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
.previous
#else
jmp .Ldefault_entry
#endif /* CONFIG_PARAVIRT */
SYM_CODE_END(startup_32)
#ifdef CONFIG_HOTPLUG_CPU

View File

@ -19,6 +19,7 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
depends on X86_64
select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
@ -50,7 +51,7 @@ config XEN_PVHVM_SMP
config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB"
depends on XEN_PV && X86_64
depends on XEN_PV
default y
help
Limit paravirtualized user domains to 512GB of RAM.

View File

@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_xen-asm.o := y
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
@ -33,7 +33,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o
obj-$(CONFIG_XEN_PV) += irq.o
obj-$(CONFIG_XEN_PV) += multicalls.o
obj-$(CONFIG_XEN_PV) += xen-asm.o
obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o

View File

@ -60,10 +60,6 @@ static u32 xen_apic_read(u32 reg)
if (reg == APIC_LVR)
return 0x14;
#ifdef CONFIG_X86_32
if (reg == APIC_LDR)
return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
#endif
if (reg != APIC_ID)
return 0;
@ -129,14 +125,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
#ifdef CONFIG_X86_32
static int xen_x86_32_early_logical_apicid(int cpu)
{
/* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
return 1 << cpu;
}
#endif
static void xen_noop(void)
{
}
@ -199,11 +187,6 @@ static struct apic xen_pv_apic = {
.icr_write = xen_apic_icr_write,
.wait_icr_idle = xen_noop,
.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
#ifdef CONFIG_X86_32
/* generic_processor_info and setup_local_APIC. */
.x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid,
#endif
};
static void __init xen_apic_check(void)

View File

@ -119,14 +119,6 @@ static void __init xen_banner(void)
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
#ifdef CONFIG_X86_32
pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
"Support for running as 32-bit PV-guest under Xen will soon be removed\n"
"from the Linux kernel!\n"
"Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
#endif
}
static void __init xen_pv_init_platform(void)
@ -353,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte_t *ptep;
pte_t pte;
unsigned long pfn;
struct page *page;
unsigned char dummy;
void *va;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
page = pfn_to_page(pfn);
pte = pfn_pte(pfn, prot);
/*
@ -391,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot)
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
if (!PageHighMem(page)) {
void *av = __va(PFN_PHYS(pfn));
va = __va(PFN_PHYS(pfn));
if (av != v)
if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
BUG();
} else
kmap_flush_unused();
if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
BUG();
preempt_enable();
}
@ -538,30 +524,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
* XXX sleazy hack: If we're being called in a lazy-cpu zone
* and lazy gs handling is enabled, it means we're in a
* context switch, and %gs has just been saved. This means we
* can zero it out to prevent faults on exit from the
* hypervisor if the next process has no %gs. Either way, it
* has been saved, and the new value will get loaded properly.
* This will go away as soon as Xen has been modified to not
* save/restore %gs for normal hypercalls.
*
* On x86_64, this hack is not used for %gs, because gs points
* to KERNEL_GS_BASE (and uses it for PDA references), so we
* must not zero %gs on x86_64
*
* For x86_64, we need to zero %fs, otherwise we may get an
* In lazy mode we need to zero %fs, otherwise we may get an
* exception between the new %fs descriptor being loaded and
* %fs being effectively cleared at __switch_to().
*/
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
#ifdef CONFIG_X86_32
lazy_load_gs(0);
#else
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(fs, 0);
#endif
}
xen_mc_batch();
@ -572,13 +540,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
#ifdef CONFIG_X86_64
static void xen_load_gs_index(unsigned int idx)
{
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
BUG();
}
#endif
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
@ -597,7 +563,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
#ifdef CONFIG_X86_64
void noist_exc_debug(struct pt_regs *regs);
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
@ -697,7 +662,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
return true;
}
#endif
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
@ -710,10 +674,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->vector = vector;
addr = gate_offset(val);
#ifdef CONFIG_X86_64
if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
#endif /* CONFIG_X86_64 */
info->address = addr;
info->cs = gate_segment(val);
@ -958,15 +920,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
#ifdef CONFIG_X86_64
unsigned int which;
u64 base;
#endif
ret = 0;
switch (msr) {
#ifdef CONFIG_X86_64
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@ -976,7 +935,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
if (HYPERVISOR_set_segment_base(which, base) != 0)
ret = -EIO;
break;
#endif
case MSR_STAR:
case MSR_CSTAR:
@ -1058,9 +1016,7 @@ void __init xen_setup_vcpu_info_placement(void)
static const struct pv_info xen_info __initconst = {
.shared_kernel_pmd = 0,
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
#endif
.name = "Xen",
};
@ -1086,18 +1042,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_pmc = xen_read_pmc,
.iret = xen_iret,
#ifdef CONFIG_X86_64
.usergs_sysret64 = xen_sysret64,
#endif
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
#ifdef CONFIG_X86_64
.load_gs_index = xen_load_gs_index,
#endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
@ -1364,15 +1316,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* keep using Xen gdt for now; no urgent need to change it */
#ifdef CONFIG_X86_32
pv_info.kernel_rpl = 1;
if (xen_feature(XENFEAT_supervisor_mode_kernel))
pv_info.kernel_rpl = 0;
#else
pv_info.kernel_rpl = 0;
#endif
/* set the limit of our address space */
xen_reserve_top();
/*
* We used to do this in xen_arch_setup, but that is too late
@ -1384,12 +1328,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
#endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
@ -1458,12 +1396,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_efi_init(&boot_params);
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();
#else
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
static int xen_cpu_up_prepare_pv(unsigned int cpu)

View File

@ -86,19 +86,8 @@
#include "mmu.h"
#include "debugfs.h"
#ifdef CONFIG_X86_32
/*
* Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest.
* Each page can map 2MB.
*/
#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
#endif
#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
#endif /* CONFIG_X86_64 */
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
if (!xen_batched_set_pte(ptep, pteval)) {
/*
* Could call native_set_pte() here and trap and
* emulate the PTE write but with 32-bit guests this
* needs two traps (one for each of the two 32-bit
* words in the PTE) so do one hypercall directly
* instead.
* emulate the PTE write, but a hypercall is much cheaper.
*/
struct mmu_update u;
@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
xen_set_pud_hyper(ptr, val);
}
#ifdef CONFIG_X86_PAE
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
trace_xen_mmu_set_pte_atomic(ptep, pte);
__xen_set_pte(ptep, pte);
}
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
trace_xen_mmu_pte_clear(mm, addr, ptep);
__xen_set_pte(ptep, native_make_pte(0));
}
static void xen_pmd_clear(pmd_t *pmdp)
{
trace_xen_mmu_pmd_clear(pmdp);
set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
__visible pmd_t xen_make_pmd(pmdval_t pmd)
{
pmd = pte_pfn_to_mfn(pmd);
@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
#ifdef CONFIG_X86_64
__visible pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
@ -571,27 +536,27 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
#endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
void (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
bool last, unsigned long limit)
{
int i, nr, flush = 0;
int i, nr;
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
for (i = 0; i < nr; i++) {
if (!pmd_none(pmd[i]))
flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
(*func)(mm, pmd_page(pmd[i]), PT_PTE);
}
return flush;
}
static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
void (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
bool last, unsigned long limit)
{
int i, nr, flush = 0;
int i, nr;
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
for (i = 0; i < nr; i++) {
@ -602,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
pmd = pmd_offset(&pud[i], 0);
if (PTRS_PER_PMD > 1)
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
flush |= xen_pmd_walk(mm, pmd, func,
last && i == nr - 1, limit);
(*func)(mm, virt_to_page(pmd), PT_PMD);
xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
}
return flush;
}
static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
void (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
bool last, unsigned long limit)
{
int flush = 0;
pud_t *pud;
if (p4d_none(*p4d))
return flush;
return;
pud = pud_offset(p4d, 0);
if (PTRS_PER_PUD > 1)
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
flush |= xen_pud_walk(mm, pud, func, last, limit);
return flush;
(*func)(mm, virt_to_page(pud), PT_PUD);
xen_pud_walk(mm, pud, func, last, limit);
}
/*
@ -636,32 +598,27 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
* will be STACK_TOP_MAX, but at boot we need to pin up to
* FIXADDR_TOP.
*
* For 32-bit the important bit is that we don't pin beyond there,
* because then we start getting into Xen's ptes.
*
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
* We must skip the Xen hole in the middle of the address space, just after
* the big x86-64 virtual hole.
*/
static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
int (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
void (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
{
int i, nr, flush = 0;
int i, nr;
unsigned hole_low = 0, hole_high = 0;
/* The limit is the last byte to be touched */
limit--;
BUG_ON(limit >= FIXADDR_TOP);
#ifdef CONFIG_X86_64
/*
* 64-bit has a great big hole in the middle of the address
* space, which contains the Xen mappings.
*/
hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
hole_high = pgd_index(GUARD_HOLE_END_ADDR);
#endif
nr = pgd_index(limit) + 1;
for (i = 0; i < nr; i++) {
@ -674,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
continue;
p4d = p4d_offset(&pgd[i], 0);
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
}
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
return flush;
(*func)(mm, virt_to_page(pgd), PT_PGD);
}
static int xen_pgd_walk(struct mm_struct *mm,
int (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
static void xen_pgd_walk(struct mm_struct *mm,
void (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
{
return __xen_pgd_walk(mm, mm->pgd, func, limit);
__xen_pgd_walk(mm, mm->pgd, func, limit);
}
/* If we're using split pte locks, then take the page's lock and
@ -722,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
xen_extend_mmuext_op(&op);
}
static int xen_pin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
static void xen_pin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
unsigned pgfl = TestSetPagePinned(page);
int flush;
if (pgfl)
flush = 0; /* already pinned */
else if (PageHighMem(page))
/* kmaps need flushing if we found an unpinned
highpage */
flush = 1;
else {
if (!pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
struct multicall_space mcs = __xen_mc_entry(0);
spinlock_t *ptl;
flush = 0;
/*
* We need to hold the pagetable lock between the time
* we make the pagetable RO and when we actually pin
@ -778,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
return flush;
}
/* This is called just after a mm has been created, but it has not
@ -787,39 +731,22 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
pgd_t *user_pgd = xen_get_user_pgd(pgd);
trace_xen_mmu_pgd_pin(mm, pgd);
xen_mc_batch();
if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for flushing */
xen_mc_issue(0);
__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
kmap_flush_unused();
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
xen_mc_batch();
if (user_pgd) {
xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
xen_do_pin(MMUEXT_PIN_L4_TABLE,
PFN_DOWN(__pa(user_pgd)));
}
#ifdef CONFIG_X86_64
{
pgd_t *user_pgd = xen_get_user_pgd(pgd);
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
if (user_pgd) {
xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
xen_do_pin(MMUEXT_PIN_L4_TABLE,
PFN_DOWN(__pa(user_pgd)));
}
}
#else /* CONFIG_X86_32 */
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is pinnable */
xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
PT_PMD);
#endif
xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
#endif /* CONFIG_X86_64 */
xen_mc_issue(0);
}
@ -854,11 +781,10 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock);
}
static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
SetPagePinned(page);
return 0;
}
/*
@ -870,18 +796,16 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
static void __init xen_after_bootmem(void)
{
static_branch_enable(&xen_struct_pages_ready);
#ifdef CONFIG_X86_64
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
static int xen_unpin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
static void xen_unpin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
unsigned pgfl = TestClearPagePinned(page);
if (pgfl && !PageHighMem(page)) {
if (pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
spinlock_t *ptl = NULL;
@ -912,36 +836,24 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
return 0; /* never need to flush on unpin */
}
/* Release a pagetables pages back as normal RW */
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
pgd_t *user_pgd = xen_get_user_pgd(pgd);
trace_xen_mmu_pgd_unpin(mm, pgd);
xen_mc_batch();
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
#ifdef CONFIG_X86_64
{
pgd_t *user_pgd = xen_get_user_pgd(pgd);
if (user_pgd) {
xen_do_pin(MMUEXT_UNPIN_TABLE,
PFN_DOWN(__pa(user_pgd)));
xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
}
if (user_pgd) {
xen_do_pin(MMUEXT_UNPIN_TABLE,
PFN_DOWN(__pa(user_pgd)));
xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
}
#endif
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is unpinned */
xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
PT_PMD);
#endif
__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
@ -1089,7 +1001,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
BUG();
}
#ifdef CONFIG_X86_64
static void __init xen_cleanhighmap(unsigned long vaddr,
unsigned long vaddr_end)
{
@ -1273,17 +1184,15 @@ static void __init xen_pagetable_cleanhighmap(void)
xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
}
#endif
static void __init xen_pagetable_p2m_setup(void)
{
xen_vmalloc_p2m_tree();
#ifdef CONFIG_X86_64
xen_pagetable_p2m_free();
xen_pagetable_cleanhighmap();
#endif
/* And revector! Bye bye old array */
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
}
@ -1420,6 +1329,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
}
static void xen_write_cr3(unsigned long cr3)
{
pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
BUG_ON(preemptible());
xen_mc_batch(); /* disables interrupts */
@ -1430,20 +1341,14 @@ static void xen_write_cr3(unsigned long cr3)
__xen_write_cr3(true, cr3);
#ifdef CONFIG_X86_64
{
pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
if (user_pgd)
__xen_write_cr3(false, __pa(user_pgd));
else
__xen_write_cr3(false, 0);
}
#endif
if (user_pgd)
__xen_write_cr3(false, __pa(user_pgd));
else
__xen_write_cr3(false, 0);
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
#ifdef CONFIG_X86_64
/*
* At the start of the day - when Xen launches a guest, it has already
* built pagetables for the guest. We diligently look over them
@ -1478,49 +1383,39 @@ static void __init xen_write_cr3_init(unsigned long cr3)
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
#endif
static int xen_pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = mm->pgd;
int ret = 0;
struct page *page = virt_to_page(pgd);
pgd_t *user_pgd;
int ret = -ENOMEM;
BUG_ON(PagePinned(virt_to_page(pgd)));
BUG_ON(page->private != 0);
#ifdef CONFIG_X86_64
{
struct page *page = virt_to_page(pgd);
pgd_t *user_pgd;
user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
page->private = (unsigned long)user_pgd;
BUG_ON(page->private != 0);
ret = -ENOMEM;
user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
page->private = (unsigned long)user_pgd;
if (user_pgd != NULL) {
if (user_pgd != NULL) {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
user_pgd[pgd_index(VSYSCALL_ADDR)] =
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
user_pgd[pgd_index(VSYSCALL_ADDR)] =
__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
#endif
ret = 0;
}
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
ret = 0;
}
#endif
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
return ret;
}
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
pgd_t *user_pgd = xen_get_user_pgd(pgd);
if (user_pgd)
free_page((unsigned long)user_pgd);
#endif
}
/*
@ -1539,7 +1434,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
*/
__visible pte_t xen_make_pte_init(pteval_t pte)
{
#ifdef CONFIG_X86_64
unsigned long pfn;
/*
@ -1553,7 +1447,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte)
pfn >= xen_start_info->first_p2m_pfn &&
pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
pte &= ~_PAGE_RW;
#endif
pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
@ -1561,13 +1455,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
#ifdef CONFIG_X86_32
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
if (pte_mfn(pte) != INVALID_P2M_ENTRY
&& pte_val_ma(*ptep) & _PAGE_PRESENT)
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
pte_val_ma(pte));
#endif
__xen_set_pte(ptep, pte);
}
@ -1642,20 +1529,14 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
if (static_branch_likely(&xen_struct_pages_ready))
SetPagePinned(page);
if (!PageHighMem(page)) {
xen_mc_batch();
xen_mc_batch();
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
xen_mc_issue(PARAVIRT_LAZY_MMU);
} else {
/* make sure there are no stray mappings of
this page */
kmap_flush_unused();
}
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
}
@ -1678,16 +1559,15 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
trace_xen_mmu_release_ptpage(pfn, level, pinned);
if (pinned) {
if (!PageHighMem(page)) {
xen_mc_batch();
xen_mc_batch();
if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
__set_pfn_prot(pfn, PAGE_KERNEL);
__set_pfn_prot(pfn, PAGE_KERNEL);
xen_mc_issue(PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
ClearPagePinned(page);
}
}
@ -1702,7 +1582,6 @@ static void xen_release_pmd(unsigned long pfn)
xen_release_ptpage(pfn, PT_PMD);
}
#ifdef CONFIG_X86_64
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
@ -1712,20 +1591,6 @@ static void xen_release_pud(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PUD);
}
#endif
void __init xen_reserve_top(void)
{
#ifdef CONFIG_X86_32
unsigned long top = HYPERVISOR_VIRT_START;
struct xen_platform_parameters pp;
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
top = pp.virt_start;
reserve_top_address(-top);
#endif /* CONFIG_X86_32 */
}
/*
* Like __va(), but returns address in the kernel mapping (which is
@ -1733,11 +1598,7 @@ void __init xen_reserve_top(void)
*/
static void * __init __ka(phys_addr_t paddr)
{
#ifdef CONFIG_X86_64
return (void *)(paddr + __START_KERNEL_map);
#else
return __va(paddr);
#endif
}
/* Convert a machine address to physical address */
@ -1771,56 +1632,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot)
{
return set_page_prot_flags(addr, prot, UVMF_NONE);
}
#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
unsigned pmdidx, pteidx;
unsigned ident_pte;
unsigned long pfn;
level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
PAGE_SIZE);
ident_pte = 0;
pfn = 0;
for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
pte_t *pte_page;
/* Reuse or allocate a page of ptes */
if (pmd_present(pmd[pmdidx]))
pte_page = m2v(pmd[pmdidx].pmd);
else {
/* Check for free pte pages */
if (ident_pte == LEVEL1_IDENT_ENTRIES)
break;
pte_page = &level1_ident_pgt[ident_pte];
ident_pte += PTRS_PER_PTE;
pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
}
/* Install mappings */
for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
pte_t pte;
if (pfn > max_pfn_mapped)
max_pfn_mapped = pfn;
if (!pte_none(pte_page[pteidx]))
continue;
pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
pte_page[pteidx] = pte;
}
}
for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
set_page_prot(pmd, PAGE_KERNEL_RO);
}
#endif
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
@ -1831,13 +1643,8 @@ void __init xen_setup_machphys_mapping(void)
} else {
machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
#ifdef CONFIG_X86_32
WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
< machine_to_phys_mapping);
#endif
}
#ifdef CONFIG_X86_64
static void __init convert_pfn_mfn(void *v)
{
pte_t *pte = v;
@ -2168,105 +1975,6 @@ void __init xen_relocate_p2m(void)
xen_start_info->nr_p2m_frames = n_frames;
}
#else /* !CONFIG_X86_64 */
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE);
RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE);
static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
BUG_ON(read_cr3_pa() != __pa(initial_page_table));
BUG_ON(cr3 != __pa(swapper_pg_dir));
/*
* We are switching to swapper_pg_dir for the first time (from
* initial_page_table) and therefore need to mark that page
* read-only and then pin it.
*
* Xen disallows sharing of kernel PMDs for PAE
* guests. Therefore we must copy the kernel PMD from
* initial_page_table into a new kernel PMD to be used in
* swapper_pg_dir.
*/
swapper_kernel_pmd =
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
copy_page(swapper_kernel_pmd, initial_kernel_pmd);
swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
xen_write_cr3(cr3);
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
PFN_DOWN(__pa(initial_page_table)));
set_page_prot(initial_page_table, PAGE_KERNEL);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
pv_ops.mmu.write_cr3 = &xen_write_cr3;
}
/*
* For 32 bit domains xen_start_info->pt_base is the pgd address which might be
* not the first page table in the page table pool.
* Iterate through the initial page tables to find the real page table base.
*/
static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
{
phys_addr_t pt_base, paddr;
unsigned pmdidx;
pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
paddr = m2p(pmd[pmdidx].pmd);
pt_base = min(pt_base, paddr);
}
return pt_base;
}
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
pmd_t *kernel_pmd;
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
xen_pt_base = xen_find_pt_base(kernel_pmd);
xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
initial_kernel_pmd =
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
copy_page(initial_kernel_pmd, kernel_pmd);
xen_map_identity_early(initial_kernel_pmd, max_pfn);
copy_page(initial_page_table, pgd);
initial_page_table[KERNEL_PGD_BOUNDARY] =
__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
set_page_prot(initial_page_table, PAGE_KERNEL_RO);
set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
PFN_DOWN(__pa(initial_page_table)));
xen_write_cr3(__pa(initial_page_table));
memblock_reserve(xen_pt_base, xen_pt_size);
}
#endif /* CONFIG_X86_64 */
void __init xen_reserve_special_pages(void)
{
phys_addr_t paddr;
@ -2300,12 +2008,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
switch (idx) {
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
#ifdef CONFIG_X86_32
case FIX_WP_TEST:
# ifdef CONFIG_HIGHMEM
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
# endif
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
#ifdef CONFIG_X86_VSYSCALL_EMULATION
case VSYSCALL_PAGE:
#endif
/* All local page mappings */
@ -2357,9 +2060,7 @@ static void __init xen_post_allocator_init(void)
pv_ops.mmu.set_pte = xen_set_pte;
pv_ops.mmu.set_pmd = xen_set_pmd;
pv_ops.mmu.set_pud = xen_set_pud;
#ifdef CONFIG_X86_64
pv_ops.mmu.set_p4d = xen_set_p4d;
#endif
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
@ -2367,15 +2068,11 @@ static void __init xen_post_allocator_init(void)
pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
pv_ops.mmu.release_pte = xen_release_pte;
pv_ops.mmu.release_pmd = xen_release_pmd;
#ifdef CONFIG_X86_64
pv_ops.mmu.alloc_pud = xen_alloc_pud;
pv_ops.mmu.release_pud = xen_release_pud;
#endif
pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
#ifdef CONFIG_X86_64
pv_ops.mmu.write_cr3 = &xen_write_cr3;
#endif
}
static void xen_leave_lazy_mmu(void)
@ -2420,17 +2117,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
#ifdef CONFIG_X86_PAE
.set_pte_atomic = xen_set_pte_atomic,
.pte_clear = xen_pte_clear,
.pmd_clear = xen_pmd_clear,
#endif /* CONFIG_X86_PAE */
.set_pud = xen_set_pud_hyper,
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
#ifdef CONFIG_X86_64
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_p4d = xen_set_p4d_hyper,
@ -2442,7 +2133,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
#endif
#endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap,

View File

@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
if (type == P2M_TYPE_PFN || i < chunk) {
/* Use initial p2m page contents. */
#ifdef CONFIG_X86_64
mfns = alloc_p2m_page();
copy_page(mfns, xen_p2m_addr + pfn);
#else
mfns = xen_p2m_addr + pfn;
#endif
ptep = populate_extra_pte((unsigned long)(p2m + pfn));
set_pte(ptep,
pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
* Allocate new pmd(s). It is checked whether the old pmd is still in place.
* If not, nothing is changed. This is okay as the only reason for allocating
* a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
* pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
* pmd.
*/
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
{

View File

@ -32,7 +32,6 @@
#include <xen/features.h>
#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "vdso.h"
#include "mmu.h"
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void)
{
unsigned long limit;
#ifdef CONFIG_X86_32
limit = GB(64) / PAGE_SIZE;
#else
limit = MAXMEM / PAGE_SIZE;
if (!xen_initial_domain() && xen_512gb_limit)
limit = GB(512) / PAGE_SIZE;
#endif
return limit;
}
@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void)
if (!xen_is_e820_reserved(start, size))
return;
#ifdef CONFIG_X86_32
/*
* Relocating the p2m on 32 bit system to an arbitrary virtual address
* is not supported, so just give up.
*/
xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
BUG();
#else
xen_relocate_p2m();
memblock_free(start, size);
#endif
}
/**
@ -921,20 +908,6 @@ char * __init xen_memory_setup(void)
return "Xen";
}
/*
* Set the bit indicating "nosegneg" library variants should be used.
* We only need to bother in pure 32-bit mode; compat 32-bit processes
* can have un-truncated segments, so wrapping around is allowed.
*/
static void __init fiddle_vdso(void)
{
#ifdef CONFIG_X86_32
u32 *mask = vdso_image_32.data +
vdso_image_32.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
#endif
}
static int register_callback(unsigned type, const void *func)
{
struct callback_register callback = {
@ -951,11 +924,7 @@ void xen_enable_sysenter(void)
int ret;
unsigned sysenter_feature;
#ifdef CONFIG_X86_32
sysenter_feature = X86_FEATURE_SEP;
#else
sysenter_feature = X86_FEATURE_SYSENTER32;
#endif
if (!boot_cpu_has(sysenter_feature))
return;
@ -967,7 +936,6 @@ void xen_enable_sysenter(void)
void xen_enable_syscall(void)
{
#ifdef CONFIG_X86_64
int ret;
ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
@ -983,7 +951,6 @@ void xen_enable_syscall(void)
if (ret != 0)
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
#endif /* CONFIG_X86_64 */
}
static void __init xen_pvmmu_arch_setup(void)
@ -1024,7 +991,6 @@ void __init xen_arch_setup(void)
disable_cpuidle();
disable_cpufreq();
WARN_ON(xen_set_default_idle());
fiddle_vdso();
#ifdef CONFIG_NUMA
numa_off = 1;
#endif

View File

@ -211,15 +211,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
#ifdef CONFIG_X86_32
/*
* Xen starts us with XEN_FLAT_RING1_DS, but linux code
* expects __USER_DS
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
#endif
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
@ -300,10 +291,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/*
@ -341,12 +328,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = task_top_of_stack(idle);
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
(unsigned long)xen_asm_exc_xen_hypervisor_callback;
ctxt->failsafe_callback_eip =

View File

@ -1,6 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Bit used for the pseudo-hwcap for non-negative segments. We use
bit 1 to avoid bugs in some versions of glibc when bit 0 is
used; the choice is otherwise arbitrary. */
#define VDSO_NOTE_NONEGSEG_BIT 1

View File

@ -6,12 +6,18 @@
* operations here; the indirect forms are better handled in C.
*/
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/frame.h>
#include <asm/segment.h>
#include <asm/thread_info.h>
#include <asm/asm.h>
#include <asm/frame.h>
#include <xen/interface/xen.h>
#include <linux/init.h>
#include <linux/linkage.h>
/*
@ -76,11 +82,7 @@ SYM_FUNC_END(xen_save_fl_direct)
*/
SYM_FUNC_START(xen_restore_fl_direct)
FRAME_BEGIN
#ifdef CONFIG_X86_64
testw $X86_EFLAGS_IF, %di
#else
testb $X86_EFLAGS_IF>>8, %ah
#endif
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/*
* Preempt here doesn't matter because that will deal with any
@ -104,15 +106,6 @@ SYM_FUNC_END(xen_restore_fl_direct)
*/
SYM_FUNC_START(check_events)
FRAME_BEGIN
#ifdef CONFIG_X86_32
push %eax
push %ecx
push %edx
call xen_force_evtchn_callback
pop %edx
pop %ecx
pop %eax
#else
push %rax
push %rcx
push %rdx
@ -132,7 +125,6 @@ SYM_FUNC_START(check_events)
pop %rdx
pop %rcx
pop %rax
#endif
FRAME_END
ret
SYM_FUNC_END(check_events)
@ -151,3 +143,175 @@ SYM_FUNC_START(xen_read_cr2_direct)
FRAME_END
ret
SYM_FUNC_END(xen_read_cr2_direct);
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
pop %rcx
pop %r11
jmp \name
SYM_CODE_END(xen_\name)
_ASM_NOKPROBE(xen_\name)
.endm
xen_pv_trap asm_exc_divide_error
xen_pv_trap asm_xenpv_exc_debug
xen_pv_trap asm_exc_int3
xen_pv_trap asm_xenpv_exc_nmi
xen_pv_trap asm_exc_overflow
xen_pv_trap asm_exc_bounds
xen_pv_trap asm_exc_invalid_op
xen_pv_trap asm_exc_device_not_available
xen_pv_trap asm_exc_double_fault
xen_pv_trap asm_exc_coproc_segment_overrun
xen_pv_trap asm_exc_invalid_tss
xen_pv_trap asm_exc_segment_not_present
xen_pv_trap asm_exc_stack_segment
xen_pv_trap asm_exc_general_protection
xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
#ifdef CONFIG_X86_MCE
xen_pv_trap asm_exc_machine_check
#endif /* CONFIG_X86_MCE */
xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION
xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap asm_exc_xen_hypervisor_callback
__INIT
SYM_CODE_START(xen_early_idt_handler_array)
i = 0
.rept NUM_EXCEPTION_VECTORS
pop %rcx
pop %r11
jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
i = i + 1
.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
SYM_CODE_END(xen_early_idt_handler_array)
__FINIT
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
*
* ss
* rsp
* rflags
* cs
* rip <-- standard iret frame
*
* flags
*
* rcx }
* r11 }<-- pushed by hypercall page
* rsp->rax }
*/
SYM_CODE_START(xen_iret)
pushq $0
jmp hypercall_iret
SYM_CODE_END(xen_iret)
SYM_CODE_START(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back.
*
* tss.sp2 is scratch space.
*/
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
pushq %r11
pushq $__USER_CS
pushq %rcx
pushq $VGCF_in_syscall
jmp hypercall_iret
SYM_CODE_END(xen_sysret64)
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
* - kernel gs
* - kernel rsp
* - an iret-like stack frame on the stack (including rcx and r11):
* ss
* rsp
* rflags
* cs
* rip
* r11
* rsp->rcx
*/
/* Normal 64-bit system call target */
SYM_FUNC_START(xen_syscall_target)
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER_DS and __USER_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER_DS, 4*8(%rsp)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
SYM_FUNC_END(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
SYM_FUNC_START(xen_syscall32_target)
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER32_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER32_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
SYM_FUNC_END(xen_syscall32_target)
/* 32-bit compat sysenter target */
SYM_FUNC_START(xen_sysenter_target)
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
* that we don't need to guard against single step exceptions here.
*/
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER32_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER32_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
SYM_FUNC_END(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */
SYM_FUNC_START_ALIAS(xen_syscall32_target)
SYM_FUNC_START(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
SYM_FUNC_END(xen_sysenter_target)
SYM_FUNC_END_ALIAS(xen_syscall32_target)
#endif /* CONFIG_IA32_EMULATION */

View File

@ -1,185 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Asm versions of Xen pv-ops, suitable for direct use.
*
* We only bother with direct forms (ie, vcpu in pda) of the
* operations here; the indirect forms are better handled in C.
*/
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <asm/asm.h>
#include <xen/interface/xen.h>
#include <linux/linkage.h>
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
#define XEN_EFLAGS_NMI 0x80000000
/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
* esp-> 0: eip
*
* This attempts to make sure that any pending events are dealt with
* on return to usermode, but there is a small window in which an
* event can happen just before entering usermode. If the nested
* interrupt ends up setting one of the TIF_WORK_MASK pending work
* flags, they will not be tested again before returning to
* usermode. This means that a process can end up with pending work,
* which will be unprocessed until the process enters and leaves the
* kernel again, which could be an unbounded amount of time. This
* means that a pending signal or reschedule event could be
* indefinitely delayed.
*
* The fix is to notice a nested interrupt in the critical window, and
* if one occurs, then fold the nested interrupt into the current
* interrupt stack frame, and re-process it iteratively rather than
* recursively. This means that it will exit via the normal path, and
* all pending work will be dealt with appropriately.
*
* Because the nested interrupt handler needs to deal with the current
* stack state in whatever form its in, we keep things simple by only
* using a single register which is pushed/popped on the stack.
*/
.macro POP_FS
1:
popw %fs
.pushsection .fixup, "ax"
2: movw $0, (%esp)
jmp 1b
.popsection
_ASM_EXTABLE(1b,2b)
.endm
SYM_CODE_START(xen_iret)
/* test eflags for special cases */
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
jnz hyper_iret
push %eax
ESP_OFFSET=4 # bytes pushed onto stack
/* Store vcpu_info pointer for easy access */
#ifdef CONFIG_SMP
pushw %fs
movl $(__KERNEL_PERCPU), %eax
movl %eax, %fs
movl %fs:xen_vcpu, %eax
POP_FS
#else
movl %ss:xen_vcpu, %eax
#endif
/* check IF state we're restoring */
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
/*
* Maybe enable events. Once this happens we could get a
* recursive event, so the critical region starts immediately
* afterwards. However, if that happens we don't end up
* resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
setz %ss:XEN_vcpu_info_mask(%eax)
xen_iret_start_crit:
/* check for unmasked and pending */
cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
/*
* If there's something pending, mask events again so we can
* jump back into exc_xen_hypervisor_callback. Otherwise do not
* touch XEN_vcpu_info_mask.
*/
jne 1f
movb $1, %ss:XEN_vcpu_info_mask(%eax)
1: popl %eax
/*
* From this point on the registers are restored and the stack
* updated, so we don't need to worry about it if we're
* preempted
*/
iret_restore_end:
/*
* Jump to hypervisor_callback after fixing up the stack.
* Events are masked, so jumping out of the critical region is
* OK.
*/
je xen_asm_exc_xen_hypervisor_callback
1: iret
xen_iret_end_crit:
_ASM_EXTABLE(1b, asm_iret_error)
hyper_iret:
/* put this out of line since its very rarely used */
jmp hypercall_page + __HYPERVISOR_iret * 32
SYM_CODE_END(xen_iret)
.globl xen_iret_start_crit, xen_iret_end_crit
/*
* This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between
* xen_iret_start_crit and xen_iret_end_crit.
*
* The stack format at this point is:
* ----------------
* ss : (ss/esp may be present if we came from usermode)
* esp :
* eflags } outer exception info
* cs }
* eip }
* ----------------
* eax : outer eax if it hasn't been restored
* ----------------
* eflags }
* cs } nested exception info
* eip }
* return address : (into xen_asm_exc_xen_hypervisor_callback)
*
* In order to deliver the nested exception properly, we need to discard the
* nested exception frame such that when we handle the exception, we do it
* in the context of the outer exception rather than starting a new one.
*
* The only caveat is that if the outer eax hasn't been restored yet (i.e.
* it's still on stack), we need to restore its value here.
*/
.pushsection .noinstr.text, "ax"
SYM_CODE_START(xen_iret_crit_fixup)
/*
* Paranoia: Make sure we're really coming from kernel space.
* One could imagine a case where userspace jumps into the
* critical range address, but just before the CPU delivers a
* PF, it decides to deliver an interrupt instead. Unlikely?
* Definitely. Easy to avoid? Yes.
*/
testb $2, 2*4(%esp) /* nested CS */
jnz 2f
/*
* If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
cmpl $iret_restore_end, 1*4(%esp)
jae 1f
movl 4*4(%esp), %eax /* load outer EAX */
ret $4*4 /* discard nested EIP, CS, and EFLAGS as
* well as the just restored EAX */
1:
ret $3*4 /* discard nested EIP, CS, and EFLAGS */
2:
ret
SYM_CODE_END(xen_iret_crit_fixup)
.popsection

View File

@ -1,192 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Asm versions of Xen pv-ops, suitable for direct use.
*
* We only bother with direct forms (ie, vcpu in pda) of the
* operations here; the indirect forms are better handled in C.
*/
#include <asm/errno.h>
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/asm.h>
#include <xen/interface/xen.h>
#include <linux/init.h>
#include <linux/linkage.h>
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
pop %rcx
pop %r11
jmp \name
SYM_CODE_END(xen_\name)
_ASM_NOKPROBE(xen_\name)
.endm
xen_pv_trap asm_exc_divide_error
xen_pv_trap asm_xenpv_exc_debug
xen_pv_trap asm_exc_int3
xen_pv_trap asm_xenpv_exc_nmi
xen_pv_trap asm_exc_overflow
xen_pv_trap asm_exc_bounds
xen_pv_trap asm_exc_invalid_op
xen_pv_trap asm_exc_device_not_available
xen_pv_trap asm_exc_double_fault
xen_pv_trap asm_exc_coproc_segment_overrun
xen_pv_trap asm_exc_invalid_tss
xen_pv_trap asm_exc_segment_not_present
xen_pv_trap asm_exc_stack_segment
xen_pv_trap asm_exc_general_protection
xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
#ifdef CONFIG_X86_MCE
xen_pv_trap asm_exc_machine_check
#endif /* CONFIG_X86_MCE */
xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION
xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap asm_exc_xen_hypervisor_callback
__INIT
SYM_CODE_START(xen_early_idt_handler_array)
i = 0
.rept NUM_EXCEPTION_VECTORS
pop %rcx
pop %r11
jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
i = i + 1
.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
SYM_CODE_END(xen_early_idt_handler_array)
__FINIT
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
*
* ss
* rsp
* rflags
* cs
* rip <-- standard iret frame
*
* flags
*
* rcx }
* r11 }<-- pushed by hypercall page
* rsp->rax }
*/
SYM_CODE_START(xen_iret)
pushq $0
jmp hypercall_iret
SYM_CODE_END(xen_iret)
SYM_CODE_START(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back.
*
* tss.sp2 is scratch space.
*/
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
pushq %r11
pushq $__USER_CS
pushq %rcx
pushq $VGCF_in_syscall
jmp hypercall_iret
SYM_CODE_END(xen_sysret64)
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
* - kernel gs
* - kernel rsp
* - an iret-like stack frame on the stack (including rcx and r11):
* ss
* rsp
* rflags
* cs
* rip
* r11
* rsp->rcx
*/
/* Normal 64-bit system call target */
SYM_FUNC_START(xen_syscall_target)
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER_DS and __USER_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER_DS, 4*8(%rsp)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
SYM_FUNC_END(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
SYM_FUNC_START(xen_syscall32_target)
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER32_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER32_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
SYM_FUNC_END(xen_syscall32_target)
/* 32-bit compat sysenter target */
SYM_FUNC_START(xen_sysenter_target)
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
* that we don't need to guard against single step exceptions here.
*/
popq %rcx
popq %r11
/*
* Neither Xen nor the kernel really knows what the old SS and
* CS were. The kernel expects __USER32_DS and __USER32_CS, so
* report those values even though Xen will guess its own values.
*/
movq $__USER32_DS, 4*8(%rsp)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
SYM_FUNC_END(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */
SYM_FUNC_START_ALIAS(xen_syscall32_target)
SYM_FUNC_START(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
SYM_FUNC_END(xen_sysenter_target)
SYM_FUNC_END_ALIAS(xen_syscall32_target)
#endif /* CONFIG_IA32_EMULATION */

View File

@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen)
rep __ASM_SIZE(stos)
mov %_ASM_SI, xen_start_info
#ifdef CONFIG_X86_64
mov initial_stack(%rip), %rsp
#else
mov initial_stack, %esp
#endif
#ifdef CONFIG_X86_64
/* Set up %gs.
*
* The base of %gs always points to fixed_percpu_data. If the
@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen)
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
#endif
call xen_start_kernel
SYM_CODE_END(startup_xen)

View File

@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void);
void __init xen_reserve_special_pages(void);
void __init xen_pt_check_e820(void);

View File

@ -157,7 +157,8 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline,
int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
u64 dbuf_cookie, u32 width, u32 height,
u32 bpp, u64 size, struct page **pages)
u32 bpp, u64 size, u32 offset,
struct page **pages)
{
struct xen_drm_front_evtchnl *evtchnl;
struct xen_drm_front_dbuf *dbuf;
@ -194,6 +195,7 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
req->op.dbuf_create.gref_directory =
xen_front_pgdir_shbuf_get_dir_start(&dbuf->shbuf);
req->op.dbuf_create.buffer_sz = size;
req->op.dbuf_create.data_ofs = offset;
req->op.dbuf_create.dbuf_cookie = dbuf_cookie;
req->op.dbuf_create.width = width;
req->op.dbuf_create.height = height;
@ -400,15 +402,15 @@ static int xen_drm_drv_dumb_create(struct drm_file *filp,
args->size = args->pitch * args->height;
obj = xen_drm_front_gem_create(dev, args->size);
if (IS_ERR_OR_NULL(obj)) {
ret = PTR_ERR_OR_ZERO(obj);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto fail;
}
ret = xen_drm_front_dbuf_create(drm_info->front_info,
xen_drm_front_dbuf_to_cookie(obj),
args->width, args->height, args->bpp,
args->size,
args->size, 0,
xen_drm_front_gem_get_pages(obj));
if (ret)
goto fail_backend;

View File

@ -145,7 +145,7 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline,
int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
u64 dbuf_cookie, u32 width, u32 height,
u32 bpp, u64 size, struct page **pages);
u32 bpp, u64 size, u32 offset, struct page **pages);
int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info,
u64 dbuf_cookie, u64 fb_cookie, u32 width,

View File

@ -33,6 +33,7 @@ static const u32 plane_formats[] = {
DRM_FORMAT_ARGB4444,
DRM_FORMAT_XRGB1555,
DRM_FORMAT_ARGB1555,
DRM_FORMAT_YUYV,
};
const u32 *xen_drm_front_conn_get_formats(int *format_count)

View File

@ -83,7 +83,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size)
size = round_up(size, PAGE_SIZE);
xen_obj = gem_create_obj(dev, size);
if (IS_ERR_OR_NULL(xen_obj))
if (IS_ERR(xen_obj))
return xen_obj;
if (drm_info->front_info->cfg.be_alloc) {
@ -117,7 +117,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size)
*/
xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
xen_obj->pages = drm_gem_get_pages(&xen_obj->base);
if (IS_ERR_OR_NULL(xen_obj->pages)) {
if (IS_ERR(xen_obj->pages)) {
ret = PTR_ERR(xen_obj->pages);
xen_obj->pages = NULL;
goto fail;
@ -136,7 +136,7 @@ struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev,
struct xen_gem_object *xen_obj;
xen_obj = gem_create(dev, size);
if (IS_ERR_OR_NULL(xen_obj))
if (IS_ERR(xen_obj))
return ERR_CAST(xen_obj);
return &xen_obj->base;
@ -194,7 +194,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev,
size = attach->dmabuf->size;
xen_obj = gem_create_obj(dev, size);
if (IS_ERR_OR_NULL(xen_obj))
if (IS_ERR(xen_obj))
return ERR_CAST(xen_obj);
ret = gem_alloc_pages_array(xen_obj, size);
@ -210,7 +210,8 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev,
ret = xen_drm_front_dbuf_create(drm_info->front_info,
xen_drm_front_dbuf_to_cookie(&xen_obj->base),
0, 0, 0, size, xen_obj->pages);
0, 0, 0, size, sgt->sgl->offset,
xen_obj->pages);
if (ret < 0)
return ERR_PTR(ret);

View File

@ -60,7 +60,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp,
int ret;
fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs);
if (IS_ERR_OR_NULL(fb))
if (IS_ERR(fb))
return fb;
gem_obj = fb->obj[0];

View File

@ -52,9 +52,7 @@ config XEN_BALLOON_MEMORY_HOTPLUG
config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
int "Hotplugged memory limit (in GiB) for a PV guest"
default 512 if X86_64
default 4 if X86_32
range 0 64 if X86_32
default 512
depends on XEN_HAVE_PVMMU
depends on XEN_BALLOON_MEMORY_HOTPLUG
help

View File

@ -613,6 +613,14 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev,
goto fail_detach;
}
/* Check that we have zero offset. */
if (sgt->sgl->offset) {
ret = ERR_PTR(-EINVAL);
pr_debug("DMA buffer has %d bytes offset, user-space expects 0\n",
sgt->sgl->offset);
goto fail_unmap;
}
/* Check number of pages that imported buffer has. */
if (attach->dmabuf->size != gntdev_dmabuf->nr_pages << PAGE_SHIFT) {
ret = ERR_PTR(-EINVAL);

View File

@ -38,7 +38,8 @@
* Protocol version
******************************************************************************
*/
#define XENDISPL_PROTOCOL_VERSION "1"
#define XENDISPL_PROTOCOL_VERSION "2"
#define XENDISPL_PROTOCOL_VERSION_INT 2
/*
******************************************************************************
@ -202,6 +203,9 @@
* Width and height of the connector in pixels separated by
* XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the
* display.
* If backend provides extended display identification data (EDID) with
* XENDISPL_OP_GET_EDID request then EDID values must take precedence
* over the resolutions defined here.
*
*------------------ Connector Request Transport Parameters -------------------
*
@ -349,6 +353,8 @@
#define XENDISPL_OP_FB_DETACH 0x13
#define XENDISPL_OP_SET_CONFIG 0x14
#define XENDISPL_OP_PG_FLIP 0x15
/* The below command is available in protocol version 2 and above. */
#define XENDISPL_OP_GET_EDID 0x16
/*
******************************************************************************
@ -377,6 +383,10 @@
#define XENDISPL_FIELD_BE_ALLOC "be-alloc"
#define XENDISPL_FIELD_UNIQUE_ID "unique-id"
#define XENDISPL_EDID_BLOCK_SIZE 128
#define XENDISPL_EDID_BLOCK_COUNT 256
#define XENDISPL_EDID_MAX_SIZE (XENDISPL_EDID_BLOCK_SIZE * XENDISPL_EDID_BLOCK_COUNT)
/*
******************************************************************************
* STATUS RETURN CODES
@ -451,7 +461,9 @@
* +----------------+----------------+----------------+----------------+
* | gref_directory | 40
* +----------------+----------------+----------------+----------------+
* | reserved | 44
* | data_ofs | 44
* +----------------+----------------+----------------+----------------+
* | reserved | 48
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
@ -494,6 +506,7 @@
* buffer size (buffer_sz) exceeds what can be addressed by this single page,
* then reference to the next page must be supplied (see gref_dir_next_page
* below)
* data_ofs - uint32_t, offset of the data in the buffer, octets
*/
#define XENDISPL_DBUF_FLG_REQ_ALLOC (1 << 0)
@ -506,6 +519,7 @@ struct xendispl_dbuf_create_req {
uint32_t buffer_sz;
uint32_t flags;
grant_ref_t gref_directory;
uint32_t data_ofs;
};
/*
@ -731,6 +745,44 @@ struct xendispl_page_flip_req {
uint64_t fb_cookie;
};
/*
* Request EDID - request EDID describing current connector:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _OP_GET_EDID | reserved | 4
* +----------------+----------------+----------------+----------------+
* | buffer_sz | 8
* +----------------+----------------+----------------+----------------+
* | gref_directory | 12
* +----------------+----------------+----------------+----------------+
* | reserved | 16
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Notes:
* - This command is not available in protocol version 1 and should be
* ignored.
* - This request is optional and if not supported then visible area
* is defined by the relevant XenStore's "resolution" property.
* - Shared buffer, allocated for EDID storage, must not be less then
* XENDISPL_EDID_MAX_SIZE octets.
*
* buffer_sz - uint32_t, buffer size to be allocated, octets
* gref_directory - grant_ref_t, a reference to the first shared page
* describing EDID buffer references. See XENDISPL_OP_DBUF_CREATE for
* grant page directory structure (struct xendispl_page_directory).
*
* See response format for this request.
*/
struct xendispl_get_edid_req {
uint32_t buffer_sz;
grant_ref_t gref_directory;
};
/*
*---------------------------------- Responses --------------------------------
*
@ -753,6 +805,35 @@ struct xendispl_page_flip_req {
* id - uint16_t, private guest value, echoed from request
* status - int32_t, response status, zero on success and -XEN_EXX on failure
*
*
* Get EDID response - response for XENDISPL_OP_GET_EDID:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | status | 8
* +----------------+----------------+----------------+----------------+
* | edid_sz | 12
* +----------------+----------------+----------------+----------------+
* | reserved | 16
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Notes:
* - This response is not available in protocol version 1 and should be
* ignored.
*
* edid_sz - uint32_t, size of the EDID, octets
*/
struct xendispl_get_edid_resp {
uint32_t edid_sz;
};
/*
*----------------------------------- Events ----------------------------------
*
* Events are sent via a shared page allocated by the front and propagated by
@ -804,6 +885,7 @@ struct xendispl_req {
struct xendispl_fb_detach_req fb_detach;
struct xendispl_set_config_req set_config;
struct xendispl_page_flip_req pg_flip;
struct xendispl_get_edid_req get_edid;
uint8_t reserved[56];
} op;
};
@ -813,7 +895,10 @@ struct xendispl_resp {
uint8_t operation;
uint8_t reserved;
int32_t status;
uint8_t reserved1[56];
union {
struct xendispl_get_edid_resp get_edid;
uint8_t reserved1[56];
} op;
};
struct xendispl_evt {