2007-07-19 16:49:23 +08:00
|
|
|
#ifndef _LGUEST_H
|
|
|
|
#define _LGUEST_H
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/stringify.h>
|
|
|
|
#include <linux/lguest.h>
|
|
|
|
#include <linux/lguest_launcher.h>
|
|
|
|
#include <linux/wait.h>
|
2008-01-19 09:59:07 +08:00
|
|
|
#include <linux/hrtimer.h>
|
2007-07-19 16:49:23 +08:00
|
|
|
#include <linux/err.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2007-07-19 16:49:23 +08:00
|
|
|
|
2007-10-22 09:03:28 +08:00
|
|
|
#include <asm/lguest.h>
|
2007-07-19 16:49:23 +08:00
|
|
|
|
2009-07-31 06:03:46 +08:00
|
|
|
struct pgdir {
|
2007-10-22 09:03:34 +08:00
|
|
|
unsigned long gpgdir;
|
2013-04-22 12:40:41 +08:00
|
|
|
bool switcher_mapped;
|
2013-04-22 12:40:41 +08:00
|
|
|
int last_host_cpu;
|
2007-10-22 09:03:33 +08:00
|
|
|
pgd_t *pgdir;
|
2007-07-19 16:49:23 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* We have two pages shared with guests, per cpu. */
|
2009-07-31 06:03:46 +08:00
|
|
|
struct lguest_pages {
|
2007-07-19 16:49:23 +08:00
|
|
|
/* This is the stack page mapped rw in guest */
|
|
|
|
char spare[PAGE_SIZE - sizeof(struct lguest_regs)];
|
|
|
|
struct lguest_regs regs;
|
|
|
|
|
|
|
|
/* This is the host state & guest descriptor page, ro in guest */
|
|
|
|
struct lguest_ro_state state;
|
|
|
|
} __attribute__((aligned(PAGE_SIZE)));
|
|
|
|
|
|
|
|
#define CHANGED_IDT 1
|
|
|
|
#define CHANGED_GDT 2
|
|
|
|
#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */
|
|
|
|
#define CHANGED_ALL 3
|
|
|
|
|
2008-01-07 21:05:22 +08:00
|
|
|
struct lg_cpu {
|
|
|
|
unsigned int id;
|
|
|
|
struct lguest *lg;
|
2008-01-07 21:05:34 +08:00
|
|
|
struct task_struct *tsk;
|
|
|
|
struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
|
2008-01-07 21:05:27 +08:00
|
|
|
|
2008-01-07 21:05:35 +08:00
|
|
|
u32 cr2;
|
|
|
|
int ts;
|
|
|
|
u32 esp1;
|
2009-06-13 12:27:04 +08:00
|
|
|
u16 ss1;
|
2008-01-07 21:05:35 +08:00
|
|
|
|
2008-01-18 05:14:46 +08:00
|
|
|
/* Bitmap of what has changed: see CHANGED_* above. */
|
|
|
|
int changed;
|
|
|
|
|
2015-02-11 12:45:09 +08:00
|
|
|
/* Pending operation. */
|
|
|
|
struct lguest_pending pending;
|
2008-01-07 21:05:36 +08:00
|
|
|
|
2015-02-11 12:45:09 +08:00
|
|
|
unsigned long *reg_read; /* register from LHREQ_GETREG */
|
|
|
|
|
2009-07-31 06:03:45 +08:00
|
|
|
/* At end of a page shared mapped over lguest_pages in guest. */
|
2008-01-07 21:05:32 +08:00
|
|
|
unsigned long regs_page;
|
|
|
|
struct lguest_regs *regs;
|
|
|
|
|
2008-01-18 05:13:26 +08:00
|
|
|
struct lguest_pages *last_pages;
|
|
|
|
|
2011-07-22 13:09:48 +08:00
|
|
|
/* Initialization mode: linear map everything. */
|
|
|
|
bool linear_pages;
|
2009-07-31 06:03:45 +08:00
|
|
|
int cpu_pgd; /* Which pgd this cpu is currently using */
|
2008-01-07 21:05:37 +08:00
|
|
|
|
2008-01-07 21:05:27 +08:00
|
|
|
/* If a hypercall was asked for, this points to the arguments. */
|
|
|
|
struct hcall_args *hcall;
|
|
|
|
u32 next_hcall;
|
2008-01-07 21:05:28 +08:00
|
|
|
|
|
|
|
/* Virtual clock device */
|
|
|
|
struct hrtimer hrt;
|
2008-01-07 21:05:29 +08:00
|
|
|
|
2009-06-13 12:27:10 +08:00
|
|
|
/* Did the Guest tell us to halt? */
|
2008-01-07 21:05:34 +08:00
|
|
|
int halted;
|
|
|
|
|
2008-01-07 21:05:29 +08:00
|
|
|
/* Pending virtual interrupts */
|
|
|
|
DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
|
2008-01-07 21:05:33 +08:00
|
|
|
|
|
|
|
struct lg_cpu_arch arch;
|
2008-01-07 21:05:22 +08:00
|
|
|
};
|
|
|
|
|
2007-07-19 16:49:23 +08:00
|
|
|
/* The private info the thread maintains about the guest. */
|
2009-07-31 06:03:46 +08:00
|
|
|
struct lguest {
|
2007-07-19 16:49:23 +08:00
|
|
|
struct lguest_data __user *lguest_data;
|
2008-01-07 21:05:22 +08:00
|
|
|
struct lg_cpu cpus[NR_CPUS];
|
|
|
|
unsigned int nr_cpus;
|
|
|
|
|
2015-02-11 12:45:10 +08:00
|
|
|
/* Valid guest memory pages must be < this. */
|
2007-07-19 16:49:23 +08:00
|
|
|
u32 pfn_limit;
|
2009-07-31 06:03:45 +08:00
|
|
|
|
2015-02-11 12:45:10 +08:00
|
|
|
/* Device memory is >= pfn_limit and < device_limit. */
|
|
|
|
u32 device_limit;
|
|
|
|
|
2009-07-31 06:03:45 +08:00
|
|
|
/*
|
|
|
|
* This provides the offset to the base of guest-physical memory in the
|
|
|
|
* Launcher.
|
|
|
|
*/
|
2007-10-22 09:03:26 +08:00
|
|
|
void __user *mem_base;
|
2007-10-22 09:03:36 +08:00
|
|
|
unsigned long kernel_address;
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
struct pgdir pgdirs[4];
|
|
|
|
|
2015-03-24 09:21:39 +08:00
|
|
|
unsigned long noirq_iret;
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
unsigned int stack_pages;
|
|
|
|
u32 tsc_khz;
|
|
|
|
|
|
|
|
/* Dead? */
|
|
|
|
const char *dead;
|
|
|
|
};
|
|
|
|
|
|
|
|
extern struct mutex lguest_lock;
|
|
|
|
|
|
|
|
/* core.c: */
|
2009-03-19 00:38:35 +08:00
|
|
|
bool lguest_address_ok(const struct lguest *lg,
|
|
|
|
unsigned long addr, unsigned long len);
|
2008-01-18 05:19:42 +08:00
|
|
|
void __lgread(struct lg_cpu *, void *, unsigned long, unsigned);
|
|
|
|
void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
|
2013-04-22 12:40:40 +08:00
|
|
|
extern struct page **lg_switcher_pages;
|
2007-10-22 09:24:24 +08:00
|
|
|
|
2009-07-31 06:03:45 +08:00
|
|
|
/*H:035
|
|
|
|
* Using memory-copy operations like that is usually inconvient, so we
|
2007-10-22 09:24:24 +08:00
|
|
|
* have the following helper macros which read and write a specific type (often
|
|
|
|
* an unsigned long).
|
|
|
|
*
|
2009-07-31 06:03:45 +08:00
|
|
|
* This reads into a variable of the given type then returns that.
|
|
|
|
*/
|
2008-01-18 05:19:42 +08:00
|
|
|
#define lgread(cpu, addr, type) \
|
|
|
|
({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; })
|
2007-10-22 09:24:24 +08:00
|
|
|
|
|
|
|
/* This checks that the variable is of the given type, then writes it out. */
|
2008-01-18 05:19:42 +08:00
|
|
|
#define lgwrite(cpu, addr, type, val) \
|
2007-10-22 09:24:24 +08:00
|
|
|
do { \
|
|
|
|
typecheck(type, val); \
|
2008-01-18 05:19:42 +08:00
|
|
|
__lgwrite((cpu), (addr), &(val), sizeof(val)); \
|
2007-10-22 09:24:24 +08:00
|
|
|
} while(0)
|
|
|
|
/* (end of memory access helper routines) :*/
|
|
|
|
|
2008-01-07 21:05:25 +08:00
|
|
|
int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
|
2007-07-19 16:49:23 +08:00
|
|
|
|
2009-07-31 06:03:45 +08:00
|
|
|
/*
|
|
|
|
* Helper macros to obtain the first 12 or the last 20 bits, this is only the
|
2007-10-22 09:03:33 +08:00
|
|
|
* first step in the migration to the kernel types. pte_pfn is already defined
|
2009-07-31 06:03:45 +08:00
|
|
|
* in the kernel.
|
|
|
|
*/
|
2007-10-22 09:03:33 +08:00
|
|
|
#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
|
|
|
|
#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
|
2009-06-13 12:27:07 +08:00
|
|
|
#define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK)
|
|
|
|
#define pmd_pfn(x) (pmd_val(x) >> PAGE_SHIFT)
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
/* interrupts_and_traps.c: */
|
2009-06-13 12:27:02 +08:00
|
|
|
unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
|
|
|
|
void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
|
2009-06-13 12:27:08 +08:00
|
|
|
void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
|
2009-03-19 00:38:35 +08:00
|
|
|
bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
|
2008-01-07 21:05:33 +08:00
|
|
|
void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
|
|
|
|
u32 low, u32 hi);
|
2008-01-07 21:05:35 +08:00
|
|
|
void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages);
|
|
|
|
void pin_stack_pages(struct lg_cpu *cpu);
|
2007-07-19 16:49:23 +08:00
|
|
|
void setup_default_idt_entries(struct lguest_ro_state *state,
|
|
|
|
const unsigned long *def);
|
2008-01-07 21:05:33 +08:00
|
|
|
void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
|
2007-07-19 16:49:23 +08:00
|
|
|
const unsigned long *def);
|
2008-01-07 21:05:28 +08:00
|
|
|
void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
|
2009-06-13 12:27:09 +08:00
|
|
|
bool send_notify_to_eventfd(struct lg_cpu *cpu);
|
2008-01-07 21:05:28 +08:00
|
|
|
void init_clockdev(struct lg_cpu *cpu);
|
2007-10-22 09:03:35 +08:00
|
|
|
bool check_syscall_vector(struct lguest *lg);
|
2016-04-01 09:45:46 +08:00
|
|
|
bool could_be_syscall(unsigned int num);
|
2007-10-22 09:03:35 +08:00
|
|
|
int init_interrupts(void);
|
|
|
|
void free_interrupts(void);
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
/* segments.c: */
|
|
|
|
void setup_default_gdt_entries(struct lguest_ro_state *state);
|
2008-01-07 21:05:33 +08:00
|
|
|
void setup_guest_gdt(struct lg_cpu *cpu);
|
2009-04-20 13:14:00 +08:00
|
|
|
void load_guest_gdt_entry(struct lg_cpu *cpu, unsigned int i,
|
|
|
|
u32 low, u32 hi);
|
2008-01-07 21:05:33 +08:00
|
|
|
void guest_load_tls(struct lg_cpu *cpu, unsigned long tls_array);
|
|
|
|
void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt);
|
|
|
|
void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
/* page_tables.c: */
|
2008-09-29 12:40:07 +08:00
|
|
|
int init_guest_pagetable(struct lguest *lg);
|
2007-07-19 16:49:23 +08:00
|
|
|
void free_guest_pagetable(struct lguest *lg);
|
2008-01-07 21:05:35 +08:00
|
|
|
void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
|
2009-05-31 02:48:08 +08:00
|
|
|
void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
|
2009-06-13 12:27:07 +08:00
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
|
|
|
|
#endif
|
2008-01-07 21:05:35 +08:00
|
|
|
void guest_pagetable_clear_all(struct lg_cpu *cpu);
|
2008-01-07 21:05:37 +08:00
|
|
|
void guest_pagetable_flush_user(struct lg_cpu *cpu);
|
2008-01-18 05:19:42 +08:00
|
|
|
void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
|
2007-10-22 09:03:33 +08:00
|
|
|
unsigned long vaddr, pte_t val);
|
2008-01-07 21:05:30 +08:00
|
|
|
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
|
2015-02-11 12:45:10 +08:00
|
|
|
bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode,
|
|
|
|
unsigned long *iomem);
|
2008-01-07 21:05:37 +08:00
|
|
|
void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
|
2015-02-11 12:45:09 +08:00
|
|
|
bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr);
|
2008-01-07 21:05:37 +08:00
|
|
|
unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
|
2008-01-18 05:19:42 +08:00
|
|
|
void page_table_guest_data_init(struct lg_cpu *cpu);
|
2007-07-19 16:49:23 +08:00
|
|
|
|
2007-10-22 09:03:28 +08:00
|
|
|
/* <arch>/core.c: */
|
|
|
|
void lguest_arch_host_init(void);
|
|
|
|
void lguest_arch_host_fini(void);
|
2008-01-07 21:05:25 +08:00
|
|
|
void lguest_arch_run_guest(struct lg_cpu *cpu);
|
2008-01-07 21:05:27 +08:00
|
|
|
void lguest_arch_handle_trap(struct lg_cpu *cpu);
|
|
|
|
int lguest_arch_init_hypercalls(struct lg_cpu *cpu);
|
|
|
|
int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args);
|
2008-01-07 21:05:32 +08:00
|
|
|
void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start);
|
2015-02-11 12:45:09 +08:00
|
|
|
unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any);
|
2007-10-22 09:03:28 +08:00
|
|
|
|
|
|
|
/* <arch>/switcher.S: */
|
|
|
|
extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
|
|
|
|
|
2007-07-19 16:49:23 +08:00
|
|
|
/* lguest_user.c: */
|
|
|
|
int lguest_device_init(void);
|
|
|
|
void lguest_device_remove(void);
|
|
|
|
|
|
|
|
/* hypercalls.c: */
|
2008-01-07 21:05:27 +08:00
|
|
|
void do_hypercalls(struct lg_cpu *cpu);
|
2008-01-18 05:19:42 +08:00
|
|
|
void write_timestamp(struct lg_cpu *cpu);
|
2007-07-19 16:49:23 +08:00
|
|
|
|
2007-07-27 01:41:03 +08:00
|
|
|
/*L:035
|
|
|
|
* Let's step aside for the moment, to study one important routine that's used
|
|
|
|
* widely in the Host code.
|
|
|
|
*
|
2007-10-25 13:02:50 +08:00
|
|
|
* There are many cases where the Guest can do something invalid, like pass crap
|
2007-07-27 01:41:03 +08:00
|
|
|
* to a hypercall. Since only the Guest kernel can make hypercalls, it's quite
|
|
|
|
* acceptable to simply terminate the Guest and give the Launcher a nicely
|
|
|
|
* formatted reason. It's also simpler for the Guest itself, which doesn't
|
|
|
|
* need to check most hypercalls for "success"; if you're still running, it
|
|
|
|
* succeeded.
|
|
|
|
*
|
|
|
|
* Once this is called, the Guest will never run again, so most Host code can
|
|
|
|
* call this then continue as if nothing had happened. This means many
|
|
|
|
* functions don't have to explicitly return an error code, which keeps the
|
|
|
|
* code simple.
|
|
|
|
*
|
|
|
|
* It also means that this can be called more than once: only the first one is
|
|
|
|
* remembered. The only trick is that we still need to kill the Guest even if
|
|
|
|
* we can't allocate memory to store the reason. Linux has a neat way of
|
|
|
|
* packing error codes into invalid pointers, so we use that here.
|
|
|
|
*
|
|
|
|
* Like any macro which uses an "if", it is safely wrapped in a run-once "do {
|
|
|
|
* } while(0)".
|
|
|
|
*/
|
2008-01-18 05:19:42 +08:00
|
|
|
#define kill_guest(cpu, fmt...) \
|
2007-07-19 16:49:23 +08:00
|
|
|
do { \
|
2008-01-18 05:19:42 +08:00
|
|
|
if (!(cpu)->lg->dead) { \
|
|
|
|
(cpu)->lg->dead = kasprintf(GFP_ATOMIC, fmt); \
|
|
|
|
if (!(cpu)->lg->dead) \
|
|
|
|
(cpu)->lg->dead = ERR_PTR(-ENOMEM); \
|
2007-07-19 16:49:23 +08:00
|
|
|
} \
|
|
|
|
} while(0)
|
2007-07-27 01:41:03 +08:00
|
|
|
/* (End of aside) :*/
|
2007-07-19 16:49:23 +08:00
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* _LGUEST_H */
|