summaryrefslogtreecommitdiff
path: root/drivers/lguest/x86
diff options
context:
space:
mode:
authorJuergen Gross <jgross@suse.com>2017-08-16 20:31:57 +0300
committerIngo Molnar <mingo@kernel.org>2017-08-24 10:57:28 +0300
commitecda85e70277ef24e44a1f6bc00243cebd19f985 (patch)
treecd094195d6ab0ed476ad093236880c8ce145e8e4 /drivers/lguest/x86
parentedcb5cf84f05e5d2e2af25422a72ccde359fcca9 (diff)
downloadlinux-ecda85e70277ef24e44a1f6bc00243cebd19f985.tar.xz
x86/lguest: Remove lguest support
Lguest seems to be rather unused these days. It has seen only patches ensuring it still builds the last two years and its official state is "Odd Fixes". Remove it in order to be able to clean up the paravirt code. Signed-off-by: Juergen Gross <jgross@suse.com> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: boris.ostrovsky@oracle.com Cc: lguest@lists.ozlabs.org Cc: rusty@rustcorp.com.au Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20170816173157.8633-3-jgross@suse.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/lguest/x86')
-rw-r--r--drivers/lguest/x86/core.c724
-rw-r--r--drivers/lguest/x86/switcher_32.S388
2 files changed, 0 insertions, 1112 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
deleted file mode 100644
index b4f79b923aea..000000000000
--- a/drivers/lguest/x86/core.c
+++ /dev/null
@@ -1,724 +0,0 @@
-/*
- * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
- * Copyright (C) 2007, Jes Sorensen <jes@sgi.com> SGI.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-/*P:450
- * This file contains the x86-specific lguest code. It used to be all
- * mixed in with drivers/lguest/core.c but several foolhardy code slashers
- * wrestled most of the dependencies out to here in preparation for porting
- * lguest to other architectures (see what I mean by foolhardy?).
- *
- * This also contains a couple of non-obvious setup and teardown pieces which
- * were implemented after days of debugging pain.
-:*/
-#include <linux/kernel.h>
-#include <linux/start_kernel.h>
-#include <linux/string.h>
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/cpu.h>
-#include <linux/lguest.h>
-#include <linux/lguest_launcher.h>
-#include <asm/paravirt.h>
-#include <asm/param.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/setup.h>
-#include <asm/lguest.h>
-#include <linux/uaccess.h>
-#include <asm/fpu/internal.h>
-#include <asm/tlbflush.h>
-#include "../lg.h"
-
-static int cpu_had_pge;
-
-static struct {
- unsigned long offset;
- unsigned short segment;
-} lguest_entry;
-
-/* Offset from where switcher.S was compiled to where we've copied it */
-static unsigned long switcher_offset(void)
-{
- return switcher_addr - (unsigned long)start_switcher_text;
-}
-
-/* This cpu's struct lguest_pages (after the Switcher text page) */
-static struct lguest_pages *lguest_pages(unsigned int cpu)
-{
- return &(((struct lguest_pages *)(switcher_addr + PAGE_SIZE))[cpu]);
-}
-
-static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu);
-
-/*S:010
- * We approach the Switcher.
- *
- * Remember that each CPU has two pages which are visible to the Guest when it
- * runs on that CPU. This has to contain the state for that Guest: we copy the
- * state in just before we run the Guest.
- *
- * Each Guest has "changed" flags which indicate what has changed in the Guest
- * since it last ran. We saw this set in interrupts_and_traps.c and
- * segments.c.
- */
-static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages)
-{
- /*
- * Copying all this data can be quite expensive. We usually run the
- * same Guest we ran last time (and that Guest hasn't run anywhere else
- * meanwhile). If that's not the case, we pretend everything in the
- * Guest has changed.
- */
- if (__this_cpu_read(lg_last_cpu) != cpu || cpu->last_pages != pages) {
- __this_cpu_write(lg_last_cpu, cpu);
- cpu->last_pages = pages;
- cpu->changed = CHANGED_ALL;
- }
-
- /*
- * These copies are pretty cheap, so we do them unconditionally: */
- /* Save the current Host top-level page directory.
- */
- pages->state.host_cr3 = __pa(current->mm->pgd);
- /*
- * Set up the Guest's page tables to see this CPU's pages (and no
- * other CPU's pages).
- */
- map_switcher_in_guest(cpu, pages);
- /*
- * Set up the two "TSS" members which tell the CPU what stack to use
- * for traps which do directly into the Guest (ie. traps at privilege
- * level 1).
- */
- pages->state.guest_tss.sp1 = cpu->esp1;
- pages->state.guest_tss.ss1 = cpu->ss1;
-
- /* Copy direct-to-Guest trap entries. */
- if (cpu->changed & CHANGED_IDT)
- copy_traps(cpu, pages->state.guest_idt, default_idt_entries);
-
- /* Copy all GDT entries which the Guest can change. */
- if (cpu->changed & CHANGED_GDT)
- copy_gdt(cpu, pages->state.guest_gdt);
- /* If only the TLS entries have changed, copy them. */
- else if (cpu->changed & CHANGED_GDT_TLS)
- copy_gdt_tls(cpu, pages->state.guest_gdt);
-
- /* Mark the Guest as unchanged for next time. */
- cpu->changed = 0;
-}
-
-/* Finally: the code to actually call into the Switcher to run the Guest. */
-static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
-{
- /* This is a dummy value we need for GCC's sake. */
- unsigned int clobber;
-
- /*
- * Copy the guest-specific information into this CPU's "struct
- * lguest_pages".
- */
- copy_in_guest_info(cpu, pages);
-
- /*
- * Set the trap number to 256 (impossible value). If we fault while
- * switching to the Guest (bad segment registers or bug), this will
- * cause us to abort the Guest.
- */
- cpu->regs->trapnum = 256;
-
- /*
- * Now: we push the "eflags" register on the stack, then do an "lcall".
- * This is how we change from using the kernel code segment to using
- * the dedicated lguest code segment, as well as jumping into the
- * Switcher.
- *
- * The lcall also pushes the old code segment (KERNEL_CS) onto the
- * stack, then the address of this call. This stack layout happens to
- * exactly match the stack layout created by an interrupt...
- */
- asm volatile("pushf; lcall *%4"
- /*
- * This is how we tell GCC that %eax ("a") and %ebx ("b")
- * are changed by this routine. The "=" means output.
- */
- : "=a"(clobber), "=b"(clobber)
- /*
- * %eax contains the pages pointer. ("0" refers to the
- * 0-th argument above, ie "a"). %ebx contains the
- * physical address of the Guest's top-level page
- * directory.
- */
- : "0"(pages),
- "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)),
- "m"(lguest_entry)
- /*
- * We tell gcc that all these registers could change,
- * which means we don't have to save and restore them in
- * the Switcher.
- */
- : "memory", "%edx", "%ecx", "%edi", "%esi");
-}
-/*:*/
-
-unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any)
-{
- switch (reg_off) {
- case offsetof(struct pt_regs, bx):
- return &cpu->regs->ebx;
- case offsetof(struct pt_regs, cx):
- return &cpu->regs->ecx;
- case offsetof(struct pt_regs, dx):
- return &cpu->regs->edx;
- case offsetof(struct pt_regs, si):
- return &cpu->regs->esi;
- case offsetof(struct pt_regs, di):
- return &cpu->regs->edi;
- case offsetof(struct pt_regs, bp):
- return &cpu->regs->ebp;
- case offsetof(struct pt_regs, ax):
- return &cpu->regs->eax;
- case offsetof(struct pt_regs, ip):
- return &cpu->regs->eip;
- case offsetof(struct pt_regs, sp):
- return &cpu->regs->esp;
- }
-
- /* Launcher can read these, but we don't allow any setting. */
- if (any) {
- switch (reg_off) {
- case offsetof(struct pt_regs, ds):
- return &cpu->regs->ds;
- case offsetof(struct pt_regs, es):
- return &cpu->regs->es;
- case offsetof(struct pt_regs, fs):
- return &cpu->regs->fs;
- case offsetof(struct pt_regs, gs):
- return &cpu->regs->gs;
- case offsetof(struct pt_regs, cs):
- return &cpu->regs->cs;
- case offsetof(struct pt_regs, flags):
- return &cpu->regs->eflags;
- case offsetof(struct pt_regs, ss):
- return &cpu->regs->ss;
- }
- }
-
- return NULL;
-}
-
-/*M:002
- * There are hooks in the scheduler which we can register to tell when we
- * get kicked off the CPU (preempt_notifier_register()). This would allow us
- * to lazily disable SYSENTER which would regain some performance, and should
- * also simplify copy_in_guest_info(). Note that we'd still need to restore
- * things when we exit to Launcher userspace, but that's fairly easy.
- *
- * We could also try using these hooks for PGE, but that might be too expensive.
- *
- * The hooks were designed for KVM, but we can also put them to good use.
-:*/
-
-/*H:040
- * This is the i386-specific code to setup and run the Guest. Interrupts
- * are disabled: we own the CPU.
- */
-void lguest_arch_run_guest(struct lg_cpu *cpu)
-{
- /*
- * SYSENTER is an optimized way of doing system calls. We can't allow
- * it because it always jumps to privilege level 0. A normal Guest
- * won't try it because we don't advertise it in CPUID, but a malicious
- * Guest (or malicious Guest userspace program) could, so we tell the
- * CPU to disable it before running the Guest.
- */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
-
- /*
- * Now we actually run the Guest. It will return when something
- * interesting happens, and we can examine its registers to see what it
- * was doing.
- */
- run_guest_once(cpu, lguest_pages(raw_smp_processor_id()));
-
- /*
- * Note that the "regs" structure contains two extra entries which are
- * not really registers: a trap number which says what interrupt or
- * trap made the switcher code come back, and an error code which some
- * traps set.
- */
-
- /* Restore SYSENTER if it's supposed to be on. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-
- /*
- * If the Guest page faulted, then the cr2 register will tell us the
- * bad virtual address. We have to grab this now, because once we
- * re-enable interrupts an interrupt could fault and thus overwrite
- * cr2, or we could even move off to a different CPU.
- */
- if (cpu->regs->trapnum == 14)
- cpu->arch.last_pagefault = read_cr2();
- /*
- * Similarly, if we took a trap because the Guest used the FPU,
- * we have to restore the FPU it expects to see.
- * fpu__restore() may sleep and we may even move off to
- * a different CPU. So all the critical stuff should be done
- * before this.
- */
- else if (cpu->regs->trapnum == 7 && !fpregs_active())
- fpu__restore(&current->thread.fpu);
-}
-
-/*H:130
- * Now we've examined the hypercall code; our Guest can make requests.
- * Our Guest is usually so well behaved; it never tries to do things it isn't
- * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual
- * infrastructure isn't quite complete, because it doesn't contain replacements
- * for the Intel I/O instructions. As a result, the Guest sometimes fumbles
- * across one during the boot process as it probes for various things which are
- * usually attached to a PC.
- *
- * When the Guest uses one of these instructions, we get a trap (General
- * Protection Fault) and come here. We queue this to be sent out to the
- * Launcher to handle.
- */
-
-/*
- * The eip contains the *virtual* address of the Guest's instruction:
- * we copy the instruction here so the Launcher doesn't have to walk
- * the page tables to decode it. We handle the case (eg. in a kernel
- * module) where the instruction is over two pages, and the pages are
- * virtually but not physically contiguous.
- *
- * The longest possible x86 instruction is 15 bytes, but we don't handle
- * anything that strange.
- */
-static void copy_from_guest(struct lg_cpu *cpu,
- void *dst, unsigned long vaddr, size_t len)
-{
- size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE);
- unsigned long paddr;
-
- BUG_ON(len > PAGE_SIZE);
-
- /* If it goes over a page, copy in two parts. */
- if (len > to_page_end) {
- /* But make sure the next page is mapped! */
- if (__guest_pa(cpu, vaddr + to_page_end, &paddr))
- copy_from_guest(cpu, dst + to_page_end,
- vaddr + to_page_end,
- len - to_page_end);
- else
- /* Otherwise fill with zeroes. */
- memset(dst + to_page_end, 0, len - to_page_end);
- len = to_page_end;
- }
-
- /* This will kill the guest if it isn't mapped, but that
- * shouldn't happen. */
- __lgread(cpu, dst, guest_pa(cpu, vaddr), len);
-}
-
-
-static void setup_emulate_insn(struct lg_cpu *cpu)
-{
- cpu->pending.trap = 13;
- copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
- sizeof(cpu->pending.insn));
-}
-
-static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr)
-{
- cpu->pending.trap = 14;
- cpu->pending.addr = iomem_addr;
- copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip,
- sizeof(cpu->pending.insn));
-}
-
-/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
-void lguest_arch_handle_trap(struct lg_cpu *cpu)
-{
- unsigned long iomem_addr;
-
- switch (cpu->regs->trapnum) {
- case 13: /* We've intercepted a General Protection Fault. */
- /* Hand to Launcher to emulate those pesky IN and OUT insns */
- if (cpu->regs->errcode == 0) {
- setup_emulate_insn(cpu);
- return;
- }
- break;
- case 14: /* We've intercepted a Page Fault. */
- /*
- * The Guest accessed a virtual address that wasn't mapped.
- * This happens a lot: we don't actually set up most of the page
- * tables for the Guest at all when we start: as it runs it asks
- * for more and more, and we set them up as required. In this
- * case, we don't even tell the Guest that the fault happened.
- *
- * The errcode tells whether this was a read or a write, and
- * whether kernel or userspace code.
- */
- if (demand_page(cpu, cpu->arch.last_pagefault,
- cpu->regs->errcode, &iomem_addr))
- return;
-
- /* Was this an access to memory mapped IO? */
- if (iomem_addr) {
- /* Tell Launcher, let it handle it. */
- setup_iomem_insn(cpu, iomem_addr);
- return;
- }
-
- /*
- * OK, it's really not there (or not OK): the Guest needs to
- * know. We write out the cr2 value so it knows where the
- * fault occurred.
- *
- * Note that if the Guest were really messed up, this could
- * happen before it's done the LHCALL_LGUEST_INIT hypercall, so
- * lg->lguest_data could be NULL
- */
- if (cpu->lg->lguest_data &&
- put_user(cpu->arch.last_pagefault,
- &cpu->lg->lguest_data->cr2))
- kill_guest(cpu, "Writing cr2");
- break;
- case 7: /* We've intercepted a Device Not Available fault. */
- /* No special handling is needed here. */
- break;
- case 32 ... 255:
- /* This might be a syscall. */
- if (could_be_syscall(cpu->regs->trapnum))
- break;
-
- /*
- * Other values mean a real interrupt occurred, in which case
- * the Host handler has already been run. We just do a
- * friendly check if another process should now be run, then
- * return to run the Guest again.
- */
- cond_resched();
- return;
- case LGUEST_TRAP_ENTRY:
- /*
- * Our 'struct hcall_args' maps directly over our regs: we set
- * up the pointer now to indicate a hypercall is pending.
- */
- cpu->hcall = (struct hcall_args *)cpu->regs;
- return;
- }
-
- /* We didn't handle the trap, so it needs to go to the Guest. */
- if (!deliver_trap(cpu, cpu->regs->trapnum))
- /*
- * If the Guest doesn't have a handler (either it hasn't
- * registered any yet, or it's one of the faults we don't let
- * it handle), it dies with this cryptic error message.
- */
- kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)",
- cpu->regs->trapnum, cpu->regs->eip,
- cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault
- : cpu->regs->errcode);
-}
-
-/*
- * Now we can look at each of the routines this calls, in increasing order of
- * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
- * deliver_trap() and demand_page(). After all those, we'll be ready to
- * examine the Switcher, and our philosophical understanding of the Host/Guest
- * duality will be complete.
-:*/
-static void adjust_pge(void *on)
-{
- if (on)
- cr4_set_bits(X86_CR4_PGE);
- else
- cr4_clear_bits(X86_CR4_PGE);
-}
-
-/*H:020
- * Now the Switcher is mapped and every thing else is ready, we need to do
- * some more i386-specific initialization.
- */
-void __init lguest_arch_host_init(void)
-{
- int i;
-
- /*
- * Most of the x86/switcher_32.S doesn't care that it's been moved; on
- * Intel, jumps are relative, and it doesn't access any references to
- * external code or data.
- *
- * The only exception is the interrupt handlers in switcher.S: their
- * addresses are placed in a table (default_idt_entries), so we need to
- * update the table with the new addresses. switcher_offset() is a
- * convenience function which returns the distance between the
- * compiled-in switcher code and the high-mapped copy we just made.
- */
- for (i = 0; i < IDT_ENTRIES; i++)
- default_idt_entries[i] += switcher_offset();
-
- /*
- * Set up the Switcher's per-cpu areas.
- *
- * Each CPU gets two pages of its own within the high-mapped region
- * (aka. "struct lguest_pages"). Much of this can be initialized now,
- * but some depends on what Guest we are running (which is set up in
- * copy_in_guest_info()).
- */
- for_each_possible_cpu(i) {
- /* lguest_pages() returns this CPU's two pages. */
- struct lguest_pages *pages = lguest_pages(i);
- /* This is a convenience pointer to make the code neater. */
- struct lguest_ro_state *state = &pages->state;
-
- /*
- * The Global Descriptor Table: the Host has a different one
- * for each CPU. We keep a descriptor for the GDT which says
- * where it is and how big it is (the size is actually the last
- * byte, not the size, hence the "-1").
- */
- state->host_gdt_desc.size = GDT_SIZE-1;
- state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
-
- /*
- * All CPUs on the Host use the same Interrupt Descriptor
- * Table, so we just use store_idt(), which gets this CPU's IDT
- * descriptor.
- */
- store_idt(&state->host_idt_desc);
-
- /*
- * The descriptors for the Guest's GDT and IDT can be filled
- * out now, too. We copy the GDT & IDT into ->guest_gdt and
- * ->guest_idt before actually running the Guest.
- */
- state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
- state->guest_idt_desc.address = (long)&state->guest_idt;
- state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
- state->guest_gdt_desc.address = (long)&state->guest_gdt;
-
- /*
- * We know where we want the stack to be when the Guest enters
- * the Switcher: in pages->regs. The stack grows upwards, so
- * we start it at the end of that structure.
- */
- state->guest_tss.sp0 = (long)(&pages->regs + 1);
- /*
- * And this is the GDT entry to use for the stack: we keep a
- * couple of special LGUEST entries.
- */
- state->guest_tss.ss0 = LGUEST_DS;
-
- /*
- * x86 can have a finegrained bitmap which indicates what I/O
- * ports the process can use. We set it to the end of our
- * structure, meaning "none".
- */
- state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
-
- /*
- * Some GDT entries are the same across all Guests, so we can
- * set them up now.
- */
- setup_default_gdt_entries(state);
- /* Most IDT entries are the same for all Guests, too.*/
- setup_default_idt_entries(state, default_idt_entries);
-
- /*
- * The Host needs to be able to use the LGUEST segments on this
- * CPU, too, so put them in the Host GDT.
- */
- get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
- get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
- }
-
- /*
- * In the Switcher, we want the %cs segment register to use the
- * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
- * it will be undisturbed when we switch. To change %cs and jump we
- * need this structure to feed to Intel's "lcall" instruction.
- */
- lguest_entry.offset = (long)switch_to_guest + switcher_offset();
- lguest_entry.segment = LGUEST_CS;
-
- /*
- * Finally, we need to turn off "Page Global Enable". PGE is an
- * optimization where page table entries are specially marked to show
- * they never change. The Host kernel marks all the kernel pages this
- * way because it's always present, even when userspace is running.
- *
- * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
- * switch to the Guest kernel. If you don't disable this on all CPUs,
- * you'll get really weird bugs that you'll chase for two days.
- *
- * I used to turn PGE off every time we switched to the Guest and back
- * on when we return, but that slowed the Switcher down noticibly.
- */
-
- /*
- * We don't need the complexity of CPUs coming and going while we're
- * doing this.
- */
- get_online_cpus();
- if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */
- /* Remember that this was originally set (for cleanup). */
- cpu_had_pge = 1;
- /*
- * adjust_pge is a helper function which sets or unsets the PGE
- * bit on its CPU, depending on the argument (0 == unset).
- */
- on_each_cpu(adjust_pge, (void *)0, 1);
- /* Turn off the feature in the global feature set. */
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
- }
- put_online_cpus();
-}
-/*:*/
-
-void __exit lguest_arch_host_fini(void)
-{
- /* If we had PGE before we started, turn it back on now. */
- get_online_cpus();
- if (cpu_had_pge) {
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
- /* adjust_pge's argument "1" means set PGE. */
- on_each_cpu(adjust_pge, (void *)1, 1);
- }
- put_online_cpus();
-}
-
-
-/*H:122 The i386-specific hypercalls simply farm out to the right functions. */
-int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
-{
- switch (args->arg0) {
- case LHCALL_LOAD_GDT_ENTRY:
- load_guest_gdt_entry(cpu, args->arg1, args->arg2, args->arg3);
- break;
- case LHCALL_LOAD_IDT_ENTRY:
- load_guest_idt_entry(cpu, args->arg1, args->arg2, args->arg3);
- break;
- case LHCALL_LOAD_TLS:
- guest_load_tls(cpu, args->arg1);
- break;
- default:
- /* Bad Guest. Bad! */
- return -EIO;
- }
- return 0;
-}
-
-/*H:126 i386-specific hypercall initialization: */
-int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
-{
- u32 tsc_speed;
-
- /*
- * The pointer to the Guest's "struct lguest_data" is the only argument.
- * We check that address now.
- */
- if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1,
- sizeof(*cpu->lg->lguest_data)))
- return -EFAULT;
-
- /*
- * Having checked it, we simply set lg->lguest_data to point straight
- * into the Launcher's memory at the right place and then use
- * copy_to_user/from_user from now on, instead of lgread/write. I put
- * this in to show that I'm not immune to writing stupid
- * optimizations.
- */
- cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1;
-
- /*
- * We insist that the Time Stamp Counter exist and doesn't change with
- * cpu frequency. Some devious chip manufacturers decided that TSC
- * changes could be handled in software. I decided that time going
- * backwards might be good for benchmarks, but it's bad for users.
- *
- * We also insist that the TSC be stable: the kernel detects unreliable
- * TSCs for its own purposes, and we use that here.
- */
- if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
- tsc_speed = tsc_khz;
- else
- tsc_speed = 0;
- if (put_user(tsc_speed, &cpu->lg->lguest_data->tsc_khz))
- return -EFAULT;
-
- /* The interrupt code might not like the system call vector. */
- if (!check_syscall_vector(cpu->lg))
- kill_guest(cpu, "bad syscall vector");
-
- return 0;
-}
-/*:*/
-
-/*L:030
- * Most of the Guest's registers are left alone: we used get_zeroed_page() to
- * allocate the structure, so they will be 0.
- */
-void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
-{
- struct lguest_regs *regs = cpu->regs;
-
- /*
- * There are four "segment" registers which the Guest needs to boot:
- * The "code segment" register (cs) refers to the kernel code segment
- * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
- * refer to the kernel data segment __KERNEL_DS.
- *
- * The privilege level is packed into the lower bits. The Guest runs
- * at privilege level 1 (GUEST_PL).
- */
- regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
- regs->cs = __KERNEL_CS|GUEST_PL;
-
- /*
- * The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
- * is supposed to always be "1". Bit 9 (0x200) controls whether
- * interrupts are enabled. We always leave interrupts enabled while
- * running the Guest.
- */
- regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
-
- /*
- * The "Extended Instruction Pointer" register says where the Guest is
- * running.
- */
- regs->eip = start;
-
- /*
- * %esi points to our boot information, at physical address 0, so don't
- * touch it.
- */
-
- /* There are a couple of GDT entries the Guest expects at boot. */
- setup_guest_gdt(cpu);
-}
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S
deleted file mode 100644
index 40634b0db9f7..000000000000
--- a/drivers/lguest/x86/switcher_32.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/*P:900
- * This is the Switcher: code which sits at 0xFFC00000 (or 0xFFE00000) astride
- * both the Host and Guest to do the low-level Guest<->Host switch. It is as
- * simple as it can be made, but it's naturally very specific to x86.
- *
- * You have now completed Preparation. If this has whet your appetite; if you
- * are feeling invigorated and refreshed then the next, more challenging stage
- * can be found in "make Guest".
- :*/
-
-/*M:012
- * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must
- * gain at least 1% more performance. Since neither LOC nor performance can be
- * measured beforehand, it generally means implementing a feature then deciding
- * if it's worth it. And once it's implemented, who can say no?
- *
- * This is why I haven't implemented this idea myself. I want to, but I
- * haven't. You could, though.
- *
- * The main place where lguest performance sucks is Guest page faulting. When
- * a Guest userspace process hits an unmapped page we switch back to the Host,
- * walk the page tables, find it's not mapped, switch back to the Guest page
- * fault handler, which calls a hypercall to set the page table entry, then
- * finally returns to userspace. That's two round-trips.
- *
- * If we had a small walker in the Switcher, we could quickly check the Guest
- * page table and if the page isn't mapped, immediately reflect the fault back
- * into the Guest. This means the Switcher would have to know the top of the
- * Guest page table and the page fault handler address.
- *
- * For simplicity, the Guest should only handle the case where the privilege
- * level of the fault is 3 and probably only not present or write faults. It
- * should also detect recursive faults, and hand the original fault to the
- * Host (which is actually really easy).
- *
- * Two questions remain. Would the performance gain outweigh the complexity?
- * And who would write the verse documenting it?
-:*/
-
-/*M:011
- * Lguest64 handles NMI. This gave me NMI envy (until I looked at their
- * code). It's worth doing though, since it would let us use oprofile in the
- * Host when a Guest is running.
-:*/
-
-/*S:100
- * Welcome to the Switcher itself!
- *
- * This file contains the low-level code which changes the CPU to run the Guest
- * code, and returns to the Host when something happens. Understand this, and
- * you understand the heart of our journey.
- *
- * Because this is in assembler rather than C, our tale switches from prose to
- * verse. First I tried limericks:
- *
- * There once was an eax reg,
- * To which our pointer was fed,
- * It needed an add,
- * Which asm-offsets.h had
- * But this limerick is hurting my head.
- *
- * Next I tried haikus, but fitting the required reference to the seasons in
- * every stanza was quickly becoming tiresome:
- *
- * The %eax reg
- * Holds "struct lguest_pages" now:
- * Cherry blossoms fall.
- *
- * Then I started with Heroic Verse, but the rhyming requirement leeched away
- * the content density and led to some uniquely awful oblique rhymes:
- *
- * These constants are coming from struct offsets
- * For use within the asm switcher text.
- *
- * Finally, I settled for something between heroic hexameter, and normal prose
- * with inappropriate linebreaks. Anyway, it aint no Shakespeare.
- */
-
-// Not all kernel headers work from assembler
-// But these ones are needed: the ENTRY() define
-// And constants extracted from struct offsets
-// To avoid magic numbers and breakage:
-// Should they change the compiler can't save us
-// Down here in the depths of assembler code.
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/segment.h>
-#include <asm/lguest.h>
-
-// We mark the start of the code to copy
-// It's placed in .text tho it's never run here
-// You'll see the trick macro at the end
-// Which interleaves data and text to effect.
-.text
-ENTRY(start_switcher_text)
-
-// When we reach switch_to_guest we have just left
-// The safe and comforting shores of C code
-// %eax has the "struct lguest_pages" to use
-// Where we save state and still see it from the Guest
-// And %ebx holds the Guest shadow pagetable:
-// Once set we have truly left Host behind.
-ENTRY(switch_to_guest)
- // We told gcc all its regs could fade,
- // Clobbered by our journey into the Guest
- // We could have saved them, if we tried
- // But time is our master and cycles count.
-
- // Segment registers must be saved for the Host
- // We push them on the Host stack for later
- pushl %es
- pushl %ds
- pushl %gs
- pushl %fs
- // But the compiler is fickle, and heeds
- // No warning of %ebp clobbers
- // When frame pointers are used. That register
- // Must be saved and restored or chaos strikes.
- pushl %ebp
- // The Host's stack is done, now save it away
- // In our "struct lguest_pages" at offset
- // Distilled into asm-offsets.h
- movl %esp, LGUEST_PAGES_host_sp(%eax)
-
- // All saved and there's now five steps before us:
- // Stack, GDT, IDT, TSS
- // Then last of all the page tables are flipped.
-
- // Yet beware that our stack pointer must be
- // Always valid lest an NMI hits
- // %edx does the duty here as we juggle
- // %eax is lguest_pages: our stack lies within.
- movl %eax, %edx
- addl $LGUEST_PAGES_regs, %edx
- movl %edx, %esp
-
- // The Guest's GDT we so carefully
- // Placed in the "struct lguest_pages" before
- lgdt LGUEST_PAGES_guest_gdt_desc(%eax)
-
- // The Guest's IDT we did partially
- // Copy to "struct lguest_pages" as well.
- lidt LGUEST_PAGES_guest_idt_desc(%eax)
-
- // The TSS entry which controls traps
- // Must be loaded up with "ltr" now:
- // The GDT entry that TSS uses
- // Changes type when we load it: damn Intel!
- // For after we switch over our page tables
- // That entry will be read-only: we'd crash.
- movl $(GDT_ENTRY_TSS*8), %edx
- ltr %dx
-
- // Look back now, before we take this last step!
- // The Host's TSS entry was also marked used;
- // Let's clear it again for our return.
- // The GDT descriptor of the Host
- // Points to the table after two "size" bytes
- movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx
- // Clear "used" from type field (byte 5, bit 2)
- andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx)
-
- // Once our page table's switched, the Guest is live!
- // The Host fades as we run this final step.
- // Our "struct lguest_pages" is now read-only.
- movl %ebx, %cr3
-
- // The page table change did one tricky thing:
- // The Guest's register page has been mapped
- // Writable under our %esp (stack) --
- // We can simply pop off all Guest regs.
- popl %eax
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %gs
- popl %fs
- popl %ds
- popl %es
-
- // Near the base of the stack lurk two strange fields
- // Which we fill as we exit the Guest
- // These are the trap number and its error
- // We can simply step past them on our way.
- addl $8, %esp
-
- // The last five stack slots hold return address
- // And everything needed to switch privilege
- // From Switcher's level 0 to Guest's 1,
- // And the stack where the Guest had last left it.
- // Interrupts are turned back on: we are Guest.
- iret
-
-// We tread two paths to switch back to the Host
-// Yet both must save Guest state and restore Host
-// So we put the routine in a macro.
-#define SWITCH_TO_HOST \
- /* We save the Guest state: all registers first \
- * Laid out just as "struct lguest_regs" defines */ \
- pushl %es; \
- pushl %ds; \
- pushl %fs; \
- pushl %gs; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
- pushl %ebx; \
- pushl %eax; \
- /* Our stack and our code are using segments \
- * Set in the TSS and IDT \
- * Yet if we were to touch data we'd use \
- * Whatever data segment the Guest had. \
- * Load the lguest ds segment for now. */ \
- movl $(LGUEST_DS), %eax; \
- movl %eax, %ds; \
- /* So where are we? Which CPU, which struct? \
- * The stack is our clue: our TSS starts \
- * It at the end of "struct lguest_pages". \
- * Or we may have stumbled while restoring \
- * Our Guest segment regs while in switch_to_guest, \
- * The fault pushed atop that part-unwound stack. \
- * If we round the stack down to the page start \
- * We're at the start of "struct lguest_pages". */ \
- movl %esp, %eax; \
- andl $(~(1 << PAGE_SHIFT - 1)), %eax; \
- /* Save our trap number: the switch will obscure it \
- * (In the Host the Guest regs are not mapped here) \
- * %ebx holds it safe for deliver_to_host */ \
- movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \
- /* The Host GDT, IDT and stack! \
- * All these lie safely hidden from the Guest: \
- * We must return to the Host page tables \
- * (Hence that was saved in struct lguest_pages) */ \
- movl LGUEST_PAGES_host_cr3(%eax), %edx; \
- movl %edx, %cr3; \
- /* As before, when we looked back at the Host \
- * As we left and marked TSS unused \
- * So must we now for the Guest left behind. */ \
- andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \
- /* Switch to Host's GDT, IDT. */ \
- lgdt LGUEST_PAGES_host_gdt_desc(%eax); \
- lidt LGUEST_PAGES_host_idt_desc(%eax); \
- /* Restore the Host's stack where its saved regs lie */ \
- movl LGUEST_PAGES_host_sp(%eax), %esp; \
- /* Last the TSS: our Host is returned */ \
- movl $(GDT_ENTRY_TSS*8), %edx; \
- ltr %dx; \
- /* Restore now the regs saved right at the first. */ \
- popl %ebp; \
- popl %fs; \
- popl %gs; \
- popl %ds; \
- popl %es
-
-// The first path is trod when the Guest has trapped:
-// (Which trap it was has been pushed on the stack).
-// We need only switch back, and the Host will decode
-// Why we came home, and what needs to be done.
-return_to_host:
- SWITCH_TO_HOST
- iret
-
-// We are lead to the second path like so:
-// An interrupt, with some cause external
-// Has ajerked us rudely from the Guest's code
-// Again we must return home to the Host
-deliver_to_host:
- SWITCH_TO_HOST
- // But now we must go home via that place
- // Where that interrupt was supposed to go
- // Had we not been ensconced, running the Guest.
- // Here we see the trickness of run_guest_once():
- // The Host stack is formed like an interrupt
- // With EIP, CS and EFLAGS layered.
- // Interrupt handlers end with "iret"
- // And that will take us home at long long last.
-
- // But first we must find the handler to call!
- // The IDT descriptor for the Host
- // Has two bytes for size, and four for address:
- // %edx will hold it for us for now.
- movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx
- // We now know the table address we need,
- // And saved the trap's number inside %ebx.
- // Yet the pointer to the handler is smeared
- // Across the bits of the table entry.
- // What oracle can tell us how to extract
- // From such a convoluted encoding?
- // I consulted gcc, and it gave
- // These instructions, which I gladly credit:
- leal (%edx,%ebx,8), %eax
- movzwl (%eax),%edx
- movl 4(%eax), %eax
- xorw %ax, %ax
- orl %eax, %edx
- // Now the address of the handler's in %edx
- // We call it now: its "iret" drops us home.
- jmp *%edx
-
-// Every interrupt can come to us here
-// But we must truly tell each apart.
-// They number two hundred and fifty six
-// And each must land in a different spot,
-// Push its number on stack, and join the stream.
-
-// And worse, a mere six of the traps stand apart
-// And push on their stack an addition:
-// An error number, thirty two bits long
-// So we punish the other two fifty
-// And make them push a zero so they match.
-
-// Yet two fifty six entries is long
-// And all will look most the same as the last
-// So we create a macro which can make
-// As many entries as we need to fill.
-
-// Note the change to .data then .text:
-// We plant the address of each entry
-// Into a (data) table for the Host
-// To know where each Guest interrupt should go.
-.macro IRQ_STUB N TARGET
- .data; .long 1f; .text; 1:
- // Trap eight, ten through fourteen and seventeen
- // Supply an error number. Else zero.
- .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17)
- pushl $0
- .endif
- pushl $\N
- jmp \TARGET
- ALIGN
-.endm
-
-// This macro creates numerous entries
-// Using GAS macros which out-power C's.
-.macro IRQ_STUBS FIRST LAST TARGET
- irq=\FIRST
- .rept \LAST-\FIRST+1
- IRQ_STUB irq \TARGET
- irq=irq+1
- .endr
-.endm
-
-// Here's the marker for our pointer table
-// Laid in the data section just before
-// Each macro places the address of code
-// Forming an array: each one points to text
-// Which handles interrupt in its turn.
-.data
-.global default_idt_entries
-default_idt_entries:
-.text
- // The first two traps go straight back to the Host
- IRQ_STUBS 0 1 return_to_host
- // We'll say nothing, yet, about NMI
- IRQ_STUB 2 handle_nmi
- // Other traps also return to the Host
- IRQ_STUBS 3 31 return_to_host
- // All interrupts go via their handlers
- IRQ_STUBS 32 127 deliver_to_host
- // 'Cept system calls coming from userspace
- // Are to go to the Guest, never the Host.
- IRQ_STUB 128 return_to_host
- IRQ_STUBS 129 255 deliver_to_host
-
-// The NMI, what a fabulous beast
-// Which swoops in and stops us no matter that
-// We're suspended between heaven and hell,
-// (Or more likely between the Host and Guest)
-// When in it comes! We are dazed and confused
-// So we do the simplest thing which one can.
-// Though we've pushed the trap number and zero
-// We discard them, return, and hope we live.
-handle_nmi:
- addl $8, %esp
- iret
-
-// We are done; all that's left is Mastery
-// And "make Mastery" is a journey long
-// Designed to make your fingers itch to code.
-
-// Here ends the text, the file and poem.
-ENTRY(end_switcher_text)