aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-01-11 10:14:53 +0000
committerAvi Kivity <avi@qumranet.com>2007-01-11 10:14:53 +0000
commit9ae1ddefd2d83c632ff0602a0215d41a99baf87c (patch)
treee3db98955ea5385ccd27e0dae1d63679c7474dfa
parent9253df2f6890d86d9714574a8b06213956db007b (diff)
kvm: release: merge from trunkkvm-11
........ r4266 | avi | 2007-01-07 16:42:17 +0200 (Sun, 07 Jan 2007) | 2 lines kvm: configure: support --disable-gcc-check ........ r4267 | avi | 2007-01-07 18:00:17 +0200 (Sun, 07 Jan 2007) | 4 lines kvm: make sure there is a vcpu context loaded when destroying the mmu this makes the vmwrite errors on vm shutdown go away. ........ r4268 | dor | 2007-01-08 13:25:07 +0200 (Mon, 08 Jan 2007) | 4 lines Replace the nic model with rtl8139. This double the guest network bandwidth (about 33Mbs). ........ r4269 | avi | 2007-01-08 15:28:51 +0200 (Mon, 08 Jan 2007) | 32 lines kvm: fix race between mmio reads and injected interrupts the kvm mmio read path looks like: 1. guest read faults 2. kvm emulates read, calls emulator_read_emulated() 3. fails as a read requires userspace help 4. exit to userspace 5. userspace emulates read, kvm sets vcpu->mmio_read_completed 6. re-enter guest, fault again 7. kvm emulates read, calls emulator_read_emulated() 8. succeeds as vcpu->mmio_read_emulated is set 9. instruction completes and guest is resumed the problem surfaces if the userspace exit (step 5) also requests an interrupt injection. in that case, the guest does not re-execute the original instruction, but the interrupt handler. The next time an mmio read is exectued (likely for a different address), step 3 will find vcpu->mmio_read_completed set and return the value read for the original instruction. the problem manifested itself in a few annoying ways: - little squares appear randomly on console when switching virtual terminals - ne2000 fails under nfs read load - rtl8139 complains about "pci errors" even though the device model is incapable of issuing them. fix by skipping interrupt injection if an mmio read is pending. a better fix is to avoid re-entry into the guest, and re-emulating immediately instead. however that's a bit more complex. ........ r4270 | avi | 2007-01-08 17:40:43 +0200 (Mon, 08 Jan 2007) | 14 lines kvm: x86 emulator: fix bit string instructions the various bit string instructions (bts, btc, etc.) fail to adjust the address correctly if the bit address is beyond BITS_PER_LONG. this bug creeped in as the emulator originally relied on cr2 to contain the memory address; however we now decode it from the mod r/m bits, and must adjust the offset to account for large bit indices. the patch is rather large because it switches src and dst decoding around, so that the bit index is available when decoding the memory address. this fixes workloads like the FC5 installer. ........ r4271 | avi | 2007-01-09 11:25:27 +0200 (Tue, 09 Jan 2007) | 32 lines kvm: kernel profiling support From: Ingo Molnar <mingo@elte.hu> This adds the profile=kvm boot option, which enables KVM to profile VM exits. Use: "readprofile -m ./System.map | sort -n" to see the resulting output: [...] 18246 serial_out 148.3415 18945 native_flush_tlb 378.9000 23618 serial_in 212.7748 29279 __spin_unlock_irq 622.9574 43447 native_apic_write 2068.9048 52702 enable_8259A_irq 742.2817 54250 vgacon_scroll 89.3740 67394 ide_inb 6126.7273 79514 copy_page_range 98.1654 84868 do_wp_page 86.6000 140266 pit_read 783.6089 151436 ide_outb 25239.3333 152668 native_io_delay 21809.7143 174783 mask_and_ack_8259A 783.7803 362404 native_set_pte_at 36240.4000 1688747 total 0.5009 Signed-off-by: Ingo Molnar <mingo@elte.hu> Acked-by: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@osdl.org> ........ r4273 | avi | 2007-01-10 15:18:38 +0200 (Wed, 10 Jan 2007) | 2 lines kvm: test: add printf implementation for testing ........ r4274 | avi | 2007-01-10 16:57:01 +0200 (Wed, 10 Jan 2007) | 4 lines kvm: test: enter 64-bit mode with ss == 0 needed for iretq to execute ........ r4275 | avi | 2007-01-10 19:36:04 +0200 (Wed, 10 Jan 2007) | 8 lines kvm: testsuite: add mmu tests access.c checks every combination of the pte flags, cpl, cr0.wp, and access type for the correct respone. in this initial version only the pte.present bit is tested, and only read access is checked. ........ r4276 | dor | 2007-01-11 10:26:57 +0200 (Thu, 11 Jan 2007) | 2 lines Fix opensuse usbdevice_fs compliation issue in tha same way xen has fixed it. ........ r4277 | uri | 2007-01-11 10:29:04 +0200 (Thu, 11 Jan 2007) | 4 lines kvm script: use commands instead of popen4, aka let lsmod rest in peace Before lsmod was a zombie as long as the kvm script was running. ........ r4278 | avi | 2007-01-11 10:45:56 +0200 (Thu, 11 Jan 2007) | 9 lines kvm: fix asm constraints with CONFIG_FRAME_POINTER=n a "g" constraint may place a local variable in an %rsp-relative memory operand. but if your assembly changes %rsp, the operand points to the wrong location. an "r" constraint fixes that. thanks to Ingo Molnar for neatly bisecting the problem. ........ r4279 | avi | 2007-01-11 11:34:35 +0200 (Thu, 11 Jan 2007) | 16 lines kvm: fix bogus pagefault on writable pages if a page is marked as dirty in the guest pte, set_pte_common() can set the writable bit on newly-instantiated shadow pte. this optimization avoids a write fault after the initial read fault. however, if a write fault instantiates the pte, fix_write_pf() incorrectly reports the fault as a guest page fault, and the guest oopses on what appears to be a correctly-mapped page. fix is to detect the condition and only report a guest page fault on a user access to a kernel page. with the fix, a kvm guest can survive a whole night of running the kernel hacker's screensaver (make -j9 in a loop). ........
-rwxr-xr-xkvm/configure7
-rw-r--r--kvm/kernel/external-module-compat.h12
-rw-r--r--kvm/kernel/kvm_main.c2
-rw-r--r--kvm/kernel/paging_tmpl.h2
-rw-r--r--kvm/kernel/svm.c11
-rw-r--r--kvm/kernel/vmx.c12
-rw-r--r--kvm/kernel/x86_emulate.c98
-rwxr-xr-xkvm/kvm5
-rw-r--r--kvm/user/Makefile7
-rw-r--r--kvm/user/flat.lds2
-rw-r--r--kvm/user/test/access.c273
-rw-r--r--kvm/user/test/cstart64.S2
-rw-r--r--kvm/user/test/printf.c158
-rw-r--r--kvm/user/test/printf.h2
-rw-r--r--usb-linux.c3
15 files changed, 542 insertions, 54 deletions
diff --git a/kvm/configure b/kvm/configure
index a819a4431..80117c8da 100755
--- a/kvm/configure
+++ b/kvm/configure
@@ -4,6 +4,7 @@ prefix=/usr/local
kerneldir=/lib/modules/$(uname -r)/build
want_module=1
qemu_cc=$(ls /usr/bin/gcc3* /usr/bin/gcc-3* 2>/dev/null | tail -n1)
+disable_gcc_check=
usage() {
cat <<-EOF
@@ -15,6 +16,8 @@ usage() {
--with-patched-kernel don't use external module
--kerneldir=DIR kernel build directory ($kerneldir)
--qemu-cc="$qemu_cc" compiler for qemu (needs gcc3.x) ($qemu_cc)
+ --disable-gcc-check don't insist on gcc-3.x
+ - this will break running without kvm
EOF
exit 1
}
@@ -39,6 +42,9 @@ while [[ "$1" = -* ]]; do
--qemu-cc)
qemu_cc="$arg"
;;
+ --disable-gcc-check)
+ disable_gcc_check=1
+ ;;
--help)
usage
;;
@@ -72,6 +78,7 @@ target_cpu() {
--extra-ldflags="-L $PWD/../user" \
--enable-kvm --kernel-path="$libkvm_kerneldir" \
--enable-alsa \
+ ${disable_gcc_check:+"--disable-gcc-check"} \
--prefix="$prefix"
)
diff --git a/kvm/kernel/external-module-compat.h b/kvm/kernel/external-module-compat.h
index 3c445124d..8c50aa85b 100644
--- a/kvm/kernel/external-module-compat.h
+++ b/kvm/kernel/external-module-compat.h
@@ -19,3 +19,15 @@
#ifndef GFP_NOWAIT
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
#endif
+
+
+/*
+ * kvm profiling support needs 2.6.20
+ */
+#include <linux/profile.h>
+
+#ifndef KVM_PROFILING
+#define KVM_PROFILING 1234
+#define prof_on 4321
+#endif
+
diff --git a/kvm/kernel/kvm_main.c b/kvm/kernel/kvm_main.c
index 67c115496..be4651abe 100644
--- a/kvm/kernel/kvm_main.c
+++ b/kvm/kernel/kvm_main.c
@@ -272,7 +272,9 @@ static void kvm_free_physmem(struct kvm *kvm)
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
+ vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
kvm_mmu_destroy(vcpu);
+ vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
}
diff --git a/kvm/kernel/paging_tmpl.h b/kvm/kernel/paging_tmpl.h
index 2dbf4307e..6bc41950f 100644
--- a/kvm/kernel/paging_tmpl.h
+++ b/kvm/kernel/paging_tmpl.h
@@ -274,7 +274,7 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page;
if (is_writeble_pte(*shadow_ent))
- return 0;
+ return !user || (*shadow_ent & PT_USER_MASK);
writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK;
if (user) {
diff --git a/kvm/kernel/svm.c b/kvm/kernel/svm.c
index ccc06b1b9..7397bfbbc 100644
--- a/kvm/kernel/svm.c
+++ b/kvm/kernel/svm.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include <linux/profile.h>
#include <asm/desc.h>
#include "kvm_svm.h"
@@ -1406,7 +1407,8 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r;
again:
- do_interrupt_requests(vcpu, kvm_run);
+ if (!vcpu->mmio_read_completed)
+ do_interrupt_requests(vcpu, kvm_run);
clgi();
@@ -1558,6 +1560,13 @@ again:
reload_tss(vcpu);
+ /*
+ * Profile KVM exit RIPs:
+ */
+ if (unlikely(prof_on == KVM_PROFILING))
+ profile_hit(KVM_PROFILING,
+ (void *)(unsigned long)vcpu->svm->vmcb->save.rip);
+
stgi();
kvm_reput_irq(vcpu);
diff --git a/kvm/kernel/vmx.c b/kvm/kernel/vmx.c
index d4701cb4c..ed1a1460d 100644
--- a/kvm/kernel/vmx.c
+++ b/kvm/kernel/vmx.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/profile.h>
#include <asm/io.h>
#include <asm/desc.h>
@@ -1716,7 +1717,8 @@ again:
vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
#endif
- do_interrupt_requests(vcpu, kvm_run);
+ if (!vcpu->mmio_read_completed)
+ do_interrupt_requests(vcpu, kvm_run);
if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu);
@@ -1823,7 +1825,7 @@ again:
#endif
"setbe %0 \n\t"
"popf \n\t"
- : "=g" (fail)
+ : "=r" (fail)
: "r"(vcpu->launched), "d"((unsigned long)HOST_RSP),
"c"(vcpu),
[rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
@@ -1859,6 +1861,12 @@ again:
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
#endif
+ /*
+ * Profile KVM exit RIPs:
+ */
+ if (unlikely(prof_on == KVM_PROFILING))
+ profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
+
kvm_run->exit_type = 0;
if (fail) {
kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
diff --git a/kvm/kernel/x86_emulate.c b/kvm/kernel/x86_emulate.c
index be70795b4..7513cddb9 100644
--- a/kvm/kernel/x86_emulate.c
+++ b/kvm/kernel/x86_emulate.c
@@ -61,6 +61,7 @@
#define ModRM (1<<6)
/* Destination is only written; never read. */
#define Mov (1<<7)
+#define BitOp (1<<8)
static u8 opcode_table[256] = {
/* 0x00 - 0x07 */
@@ -148,7 +149,7 @@ static u8 opcode_table[256] = {
0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
};
-static u8 twobyte_table[256] = {
+static u16 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -180,16 +181,16 @@ static u8 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
- DstMem | SrcReg | ModRM,
+ DstMem | SrcReg | ModRM | BitOp,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xB8 - 0xBF */
- 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM,
+ 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xC0 - 0xCF */
@@ -469,7 +470,8 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
int
x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
- u8 b, d, sib, twobyte = 0, rex_prefix = 0;
+ unsigned d;
+ u8 b, sib, twobyte = 0, rex_prefix = 0;
u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
unsigned long *override_base = NULL;
unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
@@ -726,46 +728,6 @@ done_prefixes:
;
}
- /* Decode and fetch the destination operand: register or memory. */
- switch (d & DstMask) {
- case ImplicitOps:
- /* Special instructions do their own operand decoding. */
- goto special_insn;
- case DstReg:
- dst.type = OP_REG;
- if ((d & ByteOp)
- && !(twobyte_table && (b == 0xb6 || b == 0xb7))) {
- dst.ptr = decode_register(modrm_reg, _regs,
- (rex_prefix == 0));
- dst.val = *(u8 *) dst.ptr;
- dst.bytes = 1;
- } else {
- dst.ptr = decode_register(modrm_reg, _regs, 0);
- switch ((dst.bytes = op_bytes)) {
- case 2:
- dst.val = *(u16 *)dst.ptr;
- break;
- case 4:
- dst.val = *(u32 *)dst.ptr;
- break;
- case 8:
- dst.val = *(u64 *)dst.ptr;
- break;
- }
- }
- break;
- case DstMem:
- dst.type = OP_MEM;
- dst.ptr = (unsigned long *)cr2;
- dst.bytes = (d & ByteOp) ? 1 : op_bytes;
- if (!(d & Mov) && /* optimisation - avoid slow emulated read */
- ((rc = ops->read_emulated((unsigned long)dst.ptr,
- &dst.val, dst.bytes, ctxt)) != 0))
- goto done;
- break;
- }
- dst.orig_val = dst.val;
-
/*
* Decode and fetch the source operand: register, memory
* or immediate.
@@ -838,6 +800,50 @@ done_prefixes:
break;
}
+ /* Decode and fetch the destination operand: register or memory. */
+ switch (d & DstMask) {
+ case ImplicitOps:
+ /* Special instructions do their own operand decoding. */
+ goto special_insn;
+ case DstReg:
+ dst.type = OP_REG;
+ if ((d & ByteOp)
+ && !(twobyte_table && (b == 0xb6 || b == 0xb7))) {
+ dst.ptr = decode_register(modrm_reg, _regs,
+ (rex_prefix == 0));
+ dst.val = *(u8 *) dst.ptr;
+ dst.bytes = 1;
+ } else {
+ dst.ptr = decode_register(modrm_reg, _regs, 0);
+ switch ((dst.bytes = op_bytes)) {
+ case 2:
+ dst.val = *(u16 *)dst.ptr;
+ break;
+ case 4:
+ dst.val = *(u32 *)dst.ptr;
+ break;
+ case 8:
+ dst.val = *(u64 *)dst.ptr;
+ break;
+ }
+ }
+ break;
+ case DstMem:
+ dst.type = OP_MEM;
+ dst.ptr = (unsigned long *)cr2;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if (d & BitOp) {
+ dst.ptr += src.val / BITS_PER_LONG;
+ dst.bytes = sizeof(long);
+ }
+ if (!(d & Mov) && /* optimisation - avoid slow emulated read */
+ ((rc = ops->read_emulated((unsigned long)dst.ptr,
+ &dst.val, dst.bytes, ctxt)) != 0))
+ goto done;
+ break;
+ }
+ dst.orig_val = dst.val;
+
if (twobyte)
goto twobyte_insn;
diff --git a/kvm/kvm b/kvm/kvm
index f9a0b3371..bb354e4ee 100755
--- a/kvm/kvm
+++ b/kvm/kvm
@@ -89,7 +89,8 @@ if len(args) > 1:
def remove_module(module):
module = module.replace('-', '_')
- for x in os.popen4('/sbin/lsmod')[1].readlines():
+ lines = commands.getoutput('/sbin/lsmod').split('\n')
+ for x in lines:
if x.startswith(module + ' '):
if os.spawnl(os.P_WAIT, '/sbin/rmmod', 'rmmod', module) != 0:
raise Exception('failed to remove %s module' % (module,))
@@ -177,7 +178,7 @@ if not options.notap:
mac_components[0] = 'a0'
mac = ':'.join(mac_components)
- qemu_args += ('-net', 'nic,macaddr=%s' % (mac,),
+ qemu_args += ('-net', 'nic,macaddr=%s,model=rtl8139' % (mac,),
'-net', 'tap,script=/etc/kvm/qemu-ifup',)
if options.vnc:
diff --git a/kvm/user/Makefile b/kvm/user/Makefile
index 885e0f55c..625f0b49e 100644
--- a/kvm/user/Makefile
+++ b/kvm/user/Makefile
@@ -22,7 +22,8 @@ libkvm.a: kvmctl.o
$(AR) rcs $@ $^
flatfiles: test/simple.flat test/stringio.flat test/memtest1.flat \
- test/irq.flat test/sieve.flat test/bootstrap
+ test/irq.flat test/sieve.flat test/bootstrap \
+ test/access.flat
install:
install -D kvmctl.h $(DESTDIR)/$(PREFIX)/include/kvmctl.h
@@ -41,10 +42,14 @@ test/bootstrap: test/bootstrap.o
test/irq.flat: test/print.o
+test/access.flat: test/cstart64.o test/access.o test/printf.o test/print.o
+
test/sieve.flat: test/cstart64.o test/sieve.o test/print.o test/vm.o
test/test32.flat: test/test32.o
+test/%.o: CFLAGS += -std=gnu99 -ffreestanding
+
-include .*.d
clean:
diff --git a/kvm/user/flat.lds b/kvm/user/flat.lds
index b1734632f..ff9693c92 100644
--- a/kvm/user/flat.lds
+++ b/kvm/user/flat.lds
@@ -4,7 +4,7 @@ SECTIONS
{
. = 1M;
stext = .;
- .text : { *(.init) *(.text) }
+ .text : { *(.init) *(.text) *(.text.*) }
. = ALIGN(4K);
.data : { *(.data) }
. = ALIGN(16);
diff --git a/kvm/user/test/access.c b/kvm/user/test/access.c
new file mode 100644
index 000000000..21bb0da5a
--- /dev/null
+++ b/kvm/user/test/access.c
@@ -0,0 +1,273 @@
+
+#include "printf.h"
+
+typedef unsigned long pt_element_t;
+
+#define PAGE_SIZE ((pt_element_t)4096)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
+
+#define PT_PRESENT_MASK ((pt_element_t)1 << 0)
+#define PT_WRITABLE_MASK ((pt_element_t)1 << 1)
+#define PT_USER_MASK ((pt_element_t)1 << 2)
+
+#define CR0_WP_MASK (1UL << 16)
+
+/*
+ * page table access check tests
+ */
+
+enum {
+ AC_PTE_PRESENT,
+ // AC_PTE_WRITABLE,
+ // AC_PTE_USER,
+ // AC_PTE_ACCESSED,
+ // AC_PTE_DIRTY,
+ // AC_PTE_NX,
+
+ // AC_CPU_CR0_WP,
+ // AC_CPU_EFER_NX,
+
+ // AC_ACCESS_USER,
+ // AC_ACCESS_WRITE,
+ // AC_ACCESS_FETCH,
+ // AC_ACCESS_PTE,
+
+ NR_AC_FLAGS
+};
+
+const char *ac_names[] = {
+ [AC_PTE_PRESENT] = "pte.p",
+};
+
+static inline void *va(pt_element_t phys)
+{
+ return (void *)phys;
+}
+
+static unsigned long read_cr0()
+{
+ unsigned long cr0;
+
+ asm volatile ("mov %%cr0, %0" : "=r"(cr0));
+
+ return cr0;
+}
+
+static void write_cr0(unsigned long cr0)
+{
+ asm volatile ("mov %0, %%cr0" : : "r"(cr0));
+}
+
+typedef struct {
+ unsigned short offset0;
+ unsigned short selector;
+ unsigned short ist : 3;
+ unsigned short : 5;
+ unsigned short type : 4;
+ unsigned short : 1;
+ unsigned short dpl : 2;
+ unsigned short p : 1;
+ unsigned short offset1;
+ unsigned offset2;
+ unsigned reserved;
+} idt_entry_t;
+
+typedef struct {
+ unsigned flags[NR_AC_FLAGS];
+ void *virt;
+ pt_element_t phys;
+ pt_element_t pt_pool;
+ idt_entry_t idt[256];
+} ac_test_t;
+
+typedef struct {
+ unsigned short limit;
+ unsigned long linear_addr;
+} __attribute__((packed)) descriptor_table_t;
+
+void lidt(idt_entry_t *idt, int nentries)
+{
+ descriptor_table_t dt;
+
+ dt.limit = nentries * sizeof(*idt) - 1;
+ dt.linear_addr = (unsigned long)idt;
+ asm volatile ("lidt %0" : : "m"(dt));
+}
+
+void memset(void *a, unsigned char v, int n)
+{
+ unsigned char *x = a;
+
+ while (n--)
+ *x++ = v;
+}
+
+unsigned short read_cs()
+{
+ unsigned short r;
+
+ asm volatile ("mov %%cs, %0" : "=r"(r));
+}
+
+void set_idt_entry(idt_entry_t *e, void *addr)
+{
+ memset(e, 0, sizeof *e);
+ e->offset0 = (unsigned long)addr;
+ e->selector = read_cs();
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = 0;
+ e->p = 1;
+ e->offset1 = (unsigned long)addr >> 16;
+ e->offset2 = (unsigned long)addr >> 32;
+}
+
+void set_cr0_wp(int wp)
+{
+ unsigned long cr0 = read_cr0();
+
+ cr0 &= ~CR0_WP_MASK;
+ if (wp)
+ cr0 |= CR0_WP_MASK;
+ write_cr0(cr0);
+}
+
+void ac_test_init(ac_test_t *at)
+{
+ printf("init\n");
+ set_cr0_wp(1);
+ for (int i = 0; i < NR_AC_FLAGS; ++i)
+ at->flags[i] = 0;
+ at->virt = (void *)0x123400000000;
+ at->phys = 32 * 1024 * 1024;
+ at->pt_pool = 33 * 1024 * 1024;
+ memset(at->idt, 0, sizeof at->idt);
+ printf("lidt\n");
+ lidt(at->idt, 256);
+ extern char page_fault;
+ set_idt_entry(&at->idt[14], &page_fault);
+ printf("ok\n");
+}
+
+int ac_test_bump(ac_test_t *at)
+{
+ for (int i = 0; i < NR_AC_FLAGS; ++i)
+ if (!at->flags[i]) {
+ at->flags[i] = 1;
+ return 1;
+ } else
+ at->flags[i] = 0;
+ return 0;
+}
+
+unsigned long read_cr3()
+{
+ unsigned long cr3;
+
+ asm volatile ("mov %%cr3, %0" : "=r"(cr3));
+ return cr3;
+}
+
+void invlpg(void *addr)
+{
+ asm volatile ("invlpg (%0)" : : "r"(addr));
+}
+
+pt_element_t ac_test_alloc_pt(ac_test_t *at)
+{
+ pt_element_t ret = at->pt_pool;
+ at->pt_pool += PAGE_SIZE;
+ return ret;
+}
+
+void ac_test_setup_pte(ac_test_t *at)
+{
+ unsigned long root = read_cr3();
+
+ printf("setting up pte\n");
+ for (int i = 4; i >= 1; --i) {
+ pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
+ unsigned index = ((unsigned long)at->virt >> (12 + (i-1) * 9)) & 511;
+ pt_element_t pte;
+ if (i != 1) {
+ pte = vroot[index];
+ if (!(pte & PT_PRESENT_MASK))
+ pte = ac_test_alloc_pt(at) | PT_PRESENT_MASK;
+ pte |= PT_WRITABLE_MASK | PT_USER_MASK;
+ } else {
+ pte = at->phys & PT_BASE_ADDR_MASK;
+ if (at->flags[AC_PTE_PRESENT])
+ pte |= PT_PRESENT_MASK;
+
+ }
+ vroot[index] = pte;
+ root = vroot[index];
+ }
+ invlpg(at->virt);
+}
+
+int ac_test_do_access(ac_test_t *at, unsigned *error_code)
+{
+ static unsigned unique = 42;
+ int ret = 1;
+ unsigned e;
+
+ ++unique;
+
+ printf("attempting access\n");
+ unsigned r = unique;
+ asm volatile ("fault1: mov (%[addr]), %[reg] \n\t"
+ "fixed1:"
+ : [reg]"+r"(r), "=m"(*error_code), "+a"(ret), "=b"(e)
+ : [addr]"r"(at->virt));
+
+ asm volatile (".section .text.pf \n\t"
+ "page_fault: \n\t"
+ "pop %rbx \n\t"
+ "movq $fixed1, (%rsp) \n\t"
+ "movl $0, %eax \n\t"
+ "iretq \n\t"
+ ".section .text");
+ *error_code = e;
+
+ return ret;
+}
+
+void ac_test_exec(ac_test_t *at)
+{
+ int r;
+ unsigned error_code;
+
+ printf("test");
+ for (int i = 0; i < NR_AC_FLAGS; ++i)
+ if (at->flags[i])
+ printf(" %s", ac_names[i]);
+ printf(" - ");
+ ac_test_setup_pte(at);
+ r = ac_test_do_access(at, &error_code);
+ if (r)
+ printf("accessed");
+ else
+ printf("faulted %x", error_code);
+ printf("\n");
+}
+
+void ac_test_run()
+{
+ static ac_test_t at;
+
+ printf("run\n");
+ ac_test_init(&at);
+ do {
+ ac_test_exec(&at);
+ } while (ac_test_bump(&at));
+}
+
+int main()
+{
+ printf("starting test\n\n");
+ ac_test_run();
+ return 0;
+}
diff --git a/kvm/user/test/cstart64.S b/kvm/user/test/cstart64.S
index f413da2d3..f6a2b71dc 100644
--- a/kvm/user/test/cstart64.S
+++ b/kvm/user/test/cstart64.S
@@ -63,6 +63,8 @@ efer = 0xc0000080
.code64
start64:
lea stacktop, %rsp
+ mov $0, %eax
+ mov %ax, %ss
call main
1: hlt
jmp 1b
diff --git a/kvm/user/test/printf.c b/kvm/user/test/printf.c
new file mode 100644
index 000000000..7d675d568
--- /dev/null
+++ b/kvm/user/test/printf.c
@@ -0,0 +1,158 @@
+#include "printf.h"
+#include <stdarg.h>
+
+void print(const char *s);
+
+typedef struct pstream {
+ char *buffer;
+ int remain;
+ int added;
+} pstream_t;
+
+static void addchar(pstream_t *p, char c)
+{
+ if (p->remain) {
+ *p->buffer++ = c;
+ --p->remain;
+ }
+ ++p->added;
+}
+
+void print_str(pstream_t *p, const char *s)
+{
+ while (*s)
+ addchar(p, *s++);
+}
+
+static char digits[16] = "0123456789abcdef";
+
+void print_int(pstream_t *ps, long n, int base)
+{
+ char buf[sizeof(long) * 3 + 2], *p = buf;
+ int s = 0, i;
+
+ if (n < 0) {
+ n = -n;
+ s = 1;
+ }
+
+ while (n) {
+ *p++ = digits[n % base];
+ n /= base;
+ }
+
+ if (s)
+ *p++ = '-';
+
+ if (p == buf)
+ *p++ = '0';
+
+ for (i = 0; i < (p - buf) / 2; ++i) {
+ char tmp;
+
+ tmp = buf[i];
+ buf[i] = p[-1-i];
+ p[-1-i] = tmp;
+ }
+
+ *p = 0;
+
+ print_str(ps, buf);
+}
+
+void print_unsigned(pstream_t *ps, unsigned long n, int base)
+{
+ char buf[sizeof(long) * 3 + 1], *p = buf;
+ int i;
+
+ while (n) {
+ *p++ = digits[n % base];
+ n /= base;
+ }
+
+ if (p == buf)
+ *p++ = '0';
+
+ for (i = 0; i < (p - buf) / 2; ++i) {
+ char tmp;
+
+ tmp = buf[i];
+ buf[i] = p[-1-i];
+ p[-1-i] = tmp;
+ }
+
+ *p = 0;
+
+ print_str(ps, buf);
+}
+
+int vsnprintf(char *buf, int size, const char *fmt, va_list va)
+{
+ int n;
+ pstream_t s;
+
+ s.buffer = buf;
+ s.remain = size - 1;
+ s.added = 0;
+ while (*fmt) {
+ char f = *fmt++;
+
+ if (f != '%') {
+ addchar(&s, f);
+ continue;
+ }
+ f = *fmt++;
+ switch (f) {
+ case '%':
+ addchar(&s, '%');
+ break;
+ case '\0':
+ --fmt;
+ break;
+ case 'd':
+ print_int(&s, va_arg(va, int), 10);
+ break;
+ case 'x':
+ print_unsigned(&s, va_arg(va, int), 16);
+ break;
+ case 'p':
+ print_str(&s, "0x");
+ print_unsigned(&s, (unsigned long)va_arg(va, void *), 16);
+ break;
+ case 's':
+ print_str(&s, va_arg(va, const char *));
+ break;
+ default:
+ addchar(&s, f);
+ break;
+ }
+ }
+ *s.buffer = 0;
+ ++s.added;
+ return s.added;
+}
+
+
+int snprintf(char *buf, int size, const char *fmt, ...)
+{
+ va_list va;
+ int r;
+
+ va_start(va, fmt);
+ r = vsnprintf(buf, size, fmt, va);
+ va_end(va);
+ return r;
+}
+
+int printf(const char *fmt, ...)
+{
+ va_list va;
+ char buf[2000];
+ int r;
+
+ va_start(va, fmt);
+ r = vsnprintf(buf, sizeof buf, fmt, va);
+ va_end(va);
+ print(buf);
+ return r;
+}
diff --git a/kvm/user/test/printf.h b/kvm/user/test/printf.h
new file mode 100644
index 000000000..7d4b779ca
--- /dev/null
+++ b/kvm/user/test/printf.h
@@ -0,0 +1,2 @@
+
+int printf(const char *fmt, ...);
diff --git a/usb-linux.c b/usb-linux.c
index 6ab4782d8..ef6017970 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -26,6 +26,9 @@
#if defined(__linux__)
#include <dirent.h>
#include <sys/ioctl.h>
+/* Some versions of usbdevice_fs.h need __user to be defined for them. */
+/* This may (harmlessly) conflict with a definition in linux/compiler.h. */
+#define __user
#include <linux/usbdevice_fs.h>
#include <linux/version.h>