aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-02-08 08:05:19 +0000
committerAvi Kivity <avi@qumranet.com>2007-02-08 08:05:19 +0000
commitbec8a1bf38b6814601a6033e4a8acfb4f290bbc7 (patch)
tree3b35ee47ac450fd61d8eb6830122c628fe81f4e7
parent69aafe1a3bc7bb281b3abdc9aeefb47d8fe29059 (diff)
kvm: release: merge from trunkkvm-13
........ r4366 | avi | 2007-01-31 13:12:08 +0200 (Wed, 31 Jan 2007) | 10 lines kvm: mmu: add missing dirty page tracking cases we fail to mark a page dirty in three cases: - setting the accessed bit in a pte - setting the dirty bit in a pte - emulating a write into a pagetable this fix adds the missing cases. ........ r4367 | avi | 2007-01-31 14:29:17 +0200 (Wed, 31 Jan 2007) | 2 lines kvm: stats: better error reporting ........ r4368 | avi | 2007-01-31 16:57:19 +0200 (Wed, 31 Jan 2007) | 7 lines kvm: fix lockup on 32-bit intel hosts with nx disabled in the bios intel hosts, without long mode, and with nx support disabled in the bios have an efer that is readable but not writable. this causes a lockup on switch to guest mode (even though it should exit with reason 34 according to the documentation). ........ r4369 | itaish | 2007-01-31 20:03:16 +0200 (Wed, 31 Jan 2007) | 1 line Remove some printfs ........ r4370 | avi | 2007-02-01 10:31:16 +0200 (Thu, 01 Feb 2007) | 8 lines kvm: move virtualization deactivation from CPU_DEAD state to CPU_DOWN_PREPARE From: Jeremy Katz <katzj@redhat.com> this gives it more chances of surviving suspend. Signed-off-by: Jeremy Katz <katzj@redhat.com> ........ r4371 | avi | 2007-02-01 10:53:25 +0200 (Thu, 01 Feb 2007) | 2 lines kvm: fix a couple of printk()s ........ r4372 | avi | 2007-02-01 10:59:57 +0200 (Thu, 01 Feb 2007) | 2 lines kvm: cosmetics ........ r4373 | avi | 2007-02-01 14:22:49 +0200 (Thu, 01 Feb 2007) | 9 lines kvm: add MSR based hypercall API From: Ingo Molnar <mingo@elte.hu> this adds a special MSR based hypercall API to KVM. This is to be used by paravirtual kernels and virtual drivers. Signed-off-by: Ingo Molnar <mingo@elte.hu> ........ r4374 | avi | 2007-02-01 14:29:04 +0200 (Thu, 01 Feb 2007) | 4 lines kvm: add host hypercall support for vmx From: Ingo Molnar <mingo@elte.hu> ........ r4375 | avi | 2007-02-01 14:33:27 +0200 (Thu, 01 Feb 2007) | 2 lines kvm: cosmetics ........ r4376 | avi | 2007-02-01 14:37:51 +0200 (Thu, 01 Feb 2007) | 2 lines kvm: add hypercall host support for svm ........ r4377 | avi | 2007-02-01 15:28:48 +0200 (Thu, 01 Feb 2007) | 2 lines kvm: wire up hypercall handlers to a central arch-independent location ........ r4378 | dor | 2007-02-01 15:44:52 +0200 (Thu, 01 Feb 2007) | 6 lines Fix deassert of irq level interrupt. No one tried to deassert the pci irq. So the device kept on interrupting. Also deassert it if someone resets the device. Add debug ifdef instead of comments. ........ r4379 | dor | 2007-02-01 15:46:46 +0200 (Thu, 01 Feb 2007) | 5 lines Add interrupt support: - Add the device definitions from the hypercall device (temoprary until it shifts to a common file). - Add irq request + isr - Currently just print the content of the buffer in the isr ........ r4380 | dor | 2007-02-01 17:33:38 +0200 (Thu, 01 Feb 2007) | 2 lines Use matching uchar length for reading from io port. ........ r4381 | dor | 2007-02-01 17:44:11 +0200 (Thu, 01 Feb 2007) | 3 lines Add the tx hypercall path (guest->host). Note that the call to hypercall_tx is only a test now (otherwise "hellow host" is an interesting protocol...) ........ r4382 | avi | 2007-02-04 09:57:11 +0200 (Sun, 04 Feb 2007) | 6 lines kvm: fix 32-bit build From: Anthony Liguori <anthony@codemonkey.ws> Signed-off-by: Anthony Liguori <anthony@codemonkey.ws> ........ r4383 | avi | 2007-02-04 10:37:47 +0200 (Sun, 04 Feb 2007) | 10 lines kvm: vmx: hack set_cr0_no_modeswitch() to actually do modeswitch From: Markus Rechberger <markus.rechberger@amd.com> From: Joerg Roedel <joerg.roedel@amd.com> the whole thing is rotten, but this allows vmx to boot with the guest reboot fix. Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> ........ r4384 | avi | 2007-02-04 10:38:53 +0200 (Sun, 04 Feb 2007) | 10 lines kvm: support guest reboot From: Markus Rechberger <markus.rechberger@amd.com> From: Joerg Roedel <joerg.roedel@amd.com> allow guests on amd hosts to reboot correctly. Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> ........ r4385 | dor | 2007-02-05 17:32:55 +0200 (Mon, 05 Feb 2007) | 2 lines Add sysfs interface for debug & development. ........ r4386 | dor | 2007-02-06 10:07:08 +0200 (Tue, 06 Feb 2007) | 3 lines Fix kernel dir to include the determined by the configure script. Submited by Muli Ben-Yehuda and was also noticed by Richard Voigt. ........ r4387 | avi | 2007-02-06 12:55:17 +0200 (Tue, 06 Feb 2007) | 9 lines kvm: kvmctl: reduce pio latency for non-string out instructions From: Anthony Liguori <anthony@codemonkey.ws> Avoid making system calls for out{b,w,l} instructions since it is not necessary to sync GP registers. Signed-off-by: Anthony Liguori <anthony@codemonkey.ws> ........ r4388 | dor | 2007-02-06 18:39:33 +0200 (Tue, 06 Feb 2007) | 2 lines Add store option for sysfs interface of hypercall pci driver. ........ r4389 | dor | 2007-02-06 18:46:17 +0200 (Tue, 06 Feb 2007) | 3 lines Some more makefile fixes by Muli Ben-Yehuda. Cheers. ........ r4390 | dor | 2007-02-06 18:55:54 +0200 (Tue, 06 Feb 2007) | 6 lines Use ARRAY_SIZE macro instead of manual calculation. From: Ahmed S. Darwish <darwish.07@gmail.com> Signed-off-by: Ahmed S. Darwish <darwish.07@gmail.com> ........ r4391 | avi | 2007-02-07 10:19:08 +0200 (Wed, 07 Feb 2007) | 6 lines kvm: libkvm: replace exit()s by error returns From: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> ........ r4392 | avi | 2007-02-07 10:20:24 +0200 (Wed, 07 Feb 2007) | 6 lines kvm: qemu: handle error returns from libkvm From: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> ........ r4393 | avi | 2007-02-07 10:22:56 +0200 (Wed, 07 Feb 2007) | 6 lines kvm: qemu: dont exit from kvm_get_dirty_pages on failure From: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> ........ r4394 | avi | 2007-02-07 11:04:24 +0200 (Wed, 07 Feb 2007) | 7 lines kvm: qemu: qemu binary should depend on libkvm.a Without this patch making a change to libkvm.a will not cause the qemu binary to be re-linked. Tested on x86-64. Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> ........ r4395 | avi | 2007-02-07 11:16:37 +0200 (Wed, 07 Feb 2007) | 11 lines kvm: qemu: fix 32 bit guest freeze on second reboot From: Joerg Roedel <joerg.roedel@amd.com> This patch fixes a bug detected with 32 bit Linux guests. There the second try to reboot hangs the guest machine. It seems to be necessary to fix all dataseg registers for 32 bit guests. This time with a patch attached. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> ........ r4396 | avi | 2007-02-07 11:28:34 +0200 (Wed, 07 Feb 2007) | 8 lines kvm: use page_private()/set_page_private() apis From: Markus Rechberger <markus.rechberger@amd.com> besides using an established api, this allows using kvm in older kernels. Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> ........ r4397 | avi | 2007-02-07 11:31:07 +0200 (Wed, 07 Feb 2007) | 6 lines kvm: more older kernel compatibility From: Markus Rechberger <markus.rechberger@amd.com> Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> ........ r4398 | avi | 2007-02-07 12:00:03 +0200 (Wed, 07 Feb 2007) | 9 lines kvm: libkvm: check for KVM_GET/SET_REG ioctl failures From: Muli Ben-Yehuda <muli@il.ibm.com> Check for KVM_GET/SET_REGS ioctl failures (and return -errno which the caller of kvm_run will promptely ignore). Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> ........ r4399 | itaish | 2007-02-07 19:48:54 +0200 (Wed, 07 Feb 2007) | 1 line Hypercall registers renaming. ........
-rw-r--r--Makefile.target5
-rw-r--r--hw/hypercall.c92
-rw-r--r--hw/vga.c8
-rw-r--r--kvm/Makefile2
-rw-r--r--kvm/drivers/Makefile3
-rw-r--r--kvm/drivers/hypercall.c304
-rw-r--r--kvm/kernel/Makefile3
-rw-r--r--kvm/kernel/external-module-compat.h22
-rw-r--r--kvm/kernel/include/linux/kvm_para.h73
-rw-r--r--kvm/kernel/include/linux/mutex.h5
-rw-r--r--kvm/kernel/kvm.h10
-rw-r--r--kvm/kernel/kvm_main.c140
-rw-r--r--kvm/kernel/kvm_svm.h3
-rw-r--r--kvm/kernel/mmu.c36
-rw-r--r--kvm/kernel/paging_tmpl.h18
-rw-r--r--kvm/kernel/svm.c36
-rw-r--r--kvm/kernel/vmx.c32
-rwxr-xr-xkvm/kvm_stat10
-rw-r--r--kvm/user/kvmctl.c69
-rw-r--r--kvm/user/kvmctl.h3
-rw-r--r--qemu-kvm.c31
-rw-r--r--vl.c4
22 files changed, 770 insertions, 139 deletions
diff --git a/Makefile.target b/Makefile.target
index 3e1d07333..5c81a0b54 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -25,6 +25,8 @@ CFLAGS+=-Wall -O2 -g -fno-strict-aliasing
#CFLAGS+=-Werror
LDFLAGS+=-g
LIBS=
+# libraries we depend on
+DEPLIBS=
HELPER_CFLAGS=$(CFLAGS)
DYNGEN=../dyngen$(EXESUF)
# user emulator name
@@ -331,6 +333,7 @@ AUDIODRV+= wavcapture.o
ifdef CONFIG_KVM_KERNEL_INC
DEFINES += -I $(CONFIG_KVM_KERNEL_INC)
LIBS += -lkvm
+DEPLIBS += ../user/libkvm.a
endif
# SCSI layer
@@ -437,7 +440,7 @@ ifdef CONFIG_WIN32
SDL_LIBS := $(filter-out -mwindows, $(SDL_LIBS)) -mconsole
endif
-$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
+$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a $(DEPLIBS)
$(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(COCOA_LIBS) $(VL_LIBS)
cocoa.o: cocoa.m
diff --git a/hw/hypercall.c b/hw/hypercall.c
index a1f629ca4..e531cb583 100644
--- a/hw/hypercall.c
+++ b/hw/hypercall.c
@@ -25,21 +25,22 @@
#include "vl.h"
#include <stddef.h>
-#define HP_CMD 0x00 // The command register WR
-#define HP_ISRSTATUS 0x04 // Interrupt status reg RD
+#define HCR_REGISTER 0x00 // Hypercall Command Register WR
+#define HSR_REGISTER 0x04 // Hypercall Status Register RD
#define HP_TXSIZE 0x08
#define HP_TXBUFF 0x0c
#define HP_RXSIZE 0x10
#define HP_RXBUFF 0x14
-// HP_CMD register commands
-#define HP_CMD_DI 1 // disable interrupts
-#define HP_CMD_EI 2 // enable interrupts
-#define HP_CMD_RESET 4 // enable interrupts
+// HCR_REGISTER commands
+#define HCR_DI 1 // disable interrupts
+#define HCR_EI 2 // enable interrupts
+#define HCR_GRS 4 // Global reset
+#define HCR_RESET (HCR_GRS|HCR_DI)
-/* Bits in HP_ISR - Interrupt status register */
-#define HPISR_RX 0x01 // Data is ready to be read
+// Bits in HSR_REGISTER
+#define HSR_VDR 0x01 // vmchannel Data is ready to be read
int use_hypercall_dev = 0;
static CharDriverState *vmchannel_hd;
@@ -47,8 +48,8 @@ static CharDriverState *vmchannel_hd;
#define HP_MEM_SIZE 0xE0
typedef struct HypercallState {
- uint32_t cmd;
- uint32_t isr;
+ uint32_t hcr;
+ uint32_t hsr;
uint32_t txsize;
uint32_t txbuff;
uint32_t rxsize;
@@ -61,32 +62,40 @@ typedef struct HypercallState {
HypercallState *pHypercallState = NULL;
+
+#define HYPERCALL_DEBUG 1
+
static void hp_reset(HypercallState *s)
{
- s->cmd = 0;
- s->isr = 0;
+ s->hcr = 0;
+ s->hsr = 0;
s->txsize = 0;
s->txbuff = 0;
s->rxsize= 0;
s->txbufferaccu_offset = 0;
}
+static void hypercall_update_irq(HypercallState *s);
+
+
static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
HypercallState *s = opaque;
- //printf("hp_ioport_write,addr=0x%x, val=0x%x\n",addr, val);
-
+#ifdef HYPERCALL_DEBUG
+ printf("%s: addr=0x%x, val=0x%x\n", __FUNCTION__, addr, val);
+#endif
addr &= 0xff;
switch(addr)
{
- case HP_CMD:
+ case HCR_REGISTER:
{
- s->cmd = val;
- if (val == HP_CMD_RESET){
+ s->hcr = val;
+ if (s->hcr & HCR_DI)
+ hypercall_update_irq(s);
+ if (val & HCR_GRS){
hp_reset(s);
- return;
}
break;
}
@@ -115,7 +124,6 @@ static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
s->txbufferaccu[s->txbufferaccu_offset] = val;
s->txbufferaccu_offset++;
if (s->txbufferaccu_offset >= s->txsize) {
- printf("tranmit txbuf, Len:0x%x\n", s->txbufferaccu_offset);
qemu_chr_write(vmchannel_hd, s->txbufferaccu, s->txsize);
s->txbufferaccu_offset = 0;
s->txsize = 0;
@@ -134,10 +142,9 @@ static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
HypercallState *s = opaque;
int ret;
- if (addr != 0xc204) {
- //printf("hp_ioport_read addr:0x%x\n",addr);
- }
-
+#ifdef HYPERCALL_DEBUG
+ printf("%s: addr=0x%x\n", __FUNCTION__, addr);
+#endif
addr &= 0xff;
if (addr >= offsetof(HypercallState, RxBuff) )
@@ -149,13 +156,10 @@ static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
switch (addr)
{
- case HP_ISRSTATUS:
- if (s->isr != 0){
- printf("hp_ioport_read s->isr=0x%x\n", s->isr);
- }
- ret = s->isr;
- if (ret & HPISR_RX) {
- s->isr &= ~HPISR_RX;
+ case HSR_REGISTER:
+ ret = s->hsr;
+ if (ret & HSR_VDR) {
+ s->hsr &= ~HSR_VDR;
}
break;
case HP_RXSIZE:
@@ -192,13 +196,8 @@ static void hp_map(PCIDevice *pci_dev, int region_num,
static void hypercall_update_irq(HypercallState *s)
{
- printf("hypercall_update_irq\n");
-
- if (s->cmd &= HP_CMD_DI) {
- return;
- }
- /* PCI irq */
- pci_set_irq(s->pci_dev, 0, 1);
+ /* PCI irq */
+ pci_set_irq(s->pci_dev, 0, !(s->hcr & HCR_DI));
}
void pci_hypercall_init(PCIBus *bus)
@@ -250,24 +249,21 @@ static int vmchannel_can_read(void *opaque)
static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
{
int i;
-
- printf("vmchannel_read buf:%p, size:%d\n", buf, size);
- for(i = 0; i < size; i++) {
- printf("%x,", buf[i]);
- }
- printf("\n");
+
+#ifdef HYPERCALL_DEBUG
+ printf("vmchannel_read buf:%s, size:%d\n", buf, size);
+#endif
// if the hypercall device is in interrupts disabled state, don't accept the data
- if (pHypercallState->cmd &= HP_CMD_DI) {
+ if (pHypercallState->hcr & HCR_DI) {
return;
}
for(i = 0; i < size; i++) {
- //printf("buf[i%d]=%x\n",i, buf[i]);
pHypercallState->RxBuff[i] = buf[i];
}
pHypercallState->rxsize = size;
- pHypercallState->isr = HPISR_RX;
+ pHypercallState->hsr = HSR_VDR;
hypercall_update_irq(pHypercallState);
}
@@ -275,7 +271,9 @@ void vmchannel_init(CharDriverState *hd)
{
vmchannel_hd = hd;
- //printf("vmchannel_init\n");
+#ifdef HYPERCALL_DEBUG
+ printf("vmchannel_init\n");
+#endif
use_hypercall_dev = 1;
qemu_chr_add_read_handler(vmchannel_hd, vmchannel_can_read, vmchannel_read, &pHypercallState);
diff --git a/hw/vga.c b/hw/vga.c
index 4c1e57e4b..0e7613bcd 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -1396,9 +1396,13 @@ static void vga_draw_graphic(VGAState *s, int full_update)
/* HACK ALERT */
#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
unsigned long bitmap[BITMAP_SIZE];
+ int r;
- if (kvm_allowed)
- kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+ if (kvm_allowed) {
+ r = kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+ if (r < 0)
+ fprintf(stderr, "kvm: get_dirty_pages returned %d\n", r);
+ }
#endif
full_update |= update_basic_params(s);
diff --git a/kvm/Makefile b/kvm/Makefile
index 48dda0264..48549299d 100644
--- a/kvm/Makefile
+++ b/kvm/Makefile
@@ -68,4 +68,4 @@ clean:
for i in $(if $(WANT_MODULE), kernel) user qemu; do \
make -C $$i clean; \
done
- rm -f config.make user/config.mak
+ rm -f config.mak user/config.mak
diff --git a/kvm/drivers/Makefile b/kvm/drivers/Makefile
index d0b681d43..56facbb0e 100644
--- a/kvm/drivers/Makefile
+++ b/kvm/drivers/Makefile
@@ -1,4 +1,5 @@
-KERNELDIR := /lib/modules/$(shell uname -r)/build
+include ../config.mak
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
DESTDIR=
diff --git a/kvm/drivers/hypercall.c b/kvm/drivers/hypercall.c
index 9c9462f66..e5f4c8b56 100644
--- a/kvm/drivers/hypercall.c
+++ b/kvm/drivers/hypercall.c
@@ -6,11 +6,12 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/completion.h>
+#include <linux/interrupt.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
-#define HYPERCALL_DRIVER_NAME "Qumranet hypercall driver"
+#define HYPERCALL_DRIVER_NAME "Qumranet_hypercall_driver"
#define HYPERCALL_DRIVER_VERSION "1"
#define PCI_VENDOR_ID_HYPERCALL 0x5002
#define PCI_DEVICE_ID_HYPERCALL 0x2258
@@ -43,26 +44,68 @@ static struct pci_device_id hypercall_pci_tbl[] = {
};
MODULE_DEVICE_TABLE (pci, hypercall_pci_tbl);
+
+
+/****** Hypercall device definitions ***************/
+/* To be moved into a shared file with user space */
+#define HP_CMD 0x00 // The command register WR
+#define HP_ISRSTATUS 0x04 // Interrupt status reg RD
+#define HP_TXSIZE 0x08
+#define HP_TXBUFF 0x0c
+#define HP_RXSIZE 0x10
+#define HP_RXBUFF 0x14
+
+// HP_CMD register commands
+#define HP_CMD_DI 1 // disable interrupts
+#define HP_CMD_EI 2 // enable interrupts
+#define HP_CMD_INIT 4 // reset device
+#define HP_CMD_RESET (HP_CMD_INIT|HP_CMD_DI)
+
+/* Bits in HP_ISR - Interrupt status register */
+#define HPISR_RX 0x01 // Data is ready to be read
+
+#define HP_MEM_SIZE 0xE0
+/******* End of Hypercall device definitions */
+
+/* read PIO/MMIO register */
+#define HIO_READ8(reg, ioaddr) ioread8(ioaddr + (reg))
+#define HIO_READ16(reg, ioaddr) ioread16(ioaddr + (reg))
+#define HIO_READ32(reg, ioaddr) ioread32(ioaddr + (reg))
+
+/* write PIO/MMIO register */
+#define HIO_WRITE8(reg, val8, ioaddr) iowrite8((val8), ioaddr + (reg))
+#define HIO_WRITE16(reg, val16, ioaddr) iowrite16((val16), ioaddr + (reg))
+#define HIO_WRITE32(reg, val32, ioaddr) iowrite32((val32), ioaddr + (reg))
+
+
struct hypercall_dev {
struct pci_dev *pci_dev;
+ struct kobject kobject;
u32 state;
spinlock_t lock;
u8 name[128];
u16 irq;
u32 regs_len;
- void __iomem *mmio_addr;
+ void __iomem *io_addr;
unsigned long base_addr; /* device I/O address */
+ unsigned long cmd;
};
-
+static int hypercall_close(struct hypercall_dev* dev);
+static int hypercall_open(struct hypercall_dev *dev);
static void hypercall_cleanup_dev(struct hypercall_dev *dev);
+static irqreturn_t hypercall_interrupt(int irq, void *dev_instance,
+ struct pt_regs *regs);
+
+static void __exit hypercall_sysfs_remove(struct hypercall_dev *dev);
+static int hypercall_sysfs_add(struct hypercall_dev *dev);
static int __devinit hypercall_init_board(struct pci_dev *pdev,
struct hypercall_dev **dev_out)
{
- unsigned long *ioaddr;
+ unsigned long ioaddr;
struct hypercall_dev *dev;
int rc;
u32 disable_dev_on_err = 0;
@@ -101,17 +144,17 @@ static int __devinit hypercall_init_board(struct pci_dev *pdev,
if (rc)
goto err_out;
- pci_set_master (pdev);
-
#define USE_IO_OPS 1
#ifdef USE_IO_OPS
- ioaddr = pci_iomap(pdev, 0, 0);
+ ioaddr = (unsigned long)pci_iomap(pdev, 0, 0);
+ //ioaddr = ioport_map(pio_start, pio_len);
if (!ioaddr) {
printk(KERN_ERR "%s: cannot map PIO, aborting\n", pci_name(pdev));
rc = -EIO;
goto err_out;
}
- dev->base_addr = (unsigned long)ioaddr;
+ dev->base_addr = (unsigned long)pio_start;
+ dev->io_addr = (void*)ioaddr;
dev->regs_len = pio_len;
#else
ioaddr = pci_iomap(pdev, 1, 0);
@@ -121,6 +164,7 @@ static int __devinit hypercall_init_board(struct pci_dev *pdev,
goto err_out;
}
dev->base_addr = ioaddr;
+ dev->io_addr = (void*)ioaddr;
dev->regs_len = mmio_len;
#endif /* USE_IO_OPS */
@@ -161,7 +205,13 @@ static int __devinit hypercall_init_one(struct pci_dev *pdev,
spin_lock_init(&dev->lock);
pci_set_drvdata(pdev, dev);
- printk (KERN_INFO "%s: 0x%lx, IRQ %d\n", dev->name, dev->base_addr, dev->irq);
+ printk (KERN_INFO "name=%s: base_addr=0x%lx, io_addr=0x%lx, IRQ=%d\n",
+ dev->name, dev->base_addr, (unsigned long)dev->io_addr, dev->irq);
+ hypercall_open(dev);
+
+ if (hypercall_sysfs_add(dev) != 0)
+ return -1;
+
return 0;
}
@@ -171,10 +221,111 @@ static void __devexit hypercall_remove_one(struct pci_dev *pdev)
assert(dev != NULL);
+ hypercall_close(dev);
+ hypercall_sysfs_remove(dev);
hypercall_cleanup_dev(dev);
pci_disable_device(pdev);
}
+static int hypercall_tx(struct hypercall_dev *dev, unsigned char *buf, size_t len)
+{
+ void __iomem *ioaddr = (void __iomem*)dev->io_addr;
+ int i;
+
+ if (len > HP_MEM_SIZE)
+ return -EINVAL;
+
+ spin_lock(&dev->lock);
+ HIO_WRITE8(HP_TXSIZE, len, ioaddr);
+ for (i=0; i< len; i++)
+ HIO_WRITE8(HP_TXBUFF, buf[i], ioaddr);
+ spin_unlock(&dev->lock);
+
+ return 0;
+}
+
+/*
+ * The interrupt handler does all of the rx work and cleans up
+ * after the tx
+ */
+static irqreturn_t hypercall_interrupt(int irq, void *dev_instance,
+ struct pt_regs *regs)
+{
+ struct hypercall_dev *dev = (struct hypercall_dev *)dev_instance;
+ void __iomem *ioaddr = (void __iomem*)dev->io_addr;
+ u32 status;
+ int irq_handled = IRQ_NONE;
+ int rx_buf_size;
+ int i;
+ u8 buffer[HP_MEM_SIZE];
+ u8 *pbuf;
+
+ DPRINTK("base addr is 0x%lx, io_addr=0x%lx\n", dev->base_addr, (long)dev->io_addr);
+
+ spin_lock(&dev->lock);
+ status = HIO_READ8(HP_ISRSTATUS, ioaddr);
+ DPRINTK("irq status is 0x%x\n", status);
+
+ /* shared irq? */
+ if (unlikely((status & HPISR_RX) == 0)) {
+ DPRINTK("not handeling irq, not ours\n");
+ goto out;
+ }
+
+ /* Disable device interrupts */
+ HIO_WRITE8(HP_CMD, HP_CMD_DI, ioaddr);
+ DPRINTK("disable device interrupts\n");
+
+ rx_buf_size = HIO_READ8(HP_RXSIZE, ioaddr);
+ DPRINTK("Rx buffer size is %d\n", rx_buf_size);
+
+ if (rx_buf_size > HP_MEM_SIZE)
+ rx_buf_size = HP_MEM_SIZE;
+
+ for (i=0, pbuf=buffer; i<rx_buf_size; i++, pbuf++) {
+ *pbuf = HIO_READ8(HP_RXBUFF, ioaddr + i);
+ DPRINTK("Read 0x%x as dword %d\n", *pbuf, i);
+ }
+ *pbuf = '\0';
+ DPRINTK("Read buffer %s", (char*)buffer);
+
+ HIO_WRITE8(HP_CMD, HP_CMD_EI, ioaddr);
+ DPRINTK("Enable interrupt\n");
+ irq_handled = IRQ_HANDLED;
+ out:
+ spin_unlock(&dev->lock);
+
+
+ hypercall_tx(dev, "hello host", sizeof("hello host"));
+ return irq_handled;
+}
+
+
+static int hypercall_open(struct hypercall_dev *dev)
+{
+ int rc;
+
+ rc = request_irq(dev->irq, &hypercall_interrupt,
+ SA_SHIRQ, dev->name, dev);
+ if (rc) {
+ printk(KERN_ERR "%s failed to request an irq\n", __FUNCTION__);
+ return rc;
+ }
+
+ //hypercall_thread_start(dev);
+
+ return 0;
+}
+
+static int hypercall_close(struct hypercall_dev* dev)
+{
+ //hypercall_thread_stop(dev);
+ synchronize_irq(dev->irq);
+ free_irq(dev->irq, dev);
+
+ return 0;
+}
+
#ifdef CONFIG_PM
static int hypercall_suspend(struct pci_dev *pdev, pm_message_t state)
@@ -201,7 +352,8 @@ static void hypercall_cleanup_dev(struct hypercall_dev *dev)
{
DPRINTK("cleaning up\n");
pci_release_regions(dev->pci_dev);
- pci_iounmap(dev->pci_dev, (void*)dev->base_addr);
+ pci_iounmap(dev->pci_dev, (void*)dev->io_addr);
+ pci_set_drvdata (dev->pci_dev, NULL);
kfree(dev);
}
@@ -227,5 +379,137 @@ static void __exit hypercall_cleanup_module(void)
pci_unregister_driver(&hypercall_pci_driver);
}
+/*
+ * sysfs support
+ */
+
+struct hypercall_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct hypercall_dev*, char *buf);
+ ssize_t (*store)(struct hypercall_dev*, unsigned long val);
+};
+
+static ssize_t hypercall_attribute_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct hypercall_attribute *hypercall_attr;
+ struct hypercall_dev *hdev;
+
+ hypercall_attr = container_of(attr, struct hypercall_attribute, attr);
+ hdev = container_of(kobj, struct hypercall_dev, kobject);
+
+ if (!hypercall_attr->show)
+ return -EIO;
+
+ return hypercall_attr->show(hdev, buf);
+}
+
+static ssize_t hypercall_attribute_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ struct hypercall_attribute *hypercall_attr;
+ struct hypercall_dev *hdev;
+ char *endp;
+ unsigned long val;
+ int rc;
+
+ val = simple_strtoul(buf, &endp, 0);
+
+ hypercall_attr = container_of(attr, struct hypercall_attribute, attr);
+ hdev = container_of(kobj, struct hypercall_dev, kobject);
+
+ if (!hypercall_attr->store)
+ return -EIO;
+
+ rc = hypercall_attr->store(hdev, val);
+ if (!rc)
+ rc = count;
+ return rc;
+}
+
+#define MAKE_HYPERCALL_R_ATTR(_name) \
+static ssize_t _name##_show(struct hypercall_dev *hdev, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", (unsigned long)hdev->_name); \
+} \
+struct hypercall_attribute hypercall_attr_##_name = __ATTR_RO(_name)
+
+#define MAKE_HYPERCALL_WR_ATTR(_name) \
+static int _name##_store(struct hypercall_dev *hdev, unsigned long val) \
+{ \
+ hdev->_name = (typeof(hdev->_name))val; \
+ return 0; \
+} \
+static ssize_t _name##_show(struct hypercall_dev *hdev, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", (unsigned long)hdev->_name); \
+} \
+struct hypercall_attribute hypercall_attr_##_name = \
+ __ATTR(_name,S_IRUGO|S_IWUGO,_name##_show,_name##_store)
+
+MAKE_HYPERCALL_R_ATTR(base_addr);
+MAKE_HYPERCALL_R_ATTR(irq);
+MAKE_HYPERCALL_WR_ATTR(cmd);
+
+#define GET_HYPERCALL_ATTR(_name) (&hypercall_attr_##_name.attr)
+
+static struct attribute *hypercall_default_attrs[] = {
+ GET_HYPERCALL_ATTR(base_addr),
+ GET_HYPERCALL_ATTR(irq),
+ GET_HYPERCALL_ATTR(cmd),
+ NULL
+};
+
+static struct sysfs_ops hypercall_sysfs_ops = {
+ .show = hypercall_attribute_show,
+ .store = hypercall_attribute_store,
+};
+
+static void hypercall_sysfs_release(struct kobject *kobj)
+{
+ DPRINTK(" called for obj name %s\n", kobj->name);
+}
+
+static struct kobj_type hypercall_ktype = {
+ .release = hypercall_sysfs_release,
+ .sysfs_ops = &hypercall_sysfs_ops,
+ .default_attrs = hypercall_default_attrs
+};
+
+
+static int hypercall_sysfs_add(struct hypercall_dev *dev)
+{
+ int rc;
+
+ kobject_init(&dev->kobject);
+ dev->kobject.ktype = &hypercall_ktype;
+ rc = kobject_set_name(&dev->kobject, "%s", HYPERCALL_DRIVER_NAME);
+ if (rc != 0) {
+ printk("%s: kobject_set_name failed, err=%d\n", __FUNCTION__, rc);
+ return rc;
+ }
+
+ rc = kobject_add(&dev->kobject);
+ if (rc != 0) {
+ printk("%s: kobject_add failed, err=%d\n", __FUNCTION__, rc);
+ return rc;
+ }
+
+ rc = sysfs_create_link(&dev->pci_dev->dev.kobj, &dev->kobject,
+ HYPERCALL_DRIVER_NAME);
+ if (rc != 0) {
+ printk("%s: sysfs_create_link failed, err=%d\n", __FUNCTION__, rc);
+ kobject_del(&dev->kobject);
+ }
+
+ return rc;
+}
+
+static void hypercall_sysfs_remove(struct hypercall_dev *dev)
+{
+ sysfs_remove_link(&dev->pci_dev->dev.kobj, HYPERCALL_DRIVER_NAME);
+ kobject_del(&dev->kobject);
+}
+
module_init(hypercall_init_module);
module_exit(hypercall_cleanup_module);
diff --git a/kvm/kernel/Makefile b/kvm/kernel/Makefile
index 454f5f1bd..78e31a4c7 100644
--- a/kvm/kernel/Makefile
+++ b/kvm/kernel/Makefile
@@ -1,4 +1,5 @@
-KERNELDIR := /lib/modules/$(shell uname -r)/build
+include ../config.mak
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
DESTDIR=
diff --git a/kvm/kernel/external-module-compat.h b/kvm/kernel/external-module-compat.h
index 830c46436..79608730f 100644
--- a/kvm/kernel/external-module-compat.h
+++ b/kvm/kernel/external-module-compat.h
@@ -72,6 +72,28 @@ static inline int smp_call_function_single1(int cpu, void (*func)(void *info),
* The cpu hotplug stubs are broken if !CONFIG_CPU_HOTPLUG
*/
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15)
+#define DEFINE_MUTEX(a) DECLARE_MUTEX(a)
+#define mutex_lock_interruptible(a) down_interruptible(a)
+#define mutex_unlock(a) up(a)
+#define mutex_lock(a) down(a)
+#define mutex_init(a) init_MUTEX(a)
+#define mutex_trylock(a) down_trylock(a)
+#define mutex semaphore
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+#ifndef kzalloc
+#define kzalloc(size,flags) \
+({ \
+ void *__ret = kmalloc(size, flags); \
+ if (__ret)
+ memset(__ret, 0, size);
+ __ret;
+})
+#endif
+#endif
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
#ifndef CONFIG_HOTPLUG_CPU
diff --git a/kvm/kernel/include/linux/kvm_para.h b/kvm/kernel/include/linux/kvm_para.h
new file mode 100644
index 000000000..3b292565a
--- /dev/null
+++ b/kvm/kernel/include/linux/kvm_para.h
@@ -0,0 +1,73 @@
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+/*
+ * Guest OS interface for KVM paravirtualization
+ *
+ * Note: this interface is totally experimental, and is certain to change
+ * as we make progress.
+ */
+
+/*
+ * Per-VCPU descriptor area shared between guest and host. Writable to
+ * both guest and host. Registered with the host by the guest when
+ * a guest acknowledges paravirtual mode.
+ *
+ * NOTE: all addresses are guest-physical addresses (gpa), to make it
+ * easier for the hypervisor to map between the various addresses.
+ */
+struct kvm_vcpu_para_state {
+ /*
+ * API version information for compatibility. If there's any support
+ * mismatch (too old host trying to execute too new guest) then
+ * the host will deny entry into paravirtual mode. Any other
+ * combination (new host + old guest and new host + new guest)
+ * is supposed to work - new host versions will support all old
+ * guest API versions.
+ */
+ u32 guest_version;
+ u32 host_version;
+ u32 size;
+ u32 ret;
+
+ /*
+ * The address of the vm exit instruction (VMCALL or VMMCALL),
+ * which the host will patch according to the CPU model the
+ * VM runs on:
+ */
+ u64 hypercall_gpa;
+
+} __attribute__ ((aligned(PAGE_SIZE)));
+
+#define KVM_PARA_API_VERSION 1
+
+/*
+ * This is used for an RDMSR's ECX parameter to probe for a KVM host.
+ * Hopefully no CPU vendor will use up this number. This is placed well
+ * out of way of the typical space occupied by CPU vendors' MSR indices,
+ * and we think (or at least hope) it wont be occupied in the future
+ * either.
+ */
+#define MSR_KVM_API_MAGIC 0x87655678
+
+#define KVM_EINVAL 1
+
+/*
+ * Hypercall calling convention:
+ *
+ * Each hypercall may have 0-6 parameters.
+ *
+ * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
+ *
+ * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
+ * order: RDI, RSI, RDX, RCX, R8, R9.
+ *
+ * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
+ * (the first 3 are according to the gcc regparm calling convention)
+ *
+ * No registers are clobbered by the hypercall, except that the
+ * return value is in RAX.
+ */
+#define __NR_hypercalls 0
+
+#endif
diff --git a/kvm/kernel/include/linux/mutex.h b/kvm/kernel/include/linux/mutex.h
new file mode 100644
index 000000000..71b2ae109
--- /dev/null
+++ b/kvm/kernel/include/linux/mutex.h
@@ -0,0 +1,5 @@
+/*
+ * Empty file to satisfy #include <linux/mutex.h> for older kernels.
+ */
+
+
diff --git a/kvm/kernel/kvm.h b/kvm/kernel/kvm.h
index 04574a9d4..41cc27de4 100644
--- a/kvm/kernel/kvm.h
+++ b/kvm/kernel/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,9 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
+ gpa_t para_state_gpa;
+ struct page *para_state_page;
+ gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
@@ -382,6 +386,8 @@ struct kvm_arch_ops {
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*patch_hypercall)(struct kvm_vcpu *vcpu,
+ unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
@@ -476,6 +482,8 @@ void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
+int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code)
{
@@ -523,7 +531,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
- return (struct kvm_mmu_page *)page->private;
+ return (struct kvm_mmu_page *)page_private(page);
}
static inline u16 read_fs(void)
diff --git a/kvm/kernel/kvm_main.c b/kvm/kernel/kvm_main.c
index f8b70bbce..a8b3691af 100644
--- a/kvm/kernel/kvm_main.c
+++ b/kvm/kernel/kvm_main.c
@@ -126,10 +126,8 @@ static inline int valid_vcpu(int n)
return likely(n >= 0 && n < KVM_MAX_VCPUS);
}
-int kvm_read_guest(struct kvm_vcpu *vcpu,
- gva_t addr,
- unsigned long size,
- void *dest)
+int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
+ void *dest)
{
unsigned char *host_buf = dest;
unsigned long req_size = size;
@@ -161,10 +159,8 @@ int kvm_read_guest(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_read_guest);
-int kvm_write_guest(struct kvm_vcpu *vcpu,
- gva_t addr,
- unsigned long size,
- void *data)
+int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
+ void *data)
{
unsigned char *host_buf = data;
unsigned long req_size = size;
@@ -457,7 +453,7 @@ EXPORT_SYMBOL_GPL(set_cr4);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (is_long_mode(vcpu)) {
- if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
+ if (cr3 & CR3_L_MODE_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
inject_gp(vcpu);
return;
@@ -674,7 +670,7 @@ raced:
| __GFP_ZERO);
if (!new.phys_mem[i])
goto out_free;
- new.phys_mem[i]->private = 0;
+ set_page_private(new.phys_mem[i],0);
}
}
@@ -774,7 +770,6 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
goto out;
-
if (any) {
cleared = 0;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
@@ -903,8 +898,9 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+
if (gpa == UNMAPPED_GVA)
- return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT;
+ return X86EMUL_PROPAGATE_FAULT;
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
@@ -1142,6 +1138,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(emulate_instruction);
+int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
+
+ kvm_arch_ops->decache_regs(vcpu);
+ ret = -KVM_EINVAL;
+#ifdef CONFIG_X86_64
+ if (is_long_mode(vcpu)) {
+ nr = vcpu->regs[VCPU_REGS_RAX];
+ a0 = vcpu->regs[VCPU_REGS_RDI];
+ a1 = vcpu->regs[VCPU_REGS_RSI];
+ a2 = vcpu->regs[VCPU_REGS_RDX];
+ a3 = vcpu->regs[VCPU_REGS_RCX];
+ a4 = vcpu->regs[VCPU_REGS_R8];
+ a5 = vcpu->regs[VCPU_REGS_R9];
+ } else
+#endif
+ {
+ nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
+ a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
+ a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
+ a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
+ a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
+ a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
+ a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
+ }
+ switch (nr) {
+ default:
+ ;
+ }
+ vcpu->regs[VCPU_REGS_RAX] = ret;
+ kvm_arch_ops->cache_regs(vcpu);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
+
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
@@ -1208,6 +1240,73 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
}
}
+/*
+ * Register the para guest with the host:
+ */
+static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
+{
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t para_state_hpa, hypercall_hpa;
+ struct page *para_state_page;
+ unsigned char *hypercall;
+ gpa_t hypercall_gpa;
+
+ printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
+ printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
+
+ /*
+ * Needs to be page aligned:
+ */
+ if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
+ goto err_gp;
+
+ para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
+ printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
+ if (is_error_hpa(para_state_hpa))
+ goto err_gp;
+
+ para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
+ para_state = kmap_atomic(para_state_page, KM_USER0);
+
+ printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
+ printk(KERN_DEBUG ".... size: %d\n", para_state->size);
+
+ para_state->host_version = KVM_PARA_API_VERSION;
+ /*
+ * We cannot support guests that try to register themselves
+ * with a newer API version than the host supports:
+ */
+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ hypercall_gpa = para_state->hypercall_gpa;
+ hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
+ printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
+ if (is_error_hpa(hypercall_hpa)) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
+ vcpu->para_state_page = para_state_page;
+ vcpu->para_state_gpa = para_state_gpa;
+ vcpu->hypercall_gpa = hypercall_gpa;
+
+ hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
+ KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
+ kvm_arch_ops->patch_hypercall(vcpu, hypercall);
+ kunmap_atomic(hypercall, KM_USER1);
+
+ para_state->ret = 0;
+err_kunmap_skip:
+ kunmap_atomic(para_state, KM_USER0);
+ return 0;
+err_gp:
+ return 1;
+}
+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
@@ -1316,6 +1415,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
break;
+ /*
+ * This is the 'probe whether the host is KVM' logic:
+ */
+ case MSR_KVM_API_MAGIC:
+ return vcpu_register_para(vcpu, data);
+
default:
printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
return 1;
@@ -1800,12 +1905,11 @@ static long kvm_dev_ioctl(struct file *filp,
case KVM_GET_API_VERSION:
r = KVM_API_VERSION;
break;
- case KVM_CREATE_VCPU: {
+ case KVM_CREATE_VCPU:
r = kvm_dev_ioctl_create_vcpu(kvm, arg);
if (r)
goto out;
break;
- }
case KVM_RUN: {
struct kvm_run kvm_run;
@@ -2079,13 +2183,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
int cpu = (long)v;
switch (val) {
- case CPU_DEAD:
+ case CPU_DOWN_PREPARE:
case CPU_UP_CANCELED:
+ printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+ cpu);
decache_vcpus_on_cpu(cpu);
smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
NULL, 0, 1);
break;
- case CPU_UP_PREPARE:
+ case CPU_ONLINE:
+ printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+ cpu);
smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
NULL, 0, 1);
break;
diff --git a/kvm/kernel/kvm_svm.h b/kvm/kernel/kvm_svm.h
index 74cc862f4..624f1ca48 100644
--- a/kvm/kernel/kvm_svm.h
+++ b/kvm/kernel/kvm_svm.h
@@ -1,6 +1,7 @@
#ifndef __KVM_SVM_H
#define __KVM_SVM_H
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/list.h>
#include <asm/msr.h>
@@ -18,7 +19,7 @@ static const u32 host_save_msrs[] = {
MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
};
-#define NR_HOST_SAVE_MSRS (sizeof(host_save_msrs) / sizeof(*host_save_msrs))
+#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs)
#define NUM_DB_REGS 4
struct vcpu_svm {
diff --git a/kvm/kernel/mmu.c b/kvm/kernel/mmu.c
index 22c426cd8..573867a50 100644
--- a/kvm/kernel/mmu.c
+++ b/kvm/kernel/mmu.c
@@ -298,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
- page->private = (unsigned long)spte;
- } else if (!(page->private & 1)) {
+ set_page_private(page,(unsigned long)spte);
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_rmap_desc(vcpu);
- desc->shadow_ptes[0] = (u64 *)page->private;
+ desc->shadow_ptes[0] = (u64 *)page_private(page);
desc->shadow_ptes[1] = spte;
- page->private = (unsigned long)desc | 1;
+ set_page_private(page,(unsigned long)desc | 1);
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
desc = desc->more;
if (desc->shadow_ptes[RMAP_EXT-1]) {
@@ -337,12 +337,12 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
if (j != 0)
return;
if (!prev_desc && !desc->more)
- page->private = (unsigned long)desc->shadow_ptes[0];
+ set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
else
if (prev_desc)
prev_desc->more = desc->more;
else
- page->private = (unsigned long)desc->more | 1;
+ set_page_private(page,(unsigned long)desc->more | 1);
mmu_free_rmap_desc(vcpu, desc);
}
@@ -356,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
BUG();
- } else if (!(page->private & 1)) {
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
- if ((u64 *)page->private != spte) {
+ if ((u64 *)page_private(page) != spte) {
printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
spte, *spte);
BUG();
}
- page->private = 0;
+ set_page_private(page,0);
} else {
rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
@@ -398,11 +398,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
BUG_ON(!slot);
page = gfn_to_page(slot, gfn);
- while (page->private) {
- if (!(page->private & 1))
- spte = (u64 *)page->private;
+ while (page_private(page)) {
+ if (!(page_private(page) & 1))
+ spte = (u64 *)page_private(page);
else {
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
spte = desc->shadow_ptes[0];
}
BUG_ON(!spte);
@@ -1218,7 +1218,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&page_header->link);
if ((page = alloc_page(GFP_KERNEL)) == NULL)
goto error_1;
- page->private = (unsigned long)page_header;
+ set_page_private(page, (unsigned long)page_header);
page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
list_add(&page_header->link, &vcpu->free_pages);
diff --git a/kvm/kernel/paging_tmpl.h b/kvm/kernel/paging_tmpl.h
index b6b90e9e1..f3bcee904 100644
--- a/kvm/kernel/paging_tmpl.h
+++ b/kvm/kernel/paging_tmpl.h
@@ -128,8 +128,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
goto access_error;
#endif
- if (!(*ptep & PT_ACCESSED_MASK))
- *ptep |= PT_ACCESSED_MASK; /* avoid rmw */
+ if (!(*ptep & PT_ACCESSED_MASK)) {
+ mark_page_dirty(vcpu->kvm, table_gfn);
+ *ptep |= PT_ACCESSED_MASK;
+ }
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
@@ -185,6 +187,12 @@ static void FNAME(release_walker)(struct guest_walker *walker)
kunmap_atomic(walker->table, KM_USER0);
}
+static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
+ struct guest_walker *walker)
+{
+ mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
+}
+
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
@@ -348,12 +356,15 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
} else if (kvm_mmu_lookup_page(vcpu, gfn)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__FUNCTION__, gfn);
+ mark_page_dirty(vcpu->kvm, gfn);
+ FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
*write_pt = 1;
return 0;
}
mark_page_dirty(vcpu->kvm, gfn);
*shadow_ent |= PT_WRITABLE_MASK;
+ FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
rmap_add(vcpu, shadow_ent);
@@ -430,9 +441,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
- if (is_io_pte(*shadow_pte)) {
+ if (is_io_pte(*shadow_pte))
return 1;
- }
++kvm_stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
diff --git a/kvm/kernel/svm.c b/kvm/kernel/svm.c
index cf5f4979e..9f839e263 100644
--- a/kvm/kernel/svm.c
+++ b/kvm/kernel/svm.c
@@ -15,6 +15,7 @@
*/
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/profile.h>
@@ -75,7 +76,7 @@ struct svm_init_data {
static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
-#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges))
+#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
#define MSRS_RANGE_SIZE 2048
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
@@ -1042,22 +1043,22 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
addr_mask = io_adress(vcpu, _in, &kvm_run->io.address);
if (!addr_mask) {
- printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__);
+ printk(KERN_DEBUG "%s: get io address failed\n",
+ __FUNCTION__);
return 1;
}
if (kvm_run->io.rep) {
- kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
+ kvm_run->io.count
+ = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags
& X86_EFLAGS_DF) != 0;
}
- } else {
+ } else
kvm_run->io.value = vcpu->svm->vmcb->save.rax;
- }
return 0;
}
-
static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
return 1;
@@ -1075,6 +1076,12 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vcpu->svm->vmcb->save.rip += 3;
+ return kvm_hypercall(vcpu, kvm_run);
+}
+
static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
inject_ud(vcpu);
@@ -1275,7 +1282,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = invalid_op_interception,
- [SVM_EXIT_VMMCALL] = invalid_op_interception,
+ [SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = invalid_op_interception,
[SVM_EXIT_VMSAVE] = invalid_op_interception,
[SVM_EXIT_STGI] = invalid_op_interception,
@@ -1297,7 +1304,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
__FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
exit_code);
- if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers)
+ if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| svm_exit_handlers[exit_code] == 0) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n",
@@ -1668,6 +1675,18 @@ static int is_disabled(void)
return 0;
}
+static void
+svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xd9;
+ hypercall[3] = 0xc3;
+}
+
static struct kvm_arch_ops svm_arch_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -1716,6 +1735,7 @@ static struct kvm_arch_ops svm_arch_ops = {
.run = svm_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = svm_vcpu_setup,
+ .patch_hypercall = svm_patch_hypercall,
};
static int __init svm_init(void)
diff --git a/kvm/kernel/vmx.c b/kvm/kernel/vmx.c
index 1b8feea48..936aef68a 100644
--- a/kvm/kernel/vmx.c
+++ b/kvm/kernel/vmx.c
@@ -19,6 +19,7 @@
#include "vmx.h"
#include "kvm_vmx.h"
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/profile.h>
@@ -27,7 +28,6 @@
#include "segment_descriptor.h"
-
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -76,7 +76,7 @@ static const u32 vmx_msr_index[] = {
#endif
MSR_EFER, MSR_K6_STAR,
};
-#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index))
+#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
static inline int is_page_fault(u32 intr_info)
{
@@ -418,10 +418,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_TIME_STAMP_COUNTER: {
+ case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
break;
- }
default:
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
@@ -793,6 +792,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
*/
static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
{
+ if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+ enter_rmode(vcpu);
+
vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
update_exception_bitmap(vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1128,6 +1130,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
if (rdmsr_safe(index, &data_low, &data_high) < 0)
continue;
+ if (wrmsr_safe(index, data_low, data_high) < 0)
+ continue;
data = data_low | ((u64)data_high << 32);
vcpu->host_msrs[j].index = index;
vcpu->host_msrs[j].reserved = 0;
@@ -1465,6 +1469,18 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static void
+vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xc1;
+ hypercall[3] = 0xc3;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
@@ -1641,6 +1657,12 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+ return kvm_hypercall(vcpu, kvm_run);
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -1659,6 +1681,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
[EXIT_REASON_HLT] = handle_halt,
+ [EXIT_REASON_VMCALL] = handle_vmcall,
};
static const int kvm_vmx_max_exit_handlers =
@@ -2060,6 +2083,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
.run = vmx_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = vmx_vcpu_setup,
+ .patch_hypercall = vmx_patch_hypercall,
};
static int __init vmx_init(void)
diff --git a/kvm/kvm_stat b/kvm/kvm_stat
index 80274ae00..ff6cf96f3 100755
--- a/kvm/kvm_stat
+++ b/kvm/kvm_stat
@@ -1,7 +1,7 @@
#!/usr/bin/python
import curses
-import os, time
+import sys, os, time
class Stats:
def __init__(self):
@@ -18,6 +18,14 @@ class Stats:
self.values[key] = (newval, newdelta)
return self.values
+if not os.access('/sys/kernel/debug', os.F_OK):
+ print 'Please enable CONFIG_DEBUGFS in your kernel'
+ sys.exit(1)
+if not os.access('/sys/kernel/debug/kvm', os.F_OK):
+ print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
+ print "and ensure the kvm modules are loaded"
+ sys.exit(1)
+
stats = Stats()
def main(screen, stats):
diff --git a/kvm/user/kvmctl.c b/kvm/user/kvmctl.c
index 509c12b21..533d4aa0c 100644
--- a/kvm/user/kvmctl.c
+++ b/kvm/user/kvmctl.c
@@ -205,7 +205,7 @@ void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
}
-void kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
+int kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
{
int r;
struct kvm_dirty_log log = {
@@ -216,7 +216,7 @@ void kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
r = ioctl(kvm->fd, KVM_GET_DIRTY_LOG, &log);
if (r == -1)
- exit(1);
+ return -errno;
}
static int more_io(struct kvm_run *run, int first_time)
@@ -234,27 +234,35 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
int first_time = 1;
int delta;
struct translation_cache tr;
+ int _in = (run->io.direction == KVM_EXIT_IO_IN);
+ int r;
translation_cache_init(&tr);
- regs.vcpu = run->vcpu;
- ioctl(kvm->fd, KVM_GET_REGS, &regs);
+ if (run->io.string || _in) {
+ regs.vcpu = run->vcpu;
+ r = ioctl(kvm->fd, KVM_GET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+ }
delta = run->io.string_down ? -run->io.size : run->io.size;
while (more_io(run, first_time)) {
void *value_addr;
- int r;
- if (!run->io.string)
- value_addr = &regs.rax;
- else {
+ if (!run->io.string) {
+ if (_in)
+ value_addr = &regs.rax;
+ else
+ value_addr = &run->io.value;
+ } else {
r = translate(kvm, run->vcpu, &tr, run->io.address,
&value_addr);
if (r) {
fprintf(stderr, "failed translating I/O address %x\n",
run->io.address);
- exit(1);
+ return r;
}
}
@@ -280,8 +288,8 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
break;
}
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
+ return -EMSGSIZE;
}
break;
}
@@ -300,13 +308,13 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
*(uint32_t *)value_addr);
break;
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
+ return -EMSGSIZE;
}
break;
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
+ return -EPROTO;
}
if (run->io.string) {
run->io.address += delta;
@@ -321,12 +329,22 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
}
first_time = 0;
if (r) {
- ioctl(kvm->fd, KVM_SET_REGS, &regs);
- return r;
+ int savedret = r;
+ r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+
+ return savedret;
}
}
- ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (run->io.string || _in) {
+ r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+
+ }
+
run->emulated = 1;
return 0;
}
@@ -439,7 +457,7 @@ void kvm_show_regs(kvm_context_t kvm, int vcpu)
r = ioctl(fd, KVM_GET_REGS, &regs);
if (r == -1) {
perror("KVM_GET_REGS");
- exit(1);
+ return;
}
fprintf(stderr,
"rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
@@ -522,6 +540,11 @@ static int handle_halt(kvm_context_t kvm, struct kvm_run *kvm_run)
return kvm->callbacks->halt(kvm->opaque, kvm_run->vcpu);
}
+static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+ return kvm->callbacks->shutdown(kvm->opaque, kvm_run->vcpu);
+}
+
int try_push_interrupts(kvm_context_t kvm)
{
return kvm->callbacks->try_push_interrupts(kvm->opaque);
@@ -556,8 +579,9 @@ again:
kvm_run.emulated = 0;
kvm_run.mmio_completed = 0;
if (r == -1 && errno != EINTR) {
+ r = -errno;
printf("kvm_run: %m\n");
- exit(1);
+ return r;
}
if (r == -1) {
r = handle_io_window(kvm, &kvm_run);
@@ -567,7 +591,7 @@ again:
case KVM_EXIT_TYPE_FAIL_ENTRY:
fprintf(stderr, "kvm_run: failed entry, reason %u\n",
kvm_run.exit_reason & 0xffff);
- exit(1);
+ return -ENOEXEC;
break;
case KVM_EXIT_TYPE_VM_EXIT:
switch (kvm_run.exit_reason) {
@@ -600,6 +624,9 @@ again:
break;
case KVM_EXIT_IRQ_WINDOW_OPEN:
break;
+ case KVM_EXIT_SHUTDOWN:
+ r = handle_shutdown(kvm, &kvm_run);
+ break;
default:
fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
kvm_show_regs(kvm, vcpu);
diff --git a/kvm/user/kvmctl.h b/kvm/user/kvmctl.h
index aacdd28c1..936c029ae 100644
--- a/kvm/user/kvmctl.h
+++ b/kvm/user/kvmctl.h
@@ -59,6 +59,7 @@ struct kvm_callbacks {
* on the host CPU.
*/
int (*halt)(void *opaque, int vcpu);
+ int (*shutdown)(void *opaque, int vcpu);
int (*io_window)(void *opaque);
int (*try_push_interrupts)(void *opaque);
void (*post_kvm_run)(void *opaque, struct kvm_run *kvm_run);
@@ -247,6 +248,6 @@ void *kvm_create_phys_mem(kvm_context_t, unsigned long phys_start,
unsigned long len, int slot, int log, int writable);
void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start,
unsigned long len);
-void kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
+int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
#endif
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 1a0f6e04d..401c7e12c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -124,6 +124,15 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
| (rhs->avl * DESC_AVL_MASK);
}
+/* the reset values of qemu are not compatible to SVM
+ * this function is used to fix the segment descriptor values */
+static void fix_realmode_dataseg(struct kvm_segment *seg)
+{
+ seg->type = 0x02;
+ seg->present = 1;
+ seg->s = 1;
+}
+
static void load_regs(CPUState *env)
{
struct kvm_regs regs;
@@ -182,6 +191,14 @@ static void load_regs(CPUState *env)
(sregs.cs.selector & 3);
sregs.ss.dpl = sregs.ss.selector & 3;
}
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ fix_realmode_dataseg(&sregs.ds);
+ fix_realmode_dataseg(&sregs.es);
+ fix_realmode_dataseg(&sregs.fs);
+ fix_realmode_dataseg(&sregs.gs);
+ fix_realmode_dataseg(&sregs.ss);
+ }
}
set_seg(&sregs.tr, &env->tr);
@@ -408,6 +425,7 @@ void kvm_save_registers(CPUState *env)
int kvm_cpu_exec(CPUState *env)
{
+ int r;
int pending = (!env->ready_for_interrupt_injection ||
((env->interrupt_request & CPU_INTERRUPT_HARD) &&
(env->eflags & IF_MASK)));
@@ -422,7 +440,11 @@ int kvm_cpu_exec(CPUState *env)
if (!saved_env[0])
saved_env[0] = env;
- kvm_run(kvm_context, 0);
+ r = kvm_run(kvm_context, 0);
+ if (r < 0) {
+ printf("kvm_run returned %d\n", r);
+ exit(1);
+ }
return 0;
}
@@ -587,6 +609,12 @@ static int kvm_halt(void *opaque, int vcpu)
return 1;
}
+
+static int kvm_shutdown(void *opaque, int vcpu)
+{
+ qemu_system_reset_request();
+ return 1;
+}
static struct kvm_callbacks qemu_kvm_ops = {
.cpuid = kvm_cpuid,
@@ -606,6 +634,7 @@ static struct kvm_callbacks qemu_kvm_ops = {
.writel = kvm_writel,
.writeq = kvm_writeq,
.halt = kvm_halt,
+ .shutdown = kvm_shutdown,
.io_window = kvm_io_window,
.try_push_interrupts = try_push_interrupts,
.post_kvm_run = post_kvm_run,
diff --git a/vl.c b/vl.c
index 50665a8ff..5a1f6a60d 100644
--- a/vl.c
+++ b/vl.c
@@ -5295,6 +5295,10 @@ int main_loop(void)
if (reset_requested) {
reset_requested = 0;
qemu_system_reset();
+#ifdef USE_KVM
+ if (kvm_allowed)
+ kvm_load_registers(env);
+#endif
ret = EXCP_INTERRUPT;
}
if (powerdown_requested) {