aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.target5
-rw-r--r--hw/hypercall.c92
-rw-r--r--hw/vga.c8
-rw-r--r--kvm/Makefile2
-rw-r--r--kvm/drivers/Makefile3
-rw-r--r--kvm/drivers/hypercall.c304
-rw-r--r--kvm/kernel/Makefile3
-rw-r--r--kvm/kernel/external-module-compat.h22
-rw-r--r--kvm/kernel/include/linux/kvm_para.h73
-rw-r--r--kvm/kernel/include/linux/mutex.h5
-rw-r--r--kvm/kernel/kvm.h10
-rw-r--r--kvm/kernel/kvm_main.c140
-rw-r--r--kvm/kernel/kvm_svm.h3
-rw-r--r--kvm/kernel/mmu.c36
-rw-r--r--kvm/kernel/paging_tmpl.h18
-rw-r--r--kvm/kernel/svm.c36
-rw-r--r--kvm/kernel/vmx.c32
-rwxr-xr-xkvm/kvm_stat10
-rw-r--r--kvm/user/kvmctl.c69
-rw-r--r--kvm/user/kvmctl.h3
-rw-r--r--qemu-kvm.c31
-rw-r--r--vl.c4
22 files changed, 770 insertions, 139 deletions
diff --git a/Makefile.target b/Makefile.target
index 3e1d07333..5c81a0b54 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -25,6 +25,8 @@ CFLAGS+=-Wall -O2 -g -fno-strict-aliasing
#CFLAGS+=-Werror
LDFLAGS+=-g
LIBS=
+# libraries we depend on
+DEPLIBS=
HELPER_CFLAGS=$(CFLAGS)
DYNGEN=../dyngen$(EXESUF)
# user emulator name
@@ -331,6 +333,7 @@ AUDIODRV+= wavcapture.o
ifdef CONFIG_KVM_KERNEL_INC
DEFINES += -I $(CONFIG_KVM_KERNEL_INC)
LIBS += -lkvm
+DEPLIBS += ../user/libkvm.a
endif
# SCSI layer
@@ -437,7 +440,7 @@ ifdef CONFIG_WIN32
SDL_LIBS := $(filter-out -mwindows, $(SDL_LIBS)) -mconsole
endif
-$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
+$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a $(DEPLIBS)
$(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(COCOA_LIBS) $(VL_LIBS)
cocoa.o: cocoa.m
diff --git a/hw/hypercall.c b/hw/hypercall.c
index a1f629ca4..e531cb583 100644
--- a/hw/hypercall.c
+++ b/hw/hypercall.c
@@ -25,21 +25,22 @@
#include "vl.h"
#include <stddef.h>
-#define HP_CMD 0x00 // The command register WR
-#define HP_ISRSTATUS 0x04 // Interrupt status reg RD
+#define HCR_REGISTER 0x00 // Hypercall Command Register WR
+#define HSR_REGISTER 0x04 // Hypercall Status Register RD
#define HP_TXSIZE 0x08
#define HP_TXBUFF 0x0c
#define HP_RXSIZE 0x10
#define HP_RXBUFF 0x14
-// HP_CMD register commands
-#define HP_CMD_DI 1 // disable interrupts
-#define HP_CMD_EI 2 // enable interrupts
-#define HP_CMD_RESET 4 // enable interrupts
+// HCR_REGISTER commands
+#define HCR_DI 1 // disable interrupts
+#define HCR_EI 2 // enable interrupts
+#define HCR_GRS 4 // Global reset
+#define HCR_RESET (HCR_GRS|HCR_DI)
-/* Bits in HP_ISR - Interrupt status register */
-#define HPISR_RX 0x01 // Data is ready to be read
+// Bits in HSR_REGISTER
+#define HSR_VDR 0x01 // vmchannel Data is ready to be read
int use_hypercall_dev = 0;
static CharDriverState *vmchannel_hd;
@@ -47,8 +48,8 @@ static CharDriverState *vmchannel_hd;
#define HP_MEM_SIZE 0xE0
typedef struct HypercallState {
- uint32_t cmd;
- uint32_t isr;
+ uint32_t hcr;
+ uint32_t hsr;
uint32_t txsize;
uint32_t txbuff;
uint32_t rxsize;
@@ -61,32 +62,40 @@ typedef struct HypercallState {
HypercallState *pHypercallState = NULL;
+
+#define HYPERCALL_DEBUG 1
+
static void hp_reset(HypercallState *s)
{
- s->cmd = 0;
- s->isr = 0;
+ s->hcr = 0;
+ s->hsr = 0;
s->txsize = 0;
s->txbuff = 0;
s->rxsize= 0;
s->txbufferaccu_offset = 0;
}
+static void hypercall_update_irq(HypercallState *s);
+
+
static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
HypercallState *s = opaque;
- //printf("hp_ioport_write,addr=0x%x, val=0x%x\n",addr, val);
-
+#ifdef HYPERCALL_DEBUG
+ printf("%s: addr=0x%x, val=0x%x\n", __FUNCTION__, addr, val);
+#endif
addr &= 0xff;
switch(addr)
{
- case HP_CMD:
+ case HCR_REGISTER:
{
- s->cmd = val;
- if (val == HP_CMD_RESET){
+ s->hcr = val;
+ if (s->hcr & HCR_DI)
+ hypercall_update_irq(s);
+ if (val & HCR_GRS){
hp_reset(s);
- return;
}
break;
}
@@ -115,7 +124,6 @@ static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
s->txbufferaccu[s->txbufferaccu_offset] = val;
s->txbufferaccu_offset++;
if (s->txbufferaccu_offset >= s->txsize) {
- printf("tranmit txbuf, Len:0x%x\n", s->txbufferaccu_offset);
qemu_chr_write(vmchannel_hd, s->txbufferaccu, s->txsize);
s->txbufferaccu_offset = 0;
s->txsize = 0;
@@ -134,10 +142,9 @@ static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
HypercallState *s = opaque;
int ret;
- if (addr != 0xc204) {
- //printf("hp_ioport_read addr:0x%x\n",addr);
- }
-
+#ifdef HYPERCALL_DEBUG
+ printf("%s: addr=0x%x\n", __FUNCTION__, addr);
+#endif
addr &= 0xff;
if (addr >= offsetof(HypercallState, RxBuff) )
@@ -149,13 +156,10 @@ static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
switch (addr)
{
- case HP_ISRSTATUS:
- if (s->isr != 0){
- printf("hp_ioport_read s->isr=0x%x\n", s->isr);
- }
- ret = s->isr;
- if (ret & HPISR_RX) {
- s->isr &= ~HPISR_RX;
+ case HSR_REGISTER:
+ ret = s->hsr;
+ if (ret & HSR_VDR) {
+ s->hsr &= ~HSR_VDR;
}
break;
case HP_RXSIZE:
@@ -192,13 +196,8 @@ static void hp_map(PCIDevice *pci_dev, int region_num,
static void hypercall_update_irq(HypercallState *s)
{
- printf("hypercall_update_irq\n");
-
- if (s->cmd &= HP_CMD_DI) {
- return;
- }
- /* PCI irq */
- pci_set_irq(s->pci_dev, 0, 1);
+ /* PCI irq */
+ pci_set_irq(s->pci_dev, 0, !(s->hcr & HCR_DI));
}
void pci_hypercall_init(PCIBus *bus)
@@ -250,24 +249,21 @@ static int vmchannel_can_read(void *opaque)
static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
{
int i;
-
- printf("vmchannel_read buf:%p, size:%d\n", buf, size);
- for(i = 0; i < size; i++) {
- printf("%x,", buf[i]);
- }
- printf("\n");
+
+#ifdef HYPERCALL_DEBUG
+ printf("vmchannel_read buf:%s, size:%d\n", buf, size);
+#endif
// if the hypercall device is in interrupts disabled state, don't accept the data
- if (pHypercallState->cmd &= HP_CMD_DI) {
+ if (pHypercallState->hcr & HCR_DI) {
return;
}
for(i = 0; i < size; i++) {
- //printf("buf[i%d]=%x\n",i, buf[i]);
pHypercallState->RxBuff[i] = buf[i];
}
pHypercallState->rxsize = size;
- pHypercallState->isr = HPISR_RX;
+ pHypercallState->hsr = HSR_VDR;
hypercall_update_irq(pHypercallState);
}
@@ -275,7 +271,9 @@ void vmchannel_init(CharDriverState *hd)
{
vmchannel_hd = hd;
- //printf("vmchannel_init\n");
+#ifdef HYPERCALL_DEBUG
+ printf("vmchannel_init\n");
+#endif
use_hypercall_dev = 1;
qemu_chr_add_read_handler(vmchannel_hd, vmchannel_can_read, vmchannel_read, &pHypercallState);
diff --git a/hw/vga.c b/hw/vga.c
index 4c1e57e4b..0e7613bcd 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -1396,9 +1396,13 @@ static void vga_draw_graphic(VGAState *s, int full_update)
/* HACK ALERT */
#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
unsigned long bitmap[BITMAP_SIZE];
+ int r;
- if (kvm_allowed)
- kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+ if (kvm_allowed) {
+ r = kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+ if (r < 0)
+ fprintf(stderr, "kvm: get_dirty_pages returned %d\n", r);
+ }
#endif
full_update |= update_basic_params(s);
diff --git a/kvm/Makefile b/kvm/Makefile
index 48dda0264..48549299d 100644
--- a/kvm/Makefile
+++ b/kvm/Makefile
@@ -68,4 +68,4 @@ clean:
for i in $(if $(WANT_MODULE), kernel) user qemu; do \
make -C $$i clean; \
done
- rm -f config.make user/config.mak
+ rm -f config.mak user/config.mak
diff --git a/kvm/drivers/Makefile b/kvm/drivers/Makefile
index d0b681d43..56facbb0e 100644
--- a/kvm/drivers/Makefile
+++ b/kvm/drivers/Makefile
@@ -1,4 +1,5 @@
-KERNELDIR := /lib/modules/$(shell uname -r)/build
+include ../config.mak
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
DESTDIR=
diff --git a/kvm/drivers/hypercall.c b/kvm/drivers/hypercall.c
index 9c9462f66..e5f4c8b56 100644
--- a/kvm/drivers/hypercall.c
+++ b/kvm/drivers/hypercall.c
@@ -6,11 +6,12 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/completion.h>
+#include <linux/interrupt.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
-#define HYPERCALL_DRIVER_NAME "Qumranet hypercall driver"
+#define HYPERCALL_DRIVER_NAME "Qumranet_hypercall_driver"
#define HYPERCALL_DRIVER_VERSION "1"
#define PCI_VENDOR_ID_HYPERCALL 0x5002
#define PCI_DEVICE_ID_HYPERCALL 0x2258
@@ -43,26 +44,68 @@ static struct pci_device_id hypercall_pci_tbl[] = {
};
MODULE_DEVICE_TABLE (pci, hypercall_pci_tbl);
+
+
+/****** Hypercall device definitions ***************/
+/* To be moved into a shared file with user space */
+#define HP_CMD 0x00 // The command register WR
+#define HP_ISRSTATUS 0x04 // Interrupt status reg RD
+#define HP_TXSIZE 0x08
+#define HP_TXBUFF 0x0c
+#define HP_RXSIZE 0x10
+#define HP_RXBUFF 0x14
+
+// HP_CMD register commands
+#define HP_CMD_DI 1 // disable interrupts
+#define HP_CMD_EI 2 // enable interrupts
+#define HP_CMD_INIT 4 // reset device
+#define HP_CMD_RESET (HP_CMD_INIT|HP_CMD_DI)
+
+/* Bits in HP_ISR - Interrupt status register */
+#define HPISR_RX 0x01 // Data is ready to be read
+
+#define HP_MEM_SIZE 0xE0
+/******* End of Hypercall device definitions */
+
+/* read PIO/MMIO register */
+#define HIO_READ8(reg, ioaddr) ioread8(ioaddr + (reg))
+#define HIO_READ16(reg, ioaddr) ioread16(ioaddr + (reg))
+#define HIO_READ32(reg, ioaddr) ioread32(ioaddr + (reg))
+
+/* write PIO/MMIO register */
+#define HIO_WRITE8(reg, val8, ioaddr) iowrite8((val8), ioaddr + (reg))
+#define HIO_WRITE16(reg, val16, ioaddr) iowrite16((val16), ioaddr + (reg))
+#define HIO_WRITE32(reg, val32, ioaddr) iowrite32((val32), ioaddr + (reg))
+
+
struct hypercall_dev {
struct pci_dev *pci_dev;
+ struct kobject kobject;
u32 state;
spinlock_t lock;
u8 name[128];
u16 irq;
u32 regs_len;
- void __iomem *mmio_addr;
+ void __iomem *io_addr;
unsigned long base_addr; /* device I/O address */
+ unsigned long cmd;
};
-
+static int hypercall_close(struct hypercall_dev* dev);
+static int hypercall_open(struct hypercall_dev *dev);
static void hypercall_cleanup_dev(struct hypercall_dev *dev);
+static irqreturn_t hypercall_interrupt(int irq, void *dev_instance,
+ struct pt_regs *regs);
+
+static void __exit hypercall_sysfs_remove(struct hypercall_dev *dev);
+static int hypercall_sysfs_add(struct hypercall_dev *dev);
static int __devinit hypercall_init_board(struct pci_dev *pdev,
struct hypercall_dev **dev_out)
{
- unsigned long *ioaddr;
+ unsigned long ioaddr;
struct hypercall_dev *dev;
int rc;
u32 disable_dev_on_err = 0;
@@ -101,17 +144,17 @@ static int __devinit hypercall_init_board(struct pci_dev *pdev,
if (rc)
goto err_out;
- pci_set_master (pdev);
-
#define USE_IO_OPS 1
#ifdef USE_IO_OPS
- ioaddr = pci_iomap(pdev, 0, 0);
+ ioaddr = (unsigned long)pci_iomap(pdev, 0, 0);
+ //ioaddr = ioport_map(pio_start, pio_len);
if (!ioaddr) {
printk(KERN_ERR "%s: cannot map PIO, aborting\n", pci_name(pdev));
rc = -EIO;
goto err_out;
}
- dev->base_addr = (unsigned long)ioaddr;
+ dev->base_addr = (unsigned long)pio_start;
+ dev->io_addr = (void*)ioaddr;
dev->regs_len = pio_len;
#else
ioaddr = pci_iomap(pdev, 1, 0);
@@ -121,6 +164,7 @@ static int __devinit hypercall_init_board(struct pci_dev *pdev,
goto err_out;
}
dev->base_addr = ioaddr;
+ dev->io_addr = (void*)ioaddr;
dev->regs_len = mmio_len;
#endif /* USE_IO_OPS */
@@ -161,7 +205,13 @@ static int __devinit hypercall_init_one(struct pci_dev *pdev,
spin_lock_init(&dev->lock);
pci_set_drvdata(pdev, dev);
- printk (KERN_INFO "%s: 0x%lx, IRQ %d\n", dev->name, dev->base_addr, dev->irq);
+ printk (KERN_INFO "name=%s: base_addr=0x%lx, io_addr=0x%lx, IRQ=%d\n",
+ dev->name, dev->base_addr, (unsigned long)dev->io_addr, dev->irq);
+ hypercall_open(dev);
+
+ if (hypercall_sysfs_add(dev) != 0)
+ return -1;
+
return 0;
}
@@ -171,10 +221,111 @@ static void __devexit hypercall_remove_one(struct pci_dev *pdev)
assert(dev != NULL);
+ hypercall_close(dev);
+ hypercall_sysfs_remove(dev);
hypercall_cleanup_dev(dev);
pci_disable_device(pdev);
}
+static int hypercall_tx(struct hypercall_dev *dev, unsigned char *buf, size_t len)
+{
+ void __iomem *ioaddr = (void __iomem*)dev->io_addr;
+ int i;
+
+ if (len > HP_MEM_SIZE)
+ return -EINVAL;
+
+ spin_lock(&dev->lock);
+ HIO_WRITE8(HP_TXSIZE, len, ioaddr);
+ for (i=0; i< len; i++)
+ HIO_WRITE8(HP_TXBUFF, buf[i], ioaddr);
+ spin_unlock(&dev->lock);
+
+ return 0;
+}
+
+/*
+ * The interrupt handler does all of the rx work and cleans up
+ * after the tx
+ */
+static irqreturn_t hypercall_interrupt(int irq, void *dev_instance,
+ struct pt_regs *regs)
+{
+ struct hypercall_dev *dev = (struct hypercall_dev *)dev_instance;
+ void __iomem *ioaddr = (void __iomem*)dev->io_addr;
+ u32 status;
+ int irq_handled = IRQ_NONE;
+ int rx_buf_size;
+ int i;
+ u8 buffer[HP_MEM_SIZE];
+ u8 *pbuf;
+
+ DPRINTK("base addr is 0x%lx, io_addr=0x%lx\n", dev->base_addr, (long)dev->io_addr);
+
+ spin_lock(&dev->lock);
+ status = HIO_READ8(HP_ISRSTATUS, ioaddr);
+ DPRINTK("irq status is 0x%x\n", status);
+
+ /* shared irq? */
+ if (unlikely((status & HPISR_RX) == 0)) {
+ DPRINTK("not handeling irq, not ours\n");
+ goto out;
+ }
+
+ /* Disable device interrupts */
+ HIO_WRITE8(HP_CMD, HP_CMD_DI, ioaddr);
+ DPRINTK("disable device interrupts\n");
+
+ rx_buf_size = HIO_READ8(HP_RXSIZE, ioaddr);
+ DPRINTK("Rx buffer size is %d\n", rx_buf_size);
+
+ if (rx_buf_size > HP_MEM_SIZE)
+ rx_buf_size = HP_MEM_SIZE;
+
+ for (i=0, pbuf=buffer; i<rx_buf_size; i++, pbuf++) {
+ *pbuf = HIO_READ8(HP_RXBUFF, ioaddr + i);
+ DPRINTK("Read 0x%x as dword %d\n", *pbuf, i);
+ }
+ *pbuf = '\0';
+ DPRINTK("Read buffer %s", (char*)buffer);
+
+ HIO_WRITE8(HP_CMD, HP_CMD_EI, ioaddr);
+ DPRINTK("Enable interrupt\n");
+ irq_handled = IRQ_HANDLED;
+ out:
+ spin_unlock(&dev->lock);
+
+
+ hypercall_tx(dev, "hello host", sizeof("hello host"));
+ return irq_handled;
+}
+
+
+static int hypercall_open(struct hypercall_dev *dev)
+{
+ int rc;
+
+ rc = request_irq(dev->irq, &hypercall_interrupt,
+ SA_SHIRQ, dev->name, dev);
+ if (rc) {
+ printk(KERN_ERR "%s failed to request an irq\n", __FUNCTION__);
+ return rc;
+ }
+
+ //hypercall_thread_start(dev);
+
+ return 0;
+}
+
+static int hypercall_close(struct hypercall_dev* dev)
+{
+ //hypercall_thread_stop(dev);
+ synchronize_irq(dev->irq);
+ free_irq(dev->irq, dev);
+
+ return 0;
+}
+
#ifdef CONFIG_PM
static int hypercall_suspend(struct pci_dev *pdev, pm_message_t state)
@@ -201,7 +352,8 @@ static void hypercall_cleanup_dev(struct hypercall_dev *dev)
{
DPRINTK("cleaning up\n");
pci_release_regions(dev->pci_dev);
- pci_iounmap(dev->pci_dev, (void*)dev->base_addr);
+ pci_iounmap(dev->pci_dev, (void*)dev->io_addr);
+ pci_set_drvdata (dev->pci_dev, NULL);
kfree(dev);
}
@@ -227,5 +379,137 @@ static void __exit hypercall_cleanup_module(void)
pci_unregister_driver(&hypercall_pci_driver);
}
+/*
+ * sysfs support
+ */
+
+struct hypercall_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct hypercall_dev*, char *buf);
+ ssize_t (*store)(struct hypercall_dev*, unsigned long val);
+};
+
+static ssize_t hypercall_attribute_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct hypercall_attribute *hypercall_attr;
+ struct hypercall_dev *hdev;
+
+ hypercall_attr = container_of(attr, struct hypercall_attribute, attr);
+ hdev = container_of(kobj, struct hypercall_dev, kobject);
+
+ if (!hypercall_attr->show)
+ return -EIO;
+
+ return hypercall_attr->show(hdev, buf);
+}
+
+static ssize_t hypercall_attribute_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ struct hypercall_attribute *hypercall_attr;
+ struct hypercall_dev *hdev;
+ char *endp;
+ unsigned long val;
+ int rc;
+
+ val = simple_strtoul(buf, &endp, 0);
+
+ hypercall_attr = container_of(attr, struct hypercall_attribute, attr);
+ hdev = container_of(kobj, struct hypercall_dev, kobject);
+
+ if (!hypercall_attr->store)
+ return -EIO;
+
+ rc = hypercall_attr->store(hdev, val);
+ if (!rc)
+ rc = count;
+ return rc;
+}
+
+#define MAKE_HYPERCALL_R_ATTR(_name) \
+static ssize_t _name##_show(struct hypercall_dev *hdev, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", (unsigned long)hdev->_name); \
+} \
+struct hypercall_attribute hypercall_attr_##_name = __ATTR_RO(_name)
+
+#define MAKE_HYPERCALL_WR_ATTR(_name) \
+static int _name##_store(struct hypercall_dev *hdev, unsigned long val) \
+{ \
+ hdev->_name = (typeof(hdev->_name))val; \
+ return 0; \
+} \
+static ssize_t _name##_show(struct hypercall_dev *hdev, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", (unsigned long)hdev->_name); \
+} \
+struct hypercall_attribute hypercall_attr_##_name = \
+ __ATTR(_name,S_IRUGO|S_IWUGO,_name##_show,_name##_store)
+
+MAKE_HYPERCALL_R_ATTR(base_addr);
+MAKE_HYPERCALL_R_ATTR(irq);
+MAKE_HYPERCALL_WR_ATTR(cmd);
+
+#define GET_HYPERCALL_ATTR(_name) (&hypercall_attr_##_name.attr)
+
+static struct attribute *hypercall_default_attrs[] = {
+ GET_HYPERCALL_ATTR(base_addr),
+ GET_HYPERCALL_ATTR(irq),
+ GET_HYPERCALL_ATTR(cmd),
+ NULL
+};
+
+static struct sysfs_ops hypercall_sysfs_ops = {
+ .show = hypercall_attribute_show,
+ .store = hypercall_attribute_store,
+};
+
+static void hypercall_sysfs_release(struct kobject *kobj)
+{
+ DPRINTK(" called for obj name %s\n", kobj->name);
+}
+
+static struct kobj_type hypercall_ktype = {
+ .release = hypercall_sysfs_release,
+ .sysfs_ops = &hypercall_sysfs_ops,
+ .default_attrs = hypercall_default_attrs
+};
+
+
+static int hypercall_sysfs_add(struct hypercall_dev *dev)
+{
+ int rc;
+
+ kobject_init(&dev->kobject);
+ dev->kobject.ktype = &hypercall_ktype;
+ rc = kobject_set_name(&dev->kobject, "%s", HYPERCALL_DRIVER_NAME);
+ if (rc != 0) {
+ printk("%s: kobject_set_name failed, err=%d\n", __FUNCTION__, rc);
+ return rc;
+ }
+
+ rc = kobject_add(&dev->kobject);
+ if (rc != 0) {
+ printk("%s: kobject_add failed, err=%d\n", __FUNCTION__, rc);
+ return rc;
+ }
+
+ rc = sysfs_create_link(&dev->pci_dev->dev.kobj, &dev->kobject,
+ HYPERCALL_DRIVER_NAME);
+ if (rc != 0) {
+ printk("%s: sysfs_create_link failed, err=%d\n", __FUNCTION__, rc);
+ kobject_del(&dev->kobject);
+ }
+
+ return rc;
+}
+
+static void hypercall_sysfs_remove(struct hypercall_dev *dev)
+{
+ sysfs_remove_link(&dev->pci_dev->dev.kobj, HYPERCALL_DRIVER_NAME);
+ kobject_del(&dev->kobject);
+}
+
module_init(hypercall_init_module);
module_exit(hypercall_cleanup_module);
diff --git a/kvm/kernel/Makefile b/kvm/kernel/Makefile
index 454f5f1bd..78e31a4c7 100644
--- a/kvm/kernel/Makefile
+++ b/kvm/kernel/Makefile
@@ -1,4 +1,5 @@
-KERNELDIR := /lib/modules/$(shell uname -r)/build
+include ../config.mak
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
DESTDIR=
diff --git a/kvm/kernel/external-module-compat.h b/kvm/kernel/external-module-compat.h
index 830c46436..79608730f 100644
--- a/kvm/kernel/external-module-compat.h
+++ b/kvm/kernel/external-module-compat.h
@@ -72,6 +72,28 @@ static inline int smp_call_function_single1(int cpu, void (*func)(void *info),
* The cpu hotplug stubs are broken if !CONFIG_CPU_HOTPLUG
*/
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15)
+#define DEFINE_MUTEX(a) DECLARE_MUTEX(a)
+#define mutex_lock_interruptible(a) down_interruptible(a)
+#define mutex_unlock(a) up(a)
+#define mutex_lock(a) down(a)
+#define mutex_init(a) init_MUTEX(a)
+#define mutex_trylock(a) down_trylock(a)
+#define mutex semaphore
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+#ifndef kzalloc
+#define kzalloc(size,flags) \
+({ \
+ void *__ret = kmalloc(size, flags); \
+ if (__ret)
+ memset(__ret, 0, size);
+ __ret;
+})
+#endif
+#endif
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
#ifndef CONFIG_HOTPLUG_CPU
diff --git a/kvm/kernel/include/linux/kvm_para.h b/kvm/kernel/include/linux/kvm_para.h
new file mode 100644
index 000000000..3b292565a
--- /dev/null
+++ b/kvm/kernel/include/linux/kvm_para.h
@@ -0,0 +1,73 @@
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+/*
+ * Guest OS interface for KVM paravirtualization
+ *
+ * Note: this interface is totally experimental, and is certain to change
+ * as we make progress.
+ */
+
+/*
+ * Per-VCPU descriptor area shared between guest and host. Writable to
+ * both guest and host. Registered with the host by the guest when
+ * a guest acknowledges paravirtual mode.
+ *
+ * NOTE: all addresses are guest-physical addresses (gpa), to make it
+ * easier for the hypervisor to map between the various addresses.
+ */
+struct kvm_vcpu_para_state {
+ /*
+ * API version information for compatibility. If there's any support
+ * mismatch (too old host trying to execute too new guest) then
+ * the host will deny entry into paravirtual mode. Any other
+ * combination (new host + old guest and new host + new guest)
+ * is supposed to work - new host versions will support all old
+ * guest API versions.
+ */
+ u32 guest_version;
+ u32 host_version;
+ u32 size;
+ u32 ret;
+
+ /*
+ * The address of the vm exit instruction (VMCALL or VMMCALL),
+ * which the host will patch according to the CPU model the
+ * VM runs on:
+ */
+ u64 hypercall_gpa;
+
+} __attribute__ ((aligned(PAGE_SIZE)));
+
+#define KVM_PARA_API_VERSION 1
+
+/*
+ * This is used for an RDMSR's ECX parameter to probe for a KVM host.
+ * Hopefully no CPU vendor will use up this number. This is placed well
+ * out of way of the typical space occupied by CPU vendors' MSR indices,
+ * and we think (or at least hope) it wont be occupied in the future
+ * either.
+ */
+#define MSR_KVM_API_MAGIC 0x87655678
+
+#define KVM_EINVAL 1
+
+/*
+ * Hypercall calling convention:
+ *
+ * Each hypercall may have 0-6 parameters.
+ *
+ * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
+ *
+ * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
+ * order: RDI, RSI, RDX, RCX, R8, R9.
+ *
+ * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
+ * (the first 3 are according to the gcc regparm calling convention)
+ *
+ * No registers are clobbered by the hypercall, except that the
+ * return value is in RAX.
+ */
+#define __NR_hypercalls 0
+
+#endif
diff --git a/kvm/kernel/include/linux/mutex.h b/kvm/kernel/include/linux/mutex.h
new file mode 100644
index 000000000..71b2ae109
--- /dev/null
+++ b/kvm/kernel/include/linux/mutex.h
@@ -0,0 +1,5 @@
+/*
+ * Empty file to satisfy #include <linux/mutex.h> for older kernels.
+ */
+
+
diff --git a/kvm/kernel/kvm.h b/kvm/kernel/kvm.h
index 04574a9d4..41cc27de4 100644
--- a/kvm/kernel/kvm.h
+++ b/kvm/kernel/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,9 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
+ gpa_t para_state_gpa;
+ struct page *para_state_page;
+ gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
@@ -382,6 +386,8 @@ struct kvm_arch_ops {
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*patch_hypercall)(struct kvm_vcpu *vcpu,
+ unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
@@ -476,6 +482,8 @@ void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
+int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code)
{
@@ -523,7 +531,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
- return (struct kvm_mmu_page *)page->private;
+ return (struct kvm_mmu_page *)page_private(page);
}
static inline u16 read_fs(void)
diff --git a/kvm/kernel/kvm_main.c b/kvm/kernel/kvm_main.c
index f8b70bbce..a8b3691af 100644
--- a/kvm/kernel/kvm_main.c
+++ b/kvm/kernel/kvm_main.c
@@ -126,10 +126,8 @@ static inline int valid_vcpu(int n)
return likely(n >= 0 && n < KVM_MAX_VCPUS);
}
-int kvm_read_guest(struct kvm_vcpu *vcpu,
- gva_t addr,
- unsigned long size,
- void *dest)
+int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
+ void *dest)
{
unsigned char *host_buf = dest;
unsigned long req_size = size;
@@ -161,10 +159,8 @@ int kvm_read_guest(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_read_guest);
-int kvm_write_guest(struct kvm_vcpu *vcpu,
- gva_t addr,
- unsigned long size,
- void *data)
+int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
+ void *data)
{
unsigned char *host_buf = data;
unsigned long req_size = size;
@@ -457,7 +453,7 @@ EXPORT_SYMBOL_GPL(set_cr4);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (is_long_mode(vcpu)) {
- if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
+ if (cr3 & CR3_L_MODE_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
inject_gp(vcpu);
return;
@@ -674,7 +670,7 @@ raced:
| __GFP_ZERO);
if (!new.phys_mem[i])
goto out_free;
- new.phys_mem[i]->private = 0;
+ set_page_private(new.phys_mem[i],0);
}
}
@@ -774,7 +770,6 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
goto out;
-
if (any) {
cleared = 0;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
@@ -903,8 +898,9 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+
if (gpa == UNMAPPED_GVA)
- return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT;
+ return X86EMUL_PROPAGATE_FAULT;
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
@@ -1142,6 +1138,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(emulate_instruction);
+int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
+
+ kvm_arch_ops->decache_regs(vcpu);
+ ret = -KVM_EINVAL;
+#ifdef CONFIG_X86_64
+ if (is_long_mode(vcpu)) {
+ nr = vcpu->regs[VCPU_REGS_RAX];
+ a0 = vcpu->regs[VCPU_REGS_RDI];
+ a1 = vcpu->regs[VCPU_REGS_RSI];
+ a2 = vcpu->regs[VCPU_REGS_RDX];
+ a3 = vcpu->regs[VCPU_REGS_RCX];
+ a4 = vcpu->regs[VCPU_REGS_R8];
+ a5 = vcpu->regs[VCPU_REGS_R9];
+ } else
+#endif
+ {
+ nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
+ a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
+ a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
+ a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
+ a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
+ a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
+ a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
+ }
+ switch (nr) {
+ default:
+ ;
+ }
+ vcpu->regs[VCPU_REGS_RAX] = ret;
+ kvm_arch_ops->cache_regs(vcpu);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
+
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
@@ -1208,6 +1240,73 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
}
}
+/*
+ * Register the para guest with the host:
+ */
+static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
+{
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t para_state_hpa, hypercall_hpa;
+ struct page *para_state_page;
+ unsigned char *hypercall;
+ gpa_t hypercall_gpa;
+
+ printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
+ printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
+
+ /*
+ * Needs to be page aligned:
+ */
+ if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
+ goto err_gp;
+
+ para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
+ printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
+ if (is_error_hpa(para_state_hpa))
+ goto err_gp;
+
+ para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
+ para_state = kmap_atomic(para_state_page, KM_USER0);
+
+ printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
+ printk(KERN_DEBUG ".... size: %d\n", para_state->size);
+
+ para_state->host_version = KVM_PARA_API_VERSION;
+ /*
+ * We cannot support guests that try to register themselves
+ * with a newer API version than the host supports:
+ */
+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ hypercall_gpa = para_state->hypercall_gpa;
+ hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
+ printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
+ if (is_error_hpa(hypercall_hpa)) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
+ vcpu->para_state_page = para_state_page;
+ vcpu->para_state_gpa = para_state_gpa;
+ vcpu->hypercall_gpa = hypercall_gpa;
+
+ hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
+ KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
+ kvm_arch_ops->patch_hypercall(vcpu, hypercall);
+ kunmap_atomic(hypercall, KM_USER1);
+
+ para_state->ret = 0;
+err_kunmap_skip:
+ kunmap_atomic(para_state, KM_USER0);
+ return 0;
+err_gp:
+ return 1;
+}
+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
@@ -1316,6 +1415,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
break;
+ /*
+ * This is the 'probe whether the host is KVM' logic:
+ */
+ case MSR_KVM_API_MAGIC:
+ return vcpu_register_para(vcpu, data);
+
default:
printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
return 1;
@@ -1800,12 +1905,11 @@ static long kvm_dev_ioctl(struct file *filp,
case KVM_GET_API_VERSION:
r = KVM_API_VERSION;
break;
- case KVM_CREATE_VCPU: {
+ case KVM_CREATE_VCPU:
r = kvm_dev_ioctl_create_vcpu(kvm, arg);
if (r)
goto out;
break;
- }
case KVM_RUN: {
struct kvm_run kvm_run;
@@ -2079,13 +2183,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
int cpu = (long)v;
switch (val) {
- case CPU_DEAD:
+ case CPU_DOWN_PREPARE:
case CPU_UP_CANCELED:
+ printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+ cpu);
decache_vcpus_on_cpu(cpu);
smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
NULL, 0, 1);
break;
- case CPU_UP_PREPARE:
+ case CPU_ONLINE:
+ printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+ cpu);
smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
NULL, 0, 1);
break;
diff --git a/kvm/kernel/kvm_svm.h b/kvm/kernel/kvm_svm.h
index 74cc862f4..624f1ca48 100644
--- a/kvm/kernel/kvm_svm.h
+++ b/kvm/kernel/kvm_svm.h
@@ -1,6 +1,7 @@
#ifndef __KVM_SVM_H
#define __KVM_SVM_H
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/list.h>
#include <asm/msr.h>
@@ -18,7 +19,7 @@ static const u32 host_save_msrs[] = {
MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
};
-#define NR_HOST_SAVE_MSRS (sizeof(host_save_msrs) / sizeof(*host_save_msrs))
+#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs)
#define NUM_DB_REGS 4
struct vcpu_svm {
diff --git a/kvm/kernel/mmu.c b/kvm/kernel/mmu.c
index 22c426cd8..573867a50 100644
--- a/kvm/kernel/mmu.c
+++ b/kvm/kernel/mmu.c
@@ -298,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
- page->private = (unsigned long)spte;
- } else if (!(page->private & 1)) {
+ set_page_private(page,(unsigned long)spte);
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_rmap_desc(vcpu);
- desc->shadow_ptes[0] = (u64 *)page->private;
+ desc->shadow_ptes[0] = (u64 *)page_private(page);
desc->shadow_ptes[1] = spte;
- page->private = (unsigned long)desc | 1;
+ set_page_private(page,(unsigned long)desc | 1);
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
desc = desc->more;
if (desc->shadow_ptes[RMAP_EXT-1]) {
@@ -337,12 +337,12 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
if (j != 0)
return;
if (!prev_desc && !desc->more)
- page->private = (unsigned long)desc->shadow_ptes[0];
+ set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
else
if (prev_desc)
prev_desc->more = desc->more;
else
- page->private = (unsigned long)desc->more | 1;
+ set_page_private(page,(unsigned long)desc->more | 1);
mmu_free_rmap_desc(vcpu, desc);
}
@@ -356,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
BUG();
- } else if (!(page->private & 1)) {
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
- if ((u64 *)page->private != spte) {
+ if ((u64 *)page_private(page) != spte) {
printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
spte, *spte);
BUG();
}
- page->private = 0;
+ set_page_private(page,0);
} else {
rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
@@ -398,11 +398,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
BUG_ON(!slot);
page = gfn_to_page(slot, gfn);
- while (page->private) {
- if (!(page->private & 1))
- spte = (u64 *)page->private;
+ while (page_private(page)) {
+ if (!(page_private(page) & 1))
+ spte = (u64 *)page_private(page);
else {
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
spte = desc->shadow_ptes[0];
}
BUG_ON(!spte);
@@ -1218,7 +1218,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&page_header->link);
if ((page = alloc_page(GFP_KERNEL)) == NULL)
goto error_1;
- page->private = (unsigned long)page_header;
+ set_page_private(page, (unsigned long)page_header);
page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
list_add(&page_header->link, &vcpu->free_pages);
diff --git a/kvm/kernel/paging_tmpl.h b/kvm/kernel/paging_tmpl.h
index b6b90e9e1..f3bcee904 100644
--- a/kvm/kernel/paging_tmpl.h
+++ b/kvm/kernel/paging_tmpl.h
@@ -128,8 +128,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
goto access_error;
#endif
- if (!(*ptep & PT_ACCESSED_MASK))
- *ptep |= PT_ACCESSED_MASK; /* avoid rmw */
+ if (!(*ptep & PT_ACCESSED_MASK)) {
+ mark_page_dirty(vcpu->kvm, table_gfn);
+ *ptep |= PT_ACCESSED_MASK;
+ }
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
@@ -185,6 +187,12 @@ static void FNAME(release_walker)(struct guest_walker *walker)
kunmap_atomic(walker->table, KM_USER0);
}
+static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
+ struct guest_walker *walker)
+{
+ mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
+}
+
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
@@ -348,12 +356,15 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
} else if (kvm_mmu_lookup_page(vcpu, gfn)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__FUNCTION__, gfn);
+ mark_page_dirty(vcpu->kvm, gfn);
+ FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
*write_pt = 1;
return 0;
}
mark_page_dirty(vcpu->kvm, gfn);
*shadow_ent |= PT_WRITABLE_MASK;
+ FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
rmap_add(vcpu, shadow_ent);
@@ -430,9 +441,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
- if (is_io_pte(*shadow_pte)) {
+ if (is_io_pte(*shadow_pte))
return 1;
- }
++kvm_stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
diff --git a/kvm/kernel/svm.c b/kvm/kernel/svm.c
index cf5f4979e..9f839e263 100644
--- a/kvm/kernel/svm.c
+++ b/kvm/kernel/svm.c
@@ -15,6 +15,7 @@
*/
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/profile.h>
@@ -75,7 +76,7 @@ struct svm_init_data {
static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
-#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges))
+#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
#define MSRS_RANGE_SIZE 2048
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
@@ -1042,22 +1043,22 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
addr_mask = io_adress(vcpu, _in, &kvm_run->io.address);
if (!addr_mask) {
- printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__);
+ printk(KERN_DEBUG "%s: get io address failed\n",
+ __FUNCTION__);
return 1;
}
if (kvm_run->io.rep) {
- kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
+ kvm_run->io.count
+ = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags
& X86_EFLAGS_DF) != 0;
}
- } else {
+ } else
kvm_run->io.value = vcpu->svm->vmcb->save.rax;
- }
return 0;
}
-
static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
return 1;
@@ -1075,6 +1076,12 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vcpu->svm->vmcb->save.rip += 3;
+ return kvm_hypercall(vcpu, kvm_run);
+}
+
static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
inject_ud(vcpu);
@@ -1275,7 +1282,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
[SVM_EXIT_VMRUN] = invalid_op_interception,
- [SVM_EXIT_VMMCALL] = invalid_op_interception,
+ [SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = invalid_op_interception,
[SVM_EXIT_VMSAVE] = invalid_op_interception,
[SVM_EXIT_STGI] = invalid_op_interception,
@@ -1297,7 +1304,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
__FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
exit_code);
- if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers)
+ if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| svm_exit_handlers[exit_code] == 0) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n",
@@ -1668,6 +1675,18 @@ static int is_disabled(void)
return 0;
}
+static void
+svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xd9;
+ hypercall[3] = 0xc3;
+}
+
static struct kvm_arch_ops svm_arch_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -1716,6 +1735,7 @@ static struct kvm_arch_ops svm_arch_ops = {
.run = svm_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = svm_vcpu_setup,
+ .patch_hypercall = svm_patch_hypercall,
};
static int __init svm_init(void)
diff --git a/kvm/kernel/vmx.c b/kvm/kernel/vmx.c
index 1b8feea48..936aef68a 100644
--- a/kvm/kernel/vmx.c
+++ b/kvm/kernel/vmx.c
@@ -19,6 +19,7 @@
#include "vmx.h"
#include "kvm_vmx.h"
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/profile.h>
@@ -27,7 +28,6 @@
#include "segment_descriptor.h"
-
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -76,7 +76,7 @@ static const u32 vmx_msr_index[] = {
#endif
MSR_EFER, MSR_K6_STAR,
};
-#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index))
+#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
static inline int is_page_fault(u32 intr_info)
{
@@ -418,10 +418,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_TIME_STAMP_COUNTER: {
+ case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
break;
- }
default:
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
@@ -793,6 +792,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
*/
static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
{
+ if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+ enter_rmode(vcpu);
+
vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
update_exception_bitmap(vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1128,6 +1130,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
if (rdmsr_safe(index, &data_low, &data_high) < 0)
continue;
+ if (wrmsr_safe(index, data_low, data_high) < 0)
+ continue;
data = data_low | ((u64)data_high << 32);
vcpu->host_msrs[j].index = index;
vcpu->host_msrs[j].reserved = 0;
@@ -1465,6 +1469,18 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static void
+vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xc1;
+ hypercall[3] = 0xc3;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
@@ -1641,6 +1657,12 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
+static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+ return kvm_hypercall(vcpu, kvm_run);
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -1659,6 +1681,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
[EXIT_REASON_HLT] = handle_halt,
+ [EXIT_REASON_VMCALL] = handle_vmcall,
};
static const int kvm_vmx_max_exit_handlers =
@@ -2060,6 +2083,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
.run = vmx_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = vmx_vcpu_setup,
+ .patch_hypercall = vmx_patch_hypercall,
};
static int __init vmx_init(void)
diff --git a/kvm/kvm_stat b/kvm/kvm_stat
index 80274ae00..ff6cf96f3 100755
--- a/kvm/kvm_stat
+++ b/kvm/kvm_stat
@@ -1,7 +1,7 @@
#!/usr/bin/python
import curses
-import os, time
+import sys, os, time
class Stats:
def __init__(self):
@@ -18,6 +18,14 @@ class Stats:
self.values[key] = (newval, newdelta)
return self.values
+if not os.access('/sys/kernel/debug', os.F_OK):
+ print 'Please enable CONFIG_DEBUGFS in your kernel'
+ sys.exit(1)
+if not os.access('/sys/kernel/debug/kvm', os.F_OK):
+ print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
+ print "and ensure the kvm modules are loaded"
+ sys.exit(1)
+
stats = Stats()
def main(screen, stats):
diff --git a/kvm/user/kvmctl.c b/kvm/user/kvmctl.c
index 509c12b21..533d4aa0c 100644
--- a/kvm/user/kvmctl.c
+++ b/kvm/user/kvmctl.c
@@ -205,7 +205,7 @@ void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
}
-void kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
+int kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
{
int r;
struct kvm_dirty_log log = {
@@ -216,7 +216,7 @@ void kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
r = ioctl(kvm->fd, KVM_GET_DIRTY_LOG, &log);
if (r == -1)
- exit(1);
+ return -errno;
}
static int more_io(struct kvm_run *run, int first_time)
@@ -234,27 +234,35 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
int first_time = 1;
int delta;
struct translation_cache tr;
+ int _in = (run->io.direction == KVM_EXIT_IO_IN);
+ int r;
translation_cache_init(&tr);
- regs.vcpu = run->vcpu;
- ioctl(kvm->fd, KVM_GET_REGS, &regs);
+ if (run->io.string || _in) {
+ regs.vcpu = run->vcpu;
+ r = ioctl(kvm->fd, KVM_GET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+ }
delta = run->io.string_down ? -run->io.size : run->io.size;
while (more_io(run, first_time)) {
void *value_addr;
- int r;
- if (!run->io.string)
- value_addr = &regs.rax;
- else {
+ if (!run->io.string) {
+ if (_in)
+ value_addr = &regs.rax;
+ else
+ value_addr = &run->io.value;
+ } else {
r = translate(kvm, run->vcpu, &tr, run->io.address,
&value_addr);
if (r) {
fprintf(stderr, "failed translating I/O address %x\n",
run->io.address);
- exit(1);
+ return r;
}
}
@@ -280,8 +288,8 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
break;
}
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
+ return -EMSGSIZE;
}
break;
}
@@ -300,13 +308,13 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
*(uint32_t *)value_addr);
break;
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
+ return -EMSGSIZE;
}
break;
default:
- fprintf(stderr, "bad I/O size\n");
- exit(1);
+ fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
+ return -EPROTO;
}
if (run->io.string) {
run->io.address += delta;
@@ -321,12 +329,22 @@ static int handle_io(kvm_context_t kvm, struct kvm_run *run)
}
first_time = 0;
if (r) {
- ioctl(kvm->fd, KVM_SET_REGS, &regs);
- return r;
+ int savedret = r;
+ r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+
+ return savedret;
}
}
- ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (run->io.string || _in) {
+ r = ioctl(kvm->fd, KVM_SET_REGS, &regs);
+ if (r == -1)
+ return -errno;
+
+ }
+
run->emulated = 1;
return 0;
}
@@ -439,7 +457,7 @@ void kvm_show_regs(kvm_context_t kvm, int vcpu)
r = ioctl(fd, KVM_GET_REGS, &regs);
if (r == -1) {
perror("KVM_GET_REGS");
- exit(1);
+ return;
}
fprintf(stderr,
"rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
@@ -522,6 +540,11 @@ static int handle_halt(kvm_context_t kvm, struct kvm_run *kvm_run)
return kvm->callbacks->halt(kvm->opaque, kvm_run->vcpu);
}
+static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+ return kvm->callbacks->shutdown(kvm->opaque, kvm_run->vcpu);
+}
+
int try_push_interrupts(kvm_context_t kvm)
{
return kvm->callbacks->try_push_interrupts(kvm->opaque);
@@ -556,8 +579,9 @@ again:
kvm_run.emulated = 0;
kvm_run.mmio_completed = 0;
if (r == -1 && errno != EINTR) {
+ r = -errno;
printf("kvm_run: %m\n");
- exit(1);
+ return r;
}
if (r == -1) {
r = handle_io_window(kvm, &kvm_run);
@@ -567,7 +591,7 @@ again:
case KVM_EXIT_TYPE_FAIL_ENTRY:
fprintf(stderr, "kvm_run: failed entry, reason %u\n",
kvm_run.exit_reason & 0xffff);
- exit(1);
+ return -ENOEXEC;
break;
case KVM_EXIT_TYPE_VM_EXIT:
switch (kvm_run.exit_reason) {
@@ -600,6 +624,9 @@ again:
break;
case KVM_EXIT_IRQ_WINDOW_OPEN:
break;
+ case KVM_EXIT_SHUTDOWN:
+ r = handle_shutdown(kvm, &kvm_run);
+ break;
default:
fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
kvm_show_regs(kvm, vcpu);
diff --git a/kvm/user/kvmctl.h b/kvm/user/kvmctl.h
index aacdd28c1..936c029ae 100644
--- a/kvm/user/kvmctl.h
+++ b/kvm/user/kvmctl.h
@@ -59,6 +59,7 @@ struct kvm_callbacks {
* on the host CPU.
*/
int (*halt)(void *opaque, int vcpu);
+ int (*shutdown)(void *opaque, int vcpu);
int (*io_window)(void *opaque);
int (*try_push_interrupts)(void *opaque);
void (*post_kvm_run)(void *opaque, struct kvm_run *kvm_run);
@@ -247,6 +248,6 @@ void *kvm_create_phys_mem(kvm_context_t, unsigned long phys_start,
unsigned long len, int slot, int log, int writable);
void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start,
unsigned long len);
-void kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
+int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
#endif
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 1a0f6e04d..401c7e12c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -124,6 +124,15 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
| (rhs->avl * DESC_AVL_MASK);
}
+/* the reset values of qemu are not compatible to SVM
+ * this function is used to fix the segment descriptor values */
+static void fix_realmode_dataseg(struct kvm_segment *seg)
+{
+ seg->type = 0x02;
+ seg->present = 1;
+ seg->s = 1;
+}
+
static void load_regs(CPUState *env)
{
struct kvm_regs regs;
@@ -182,6 +191,14 @@ static void load_regs(CPUState *env)
(sregs.cs.selector & 3);
sregs.ss.dpl = sregs.ss.selector & 3;
}
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ fix_realmode_dataseg(&sregs.ds);
+ fix_realmode_dataseg(&sregs.es);
+ fix_realmode_dataseg(&sregs.fs);
+ fix_realmode_dataseg(&sregs.gs);
+ fix_realmode_dataseg(&sregs.ss);
+ }
}
set_seg(&sregs.tr, &env->tr);
@@ -408,6 +425,7 @@ void kvm_save_registers(CPUState *env)
int kvm_cpu_exec(CPUState *env)
{
+ int r;
int pending = (!env->ready_for_interrupt_injection ||
((env->interrupt_request & CPU_INTERRUPT_HARD) &&
(env->eflags & IF_MASK)));
@@ -422,7 +440,11 @@ int kvm_cpu_exec(CPUState *env)
if (!saved_env[0])
saved_env[0] = env;
- kvm_run(kvm_context, 0);
+ r = kvm_run(kvm_context, 0);
+ if (r < 0) {
+ printf("kvm_run returned %d\n", r);
+ exit(1);
+ }
return 0;
}
@@ -587,6 +609,12 @@ static int kvm_halt(void *opaque, int vcpu)
return 1;
}
+
+static int kvm_shutdown(void *opaque, int vcpu)
+{
+ qemu_system_reset_request();
+ return 1;
+}
static struct kvm_callbacks qemu_kvm_ops = {
.cpuid = kvm_cpuid,
@@ -606,6 +634,7 @@ static struct kvm_callbacks qemu_kvm_ops = {
.writel = kvm_writel,
.writeq = kvm_writeq,
.halt = kvm_halt,
+ .shutdown = kvm_shutdown,
.io_window = kvm_io_window,
.try_push_interrupts = try_push_interrupts,
.post_kvm_run = post_kvm_run,
diff --git a/vl.c b/vl.c
index 50665a8ff..5a1f6a60d 100644
--- a/vl.c
+++ b/vl.c
@@ -5295,6 +5295,10 @@ int main_loop(void)
if (reset_requested) {
reset_requested = 0;
qemu_system_reset();
+#ifdef USE_KVM
+ if (kvm_allowed)
+ kvm_load_registers(env);
+#endif
ret = EXCP_INTERRUPT;
}
if (powerdown_requested) {