From 0b23c5d40ea933cfece3b4f69427f79c8a23256d Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 29 Nov 2011 06:34:48 +0100 Subject: malta: Fix regression (i8259 interrupts did not work) Commit 5632ae46d5bda798e971dae48ebb318ac2c3686a passes the address of i8259 to qemu_irq_proxy. i8259 is an auto variable with undefined value outside of mips_malta_init. This made the interrupt proxy unusable: either QEMU crashes, or the interrupt handler was not called. Ethernet for example no longer worked with MIPS Malta. v2: While v1 used a static variable for i8259, this patch introduces a qdev for the malta machine. i8259 is now part of the device status. This is a minimal qdev implementation to keep the patch small. Signed-off-by: Stefan Weil Signed-off-by: Aurelien Jarno (cherry picked from commit e9b40fd34ceb23461083d505a444a389c094455b) --- hw/mips_malta.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/hw/mips_malta.c b/hw/mips_malta.c index bb4974956..941b9bdd8 100644 --- a/hw/mips_malta.c +++ b/hw/mips_malta.c @@ -47,6 +47,7 @@ #include "mc146818rtc.h" #include "blockdev.h" #include "exec-memory.h" +#include "sysbus.h" /* SysBusDevice */ //#define DEBUG_BOARD_INIT @@ -72,6 +73,11 @@ typedef struct { SerialState *uart; } MaltaFPGAState; +typedef struct { + SysBusDevice busdev; + qemu_irq *i8259; +} MaltaState; + static ISADevice *pit; static struct _loaderparams { @@ -775,7 +781,7 @@ void mips_malta_init (ram_addr_t ram_size, int64_t kernel_entry; PCIBus *pci_bus; CPUState *env; - qemu_irq *i8259 = NULL, *isa_irq; + qemu_irq *isa_irq; qemu_irq *cpu_exit_irq; int piix4_devfn; i2c_bus *smbus; @@ -787,6 +793,11 @@ void mips_malta_init (ram_addr_t ram_size, int fl_sectors = 0; int be; + DeviceState *dev = qdev_create(NULL, "mips-malta"); + MaltaState *s = DO_UPCAST(MaltaState, busdev.qdev, dev); + + qdev_init_nofail(dev); + /* Make sure the first 3 serial ports are associated with a device. */ for(i = 0; i < 3; i++) { if (!serial_hds[i]) { @@ -932,7 +943,7 @@ void mips_malta_init (ram_addr_t ram_size, * qemu_irq_proxy() adds an extra bit of indirection, allowing us * to resolve the isa_irq -> i8259 dependency after i8259 is initialized. */ - isa_irq = qemu_irq_proxy(&i8259, 16); + isa_irq = qemu_irq_proxy(&s->i8259, 16); /* Northbridge */ pci_bus = gt64120_register(isa_irq); @@ -944,9 +955,9 @@ void mips_malta_init (ram_addr_t ram_size, /* Interrupt controller */ /* The 8259 is attached to the MIPS CPU INT0 pin, ie interrupt 2 */ - i8259 = i8259_init(env->irq[2]); + s->i8259 = i8259_init(env->irq[2]); - isa_bus_irqs(i8259); + isa_bus_irqs(s->i8259); pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1); usb_uhci_piix4_init(pci_bus, piix4_devfn + 2); smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100, isa_get_irq(9), @@ -990,6 +1001,20 @@ void mips_malta_init (ram_addr_t ram_size, } } +static int mips_malta_sysbus_device_init(SysBusDevice *sysbusdev) +{ + return 0; +} + +static SysBusDeviceInfo mips_malta_device = { + .init = mips_malta_sysbus_device_init, + .qdev.name = "mips-malta", + .qdev.size = sizeof(MaltaState), + .qdev.props = (Property[]) { + DEFINE_PROP_END_OF_LIST(), + } +}; + static QEMUMachine mips_malta_machine = { .name = "malta", .desc = "MIPS Malta Core LV", @@ -998,9 +1023,15 @@ static QEMUMachine mips_malta_machine = { .is_default = 1, }; +static void mips_malta_device_init(void) +{ + sysbus_register_withprop(&mips_malta_device); +} + static void mips_malta_machine_init(void) { qemu_register_machine(&mips_malta_machine); } +device_init(mips_malta_device_init); machine_init(mips_malta_machine_init); -- cgit v1.2.3 From 2061800b85ddcc9b34b5ccbfaa87f7e8b94626a6 Mon Sep 17 00:00:00 2001 From: Andreas Färber Date: Wed, 30 Nov 2011 16:26:21 +0100 Subject: exec.c: Fix subpage memory access to RAM MemoryRegion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 95c318f5e1f88d7e5bcc6deac17330fd4806a2d3 (Fix segfault in mmio subpage handling code.) prevented a segfault by making all subpage registrations over an existing memory page perform an unassigned access. Symptoms were writes not taking effect and reads returning zero. Very small page sizes are not currently supported either, so subpage memory areas cannot fully be avoided. Therefore change the previous fix to use a new IO_MEM_SUBPAGE_RAM instead of IO_MEM_UNASSIGNED. Suggested by Avi. Reviewed-by: Avi Kivity Signed-off-by: Andreas Färber Cc: Avi Kivity Cc: Gleb Natapov Signed-off-by: Anthony Liguori --- cpu-common.h | 1 + exec.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/cpu-common.h b/cpu-common.h index c9878ba47..3f4542869 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -172,6 +172,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, #define IO_MEM_ROM (1 << IO_MEM_SHIFT) /* hardcoded offset */ #define IO_MEM_UNASSIGNED (2 << IO_MEM_SHIFT) #define IO_MEM_NOTDIRTY (3 << IO_MEM_SHIFT) +#define IO_MEM_SUBPAGE_RAM (4 << IO_MEM_SHIFT) /* Acts like a ROM when read and like a device when written. */ #define IO_MEM_ROMD (1) diff --git a/exec.c b/exec.c index 6b92198e6..6c206ff5b 100644 --- a/exec.c +++ b/exec.c @@ -3570,6 +3570,63 @@ static CPUWriteMemoryFunc * const subpage_write[] = { &subpage_writel, }; +static uint32_t subpage_ram_readb(void *opaque, target_phys_addr_t addr) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + return ldub_p(ptr); +} + +static void subpage_ram_writeb(void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + stb_p(ptr, value); +} + +static uint32_t subpage_ram_readw(void *opaque, target_phys_addr_t addr) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + return lduw_p(ptr); +} + +static void subpage_ram_writew(void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + stw_p(ptr, value); +} + +static uint32_t subpage_ram_readl(void *opaque, target_phys_addr_t addr) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + return ldl_p(ptr); +} + +static void subpage_ram_writel(void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + ram_addr_t raddr = addr; + void *ptr = qemu_get_ram_ptr(raddr); + stl_p(ptr, value); +} + +static CPUReadMemoryFunc * const subpage_ram_read[] = { + &subpage_ram_readb, + &subpage_ram_readw, + &subpage_ram_readl, +}; + +static CPUWriteMemoryFunc * const subpage_ram_write[] = { + &subpage_ram_writeb, + &subpage_ram_writew, + &subpage_ram_writel, +}; + static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, ram_addr_t memory, ram_addr_t region_offset) { @@ -3583,8 +3640,9 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__, mmio, start, end, idx, eidx, memory); #endif - if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM) - memory = IO_MEM_UNASSIGNED; + if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM) { + memory = IO_MEM_SUBPAGE_RAM; + } memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); for (; idx <= eidx; idx++) { mmio->sub_io_index[idx] = memory; @@ -3817,6 +3875,9 @@ static void io_mem_init(void) cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL, DEVICE_NATIVE_ENDIAN); + cpu_register_io_memory_fixed(IO_MEM_SUBPAGE_RAM, subpage_ram_read, + subpage_ram_write, NULL, + DEVICE_NATIVE_ENDIAN); for (i=0; i<5; i++) io_mem_used[i] = 1; -- cgit v1.2.3 From f03969b952bc2aaf9f4445b6da28aebb0a9abde5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 4 Dec 2011 22:35:27 +0530 Subject: hw/9pfs: Improve portability to older systems handle fs driver require a set of newly added syscalls. Don't Compile handle FS driver if those syscalls are not available. Instead of adding #ifdef for all those syscalls we check for open by handle syscall. If that is available then rest of the syscalls used by the driver should be available. Signed-off-by: Aneesh Kumar K.V --- Makefile.objs | 4 ++-- fsdev/qemu-fsdev.c | 2 ++ hw/9pfs/virtio-9p-handle.c | 33 --------------------------------- 3 files changed, 4 insertions(+), 35 deletions(-) diff --git a/Makefile.objs b/Makefile.objs index d7a65393b..3a699ee7d 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -310,8 +310,8 @@ hw-obj-$(CONFIG_SOUND) += $(sound-obj-y) 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-local.o virtio-9p-xattr.o 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-xattr-user.o virtio-9p-posix-acl.o 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-coth.o cofs.o codir.o cofile.o -9pfs-nested-$(CONFIG_VIRTFS) += coxattr.o virtio-9p-handle.o -9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-synth.o +9pfs-nested-$(CONFIG_VIRTFS) += coxattr.o virtio-9p-synth.o +9pfs-nested-$(CONFIG_OPEN_BY_HANDLE) += virtio-9p-handle.o hw-obj-$(CONFIG_REALLY_VIRTFS) += $(addprefix 9pfs/, $(9pfs-nested-y)) $(addprefix 9pfs/, $(9pfs-nested-y)): QEMU_CFLAGS+=$(GLIB_CFLAGS) diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c index 7fd2aa779..6684f7ea9 100644 --- a/fsdev/qemu-fsdev.c +++ b/fsdev/qemu-fsdev.c @@ -23,7 +23,9 @@ static QTAILQ_HEAD(FsDriverEntry_head, FsDriverListEntry) fsdriver_entries = static FsDriverTable FsDrivers[] = { { .name = "local", .ops = &local_ops}, +#ifdef CONFIG_OPEN_BY_HANDLE { .name = "handle", .ops = &handle_ops}, +#endif { .name = "synth", .ops = &synth_ops}, }; diff --git a/hw/9pfs/virtio-9p-handle.c b/hw/9pfs/virtio-9p-handle.c index 7644ae5ab..a62f690ee 100644 --- a/hw/9pfs/virtio-9p-handle.c +++ b/hw/9pfs/virtio-9p-handle.c @@ -45,7 +45,6 @@ struct handle_data { int handle_bytes; }; -#ifdef CONFIG_OPEN_BY_HANDLE static inline int name_to_handle(int dirfd, const char *name, struct file_handle *fh, int *mnt_id, int flags) { @@ -56,38 +55,6 @@ static inline int open_by_handle(int mountfd, const char *fh, int flags) { return open_by_handle_at(mountfd, (struct file_handle *)fh, flags); } -#else - -struct rpl_file_handle { - unsigned int handle_bytes; - int handle_type; - unsigned char handle[0]; -}; -#define file_handle rpl_file_handle - -#ifndef AT_REMOVEDIR -#define AT_REMOVEDIR 0x200 -#endif -#ifndef AT_EMPTY_PATH -#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ -#endif -#ifndef O_PATH -#define O_PATH 010000000 -#endif - -static inline int name_to_handle(int dirfd, const char *name, - struct file_handle *fh, int *mnt_id, int flags) -{ - errno = ENOSYS; - return -1; -} - -static inline int open_by_handle(int mountfd, const char *fh, int flags) -{ - errno = ENOSYS; - return -1; -} -#endif static int handle_update_file_cred(int dirfd, const char *name, FsCred *credp) { -- cgit v1.2.3 From 77a02621812952acfde887244f6f480de1b51f95 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 4 Dec 2011 22:35:28 +0530 Subject: hw/9pfs: use migration blockers to prevent live migration when virtfs export path is mounted Now when you try to migrate with VirtFS export path mounted, you get a proper QMP error: (qemu) migrate tcp:localhost:4444 Migration is disabled when VirtFS export path '/tmp/' is mounted in the guest using mount_tag 'v_tmp' (qemu) Signed-off-by: Aneesh Kumar K.V --- hw/9pfs/virtio-9p-device.c | 22 +++++++++++----------- hw/9pfs/virtio-9p.c | 19 +++++++++++++++++++ hw/9pfs/virtio-9p.h | 5 +++-- qerror.c | 5 +++++ qerror.h | 3 +++ 5 files changed, 41 insertions(+), 13 deletions(-) diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index bba4c5476..c9bca8bf7 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -33,13 +33,15 @@ static V9fsState *to_virtio_9p(VirtIODevice *vdev) static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config) { + int len; struct virtio_9p_config *cfg; V9fsState *s = to_virtio_9p(vdev); - cfg = g_malloc0(sizeof(struct virtio_9p_config) + - s->tag_len); - stw_raw(&cfg->tag_len, s->tag_len); - memcpy(cfg->tag, s->tag, s->tag_len); + len = strlen(s->tag); + cfg = g_malloc0(sizeof(struct virtio_9p_config) + len); + stw_raw(&cfg->tag_len, len); + /* We don't copy the terminating null to config space */ + memcpy(cfg->tag, s->tag, len); memcpy(config, cfg, s->config_size); g_free(cfg); } @@ -96,20 +98,18 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf) } len = strlen(conf->tag); - if (len > MAX_TAG_LEN) { + if (len > MAX_TAG_LEN - 1) { fprintf(stderr, "mount tag '%s' (%d bytes) is longer than " - "maximum (%d bytes)", conf->tag, len, MAX_TAG_LEN); + "maximum (%d bytes)", conf->tag, len, MAX_TAG_LEN - 1); exit(1); } - /* s->tag is non-NULL terminated string */ - s->tag = g_malloc(len); - memcpy(s->tag, conf->tag, len); - s->tag_len = len; + + s->tag = strdup(conf->tag); s->ctx.uid = -1; s->ops = fse->ops; s->vdev.get_features = virtio_9p_get_features; - s->config_size = sizeof(struct virtio_9p_config) + s->tag_len; + s->config_size = sizeof(struct virtio_9p_config) + len; s->vdev.get_config = virtio_9p_get_config; s->fid_list = NULL; qemu_co_rwlock_init(&s->rename_lock); diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index 1b2fc5dfb..32b98ddef 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -23,6 +23,7 @@ #include "virtio-9p-xattr.h" #include "virtio-9p-coth.h" #include "trace.h" +#include "migration.h" int open_fd_hw; int total_open_fd; @@ -373,6 +374,19 @@ static void put_fid(V9fsPDU *pdu, V9fsFidState *fidp) * Don't free the fid if it is in reclaim list */ if (!fidp->ref && fidp->clunked) { + if (fidp->fid == pdu->s->root_fid) { + /* + * if the clunked fid is root fid then we + * have unmounted the fs on the client side. + * delete the migration blocker. Ideally, this + * should be hooked to transport close notification + */ + if (pdu->s->migration_blocker) { + migrate_del_blocker(pdu->s->migration_blocker); + error_free(pdu->s->migration_blocker); + pdu->s->migration_blocker = NULL; + } + } free_fid(pdu, fidp); } } @@ -1235,6 +1249,11 @@ static void v9fs_attach(void *opaque) err = offset; trace_v9fs_attach_return(pdu->tag, pdu->id, qid.type, qid.version, qid.path); + s->root_fid = fid; + /* disable migration */ + error_set(&s->migration_blocker, QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION, + s->ctx.fs_root, s->tag); + migrate_add_blocker(s->migration_blocker); out: put_fid(pdu, fidp); out_nofid: diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h index 7f883563d..8b612da52 100644 --- a/hw/9pfs/virtio-9p.h +++ b/hw/9pfs/virtio-9p.h @@ -246,8 +246,7 @@ typedef struct V9fsState V9fsFidState *fid_list; FileOperations *ops; FsContext ctx; - uint16_t tag_len; - uint8_t *tag; + char *tag; size_t config_size; enum p9_proto_version proto_version; int32_t msize; @@ -256,6 +255,8 @@ typedef struct V9fsState * on rename. */ CoRwlock rename_lock; + int32_t root_fid; + Error *migration_blocker; } V9fsState; typedef struct V9fsStatState { diff --git a/qerror.c b/qerror.c index fdf62b9e2..25bc91e58 100644 --- a/qerror.c +++ b/qerror.c @@ -234,6 +234,11 @@ static const QErrorStringTable qerror_table[] = { .desc = "'%(device)' uses a %(format) feature which is not " "supported by this qemu version: %(feature)", }, + { + .error_fmt = QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION, + .desc = "Migration is disabled when VirtFS export path '%(path)' " + "is mounted in the guest using mount_tag '%(tag)'", + }, { .error_fmt = QERR_VNC_SERVER_FAILED, .desc = "Could not start VNC server on %(target)", diff --git a/qerror.h b/qerror.h index 2d3d43bed..6414cd9d5 100644 --- a/qerror.h +++ b/qerror.h @@ -192,6 +192,9 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \ "{ 'class': 'UnknownBlockFormatFeature', 'data': { 'device': %s, 'format': %s, 'feature': %s } }" +#define QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION \ + "{ 'class': 'VirtFSFeatureBlocksMigration', 'data': { 'path': %s, 'tag': %s } }" + #define QERR_VNC_SERVER_FAILED \ "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }" -- cgit v1.2.3 From c554919f74e5a79f15360c4c2f417003477634cf Mon Sep 17 00:00:00 2001 From: Deepak C Shetty Date: Sun, 4 Dec 2011 22:35:28 +0530 Subject: hw/9pfs: Reset server state during TVERSION As per the 9p rfc, during TVERSION its necessary to clean all the active fids, so that we start the session from a clean state. Its also needed in scenarios where the guest is booting off 9p, and boot fails, and client restarts, without any knowledge of the past, it will issue a TVERSION again so this ensures that we always start from a clean state. Signed-off-by: Deepak C Shetty Signed-off-by: Aneesh Kumar K.V --- hw/9pfs/virtio-9p.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index 32b98ddef..dd432091f 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -523,6 +523,30 @@ static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path) return 0; } +static void virtfs_reset(V9fsPDU *pdu) +{ + V9fsState *s = pdu->s; + V9fsFidState *fidp = NULL; + + /* Free all fids */ + while (s->fid_list) { + fidp = s->fid_list; + s->fid_list = fidp->next; + + if (fidp->ref) { + fidp->clunked = 1; + } else { + free_fid(pdu, fidp); + } + } + if (fidp) { + /* One or more unclunked fids found... */ + error_report("9pfs:%s: One or more uncluncked fids " + "found during reset", __func__); + } + return; +} + #define P9_QID_TYPE_DIR 0x80 #define P9_QID_TYPE_SYMLINK 0x02 @@ -1196,6 +1220,8 @@ static void v9fs_version(void *opaque) pdu_unmarshal(pdu, offset, "ds", &s->msize, &version); trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data); + virtfs_reset(pdu); + if (!strcmp(version.data, "9P2000.u")) { s->proto_version = V9FS_PROTO_2000U; } else if (!strcmp(version.data, "9P2000.L")) { -- cgit v1.2.3 From 64dd41bc2de392fa018c5ce804cc451b83f18b94 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 4 Dec 2011 22:35:28 +0530 Subject: hw/9pfs: Add qdev.reset callback for virtio-9p-pci device Add the device reset callback Signed-off-by: Aneesh Kumar K.V --- hw/9pfs/virtio-9p-device.c | 3 ++- hw/virtio-pci.c | 2 +- hw/virtio-pci.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index c9bca8bf7..cd343e1d8 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -176,7 +176,8 @@ static PCIDeviceInfo virtio_9p_info = { DEFINE_PROP_STRING("mount_tag", VirtIOPCIProxy, fsconf.tag), DEFINE_PROP_STRING("fsdev", VirtIOPCIProxy, fsconf.fsdev_id), DEFINE_PROP_END_OF_LIST(), - } + }, + .qdev.reset = virtio_pci_reset, }; static void virtio_9p_register_devices(void) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 64c6a9414..c665f5c94 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -266,7 +266,7 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) proxy->ioeventfd_started = false; } -static void virtio_pci_reset(DeviceState *d) +void virtio_pci_reset(DeviceState *d) { VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev); virtio_pci_stop_ioeventfd(proxy); diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h index f8404de92..344c22b68 100644 --- a/hw/virtio-pci.h +++ b/hw/virtio-pci.h @@ -45,6 +45,7 @@ typedef struct { } VirtIOPCIProxy; void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev); +void virtio_pci_reset(DeviceState *d); /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 -- cgit v1.2.3 From ed6857bf98e6c8b8080be208ffe15bb678591466 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 4 Dec 2011 22:35:28 +0530 Subject: hw/9pfs: Use the correct file descriptor in Fsdriver Callback Fsdriver callback that operate on file descriptor need to differentiate between directory fd and file fd. Based on the original patch from Sassan Panahinejad Signed-off-by: Aneesh Kumar K.V --- fsdev/file-op-9p.h | 4 ++-- hw/9pfs/cofile.c | 4 ++-- hw/9pfs/virtio-9p-handle.c | 28 ++++++++++++++++++++++------ hw/9pfs/virtio-9p-local.c | 36 ++++++++++++++++++++++++++---------- hw/9pfs/virtio-9p-synth.c | 5 +++-- 5 files changed, 55 insertions(+), 22 deletions(-) diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index 1928da252..a85ecd30b 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -112,10 +112,10 @@ typedef struct FileOperations ssize_t (*pwritev)(FsContext *, V9fsFidOpenState *, const struct iovec *, int, off_t); int (*mkdir)(FsContext *, V9fsPath *, const char *, FsCred *); - int (*fstat)(FsContext *, V9fsFidOpenState *, struct stat *); + int (*fstat)(FsContext *, int, V9fsFidOpenState *, struct stat *); int (*rename)(FsContext *, const char *, const char *); int (*truncate)(FsContext *, V9fsPath *, off_t); - int (*fsync)(FsContext *, V9fsFidOpenState *, int); + int (*fsync)(FsContext *, int, V9fsFidOpenState *, int); int (*statfs)(FsContext *s, V9fsPath *path, struct statfs *stbuf); ssize_t (*lgetxattr)(FsContext *, V9fsPath *, const char *, void *, size_t); diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c index 586b0382f..b15838c1e 100644 --- a/hw/9pfs/cofile.c +++ b/hw/9pfs/cofile.c @@ -71,7 +71,7 @@ int v9fs_co_fstat(V9fsPDU *pdu, V9fsFidState *fidp, struct stat *stbuf) } v9fs_co_run_in_worker( { - err = s->ops->fstat(&s->ctx, &fidp->fs, stbuf); + err = s->ops->fstat(&s->ctx, fidp->fid_type, &fidp->fs, stbuf); if (err < 0) { err = -errno; } @@ -192,7 +192,7 @@ int v9fs_co_fsync(V9fsPDU *pdu, V9fsFidState *fidp, int datasync) } v9fs_co_run_in_worker( { - err = s->ops->fsync(&s->ctx, &fidp->fs, datasync); + err = s->ops->fsync(&s->ctx, fidp->fid_type, &fidp->fs, datasync); if (err < 0) { err = -errno; } diff --git a/hw/9pfs/virtio-9p-handle.c b/hw/9pfs/virtio-9p-handle.c index a62f690ee..f97d8984b 100644 --- a/hw/9pfs/virtio-9p-handle.c +++ b/hw/9pfs/virtio-9p-handle.c @@ -255,10 +255,17 @@ static int handle_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, return ret; } -static int handle_fstat(FsContext *fs_ctx, V9fsFidOpenState *fs, - struct stat *stbuf) +static int handle_fstat(FsContext *fs_ctx, int fid_type, + V9fsFidOpenState *fs, struct stat *stbuf) { - return fstat(fs->fd, stbuf); + int fd; + + if (fid_type == P9_FID_DIR) { + fd = dirfd(fs->dir); + } else { + fd = fs->fd; + } + return fstat(fd, stbuf); } static int handle_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, @@ -395,12 +402,21 @@ static int handle_remove(FsContext *ctx, const char *path) return -1; } -static int handle_fsync(FsContext *ctx, V9fsFidOpenState *fs, int datasync) +static int handle_fsync(FsContext *ctx, int fid_type, + V9fsFidOpenState *fs, int datasync) { + int fd; + + if (fid_type == P9_FID_DIR) { + fd = dirfd(fs->dir); + } else { + fd = fs->fd; + } + if (datasync) { - return qemu_fdatasync(fs->fd); + return qemu_fdatasync(fd); } else { - return fsync(fs->fd); + return fsync(fd); } } diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c index 99ef0cd33..371a94dff 100644 --- a/hw/9pfs/virtio-9p-local.c +++ b/hw/9pfs/virtio-9p-local.c @@ -366,11 +366,18 @@ out: return err; } -static int local_fstat(FsContext *fs_ctx, +static int local_fstat(FsContext *fs_ctx, int fid_type, V9fsFidOpenState *fs, struct stat *stbuf) { - int err; - err = fstat(fs->fd, stbuf); + int err, fd; + + if (fid_type == P9_FID_DIR) { + fd = dirfd(fs->dir); + } else { + fd = fs->fd; + } + + err = fstat(fd, stbuf); if (err) { return err; } @@ -381,19 +388,19 @@ static int local_fstat(FsContext *fs_ctx, mode_t tmp_mode; dev_t tmp_dev; - if (fgetxattr(fs->fd, "user.virtfs.uid", + if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { stbuf->st_uid = tmp_uid; } - if (fgetxattr(fs->fd, "user.virtfs.gid", + if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { stbuf->st_gid = tmp_gid; } - if (fgetxattr(fs->fd, "user.virtfs.mode", + if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) { stbuf->st_mode = tmp_mode; } - if (fgetxattr(fs->fd, "user.virtfs.rdev", + if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { stbuf->st_rdev = tmp_dev; } @@ -592,12 +599,21 @@ static int local_remove(FsContext *ctx, const char *path) return remove(rpath(ctx, path, buffer)); } -static int local_fsync(FsContext *ctx, V9fsFidOpenState *fs, int datasync) +static int local_fsync(FsContext *ctx, int fid_type, + V9fsFidOpenState *fs, int datasync) { + int fd; + + if (fid_type == P9_FID_DIR) { + fd = dirfd(fs->dir); + } else { + fd = fs->fd; + } + if (datasync) { - return qemu_fdatasync(fs->fd); + return qemu_fdatasync(fd); } else { - return fsync(fs->fd); + return fsync(fd); } } diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c index f57361636..92e0b09d3 100644 --- a/hw/9pfs/virtio-9p-synth.c +++ b/hw/9pfs/virtio-9p-synth.c @@ -166,7 +166,7 @@ static int v9fs_synth_lstat(FsContext *fs_ctx, return 0; } -static int v9fs_synth_fstat(FsContext *fs_ctx, +static int v9fs_synth_fstat(FsContext *fs_ctx, int fid_type, V9fsFidOpenState *fs, struct stat *stbuf) { V9fsSynthOpenState *synth_open = fs->private; @@ -414,7 +414,8 @@ static int v9fs_synth_remove(FsContext *ctx, const char *path) return -1; } -static int v9fs_synth_fsync(FsContext *ctx, V9fsFidOpenState *fs, int datasync) +static int v9fs_synth_fsync(FsContext *ctx, int fid_type, + V9fsFidOpenState *fs, int datasync) { errno = ENOSYS; return 0; -- cgit v1.2.3 From 45d6cdff48356dc8974497ec0524f971b646dd70 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 21 Dec 2011 12:37:22 +0530 Subject: hw/9pfs: replace iovec manipulation with QEMUIOVector The v9fs_read() and v9fs_write() functions rely on iovec[] manipulation code should be replaced with QEMUIOVector to avoid duplicating code. In the future it may be possible to make the code even more concise by using QEMUIOVector consistently across virtio and 9pfs. The "v" format specifier for pdu_marshal() and pdu_unmarshal() is dropped since it does not actually pack/unpack anything. The specifier was also not implemented to update the offset variable and could only be used at the end of a format string, another sign that this shouldn't really be a format specifier. Instead, see the new v9fs_init_qiov_from_pdu() function. This change avoids a possible iovec[] buffer overflow when indirect vrings are used since the number of vectors is now limited by the underlying VirtQueueElement and cannot be out-of-bounds. Signed-off-by: Stefan Hajnoczi Signed-off-by: Aneesh Kumar K.V --- hw/9pfs/virtio-9p.c | 162 +++++++++++++++++++--------------------------------- 1 file changed, 60 insertions(+), 102 deletions(-) diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index dd432091f..c0189163b 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -674,40 +674,6 @@ static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, offset, size, 1); } -static int pdu_copy_sg(V9fsPDU *pdu, size_t offset, int rx, struct iovec *sg) -{ - size_t pos = 0; - int i, j; - struct iovec *src_sg; - unsigned int num; - - if (rx) { - src_sg = pdu->elem.in_sg; - num = pdu->elem.in_num; - } else { - src_sg = pdu->elem.out_sg; - num = pdu->elem.out_num; - } - - j = 0; - for (i = 0; i < num; i++) { - if (offset <= pos) { - sg[j].iov_base = src_sg[i].iov_base; - sg[j].iov_len = src_sg[i].iov_len; - j++; - } else if (offset < (src_sg[i].iov_len + pos)) { - sg[j].iov_base = src_sg[i].iov_base; - sg[j].iov_len = src_sg[i].iov_len; - sg[j].iov_base += (offset - pos); - sg[j].iov_len -= (offset - pos); - j++; - } - pos += src_sg[i].iov_len; - } - - return j; -} - static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) { size_t old_offset = offset; @@ -743,12 +709,6 @@ static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) *valp = le64_to_cpu(val); break; } - case 'v': { - struct iovec *iov = va_arg(ap, struct iovec *); - int *iovcnt = va_arg(ap, int *); - *iovcnt = pdu_copy_sg(pdu, offset, 0, iov); - break; - } case 's': { V9fsString *str = va_arg(ap, V9fsString *); offset += pdu_unmarshal(pdu, offset, "w", &str->size); @@ -827,12 +787,6 @@ static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) offset += pdu_pack(pdu, offset, &val, sizeof(val)); break; } - case 'v': { - struct iovec *iov = va_arg(ap, struct iovec *); - int *iovcnt = va_arg(ap, int *); - *iovcnt = pdu_copy_sg(pdu, offset, 1, iov); - break; - } case 's': { V9fsString *str = va_arg(ap, V9fsString *); offset += pdu_marshal(pdu, offset, "w", str->size); @@ -1143,42 +1097,6 @@ static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf, stat_to_qid(stbuf, &v9lstat->qid); } -static struct iovec *adjust_sg(struct iovec *sg, int len, int *iovcnt) -{ - while (len && *iovcnt) { - if (len < sg->iov_len) { - sg->iov_len -= len; - sg->iov_base += len; - len = 0; - } else { - len -= sg->iov_len; - sg++; - *iovcnt -= 1; - } - } - - return sg; -} - -static struct iovec *cap_sg(struct iovec *sg, int cap, int *cnt) -{ - int i; - int total = 0; - - for (i = 0; i < *cnt; i++) { - if ((total + sg[i].iov_len) > cap) { - sg[i].iov_len -= ((total + sg[i].iov_len) - cap); - i++; - break; - } - total += sg[i].iov_len; - } - - *cnt = i; - - return sg; -} - static void print_sg(struct iovec *sg, int cnt) { int i; @@ -1861,6 +1779,38 @@ out: return count; } +/* + * Create a QEMUIOVector for a sub-region of PDU iovecs + * + * @qiov: uninitialized QEMUIOVector + * @skip: number of bytes to skip from beginning of PDU + * @size: number of bytes to include + * @is_write: true - write, false - read + * + * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up + * with qemu_iovec_destroy(). + */ +static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, + uint64_t skip, size_t size, + bool is_write) +{ + QEMUIOVector elem; + struct iovec *iov; + unsigned int niov; + + if (is_write) { + iov = pdu->elem.out_sg; + niov = pdu->elem.out_num; + } else { + iov = pdu->elem.in_sg; + niov = pdu->elem.in_num; + } + + qemu_iovec_init_external(&elem, iov, niov); + qemu_iovec_init(qiov, niov); + qemu_iovec_copy(qiov, &elem, skip, size); +} + static void v9fs_read(void *opaque) { int32_t fid; @@ -1895,21 +1845,21 @@ static void v9fs_read(void *opaque) err += pdu_marshal(pdu, offset, "d", count); err += count; } else if (fidp->fid_type == P9_FID_FILE) { - int32_t cnt; + QEMUIOVector qiov_full; + QEMUIOVector qiov; int32_t len; - struct iovec *sg; - struct iovec iov[128]; /* FIXME: bad, bad, bad */ - sg = iov; - pdu_marshal(pdu, offset + 4, "v", sg, &cnt); - sg = cap_sg(sg, max_count, &cnt); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); + qemu_iovec_init(&qiov, qiov_full.niov); do { + qemu_iovec_reset(&qiov); + qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count); if (0) { - print_sg(sg, cnt); + print_sg(qiov.iov, qiov.niov); } /* Loop in case of EINTR */ do { - len = v9fs_co_preadv(pdu, fidp, sg, cnt, off); + len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off); if (len >= 0) { off += len; count += len; @@ -1920,11 +1870,12 @@ static void v9fs_read(void *opaque) err = len; goto out; } - sg = adjust_sg(sg, len, &cnt); } while (count < max_count && len > 0); err = offset; err += pdu_marshal(pdu, offset, "d", count); err += count; + qemu_iovec_destroy(&qiov); + qemu_iovec_destroy(&qiov_full); } else if (fidp->fid_type == P9_FID_XATTR) { err = v9fs_xattr_read(s, pdu, fidp, off, max_count); } else { @@ -2095,7 +2046,6 @@ out: static void v9fs_write(void *opaque) { - int cnt; ssize_t err; int32_t fid; int64_t off; @@ -2104,13 +2054,14 @@ static void v9fs_write(void *opaque) int32_t total = 0; size_t offset = 7; V9fsFidState *fidp; - struct iovec iov[128]; /* FIXME: bad, bad, bad */ - struct iovec *sg = iov; V9fsPDU *pdu = opaque; V9fsState *s = pdu->s; + QEMUIOVector qiov_full; + QEMUIOVector qiov; - pdu_unmarshal(pdu, offset, "dqdv", &fid, &off, &count, sg, &cnt); - trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, cnt); + offset += pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count); + v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); + trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); fidp = get_fid(pdu, fid); if (fidp == NULL) { @@ -2126,20 +2077,23 @@ static void v9fs_write(void *opaque) /* * setxattr operation */ - err = v9fs_xattr_write(s, pdu, fidp, off, count, sg, cnt); + err = v9fs_xattr_write(s, pdu, fidp, off, count, + qiov_full.iov, qiov_full.niov); goto out; } else { err = -EINVAL; goto out; } - sg = cap_sg(sg, count, &cnt); + qemu_iovec_init(&qiov, qiov_full.niov); do { + qemu_iovec_reset(&qiov); + qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total); if (0) { - print_sg(sg, cnt); + print_sg(qiov.iov, qiov.niov); } /* Loop in case of EINTR */ do { - len = v9fs_co_pwritev(pdu, fidp, sg, cnt, off); + len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off); if (len >= 0) { off += len; total += len; @@ -2148,16 +2102,20 @@ static void v9fs_write(void *opaque) if (len < 0) { /* IO error return the error */ err = len; - goto out; + goto out_qiov; } - sg = adjust_sg(sg, len, &cnt); } while (total < count && len > 0); + + offset = 7; offset += pdu_marshal(pdu, offset, "d", total); err = offset; trace_v9fs_write_return(pdu->tag, pdu->id, total, err); +out_qiov: + qemu_iovec_destroy(&qiov); out: put_fid(pdu, fidp); out_nofid: + qemu_iovec_destroy(&qiov_full); complete_pdu(s, pdu, err); } -- cgit v1.2.3 From 3d3ec7b809b91f2a71fb78fc6b5b079963383243 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 21 Dec 2011 12:37:23 +0530 Subject: hw/9pfs: Use the correct signed type for different variables Signed-off-by: Aneesh Kumar K.V --- fsdev/file-op-9p.h | 2 +- hw/9pfs/virtio-9p.c | 21 +++++++++++---------- hw/9pfs/virtio-9p.h | 2 +- trace-events | 8 ++++---- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index a85ecd30b..c823fe0ae 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -74,7 +74,7 @@ typedef struct FsContext } FsContext; typedef struct V9fsPath { - int16_t size; + uint16_t size; char *data; } V9fsPath; diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index c0189163b..b3fc3d088 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -1694,8 +1694,8 @@ out_nofid: complete_pdu(s, pdu, err); } -static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, - V9fsFidState *fidp, int64_t off, int32_t max_count) +static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, + uint64_t off, uint32_t max_count) { size_t offset = 7; int read_count; @@ -1719,7 +1719,7 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, } static int v9fs_do_readdir_with_stat(V9fsPDU *pdu, - V9fsFidState *fidp, int32_t max_count) + V9fsFidState *fidp, uint32_t max_count) { V9fsPath path; V9fsStat v9stat; @@ -1814,11 +1814,11 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, static void v9fs_read(void *opaque) { int32_t fid; - int64_t off; + uint64_t off; ssize_t err = 0; int32_t count = 0; size_t offset = 7; - int32_t max_count; + uint32_t max_count; V9fsFidState *fidp; V9fsPDU *pdu = opaque; V9fsState *s = pdu->s; @@ -1962,8 +1962,9 @@ static void v9fs_readdir(void *opaque) V9fsFidState *fidp; ssize_t retval = 0; size_t offset = 7; - int64_t initial_offset; - int32_t count, max_count; + uint64_t initial_offset; + int32_t count; + uint32_t max_count; V9fsPDU *pdu = opaque; V9fsState *s = pdu->s; @@ -2001,7 +2002,7 @@ out_nofid: } static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, - int64_t off, int32_t count, + uint64_t off, uint32_t count, struct iovec *sg, int cnt) { int i, to_copy; @@ -2048,8 +2049,8 @@ static void v9fs_write(void *opaque) { ssize_t err; int32_t fid; - int64_t off; - int32_t count; + uint64_t off; + uint32_t count; int32_t len = 0; int32_t total = 0; size_t offset = 7; diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h index 8b612da52..19a797b72 100644 --- a/hw/9pfs/virtio-9p.h +++ b/hw/9pfs/virtio-9p.h @@ -156,7 +156,7 @@ typedef struct V9fsFidState V9fsFidState; typedef struct V9fsString { - int16_t size; + uint16_t size; char *data; } V9fsString; diff --git a/trace-events b/trace-events index 962caca59..e417897e5 100644 --- a/trace-events +++ b/trace-events @@ -579,11 +579,11 @@ v9fs_lcreate(uint16_t tag, uint8_t id, int32_t dfid, int32_t flags, int32_t mode v9fs_lcreate_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int32_t iounit) "tag %d id %d qid={type %d version %d path %"PRId64"} iounit %d" v9fs_fsync(uint16_t tag, uint8_t id, int32_t fid, int datasync) "tag %d id %d fid %d datasync %d" v9fs_clunk(uint16_t tag, uint8_t id, int32_t fid) "tag %d id %d fid %d" -v9fs_read(uint16_t tag, uint8_t id, int32_t fid, int64_t off, int32_t max_count) "tag %d id %d fid %d off %"PRId64" max_count %d" +v9fs_read(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t max_count) "tag %d id %d fid %d off %"PRIu64" max_count %u" v9fs_read_return(uint16_t tag, uint8_t id, int32_t count, ssize_t err) "tag %d id %d count %d err %zd" -v9fs_readdir(uint16_t tag, uint8_t id, int32_t fid, int64_t offset, int32_t max_count) "tag %d id %d fid %d offset %"PRId64" max_count %d" -v9fs_readdir_return(uint16_t tag, uint8_t id, int32_t count, ssize_t retval) "tag %d id %d count %d retval %zd" -v9fs_write(uint16_t tag, uint8_t id, int32_t fid, int64_t off, int32_t count, int cnt) "tag %d id %d fid %d off %"PRId64" count %d cnt %d" +v9fs_readdir(uint16_t tag, uint8_t id, int32_t fid, uint64_t offset, uint32_t max_count) "tag %d id %d fid %d offset %"PRIu64" max_count %u" +v9fs_readdir_return(uint16_t tag, uint8_t id, uint32_t count, ssize_t retval) "tag %d id %d count %u retval %zd" +v9fs_write(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t count, int cnt) "tag %d id %d fid %d off %"PRIu64" count %u cnt %d" v9fs_write_return(uint16_t tag, uint8_t id, int32_t total, ssize_t err) "tag %d id %d total %d err %zd" v9fs_create(uint16_t tag, uint8_t id, int32_t fid, char* name, int32_t perm, int8_t mode) "tag %d id %d fid %d name %s perm %d mode %d" v9fs_create_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int iounit) "tag %d id %d qid={type %d version %d path %"PRId64"} iounit %d" -- cgit v1.2.3 From abf80f880410ebbdd01a289c41c87153802fe900 Mon Sep 17 00:00:00 2001 From: Andreas Gustafsson Date: Mon, 12 Dec 2011 00:46:32 +0400 Subject: target-i386: fix cmpxchg instruction emulation When the i386 cmpxchg instruction is executed with a memory operand and the comparison result is "unequal", do the memory write before changing the accumulator instead of the other way around, because otherwise the new accumulator value will incorrectly be used in the comparison when the instruction is restarted after a page fault. This bug was originally reported on 2010-04-25 as https://bugs.launchpad.net/qemu/+bug/569760 Signed-off-by: Andreas Gustafsson --- target-i386/translate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 1ef8d16ac..8321bf39a 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4870,20 +4870,23 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0); gen_extu(ot, t2); tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + label2 = gen_new_label(); if (mod == 3) { - label2 = gen_new_label(); gen_op_mov_reg_v(ot, R_EAX, t0); tcg_gen_br(label2); gen_set_label(label1); gen_op_mov_reg_v(ot, rm, t1); - gen_set_label(label2); } else { - tcg_gen_mov_tl(t1, t0); + /* perform no-op store cycle like physical cpu; must be + before changing accumulator to ensure idempotency if + the store faults and the instruction is restarted */ + gen_op_st_v(ot + s->mem_index, t0, a0); gen_op_mov_reg_v(ot, R_EAX, t0); + tcg_gen_br(label2); gen_set_label(label1); - /* always store */ gen_op_st_v(ot + s->mem_index, t1, a0); } + gen_set_label(label2); tcg_gen_mov_tl(cpu_cc_src, t0); tcg_gen_mov_tl(cpu_cc_dst, t2); s->cc_op = CC_OP_SUBB + ot; -- cgit v1.2.3 From 6d450bfbc862d0dab0e8da10ae15698612800726 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 28 Nov 2011 19:53:49 -0500 Subject: configure: Enable build by default PIE / read-only relocation sections on OpenBSD amd64/i386. Enable build by default PIE / read-only relocation sections for the QEMU binaries on OpenBSD amd64/i386. Signed-off-by: Brad Smith Signed-off-by: Blue Swirl --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index ac4840d45..b113f6093 100755 --- a/configure +++ b/configure @@ -1116,7 +1116,7 @@ fi if test "$pie" = ""; then case "$cpu-$targetos" in - i386-Linux|x86_64-Linux) + i386-Linux|x86_64-Linux|i386-OpenBSD|x86_64-OpenBSD) ;; *) pie="no" -- cgit v1.2.3 From 3e8088148bb56b84a739c2ef3c63d89188a1ad8f Mon Sep 17 00:00:00 2001 From: Stefan Sandstrom Date: Mon, 12 Dec 2011 11:38:31 +0100 Subject: cris: Handle conditional stores on CRISv10 Signed-off-by: Stefan Sandstrom Signed-off-by: Edgar E. Iglesias --- target-cris/cpu.h | 2 ++ target-cris/helper.c | 1 + target-cris/translate_v10.c | 72 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/target-cris/cpu.h b/target-cris/cpu.h index 8ae0ce3ef..453afbb66 100644 --- a/target-cris/cpu.h +++ b/target-cris/cpu.h @@ -67,6 +67,8 @@ #define Q_FLAG 0x80000000 #define M_FLAG 0x40000000 #define PFIX_FLAG 0x800 /* CRISv10 Only. */ +#define F_FLAG_V10 0x400 +#define P_FLAG_V10 0x200 #define S_FLAG 0x200 #define R_FLAG 0x100 #define P_FLAG 0x80 diff --git a/target-cris/helper.c b/target-cris/helper.c index 75f0035e6..5bc6d810c 100644 --- a/target-cris/helper.c +++ b/target-cris/helper.c @@ -157,6 +157,7 @@ static void do_interruptv10(CPUState *env) /* Now that we are in kernel mode, load the handlers address. */ env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4); env->locked_irq = 1; + env->pregs[PR_CCS] |= F_FLAG_V10; /* set F. */ qemu_log_mask(CPU_LOG_INT, "%s isr=%x vec=%x ccs=%x pid=%d erp=%x\n", __func__, env->pc, ex_vec, diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c index 637ac2084..95053b64f 100644 --- a/target-cris/translate_v10.c +++ b/target-cris/translate_v10.c @@ -62,6 +62,65 @@ static inline void cris_illegal_insn(DisasContext *dc) t_gen_raise_exception(EXCP_BREAK); } +static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val, + unsigned int size, int mem_index) +{ + int l1 = gen_new_label(); + TCGv taddr = tcg_temp_local_new(); + TCGv tval = tcg_temp_local_new(); + TCGv t1 = tcg_temp_local_new(); + dc->postinc = 0; + cris_evaluate_flags(dc); + + tcg_gen_mov_tl(taddr, addr); + tcg_gen_mov_tl(tval, val); + + /* Store only if F flag isn't set */ + tcg_gen_andi_tl(t1, cpu_PR[PR_CCS], F_FLAG_V10); + tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1); + if (size == 1) { + tcg_gen_qemu_st8(tval, taddr, mem_index); + } else if (size == 2) { + tcg_gen_qemu_st16(tval, taddr, mem_index); + } else { + tcg_gen_qemu_st32(tval, taddr, mem_index); + } + gen_set_label(l1); + tcg_gen_shri_tl(t1, t1, 1); /* shift F to P position */ + tcg_gen_or_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], t1); /*P=F*/ + tcg_temp_free(t1); + tcg_temp_free(tval); + tcg_temp_free(taddr); +} + +static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val, + unsigned int size) +{ + int mem_index = cpu_mmu_index(dc->env); + + /* If we get a fault on a delayslot we must keep the jmp state in + the cpu-state to be able to re-execute the jmp. */ + if (dc->delayed_branch == 1) { + cris_store_direct_jmp(dc); + } + + /* Conditional writes. We only support the kind were X is known + at translation time. */ + if (dc->flagx_known && dc->flags_x) { + gen_store_v10_conditional(dc, addr, val, size, mem_index); + return; + } + + if (size == 1) { + tcg_gen_qemu_st8(val, addr, mem_index); + } else if (size == 2) { + tcg_gen_qemu_st16(val, addr, mem_index); + } else { + tcg_gen_qemu_st32(val, addr, mem_index); + } +} + + /* Prefix flag and register are used to handle the more complex addressing modes. */ static void cris_set_prefix(DisasContext *dc) @@ -313,7 +372,8 @@ static unsigned int dec10_setclrf(DisasContext *dc) if (set) { tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], flags); } else { - tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~flags); + tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], + ~(flags|F_FLAG_V10|P_FLAG_V10)); } dc->flags_uptodate = 1; @@ -723,7 +783,7 @@ static unsigned int dec10_ind_move_r_m(DisasContext *dc, unsigned int size) LOG_DIS("move.%d $r%d, [$r%d]\n", dc->size, dc->src, dc->dst); addr = tcg_temp_new(); crisv10_prepare_memaddr(dc, addr, size); - gen_store(dc, addr, cpu_R[dc->dst], size); + gen_store_v10(dc, addr, cpu_R[dc->dst], size); insn_len += crisv10_post_memaddr(dc, size); return insn_len; @@ -767,10 +827,10 @@ static unsigned int dec10_ind_move_pr_m(DisasContext *dc) t0 = tcg_temp_new(); cris_evaluate_flags(dc); tcg_gen_andi_tl(t0, cpu_PR[PR_CCS], ~PFIX_FLAG); - gen_store(dc, addr, t0, size); + gen_store_v10(dc, addr, t0, size); tcg_temp_free(t0); } else { - gen_store(dc, addr, cpu_PR[dc->dst], size); + gen_store_v10(dc, addr, cpu_PR[dc->dst], size); } t0 = tcg_temp_new(); insn_len += crisv10_post_memaddr(dc, size); @@ -793,9 +853,9 @@ static void dec10_movem_r_m(DisasContext *dc) tcg_gen_mov_tl(t0, addr); for (i = dc->dst; i >= 0; i--) { if ((pfix && dc->mode == CRISV10_MODE_AUTOINC) && dc->src == i) { - gen_store(dc, addr, t0, 4); + gen_store_v10(dc, addr, t0, 4); } else { - gen_store(dc, addr, cpu_R[i], 4); + gen_store_v10(dc, addr, cpu_R[i], 4); } tcg_gen_addi_tl(addr, addr, 4); } -- cgit v1.2.3 From a25808dc5baee83f36e0cdab998eb6c0024156fa Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Sun, 18 Dec 2011 12:59:12 -0600 Subject: pc: add pc-0.15 Signed-off-by: Anthony Liguori --- hw/pc_piix.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 970f43c99..9093a285e 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -306,6 +306,14 @@ static QEMUMachine pc_machine_v1_0 = { .is_default = 1, }; +static QEMUMachine pc_machine_v0_15 = { + .name = "pc-0.15", + .desc = "Standard PC", + .init = pc_init_pci, + .max_cpus = 255, + .is_default = 1, +}; + static QEMUMachine pc_machine_v0_14 = { .name = "pc-0.14", .desc = "Standard PC", @@ -557,6 +565,7 @@ static QEMUMachine xenfv_machine = { static void pc_machine_init(void) { qemu_register_machine(&pc_machine_v1_0); + qemu_register_machine(&pc_machine_v0_15); qemu_register_machine(&pc_machine_v0_14); qemu_register_machine(&pc_machine_v0_13); qemu_register_machine(&pc_machine_v0_12); -- cgit v1.2.3 From 7e2191ae9898cc957a3d1991aff0e40f2e0f44a4 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Sun, 18 Dec 2011 13:07:03 -0600 Subject: pc: fix event_idx compatibility for virtio devices event_idx was introduced in 0.15 and must be disabled for all virtio-pci devices (including virtio-balloon-pci). Signed-off-by: Anthony Liguori --- hw/pc_piix.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 9093a285e..05000e3cc 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -328,6 +328,22 @@ static QEMUMachine pc_machine_v0_14 = { .driver = "qxl-vga", .property = "revision", .value = stringify(2), + },{ + .driver = "virtio-blk-pci", + .property = "event_idx", + .value = "off", + },{ + .driver = "virtio-serial-pci", + .property = "event_idx", + .value = "off", + },{ + .driver = "virtio-net-pci", + .property = "event_idx", + .value = "off", + },{ + .driver = "virtio-balloon-pci", + .property = "event_idx", + .value = "off", }, { /* end of list */ } }, @@ -367,6 +383,10 @@ static QEMUMachine pc_machine_v0_13 = { .driver = "virtio-net-pci", .property = "event_idx", .value = "off", + },{ + .driver = "virtio-balloon-pci", + .property = "event_idx", + .value = "off", },{ .driver = "AC97", .property = "use_broken_id", @@ -414,6 +434,10 @@ static QEMUMachine pc_machine_v0_12 = { .driver = "virtio-net-pci", .property = "event_idx", .value = "off", + },{ + .driver = "virtio-balloon-pci", + .property = "event_idx", + .value = "off", },{ .driver = "AC97", .property = "use_broken_id", @@ -469,6 +493,10 @@ static QEMUMachine pc_machine_v0_11 = { .driver = "virtio-net-pci", .property = "event_idx", .value = "off", + },{ + .driver = "virtio-balloon-pci", + .property = "event_idx", + .value = "off", },{ .driver = "AC97", .property = "use_broken_id", @@ -536,6 +564,10 @@ static QEMUMachine pc_machine_v0_10 = { .driver = "virtio-net-pci", .property = "event_idx", .value = "off", + },{ + .driver = "virtio-balloon-pci", + .property = "event_idx", + .value = "off", },{ .driver = "AC97", .property = "use_broken_id", -- cgit v1.2.3 From 9b81fbdbb0cc930aacec343c6ab37adfd60c9e76 Mon Sep 17 00:00:00 2001 From: "Cao,Bing Bu" Date: Tue, 13 Dec 2011 09:22:20 +0800 Subject: Fix parse of usb device description with multiple configurations Changed From V1: Use DPRINTF instead of fprintf,because it is not an error. When testing ipod on QEMU by He Jie Xu,qemu made a assertion. We found that the ipod with 2 configurations,and the usb-linux did not parse the descriptor correctly. The descr_len returned is the total length of the all configurations,not one configuration. The older version will through the other configurations instead of skip,continue parsing the descriptor of interfaces/endpoints in other configurations,then went wrong. This patch will put the configuration descriptor parse in loop outside and dispel the other configurations not requested. Signed-off-by: Cao,Bing Bu Signed-off-by: Gerd Hoffmann --- usb-linux.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/usb-linux.c b/usb-linux.c index ab4c6930c..ed14bb115 100644 --- a/usb-linux.c +++ b/usb-linux.c @@ -1141,15 +1141,18 @@ static int usb_linux_update_endp_table(USBHostDevice *s) length = s->descr_len - 18; i = 0; - if (descriptors[i + 1] != USB_DT_CONFIG || - descriptors[i + 5] != s->configuration) { - fprintf(stderr, "invalid descriptor data - configuration %d\n", - s->configuration); - return 1; - } - i += descriptors[i]; - while (i < length) { + if (descriptors[i + 1] != USB_DT_CONFIG) { + fprintf(stderr, "invalid descriptor data\n"); + return 1; + } else if (descriptors[i + 5] != s->configuration) { + DPRINTF("not requested configuration %d\n", s->configuration); + i += (descriptors[i + 3] << 8) + descriptors[i + 2]; + continue; + } + + i += descriptors[i]; + if (descriptors[i + 1] != USB_DT_INTERFACE || (descriptors[i + 1] == USB_DT_INTERFACE && descriptors[i + 4] == 0)) { -- cgit v1.2.3 From f63d074313c5df917535587b50802ece7beb6e45 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 4 Jan 2012 18:13:54 +0100 Subject: usb-storage: cancel I/O on reset When resetting the usb-storage device we'll have to carefully cancel and clear any requests which might be in flight, otherwise we'll confuse the state machine. Signed-off-by: Gerd Hoffmann --- hw/usb-msd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/usb-msd.c b/hw/usb-msd.c index 4c0695012..3147131db 100644 --- a/hw/usb-msd.c +++ b/hw/usb-msd.c @@ -278,6 +278,18 @@ static void usb_msd_handle_reset(USBDevice *dev) MSDState *s = (MSDState *)dev; DPRINTF("Reset\n"); + if (s->req) { + scsi_req_cancel(s->req); + } + assert(s->req == NULL); + + if (s->packet) { + USBPacket *p = s->packet; + s->packet = NULL; + p->result = USB_RET_STALL; + usb_packet_complete(dev, p); + } + s->mode = USB_MSDM_CBW; } -- cgit v1.2.3 From c936f649d4a6b87cabe809170874f6b560cc0524 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 5 Jan 2012 15:49:18 +0100 Subject: usb-host: properly release port on unplug & exit Factor out port release into a separate function. Call release function in exit notifier too. Add explicit call the USBDEVFS_RELEASE_PORT ioctl, just closing the hub file handle seems not to be enougth. Make sure we release the port before resetting the device, otherwise host drivers will not re-attach. Signed-off-by: Gerd Hoffmann --- usb-linux.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/usb-linux.c b/usb-linux.c index ed14bb115..749ce7108 100644 --- a/usb-linux.c +++ b/usb-linux.c @@ -116,6 +116,7 @@ typedef struct USBHostDevice { USBDevice dev; int fd; int hub_fd; + int hub_port; uint8_t descr[8192]; int descr_len; @@ -434,7 +435,7 @@ static int usb_host_claim_port(USBHostDevice *s) { #ifdef USBDEVFS_CLAIM_PORT char *h, hub_name[64], line[1024]; - int hub_addr, portnr, ret; + int hub_addr, ret; snprintf(hub_name, sizeof(hub_name), "%d-%s", s->match.bus_num, s->match.port); @@ -442,13 +443,13 @@ static int usb_host_claim_port(USBHostDevice *s) /* try strip off last ".$portnr" to get hub */ h = strrchr(hub_name, '.'); if (h != NULL) { - portnr = atoi(h+1); + s->hub_port = atoi(h+1); *h = '\0'; } else { /* no dot in there -> it is the root hub */ snprintf(hub_name, sizeof(hub_name), "usb%d", s->match.bus_num); - portnr = atoi(s->match.port); + s->hub_port = atoi(s->match.port); } if (!usb_host_read_file(line, sizeof(line), "devnum", @@ -469,20 +470,32 @@ static int usb_host_claim_port(USBHostDevice *s) return -1; } - ret = ioctl(s->hub_fd, USBDEVFS_CLAIM_PORT, &portnr); + ret = ioctl(s->hub_fd, USBDEVFS_CLAIM_PORT, &s->hub_port); if (ret < 0) { close(s->hub_fd); s->hub_fd = -1; return -1; } - trace_usb_host_claim_port(s->match.bus_num, hub_addr, portnr); + trace_usb_host_claim_port(s->match.bus_num, hub_addr, s->hub_port); return 0; #else return -1; #endif } +static void usb_host_release_port(USBHostDevice *s) +{ + if (s->hub_fd == -1) { + return; + } +#ifdef USBDEVFS_RELEASE_PORT + ioctl(s->hub_fd, USBDEVFS_RELEASE_PORT, &s->hub_port); +#endif + close(s->hub_fd); + s->hub_fd = -1; +} + static int usb_host_disconnect_ifaces(USBHostDevice *dev, int nb_interfaces) { /* earlier Linux 2.4 do not support that */ @@ -635,10 +648,8 @@ static void usb_host_handle_destroy(USBDevice *dev) { USBHostDevice *s = (USBHostDevice *)dev; + usb_host_release_port(s); usb_host_close(s); - if (s->hub_fd != -1) { - close(s->hub_fd); - } QTAILQ_REMOVE(&hostdevs, s, next); qemu_remove_exit_notifier(&s->exit); } @@ -1402,6 +1413,7 @@ static void usb_host_exit_notifier(struct Notifier *n, void *data) { USBHostDevice *s = container_of(n, USBHostDevice, exit); + usb_host_release_port(s); if (s->fd != -1) { usb_host_do_reset(s);; } -- cgit v1.2.3 From 23201c64a789cf948fedcea221a4b6e197fcd628 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Thu, 22 Dec 2011 11:34:30 +0200 Subject: usb-ohci: td.cbp incorrectly updated near page end The current code that updates the cbp value after a transfer looks like this: td.cbp += ret; if ((td.cbp & 0xfff) + ret > 0xfff) { because the 'ret' value is effectively added twice the check may fire too early when the overflow hasn't happened yet. Below is one of the possible changes that correct the behavior: Signed-off-by: Gerd Hoffmann --- hw/usb-ohci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/usb-ohci.c b/hw/usb-ohci.c index c2981c59a..c27014a88 100644 --- a/hw/usb-ohci.c +++ b/hw/usb-ohci.c @@ -1025,10 +1025,10 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) if (ret == len) { td.cbp = 0; } else { - td.cbp += ret; if ((td.cbp & 0xfff) + ret > 0xfff) { - td.cbp &= 0xfff; - td.cbp |= td.be & ~0xfff; + td.cbp = (td.be & ~0xfff) + ((td.cbp + ret) & 0xfff); + } else { + td.cbp += ret; } } td.flags |= OHCI_TD_T1; -- cgit v1.2.3 From 37769d27270eff15d878a1c7df23407fc5f09b7f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sat, 7 Jan 2012 15:20:12 +0100 Subject: target-sh4: ignore ocbp and ocbwb instructions ocbp and ocbwb controls the writeback of a cache line to memory. They are supposed to do nothing in case of a cache miss. Given QEMU only partially emulate caches, it is safe to ignore these instructions. This fixes a kernel oops when trying to access an rtl8139 NIC with recent versions. Signed-off-by: Aurelien Jarno (cherry picked from commit 0cdb95549fedc73e13c147ab9dcabcc303426a07) --- target-sh4/translate.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index bad357772..e04a6e0f5 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -1652,18 +1652,10 @@ static void _decode_opc(DisasContext * ctx) } return; case 0x00a3: /* ocbp @Rn */ - { - TCGv dummy = tcg_temp_new(); - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx); - tcg_temp_free(dummy); - } - return; case 0x00b3: /* ocbwb @Rn */ - { - TCGv dummy = tcg_temp_new(); - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx); - tcg_temp_free(dummy); - } + /* These instructions are supposed to do nothing in case of + a cache miss. Given that we only partially emulate caches + it is safe to simply ignore them. */ return; case 0x0083: /* pref @Rn */ return; -- cgit v1.2.3 From fbcf305e5adc310e6383d4ec5e844f3f8d072116 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 12 Dec 2011 22:36:01 +0100 Subject: PPC: Fix linker scripts on ppc hosts When compiling qemu statically with multilib on PPC, we hit the same issue that commit 845f2c2812d9ed24b36c02a3d06ee83aeafe8b49 is fixing. Do the same here. Signed-off-by: Alexander Graf Signed-off-by: Aurelien Jarno (cherry picked from commit 665a04ae1cbfa8004a38cf0fe99ba799c978a1fe) --- ppc.ld | 16 ++++++++++++++-- ppc64.ld | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/ppc.ld b/ppc.ld index 69aa3f228..2a0dcad63 100644 --- a/ppc.ld +++ b/ppc.ld @@ -49,8 +49,20 @@ SECTIONS .rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) } .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } + .rel.plt : + { + *(.rel.plt) + PROVIDE (__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE (__rel_iplt_end = .); + } + .rela.plt : + { + *(.rela.plt) + PROVIDE (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE (__rela_iplt_end = .); + } .init : { KEEP (*(.init)) diff --git a/ppc64.ld b/ppc64.ld index 0a7c0dd0c..e2dafa0b5 100644 --- a/ppc64.ld +++ b/ppc64.ld @@ -54,8 +54,20 @@ SECTIONS *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } + .rel.plt : + { + *(.rel.plt) + PROVIDE (__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE (__rel_iplt_end = .); + } + .rela.plt : + { + *(.rela.plt) + PROVIDE (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE (__rela_iplt_end = .); + } .rela.tocbss : { *(.rela.tocbss) } .init : { -- cgit v1.2.3 From 6061f16a8a119a46e61f2ddbabdb58f83e8857f7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 25 Nov 2011 12:06:22 +0100 Subject: qiov: prevent double free or use-after-free qemu_iovec_destroy does not clear the QEMUIOVector fully, and the data could thus be used after free or freed again. While I do not know any example in the tree, I observed this using virtio-scsi (and SCSI scatter/gather) when canceling DMA requests. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- cutils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cutils.c b/cutils.c index 6db6304bb..24b3fe355 100644 --- a/cutils.c +++ b/cutils.c @@ -217,7 +217,10 @@ void qemu_iovec_destroy(QEMUIOVector *qiov) { assert(qiov->nalloc != -1); + qemu_iovec_reset(qiov); g_free(qiov->iov); + qiov->nalloc = 0; + qiov->iov = NULL; } void qemu_iovec_reset(QEMUIOVector *qiov) -- cgit v1.2.3 From fe5c13ebf1161d0f324229cfb36cb5fb87ec6248 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Dec 2011 19:20:12 +0200 Subject: coroutine: switch per-thread free pool to a global pool ucontext-based coroutines use a free pool to reduce allocations and deallocations of coroutine objects. The pool is per-thread, presumably to improve locality. However, as coroutines are usually allocated in a vcpu thread and freed in the I/O thread, the pool accounting gets screwed up and we end allocating and freeing a coroutine for every I/O request. This is expensive since large objects are allocated via the kernel, and are not cached by the C runtime. Fix by switching to a global pool. This is safe since we're protected by the global mutex. Signed-off-by: Avi Kivity Signed-off-by: Kevin Wolf --- coroutine-ucontext.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c index 2b8d3e9c1..3d01075b0 100644 --- a/coroutine-ucontext.c +++ b/coroutine-ucontext.c @@ -35,6 +35,10 @@ enum { POOL_MAX_SIZE = 64, }; +/** Free list to speed up creation */ +static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool); +static unsigned int pool_size; + typedef struct { Coroutine base; void *stack; @@ -48,10 +52,6 @@ typedef struct { /** Currently executing coroutine */ Coroutine *current; - /** Free list to speed up creation */ - QLIST_HEAD(, Coroutine) pool; - unsigned int pool_size; - /** The default coroutine */ CoroutineUContext leader; } CoroutineThreadState; @@ -75,7 +75,6 @@ static CoroutineThreadState *coroutine_get_thread_state(void) if (!s) { s = g_malloc0(sizeof(*s)); s->current = &s->leader.base; - QLIST_INIT(&s->pool); pthread_setspecific(thread_state_key, s); } return s; @@ -84,14 +83,19 @@ static CoroutineThreadState *coroutine_get_thread_state(void) static void qemu_coroutine_thread_cleanup(void *opaque) { CoroutineThreadState *s = opaque; + + g_free(s); +} + +static void __attribute__((destructor)) coroutine_cleanup(void) +{ Coroutine *co; Coroutine *tmp; - QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) { + QLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) { g_free(DO_UPCAST(CoroutineUContext, base, co)->stack); g_free(co); } - g_free(s); } static void __attribute__((constructor)) coroutine_init(void) @@ -169,13 +173,12 @@ static Coroutine *coroutine_new(void) Coroutine *qemu_coroutine_new(void) { - CoroutineThreadState *s = coroutine_get_thread_state(); Coroutine *co; - co = QLIST_FIRST(&s->pool); + co = QLIST_FIRST(&pool); if (co) { QLIST_REMOVE(co, pool_next); - s->pool_size--; + pool_size--; } else { co = coroutine_new(); } @@ -184,13 +187,12 @@ Coroutine *qemu_coroutine_new(void) void qemu_coroutine_delete(Coroutine *co_) { - CoroutineThreadState *s = coroutine_get_thread_state(); CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); - if (s->pool_size < POOL_MAX_SIZE) { - QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next); + if (pool_size < POOL_MAX_SIZE) { + QLIST_INSERT_HEAD(&pool, &co->base, pool_next); co->base.caller = NULL; - s->pool_size++; + pool_size++; return; } -- cgit v1.2.3 From 5bb37d151b026759ee35f04212b11b4d625c7431 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2011 12:42:10 +0100 Subject: qemu-img rebase: Fix for undersized backing files Backing files may be smaller than the corresponding COW file. When reading directly from the backing file, qemu-img rebase must consider this and assume zero sectors after the end of backing files. Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- qemu-img.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 8bdae6649..01cc0d35a 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1420,6 +1420,8 @@ static int img_rebase(int argc, char **argv) */ if (!unsafe) { uint64_t num_sectors; + uint64_t old_backing_num_sectors; + uint64_t new_backing_num_sectors; uint64_t sector; int n; uint8_t * buf_old; @@ -1430,6 +1432,8 @@ static int img_rebase(int argc, char **argv) buf_new = qemu_blockalign(bs, IO_BUF_SIZE); bdrv_get_geometry(bs, &num_sectors); + bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors); + bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); local_progress = (float)100 / (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512)); @@ -1448,16 +1452,36 @@ static int img_rebase(int argc, char **argv) continue; } - /* Read old and new backing file */ - ret = bdrv_read(bs_old_backing, sector, buf_old, n); - if (ret < 0) { - error_report("error while reading from old backing file"); - goto out; + /* + * Read old and new backing file and take into consideration that + * backing files may be smaller than the COW image. + */ + if (sector >= old_backing_num_sectors) { + memset(buf_old, 0, n * BDRV_SECTOR_SIZE); + } else { + if (sector + n > old_backing_num_sectors) { + n = old_backing_num_sectors - sector; + } + + ret = bdrv_read(bs_old_backing, sector, buf_old, n); + if (ret < 0) { + error_report("error while reading from old backing file"); + goto out; + } } - ret = bdrv_read(bs_new_backing, sector, buf_new, n); - if (ret < 0) { - error_report("error while reading from new backing file"); - goto out; + + if (sector >= new_backing_num_sectors) { + memset(buf_new, 0, n * BDRV_SECTOR_SIZE); + } else { + if (sector + n > new_backing_num_sectors) { + n = new_backing_num_sectors - sector; + } + + ret = bdrv_read(bs_new_backing, sector, buf_new, n); + if (ret < 0) { + error_report("error while reading from new backing file"); + goto out; + } } /* If they differ, we need to write to the COW file */ -- cgit v1.2.3 From 8afe984ef7aa25cb2f8af51da021fdc8a242884d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2011 13:57:13 +0100 Subject: Documentation: Add qemu-img -t parameter in man page Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- qemu-img-cmds.hx | 6 +++--- qemu-img.texi | 10 +++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 4be00a5ed..49dce7c92 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -24,13 +24,13 @@ ETEXI DEF("commit", img_commit, "commit [-f fmt] [-t cache] filename") STEXI -@item commit [-f @var{fmt}] @var{filename} +@item commit [-f @var{fmt}] [-t @var{cache}] @var{filename} ETEXI DEF("convert", img_convert, "convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename") STEXI -@item convert [-c] [-p] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("info", img_info, @@ -48,7 +48,7 @@ ETEXI DEF("rebase", img_rebase, "rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") STEXI -@item rebase [-f @var{fmt}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} +@item rebase [-f @var{fmt}] [-t @var{cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI DEF("resize", img_resize, diff --git a/qemu-img.texi b/qemu-img.texi index 70fa321df..b2ca3a542 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -45,6 +45,10 @@ indicates the consecutive number of bytes that must contain only zeros for qemu-img to create a sparse image during conversion. This value is rounded down to the nearest 512 bytes. You may use the common size suffixes like @code{k} for kilobytes. +@item -t @var{cache} +specifies the cache mode that should be used with the (destination) file. See +the documentation of the emulator's @code{-drive cache=...} option for allowed +values. @end table Parameters to snapshot subcommand: @@ -87,11 +91,11 @@ this case. @var{backing_file} will never be modified unless you use the The size can also be specified using the @var{size} option with @code{-o}, it doesn't need to be specified separately in this case. -@item commit [-f @var{fmt}] @var{filename} +@item commit [-f @var{fmt}] [-t @var{cache}] @var{filename} Commit the changes recorded in @var{filename} in its base image. -@item convert [-c] [-p] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_name} to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -121,7 +125,7 @@ they are displayed too. List, apply, create or delete snapshots in image @var{filename}. -@item rebase [-f @var{fmt}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} +@item rebase [-f @var{fmt}] [-t @var{cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} Changes the backing file of an image. Only the formats @code{qcow2} and @code{qed} support changing the backing file. -- cgit v1.2.3 From e47c212cb5af148ab6d9dcf49bc0e054fe9c2e1d Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Tue, 6 Dec 2011 17:05:10 -0800 Subject: rbd: always set out parameter in qemu_rbd_snap_list The caller expects psn_tab to be NULL when there are no snapshots or an error occurs. This results in calling g_free on an invalid address. Reported-by: Oliver Francke Signed-off-by: Josh Durgin Signed-off-by: Kevin Wolf --- block/rbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/rbd.c b/block/rbd.c index 9088c52d2..54a696173 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -808,7 +808,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, } while (snap_count == -ERANGE); if (snap_count <= 0) { - return snap_count; + goto done; } sn_tab = g_malloc0(snap_count * sizeof(QEMUSnapshotInfo)); @@ -827,6 +827,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, } rbd_snap_list_end(snaps); + done: *psn_tab = sn_tab; return snap_count; } -- cgit v1.2.3 From 986626efecae87af17a9d9726767501cb4687b8c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Nov 2011 16:41:59 +0100 Subject: console: Fix segfault on screendump without VGA adapter When trying to create a screen dump without having any VGA adapter inside the guest, QEMU segfaults. This is because it's trying to switch back to the "previous" screen it was on before dumping the VGA screen. Unfortunately, in my case there simply is no previous screen so it accesses a NULL pointer. Fix it by checking if previous_active_console is actually available. This is 1.0 material. Signed-off-by: Alexander Graf --- console.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/console.c b/console.c index f6fe44195..ed6a653fd 100644 --- a/console.c +++ b/console.c @@ -186,7 +186,9 @@ void vga_hw_screen_dump(const char *filename) consoles[0]->hw_screen_dump(consoles[0]->hw, filename); } - console_select(previous_active_console->index); + if (previous_active_console) { + console_select(previous_active_console->index); + } } void vga_hw_text_update(console_ch_t *chardata) -- cgit v1.2.3 From 57ee5f77c09d365d470cd208c76a7a01a85435b2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 28 Nov 2011 20:21:39 +0000 Subject: pseries: Fix array overrun bug in PCI code spapr_populate_pci_devices() containd a loop with PCI_NUM_REGIONS (7) iterations. However this overruns the 'bars' global array, which only has 6 elements. In fact we only want to run this loop for things listed in the bars array, so this patch corrects the loop bounds to reflect that. Signed-off-by: David Gibson Signed-off-by: Alexander Graf (cherry picked from commit 135712de61dfa22368e98914d65b8b0860ec8505) --- hw/spapr_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 716258854..9b6a032cc 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -454,7 +454,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, reg[0].size = 0; n = 0; - for (i = 0; i < PCI_NUM_REGIONS; ++i) { + for (i = 0; i < ARRAY_SIZE(bars); ++i) { if (0 == dev->io_regions[i].size) { continue; } -- cgit v1.2.3 From adf6c527b07b1399cae990d48fa7b87113bd5083 Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Mon, 28 Nov 2011 20:41:18 +0000 Subject: kvm-ppc: halt secondary cpus when guest reset When guest reset, we need to halt secondary cpus until guest kick them. This already works for tcg. The patch add the support for kvm. Signed-off-by: Liu Yu Signed-off-by: Alexander Graf [agraf: remove in-kernel irqchip code] (cherry picked from commit 157feeadbaec09fe4dca539a24f6f6d327d6eeb6) --- hw/ppce500_spin.c | 1 + target-ppc/kvm.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c index cccd94073..2b527282b 100644 --- a/hw/ppce500_spin.c +++ b/hw/ppce500_spin.c @@ -112,6 +112,7 @@ static void spin_kick(void *data) env->halted = 0; env->exception_index = -1; + env->stopped = 0; qemu_cpu_kick(env); } diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 429349fb9..9b2e605b6 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -504,7 +504,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run) int kvm_arch_process_async_events(CPUState *env) { - return 0; + return env->halted; } static int kvmppc_handle_halt(CPUState *env) -- cgit v1.2.3 From e295c31e25b044acba2a48f1e7292b9fbb896367 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 12 Dec 2011 18:24:32 +0000 Subject: pseries: Emit device tree nodes in reg order Although in theory the device tree has no inherent ordering, in practice the order of nodes in the device tree does effect the order that devices are detected by software. Currently the ordering is determined by the order the devices appear on the QEMU command line. Although that does give the user control over the ordering, it is fragile, especially when the user does not generate the command line manually - eg. when using libvirt etc. So order the device tree based on the reg value, ie. the address of on the VIO bus of the devices. This gives us a sane and stable ordering. Signed-off-by: Michael Ellerman Signed-off-by: David Gibson Signed-off-by: Alexander Graf [agraf] add braces (cherry picked from commit 05c194384f836240ea4c2da5fa3be43a54bff021) --- hw/spapr_vio.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c index 2dcc0361e..8bd00cade 100644 --- a/hw/spapr_vio.c +++ b/hw/spapr_vio.c @@ -749,21 +749,61 @@ static void spapr_vio_register_devices(void) device_init(spapr_vio_register_devices) #ifdef CONFIG_FDT +static int compare_reg(const void *p1, const void *p2) +{ + VIOsPAPRDevice const *dev1, *dev2; + + dev1 = (VIOsPAPRDevice *)*(DeviceState **)p1; + dev2 = (VIOsPAPRDevice *)*(DeviceState **)p2; + + if (dev1->reg < dev2->reg) { + return -1; + } + if (dev1->reg == dev2->reg) { + return 0; + } + + /* dev1->reg > dev2->reg */ + return 1; +} + int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt) { - DeviceState *qdev; - int ret = 0; + DeviceState *qdev, **qdevs; + int i, num, ret = 0; + /* Count qdevs on the bus list */ + num = 0; QTAILQ_FOREACH(qdev, &bus->bus.children, sibling) { - VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; + num++; + } + + /* Copy out into an array of pointers */ + qdevs = g_malloc(sizeof(qdev) * num); + num = 0; + QTAILQ_FOREACH(qdev, &bus->bus.children, sibling) { + qdevs[num++] = qdev; + } + + /* Sort the array */ + qsort(qdevs, num, sizeof(qdev), compare_reg); + + /* Hack alert. Give the devices to libfdt in reverse order, we happen + * to know that will mean they are in forward order in the tree. */ + for (i = num - 1; i >= 0; i--) { + VIOsPAPRDevice *dev = (VIOsPAPRDevice *)(qdevs[i]); ret = vio_make_devnode(dev, fdt); if (ret < 0) { - return ret; + goto out; } } - return 0; + ret = 0; +out: + free(qdevs); + + return ret; } #endif /* CONFIG_FDT */ -- cgit v1.2.3 From 447a3b347381162f9c2b1b7831643b6ee0e767ca Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 12 Dec 2011 18:24:33 +0000 Subject: pseries: Add a routine to find a stable "default" vty and use it In vty_lookup() we have a special case for supporting early debug in the kernel. This accepts reg == 0 as a special case to mean "any vty". We implement this by searching the vtys on the bus and returning the first we find. This means that the vty we chose depends on the order the vtys are specified on the QEMU command line - because that determines the order of the vtys on the bus. We'd rather the command line order was irrelevant, so instead return the vty with the lowest reg value. This is still a guess as to what the user really means, but it is at least stable WRT command line ordering. Signed-off-by: Michael Ellerman Signed-off-by: David Gibson Signed-off-by: Alexander Graf [agraf] fix braces (cherry picked from commit 98331f8ad6a3e2cfbb402d72e6be47eac7706251) --- hw/spapr_vty.c | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c index f23cc3623..e2fec5880 100644 --- a/hw/spapr_vty.c +++ b/hw/spapr_vty.c @@ -156,24 +156,53 @@ static VIOsPAPRDeviceInfo spapr_vty = { }, }; +static VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) +{ + VIOsPAPRDevice *sdev, *selected; + DeviceState *iter; + + /* + * To avoid the console bouncing around we want one VTY to be + * the "default". We haven't really got anything to go on, so + * arbitrarily choose the one with the lowest reg value. + */ + + selected = NULL; + QTAILQ_FOREACH(iter, &bus->bus.children, sibling) { + /* Only look at VTY devices */ + if (iter->info != &spapr_vty.qdev) { + continue; + } + + sdev = DO_UPCAST(VIOsPAPRDevice, qdev, iter); + + /* First VTY we've found, so it is selected for now */ + if (!selected) { + selected = sdev; + continue; + } + + /* Choose VTY with lowest reg value */ + if (sdev->reg < selected->reg) { + selected = sdev; + } + } + + return selected; +} + static VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) { VIOsPAPRDevice *sdev; sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); if (!sdev && reg == 0) { - DeviceState *qdev; - /* Hack for kernel early debug, which always specifies reg==0. - * We search all VIO devices, and grab the first available vty - * device. This attempts to mimic existing PowerVM behaviour + * We search all VIO devices, and grab the vty with the lowest + * reg. This attempts to mimic existing PowerVM behaviour * (early debug does work there, despite having no vty with * reg==0. */ - QTAILQ_FOREACH(qdev, &spapr->vio_bus->bus.children, sibling) { - if (qdev->info == &spapr_vty.qdev) { - return DO_UPCAST(VIOsPAPRDevice, qdev, qdev); - } - } + return spapr_vty_get_default(spapr->vio_bus); } return sdev; -- cgit v1.2.3 From 7a6d80e93cb461a985ffb64bc4d19dd5f726386b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 13 Dec 2011 15:24:34 +1100 Subject: pseries: Populate "/chosen/linux,stdout-path" in the FDT There is a device tree property "/chosen/linux,stdout-path" which indicates which device should be used as stdout - ie. "the console". Currently we don't specify anything, which means both firmware and Linux choose something arbitrarily. Use the routine we added in the last patch to pick a default vty and specify it as stdout. Currently SLOF doesn't use the property, but we are hoping to update it to do so. Signed-off-by: Michael Ellerman Signed-off-by: David Gibson Signed-off-by: Alexander Graf (cherry picked from commit 68f3a94c64bbaaf8c7f2daa70de1b5d87a432f86) --- hw/spapr.c | 2 ++ hw/spapr_vio.c | 34 ++++++++++++++++++++++++++++++++++ hw/spapr_vio.h | 3 +++ hw/spapr_vty.c | 2 +- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/hw/spapr.c b/hw/spapr.c index 2b901f105..5a98d8651 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -351,6 +351,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, fprintf(stderr, "Couldn't set up RTAS device tree properties\n"); } + spapr_populate_chosen_stdout(fdt, spapr->vio_bus); + _FDT((fdt_pack(fdt))); cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c index 8bd00cade..464fe8744 100644 --- a/hw/spapr_vio.c +++ b/hw/spapr_vio.c @@ -806,4 +806,38 @@ out: return ret; } + +int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus) +{ + VIOsPAPRDevice *dev; + char *name, *path; + int ret, offset; + + dev = spapr_vty_get_default(bus); + if (!dev) + return 0; + + offset = fdt_path_offset(fdt, "/chosen"); + if (offset < 0) { + return offset; + } + + name = vio_format_dev_name(dev); + if (!name) { + return -ENOMEM; + } + + if (asprintf(&path, "/vdevice/%s", name) < 0) { + path = NULL; + ret = -ENOMEM; + goto out; + } + + ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path); +out: + free(name); + free(path); + + return ret; +} #endif /* CONFIG_FDT */ diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h index a325a5f4b..9fcd304ad 100644 --- a/hw/spapr_vio.h +++ b/hw/spapr_vio.h @@ -83,6 +83,7 @@ extern VIOsPAPRBus *spapr_vio_bus_init(void); extern VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg); extern void spapr_vio_bus_register_withprop(VIOsPAPRDeviceInfo *info); extern int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt); +extern int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus); extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode); @@ -108,6 +109,8 @@ void spapr_vty_create(VIOsPAPRBus *bus, uint32_t reg, CharDriverState *chardev); void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd); void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg); +VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus); + int spapr_tce_set_bypass(uint32_t unit, uint32_t enable); void spapr_vio_quiesce(void); diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c index e2fec5880..386ccf720 100644 --- a/hw/spapr_vty.c +++ b/hw/spapr_vty.c @@ -156,7 +156,7 @@ static VIOsPAPRDeviceInfo spapr_vty = { }, }; -static VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) +VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) { VIOsPAPRDevice *sdev, *selected; DeviceState *iter; -- cgit v1.2.3 From d928541b51731a185214924cc5b44cd28020fa4f Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 11 Jan 2012 19:46:27 +0000 Subject: pseries: Don't try to munmap() a malloc()ed TCE table For the pseries machine, TCE (IOMMU) tables can either be directly malloc()ed in qemu or, when running on a KVM which supports it, mmap()ed from a KVM ioctl. The latter option is used when available, because it allows the (frequent bottlenext) H_PUT_TCE hypercall to be KVM accelerated. However, even when KVM is persent, TCE acceleration is not always possible. Only KVM HV supports this ioctl(), not KVM PR, or the kernel could run out of contiguous memory to allocate the new table. In this case we need to fall back on the malloc()ed table. When a device is removed, and we need to remove the TCE table, we need to either munmap() or free() the table as appropriate for how it was allocated. The code is supposed to do that, but we buggily fail to initialize the tcet->fd variable in the malloc() case, which is used as a flag to determine which is the right choice. This patch fixes the bug, and cleans up error messages relating to this path while we're at it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: David Gibson Signed-off-by: Alexander Graf --- target-ppc/kvm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 9b2e605b6..0410901b5 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -838,12 +838,18 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) int fd; void *table; + /* Must set fd to -1 so we don't try to munmap when called for + * destroying the table, which the upper layers -will- do + */ + *pfd = -1; if (!cap_spapr_tce) { return NULL; } fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); if (fd < 0) { + fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", + liobn); return NULL; } @@ -852,6 +858,8 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (table == MAP_FAILED) { + fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", + liobn); close(fd); return NULL; } @@ -871,8 +879,8 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size) len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE); if ((munmap(table, len) < 0) || (close(fd) < 0)) { - fprintf(stderr, "KVM: Unexpected error removing KVM SPAPR TCE " - "table: %s", strerror(errno)); + fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", + strerror(errno)); /* Leak the table */ } -- cgit v1.2.3 From 8a0a9cf35bd1abc7a9a11f430a6b14e034f34ba0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 18 Nov 2011 16:45:54 +0100 Subject: s390x: add TR function for EXECUTE Newer gcc versions (or glibc?) also generate code that tries to EXECUTE the TR opcode. Implement it so that we don't break valid guests. Reported-by: Andreas Faerber Signed-off-by: Alexander Graf --- target-s390x/op_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target-s390x/op_helper.c b/target-s390x/op_helper.c index 137bae74a..5ddc7b93c 100644 --- a/target-s390x/op_helper.c +++ b/target-s390x/op_helper.c @@ -636,6 +636,9 @@ uint32_t HELPER(ex)(uint32_t cc, uint64_t v1, uint64_t addr, uint64_t ret) case 0x700: cc = helper_xc(l, get_address(0, b1, d1), get_address(0, b2, d2)); break; + case 0xc00: + helper_tr(l, get_address(0, b1, d1), get_address(0, b2, d2)); + break; default: goto abort; break; -- cgit v1.2.3 From d194ba1cdbeee54166845b75f9186a8e401e5f07 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 20 Nov 2011 23:12:03 +0000 Subject: s390: fix cpu hotplug / cpu activity on interrupts The add_del/running_cpu code and env->halted are tracking stopped cpus. Sleeping cpus (idle and enabled for interrupts) are waiting inside the kernel. No interrupt besides the restart can move a cpu from stopped to operational. This is already handled over there. So lets just remove the bogus wakup from the common interrupt delivery, otherwise any interrupt will wake up a cpu, even if this cpu is stopped (Thus leading to strange hangs on sigp restart) This fixes echo 0 > /sys/devices/system/cpu/cpu0/online echo 1 > /sys/devices/system/cpu/cpu0/online in the guest Signed-off-by: Christian Borntraeger Signed-off-by: Alexander Graf (cherry picked from commit 93116ac0cf9734e7b28886aedf03848b37d6785e) --- target-s390x/kvm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 40b0ab192..b1404bfd2 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -185,9 +185,6 @@ void kvm_s390_interrupt_internal(CPUState *env, int type, uint32_t parm, return; } - s390_add_running_cpu(env); - qemu_cpu_kick(env); - kvmint.type = type; kvmint.parm = parm; kvmint.parm64 = parm64; -- cgit v1.2.3 From d0ed2d2e8e863a9a64c9fc9c08fa68bee546ad00 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 23 Jan 2012 07:30:43 -0600 Subject: e1000: bounds packet size against buffer size Otherwise we can write beyond the buffer and corrupt memory. This is tracked as CVE-2012-0029. Signed-off-by: Anthony Liguori --- hw/e1000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/e1000.c b/hw/e1000.c index 986ed9cf7..e164d79b8 100644 --- a/hw/e1000.c +++ b/hw/e1000.c @@ -466,6 +466,8 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) bytes = split_size; if (tp->size + bytes > msh) bytes = msh - tp->size; + + bytes = MIN(sizeof(tp->data) - tp->size, bytes); pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes); if ((sz = tp->size + bytes) >= hdr && tp->size < hdr) memmove(tp->header, tp->data, hdr); @@ -481,6 +483,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) // context descriptor TSE is not set, while data descriptor TSE is set DBGOUT(TXERR, "TCP segmentaion Error\n"); } else { + split_size = MIN(sizeof(tp->data) - tp->size, split_size); pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size); tp->size += split_size; } -- cgit v1.2.3 From 3ffb4001c2e28624cd59a2b3598858784d0718d0 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Thu, 2 Feb 2012 16:44:08 -0600 Subject: Version 1.0.1 Signed-off-by: Justin M. Forbes --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d3827e75a..b70c2927d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0 +1.0,1 -- cgit v1.2.3