diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile.objs | 5 | ||||
-rw-r--r-- | block/blkdebug.c | 12 | ||||
-rw-r--r-- | block/commit.c | 268 | ||||
-rw-r--r-- | block/curl.c | 3 | ||||
-rw-r--r-- | block/gluster.c | 624 | ||||
-rw-r--r-- | block/iscsi.c | 27 | ||||
-rw-r--r-- | block/qcow.c | 10 | ||||
-rw-r--r-- | block/qcow2.c | 11 | ||||
-rw-r--r-- | block/qed-table.c | 1 | ||||
-rw-r--r-- | block/qed.c | 9 | ||||
-rw-r--r-- | block/raw-posix.c | 225 | ||||
-rw-r--r-- | block/raw-win32.c | 40 | ||||
-rw-r--r-- | block/raw.c | 10 | ||||
-rw-r--r-- | block/rbd.c | 6 | ||||
-rw-r--r-- | block/sheepdog.c | 51 | ||||
-rw-r--r-- | block/stream.c | 29 | ||||
-rw-r--r-- | block/vdi.c | 32 | ||||
-rw-r--r-- | block/vmdk.c | 38 | ||||
-rw-r--r-- | block/vpc.c | 7 |
19 files changed, 1273 insertions, 135 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d39b..554f429d0 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -3,9 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-y += stream.o block-obj-$(CONFIG_WIN32) += raw-win32.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o + +common-obj-y += stream.o +common-obj-y += commit.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 59dcea065..1206d5256 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -28,6 +28,7 @@ typedef struct BDRVBlkdebugState { int state; + int new_state; QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; } BDRVBlkdebugState; @@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs) } static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - int old_state, bool injected) + bool injected) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ - if (rule->state && rule->state != old_state) { + if (rule->state && rule->state != s->state) { return injected; } @@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, break; case ACTION_SET_STATE: - s->state = rule->options.set_state.new_state; + s->new_state = rule->options.set_state.new_state; break; } return injected; @@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule; - int old_state = s->state; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; + s->new_state = s->state; QLIST_FOREACH(rule, &s->rules[event], next) { - injected = process_rule(bs, rule, old_state, injected); + injected = process_rule(bs, rule, injected); } + s->state = s->new_state; } static int64_t blkdebug_getlength(BlockDriverState *bs) diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 000000000..733c91403 --- /dev/null +++ b/block/commit.c @@ -0,0 +1,268 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block_int.h" +#include "blockjob.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CommitBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *active; + BlockDriverState *top; + BlockDriverState *base; + BlockdevOnError on_error; + int base_flags; + int orig_overlay_flags; +} CommitBlockJob; + +static int coroutine_fn commit_populate(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, int nb_sectors, + void *buf) +{ + int ret = 0; + + ret = bdrv_read(bs, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + ret = bdrv_write(base, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + return 0; +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + BlockDriverState *active = s->active; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; + BlockDriverState *overlay_bs = NULL; + int64_t sector_num, end; + int ret = 0; + int n = 0; + void *buf; + int bytes_written = 0; + int64_t base_len; + + ret = s->common.len = bdrv_getlength(top); + + + if (s->common.len < 0) { + goto exit_restore_reopen; + } + + ret = base_len = bdrv_getlength(base); + if (base_len < 0) { + goto exit_restore_reopen; + } + + if (base_len < s->common.len) { + ret = bdrv_truncate(base, s->common.len); + if (ret) { + goto exit_restore_reopen; + } + } + + overlay_bs = bdrv_find_overlay(active, top); + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + + for (sector_num = 0; sector_num < end; sector_num += n) { + uint64_t delay_ns = 0; + bool copy; + +wait: + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_co_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + copy = (ret == 1); + trace_commit_one_iteration(s, sector_num, n, ret); + if (copy) { + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + goto wait; + } + } + ret = commit_populate(top, base, sector_num, n, buf); + bytes_written += n * BDRV_SECTOR_SIZE; + } + if (ret < 0) { + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || + s->on_error == BLOCKDEV_ON_ERROR_REPORT|| + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { + goto exit_free_buf; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + ret = 0; + + if (!block_job_is_cancelled(&s->common) && sector_num == end) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base); + } + +exit_free_buf: + qemu_vfree(buf); + +exit_restore_reopen: + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + + block_job_complete(&s->common, ret); +} + +static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType commit_job_type = { + .instance_size = sizeof(CommitBlockJob), + .job_type = "commit", + .set_speed = commit_set_speed, +}; + +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + CommitBlockJob *s; + BlockReopenQueue *reopen_queue = NULL; + int orig_overlay_flags; + int orig_base_flags; + BlockDriverState *overlay_bs; + Error *local_err = NULL; + + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); + return; + } + + /* Once we support top == active layer, remove this check */ + if (top == bs) { + error_setg(errp, + "Top image as the active layer is currently unsupported"); + return; + } + + if (top == base) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + /* top and base may be valid, but let's make sure that base is reachable + * from top */ + if (bdrv_find_backing_image(top, base->filename) != base) { + error_setg(errp, + "Base (%s) is not reachable from top (%s)", + base->filename, top->filename); + return; + } + + overlay_bs = bdrv_find_overlay(bs, top); + + if (overlay_bs == NULL) { + error_setg(errp, "Could not find overlay image for %s:", top->filename); + return; + } + + orig_base_flags = bdrv_get_flags(base); + orig_overlay_flags = bdrv_get_flags(overlay_bs); + + /* convert base & overlay_bs to r/w, if necessary */ + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, + orig_base_flags | BDRV_O_RDWR); + } + if (!(orig_overlay_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + orig_overlay_flags | BDRV_O_RDWR); + } + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + } + + + s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->base = base; + s->top = top; + s->active = bs; + + s->base_flags = orig_base_flags; + s->orig_overlay_flags = orig_overlay_flags; + + s->on_error = on_error; + s->common.co = qemu_coroutine_create(commit_run); + + trace_commit_start(bs, base, top, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/curl.c b/block/curl.c index e7c3634d3..c1074cd2e 100644 --- a/block/curl.c +++ b/block/curl.c @@ -542,8 +542,7 @@ static void curl_close(BlockDriverState *bs) } if (s->multi) curl_multi_cleanup(s->multi); - if (s->url) - free(s->url); + g_free(s->url); } static int64_t curl_getlength(BlockDriverState *bs) diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 000000000..3588d7377 --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,624 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>, + * Josh Durgin <josh.durgin@dreamhost.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include <glusterfs/api/glfs.h> +#include "block_int.h" +#include "qemu_socket.h" +#include "uri.h" + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + int64_t size; + int ret; + bool *finished; + QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +typedef struct GlusterConf { + char *server; + int port; + char *volname; + char *image; + char *transport; +} GlusterConf; + +static void qemu_gluster_gconf_free(GlusterConf *gconf) +{ + g_free(gconf->server); + g_free(gconf->volname); + g_free(gconf->image); + g_free(gconf->transport); + g_free(gconf); +} + +static int parse_volume_options(GlusterConf *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volname = g_strndup(q, p - q); + + /* image */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->image = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp, unix and rdma. If a transport type isn't specified, then tcp + * type is assumed. + * + * 'server' specifies the server where the volume file specification for + * the given volume resides. This can be either hostname, ipv4 address + * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * If transport type is 'unix', then 'server' field should not be specifed. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volname' is the name of the gluster volume which contains the VM image. + * + * 'image' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img + */ +static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +{ + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "gluster")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gconf->transport = g_strdup("unix"); + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gconf->transport = g_strdup("rdma"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gconf->server = g_strdup(qp->p[0].value); + } else { + gconf->server = g_strdup(uri->server); + gconf->port = uri->port; + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + int old_errno; + + ret = qemu_gluster_parseuri(gconf, filename); + if (ret < 0) { + error_report("Usage: file=gluster[+transport]://[server[:port]]/" + "volname/image[?socket=...]"); + errno = -ret; + goto out; + } + + glfs = glfs_new(gconf->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + gconf->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when + * GlusterFS makes GF_LOG_* macros available to libgfapi users. + */ + ret = glfs_set_logging(glfs, "-", 4); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_report("Gluster connection failed for server=%s port=%d " + "volume=%s image=%s transport=%s\n", gconf->server, gconf->port, + gconf->volname, gconf->image, gconf->transport); + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_fini(glfs); + errno = old_errno; + } + return NULL; +} + +static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +{ + int ret; + bool *finished = acb->finished; + BlockDriverCompletionFunc *cb = acb->common.cb; + void *opaque = acb->common.opaque; + + if (!acb->ret || acb->ret == acb->size) { + ret = 0; /* Success */ + } else if (acb->ret < 0) { + ret = acb->ret; /* Read/Write failed */ + } else { + ret = -EIO; /* Partial read/write - fail it */ + } + + s->qemu_aio_count--; + qemu_aio_release(acb); + cb(opaque, ret); + if (finished) { + *finished = true; + } +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_acb; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_acb) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_acb)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_acb, s); + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = O_BINARY; + int ret = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + s->glfs = qemu_gluster_init(gconf, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + if (bdrv_flags & BDRV_O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, gconf->image, open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + ret = -errno; + goto out; + } + fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + +out: + qemu_gluster_gconf_free(gconf); + if (!ret) { + return ret; + } + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + int ret = 0; + int64_t total_size = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + glfs = qemu_gluster_init(gconf, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, gconf->image, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + qemu_gluster_gconf_free(gconf); + if (glfs) { + glfs_fini(glfs); + } + return ret; +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static AIOPool gluster_aio_pool = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + BlockDriverState *bs = acb->common.bs; + BDRVGlusterState *s = bs->opaque; + int retval; + + acb->ret = ret; + retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); + if (retval != sizeof(acb)) { + /* + * Gluster AIO callback thread failed to notify the waiting + * QEMU thread about IO completion. + * + * Complete this IO request and make the disk inaccessible for + * subsequent reads and writes. + */ + error_report("Gluster failed to notify QEMU about IO completion"); + + qemu_mutex_lock_iothread(); /* We are in gluster thread context */ + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + s->qemu_aio_count--; + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, + NULL); + bs->drv = NULL; /* Make the disk inaccessible */ + qemu_mutex_unlock_iothread(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); diff --git a/block/iscsi.c b/block/iscsi.c index 0b96165ec..d0b1a10ee 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -167,12 +167,6 @@ iscsi_set_events(IscsiLun *iscsilun) } - /* If we just added an event, the callback might be delayed - * unless we call qemu_notify_event(). - */ - if (ev & ~iscsilun->events) { - qemu_notify_event(); - } iscsilun->events = ev; } @@ -268,10 +262,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, acb->task->xfer_dir = SCSI_XFER_WRITE; acb->task->cdb_size = 16; acb->task->cdb[0] = 0x8a; - if (!(bs->open_flags & BDRV_O_CACHE_WB)) { - /* set FUA on writes when cache mode is write through */ - acb->task->cdb[1] |= 0x04; - } lba = sector_qemu2lun(sector_num, iscsilun); *(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32); *(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff); @@ -628,9 +618,17 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } + +static void ioctl_cb(void *opaque, int status) +{ + int *p_status = opaque; + *p_status = status; +} + static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { IscsiLun *iscsilun = bs->opaque; + int status; switch (req) { case SG_GET_VERSION_NUM: @@ -639,6 +637,15 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) case SG_GET_SCSI_ID: ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; break; + case SG_IO: + status = -EINPROGRESS; + iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); + + while (status == -EINPROGRESS) { + qemu_aio_wait(); + } + + return 0; default: return -1; } diff --git a/block/qcow.c b/block/qcow.c index 7b5ab87d2..b239c82ae 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -197,6 +197,15 @@ static int qcow_open(BlockDriverState *bs, int flags) return ret; } + +/* We have nothing to do for QCOW reopen, stubs just return + * success */ +static int qcow_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int qcow_set_key(BlockDriverState *bs, const char *key) { BDRVQcowState *s = bs->opaque; @@ -868,6 +877,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_co_readv = qcow_co_readv, diff --git a/block/qcow2.c b/block/qcow2.c index 8f183f146..c1ff31f48 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -52,6 +52,7 @@ typedef struct { uint32_t magic; uint32_t len; } QCowExtension; + #define QCOW2_EXT_MAGIC_END 0 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 @@ -558,6 +559,14 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } +/* We have nothing to do for QCOW2 reopen, stubs just return + * success */ +static int qcow2_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -1087,6 +1096,7 @@ int qcow2_update_header(BlockDriverState *bs) goto fail; } + /* Using strncpy is ok here, since buf is not NUL-terminated. */ strncpy(buf, bs->backing_file, buflen); header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); @@ -1679,6 +1689,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_create = qcow2_create, .bdrv_co_is_allocated = qcow2_co_is_allocated, .bdrv_set_key = qcow2_set_key, diff --git a/block/qed-table.c b/block/qed-table.c index ce07b0554..de845ec3d 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -103,7 +103,6 @@ static void qed_write_table_cb(void *opaque, int ret) out: qemu_vfree(write_table_cb->table); gencb_complete(&write_table_cb->gencb, ret); - return; } /** diff --git a/block/qed.c b/block/qed.c index 21cb23987..6c182ca91 100644 --- a/block/qed.c +++ b/block/qed.c @@ -505,6 +505,14 @@ out: return ret; } +/* We have nothing to do for QED reopen, stubs just return + * success */ +static int bdrv_qed_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -1564,6 +1572,7 @@ static BlockDriver bdrv_qed = { .bdrv_rebind = bdrv_qed_rebind, .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, + .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_create = bdrv_qed_create, .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, diff --git a/block/raw-posix.c b/block/raw-posix.c index 6be20b192..28d439fa8 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -133,13 +133,19 @@ typedef struct BDRVRawState { int use_aio; void *aio_ctx; #endif - uint8_t *aligned_buf; - unsigned aligned_buf_size; #ifdef CONFIG_XFS bool is_xfs : 1; #endif } BDRVRawState; +typedef struct BDRVRawReopenState { + int fd; + int open_flags; +#ifdef CONFIG_LINUX_AIO + int use_aio; +#endif +} BDRVRawReopenState; + static int fd_open(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs); @@ -185,6 +191,57 @@ static int raw_normalize_devicepath(const char **filename) } #endif +static void raw_parse_flags(int bdrv_flags, int *open_flags) +{ + assert(open_flags != NULL); + + *open_flags |= O_BINARY; + *open_flags &= ~O_ACCMODE; + if (bdrv_flags & BDRV_O_RDWR) { + *open_flags |= O_RDWR; + } else { + *open_flags |= O_RDONLY; + } + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((bdrv_flags & BDRV_O_NOCACHE)) { + *open_flags |= O_DIRECT; + } +} + +#ifdef CONFIG_LINUX_AIO +static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags) +{ + int ret = -1; + assert(aio_ctx != NULL); + assert(use_aio != NULL); + /* + * Currently Linux do AIO only for files opened with O_DIRECT + * specified so check NOCACHE flag too + */ + if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == + (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { + + /* if non-NULL, laio_init() has already been run */ + if (*aio_ctx == NULL) { + *aio_ctx = laio_init(); + if (!*aio_ctx) { + goto error; + } + } + *use_aio = 1; + } else { + *use_aio = 0; + } + + ret = 0; + +error: + return ret; +} +#endif + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -196,20 +253,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } - s->open_flags = open_flags | O_BINARY; - s->open_flags &= ~O_ACCMODE; - if (bdrv_flags & BDRV_O_RDWR) { - s->open_flags |= O_RDWR; - } else { - s->open_flags |= O_RDONLY; - } - - /* Use O_DSYNC for write-through caching, no flags for write-back caching, - * and O_DIRECT for no caching. */ - if ((bdrv_flags & BDRV_O_NOCACHE)) - s->open_flags |= O_DIRECT; - if (!(bdrv_flags & BDRV_O_CACHE_WB)) - s->open_flags |= O_DSYNC; + s->open_flags = open_flags; + raw_parse_flags(bdrv_flags, &s->open_flags); s->fd = -1; fd = qemu_open(filename, s->open_flags, 0644); @@ -220,45 +265,17 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } s->fd = fd; - s->aligned_buf = NULL; - - if ((bdrv_flags & BDRV_O_NOCACHE)) { - /* - * Allocate a buffer for read/modify/write cycles. Chose the size - * pessimistically as we don't know the block size yet. - */ - s->aligned_buf_size = 32 * MAX_BLOCKSIZE; - s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size); - if (s->aligned_buf == NULL) { - goto out_close; - } - } /* We're falling back to POSIX AIO in some cases so init always */ if (paio_init() < 0) { - goto out_free_buf; + goto out_close; } #ifdef CONFIG_LINUX_AIO - /* - * Currently Linux do AIO only for files opened with O_DIRECT - * specified so check NOCACHE flag too - */ - if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { - - s->aio_ctx = laio_init(); - if (!s->aio_ctx) { - goto out_free_buf; - } - s->use_aio = 1; - } else -#endif - { -#ifdef CONFIG_LINUX_AIO - s->use_aio = 0; -#endif + if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) { + goto out_close; } +#endif #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -268,8 +285,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return 0; -out_free_buf: - qemu_vfree(s->aligned_buf); out_close: qemu_close(fd); return -errno; @@ -283,6 +298,109 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return raw_open_common(bs, filename, flags, 0); } +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRawState *s; + BDRVRawReopenState *raw_s; + int ret = 0; + + assert(state != NULL); + assert(state->bs != NULL); + + s = state->bs->opaque; + + state->opaque = g_malloc0(sizeof(BDRVRawReopenState)); + raw_s = state->opaque; + +#ifdef CONFIG_LINUX_AIO + raw_s->use_aio = s->use_aio; + + /* we can use s->aio_ctx instead of a copy, because the use_aio flag is + * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio() + * won't override aio_ctx if aio_ctx is non-NULL */ + if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) { + return -1; + } +#endif + + raw_parse_flags(state->flags, &raw_s->open_flags); + + raw_s->fd = -1; + + int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; +#ifdef O_NOATIME + fcntl_flags |= O_NOATIME; +#endif + + if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) { + /* dup the original fd */ + /* TODO: use qemu fcntl wrapper */ +#ifdef F_DUPFD_CLOEXEC + raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0); +#else + raw_s->fd = dup(s->fd); + if (raw_s->fd != -1) { + qemu_set_cloexec(raw_s->fd); + } +#endif + if (raw_s->fd >= 0) { + ret = fcntl_setfl(raw_s->fd, raw_s->open_flags); + if (ret) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + } + } + + /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ + if (raw_s->fd == -1) { + assert(!(raw_s->open_flags & O_CREAT)); + raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags); + if (raw_s->fd == -1) { + ret = -1; + } + } + return ret; +} + + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + BDRVRawState *s = state->bs->opaque; + + s->open_flags = raw_s->open_flags; + + qemu_close(s->fd); + s->fd = raw_s->fd; +#ifdef CONFIG_LINUX_AIO + s->use_aio = raw_s->use_aio; +#endif + + g_free(state->opaque); + state->opaque = NULL; +} + + +static void raw_reopen_abort(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + + /* nothing to do if NULL, we didn't get far enough */ + if (raw_s == NULL) { + return; + } + + if (raw_s->fd >= 0) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + g_free(state->opaque); + state->opaque = NULL; +} + + /* XXX: use host sector size if necessary with: #ifdef DIOCGSECTORSIZE { @@ -330,7 +448,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * boundary. Check if this is the case or tell the low-level * driver that it needs to copy the buffer. */ - if (s->aligned_buf) { + if ((bs->open_flags & BDRV_O_NOCACHE)) { if (!qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO @@ -378,8 +496,6 @@ static void raw_close(BlockDriverState *bs) if (s->fd >= 0) { qemu_close(s->fd); s->fd = -1; - if (s->aligned_buf != NULL) - qemu_vfree(s->aligned_buf); } } @@ -735,6 +851,9 @@ static BlockDriver bdrv_file = { .instance_size = sizeof(BDRVRawState), .bdrv_probe = NULL, /* no probe for protocols */ .bdrv_file_open = raw_open, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, .bdrv_co_discard = raw_co_discard, diff --git a/block/raw-win32.c b/block/raw-win32.c index c56bf8337..78c830648 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -77,6 +77,23 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) +{ + assert(access_flags != NULL); + assert(overlapped != NULL); + + if (flags & BDRV_O_RDWR) { + *access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + *access_flags = GENERIC_READ; + } + + *overlapped = FILE_ATTRIBUTE_NORMAL; + if (flags & BDRV_O_NOCACHE) { + *overlapped |= FILE_FLAG_NO_BUFFERING; + } +} + static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; @@ -85,17 +102,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } + raw_parse_flags(flags, &access_flags, &overlapped); - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, overlapped, NULL); @@ -374,18 +382,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) } s->type = find_device_type(bs, filename); - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } + raw_parse_flags(flags, &access_flags, &overlapped); + create_flags = OPEN_EXISTING; - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); diff --git a/block/raw.c b/block/raw.c index ff34ea41e..253e949b8 100644 --- a/block/raw.c +++ b/block/raw.c @@ -9,6 +9,14 @@ static int raw_open(BlockDriverState *bs, int flags) return 0; } +/* We have nothing to do for raw reopen, stubs just return + * success */ +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -115,6 +123,8 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, .bdrv_co_is_allocated = raw_co_is_allocated, diff --git a/block/rbd.c b/block/rbd.c index 5a0f79fc8..015a9db0a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -487,12 +487,6 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) rados_conf_set(s->cluster, "rbd_cache", "false"); } else { rados_conf_set(s->cluster, "rbd_cache", "true"); - if (!(flags & BDRV_O_CACHE_WB)) { - r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0"); - if (r < 0) { - rados_conf_set(s->cluster, "rbd_cache", "false"); - } - } } if (strstr(conf, "conf=") == NULL) { diff --git a/block/sheepdog.c b/block/sheepdog.c index df4f44107..f35ff5bbe 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -866,14 +866,14 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename, s->port = 0; } - strncpy(vdi, p, SD_MAX_VDI_LEN); + pstrcpy(vdi, SD_MAX_VDI_LEN, p); p = strchr(vdi, ':'); if (p) { *p++ = '\0'; *snapid = strtoul(p, NULL, 10); if (*snapid == 0) { - strncpy(tag, p, SD_MAX_VDI_TAG_LEN); + pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p); } } else { *snapid = CURRENT_VDI_ID; /* search current vdi */ @@ -900,7 +900,10 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, return fd; } - memset(buf, 0, sizeof(buf)); + /* This pair of strncpy calls ensures that the buffer is zero-filled, + * which is desirable since we'll soon be sending those bytes, and + * don't want the send_req to read uninitialized data. + */ strncpy(buf, filename, SD_MAX_VDI_LEN); strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); @@ -1114,14 +1117,12 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - if (flags & BDRV_O_CACHE_WB) { - s->cache_enabled = 1; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; - } + s->cache_enabled = 1; + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; } if (snapid || tag[0] != '\0') { @@ -1151,7 +1152,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) s->max_dirty_data_idx = 0; bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE; - strncpy(s->name, vdi, sizeof(s->name)); + pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); g_free(buf); return 0; @@ -1179,8 +1180,11 @@ static int do_sd_create(char *filename, int64_t vdi_size, return fd; } + /* FIXME: would it be better to fail (e.g., return -EIO) when filename + * does not fit in buf? For now, just truncate and avoid buffer overrun. + */ memset(buf, 0, sizeof(buf)); - strncpy(buf, filename, SD_MAX_VDI_LEN); + pstrcpy(buf, sizeof(buf), filename); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; @@ -1754,6 +1758,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) s->inode.vm_state_size = sn_info->vm_state_size; s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; + /* It appears that inode.tag does not require a NUL terminator, + * which means this use of strncpy is ok. + */ strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); /* we don't need to update entire object */ datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); @@ -1813,13 +1820,13 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) memcpy(old_s, s, sizeof(BDRVSheepdogState)); - memset(vdi, 0, sizeof(vdi)); - strncpy(vdi, s->name, sizeof(vdi)); + pstrcpy(vdi, sizeof(vdi), s->name); - memset(tag, 0, sizeof(tag)); snapid = strtoul(snapshot_id, NULL, 10); - if (!snapid) { - strncpy(tag, s->name, sizeof(tag)); + if (snapid) { + tag[0] = 0; + } else { + pstrcpy(tag, sizeof(tag), s->name); } ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1); @@ -1948,8 +1955,9 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", inode.snap_id); - strncpy(sn_tab[found].name, inode.tag, - MIN(sizeof(sn_tab[found].name), sizeof(inode.tag))); + pstrcpy(sn_tab[found].name, + MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)), + inode.tag); found++; } } @@ -1986,7 +1994,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, vdi_index = pos / SD_DATA_OBJ_SIZE; offset = pos % SD_DATA_OBJ_SIZE; - data_len = MIN(remaining, SD_DATA_OBJ_SIZE); + data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index); @@ -2007,6 +2015,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, } pos += data_len; + data += data_len; remaining -= data_len; } ret = size; diff --git a/block/stream.c b/block/stream.c index c4f87dd5b..792665276 100644 --- a/block/stream.c +++ b/block/stream.c @@ -13,6 +13,7 @@ #include "trace.h" #include "block_int.h" +#include "blockjob.h" #include "qemu/ratelimit.h" enum { @@ -30,6 +31,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + BlockdevOnError on_error; char backing_file_id[1024]; } StreamBlockJob; @@ -77,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque) BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; + int error = 0; int ret = 0; int n = 0; void *buf; @@ -141,7 +144,19 @@ wait: ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { - break; + BlockErrorAction action = + block_job_error_action(&s->common, s->common.bs, s->on_error, + true, -ret); + if (action == BDRV_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BDRV_ACTION_REPORT) { + break; + } } ret = 0; @@ -153,6 +168,9 @@ wait: bdrv_disable_copy_on_read(bs); } + /* Do not remove the backing file if an error was there but ignored. */ + ret = error; + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { @@ -188,11 +206,19 @@ static BlockJobType stream_job_type = { void stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-error"); + return; + } + s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); if (!s) { return; @@ -203,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); } + s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block/vdi.c b/block/vdi.c index c4f1529db..f35b12ec9 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -454,6 +454,12 @@ static int vdi_open(BlockDriverState *bs, int flags) return -1; } +static int vdi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -628,7 +634,6 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) VdiHeader header; size_t i; size_t bmap_size; - uint32_t *bmap; logout("\n"); @@ -693,21 +698,21 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options) result = -errno; } - bmap = NULL; if (bmap_size > 0) { - bmap = (uint32_t *)g_malloc0(bmap_size); - } - for (i = 0; i < blocks; i++) { - if (image_type == VDI_TYPE_STATIC) { - bmap[i] = i; - } else { - bmap[i] = VDI_UNALLOCATED; + uint32_t *bmap = g_malloc0(bmap_size); + for (i = 0; i < blocks; i++) { + if (image_type == VDI_TYPE_STATIC) { + bmap[i] = i; + } else { + bmap[i] = VDI_UNALLOCATED; + } } + if (write(fd, bmap, bmap_size) < 0) { + result = -errno; + } + g_free(bmap); } - if (write(fd, bmap, bmap_size) < 0) { - result = -errno; - } - g_free(bmap); + if (image_type == VDI_TYPE_STATIC) { if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { result = -errno; @@ -762,6 +767,7 @@ static BlockDriver bdrv_vdi = { .bdrv_probe = vdi_probe, .bdrv_open = vdi_open, .bdrv_close = vdi_close, + .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_create = vdi_create, .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/block/vmdk.c b/block/vmdk.c index bba4c61a7..1a80e5a24 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -300,6 +300,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } +/* Queue extents, if any, for reopen() */ +static int vmdk_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVVmdkState *s; + int ret = -1; + int i; + VmdkExtent *e; + + assert(state != NULL); + assert(state->bs != NULL); + + if (queue == NULL) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "No reopen queue for VMDK extents"); + goto exit; + } + + s = state->bs->opaque; + + assert(s != NULL); + + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + if (e->file != state->bs->file) { + bdrv_reopen_queue(queue, e->file, state->flags); + } + } + ret = 0; + +exit: + return ret; +} + static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; @@ -1374,8 +1408,7 @@ static int relative_path(char *dest, int dest_size, return -1; } if (path_is_absolute(target)) { - dest[dest_size - 1] = '\0'; - strncpy(dest, target, dest_size - 1); + pstrcpy(dest, dest_size, target); return 0; } while (base[i] == target[i]) { @@ -1646,6 +1679,7 @@ static BlockDriver bdrv_vmdk = { .instance_size = sizeof(BDRVVmdkState), .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, + .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, diff --git a/block/vpc.c b/block/vpc.c index c0b82c4f5..b6bf52f14 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -265,6 +265,12 @@ static int vpc_open(BlockDriverState *bs, int flags) return err; } +static int vpc_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + /* * Returns the absolute byte offset of the given sector in the image file. * If the sector is not allocated, -1 is returned instead. @@ -783,6 +789,7 @@ static BlockDriver bdrv_vpc = { .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, .bdrv_close = vpc_close, + .bdrv_reopen_prepare = vpc_reopen_prepare, .bdrv_create = vpc_create, .bdrv_read = vpc_co_read, |