From 183c5eaa41f7814ed4e83c16812c67a857eac6aa Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 13 Sep 2012 11:08:01 +0200
Subject: compat: turn off msi/msix on xhci for old machine types

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 hw/pc_piix.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 5a0796bed..afd8361df 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -358,14 +358,30 @@ static QEMUMachine pc_machine_v1_3 = {
     .is_default = 1,
 };
 
+#define PC_COMPAT_1_2 \
+        {\
+            .driver   = "nec-usb-xhci",\
+            .property = "msi",\
+            .value    = "off",\
+        },{\
+            .driver   = "nec-usb-xhci",\
+            .property = "msix",\
+            .value    = "off",\
+        }
+
 static QEMUMachine pc_machine_v1_2 = {
     .name = "pc-1.2",
     .desc = "Standard PC",
     .init = pc_init_pci,
     .max_cpus = 255,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_2,
+        { /* end of list */ }
+    },
 };
 
 #define PC_COMPAT_1_1 \
+        PC_COMPAT_1_2,\
         {\
             .driver   = "virtio-scsi-pci",\
             .property = "hotplug",\
-- 
cgit v1.2.3


From c08ba66f13d3977402e0c9cd6ef35323ea11c0d6 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 13 Sep 2012 11:08:02 +0200
Subject: ivshmem: add 64bit option

This patch adds a "use64" property which will make the ivshmem driver
register a 64bit memory bar when set, so you have something to play with
when testing 64bit pci bits.  It also allows to have quite big shared
memory regions, like this:

[root@fedora ~]# lspci -vs1:1
01:01.0 RAM memory: Red Hat, Inc Device 1110
        Subsystem: Red Hat, Inc Device 1100
        Physical Slot: 1-1
        Flags: fast devsel
        Memory at fd400000 (32-bit, non-prefetchable) [disabled] [size=256]
        Memory at 8040000000 (64-bit, prefetchable) [size=1G]

[ v5: rebase, update compat property for post-1.2 merge ]
[ v4: rebase & adapt to latest master again ]
[ v3: rebase & adapt to latest master ]
[ v2: default to on as suggested by avi,
      turn off for pc-$old using compat property ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: Cam Macdonell <cam@cs.ualberta.ca>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 hw/ivshmem.c | 13 ++++++++++---
 hw/pc_piix.c |  4 ++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 62fe53ae2..59f1aa426 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -71,6 +71,8 @@ typedef struct IVShmemState {
     MemoryRegion bar;
     MemoryRegion ivshmem;
     uint64_t ivshmem_size; /* size of shared memory region */
+    uint32_t ivshmem_attr;
+    uint32_t ivshmem_64bit;
     int shm_fd; /* shared memory file descriptor */
 
     Peer *peers;
@@ -339,7 +341,7 @@ static void create_shared_memory_BAR(IVShmemState *s, int fd) {
     memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
     /* region for shared memory */
-    pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
+    pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
 }
 
 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
@@ -690,6 +692,11 @@ static int pci_ivshmem_init(PCIDevice *dev)
                      &s->ivshmem_mmio);
 
     memory_region_init(&s->bar, "ivshmem-bar2-container", s->ivshmem_size);
+    s->ivshmem_attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
+        PCI_BASE_ADDRESS_MEM_PREFETCH;
+    if (s->ivshmem_64bit) {
+        s->ivshmem_attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+    }
 
     if ((s->server_chr != NULL) &&
                         (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
@@ -715,8 +722,7 @@ static int pci_ivshmem_init(PCIDevice *dev)
         /* allocate/initialize space for interrupt handling */
         s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
 
-        pci_register_bar(&s->dev, 2,
-                         PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
+        pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
 
         s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
 
@@ -786,6 +792,7 @@ static Property ivshmem_properties[] = {
     DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
     DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
     DEFINE_PROP_STRING("role", IVShmemState, role),
+    DEFINE_PROP_UINT32("use64", IVShmemState, ivshmem_64bit, 1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index afd8361df..fd5898fba 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -367,6 +367,10 @@ static QEMUMachine pc_machine_v1_3 = {
             .driver   = "nec-usb-xhci",\
             .property = "msix",\
             .value    = "off",\
+        },{\
+            .driver   = "ivshmem",\
+            .property = "use64",\
+            .value    = "0",\
         }
 
 static QEMUMachine pc_machine_v1_2 = {
-- 
cgit v1.2.3


From 8bdd3d499fe0ddffa9901c56ab3bc8911d5b8be0 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Fri, 14 Sep 2012 19:02:30 +0200
Subject: configure: Allow builds without any system or user emulation

The old code aborted configure when no emulation target was selected.
Even after removing the 'exit 1', it tried to read from STDIN
when QEMU was configured with

    configure' '--disable-user' '--disable-system'

This is fixed here.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 Makefile  | 5 +++++
 configure | 4 ----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index def2ae23c..04642975a 100644
--- a/Makefile
+++ b/Makefile
@@ -52,8 +52,13 @@ SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory) BUILD_DIR=$(BUILD_DIR)
 SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_DIRS))
 SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %/config-devices.mak.d, $(TARGET_DIRS))
 
+ifeq ($(SUBDIR_DEVICES_MAK),)
+config-all-devices.mak:
+	$(call quiet-command,echo '# no devices' > $@,"  GEN   $@")
+else
 config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
 	$(call quiet-command,cat $(SUBDIR_DEVICES_MAK) | grep =y | sort -u > $@,"  GEN   $@")
+endif
 
 -include $(SUBDIR_DEVICES_MAK_DEP)
 
diff --git a/configure b/configure
index 1b865174e..8f99b7b94 100755
--- a/configure
+++ b/configure
@@ -1286,10 +1286,6 @@ if test -z "$target_list" ; then
 else
     target_list=`echo "$target_list" | sed -e 's/,/ /g'`
 fi
-if test -z "$target_list" ; then
-    echo "No targets enabled"
-    exit 1
-fi
 # see if system emulation was really requested
 case " $target_list " in
   *"-softmmu "*) softmmu=yes
-- 
cgit v1.2.3


From 05bc1d8a4b2f77df8cc9880a552047e30c16f1f8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 24 Sep 2012 13:11:07 +0200
Subject: Refactor inet_connect_opts function

refactor address resolution code to fix nonblocking connect
remove getnameinfo call

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 qemu-sockets.c | 148 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 85 insertions(+), 63 deletions(-)

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 037775b86..22797bf2e 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -209,95 +209,117 @@ listen:
     return slisten;
 }
 
-int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp)
+#ifdef _WIN32
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY)
+#else
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS)
+#endif
+
+static int inet_connect_addr(struct addrinfo *addr, bool block,
+                             bool *in_progress)
 {
-    struct addrinfo ai,*res,*e;
+    int sock, rc;
+
+    if (in_progress) {
+        *in_progress = false;
+    }
+
+    sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+    if (sock < 0) {
+        fprintf(stderr, "%s: socket(%s): %s\n", __func__,
+                inet_strfamily(addr->ai_family), strerror(errno));
+        return -1;
+    }
+    setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+    if (!block) {
+        socket_set_nonblock(sock);
+    }
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
+            rc = -socket_error();
+        }
+    } while (rc == -EINTR);
+
+    if (!block && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        if (in_progress) {
+            *in_progress = true;
+        }
+    } else if (rc < 0) {
+        closesocket(sock);
+        return -1;
+    }
+    return sock;
+}
+
+static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
+{
+    struct addrinfo ai, *res;
+    int rc;
     const char *addr;
     const char *port;
-    char uaddr[INET6_ADDRSTRLEN+1];
-    char uport[33];
-    int sock,rc;
-    bool block;
 
-    memset(&ai,0, sizeof(ai));
+    memset(&ai, 0, sizeof(ai));
     ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
     ai.ai_family = PF_UNSPEC;
     ai.ai_socktype = SOCK_STREAM;
 
-    if (in_progress) {
-        *in_progress = false;
-    }
-
     addr = qemu_opt_get(opts, "host");
     port = qemu_opt_get(opts, "port");
-    block = qemu_opt_get_bool(opts, "block", 0);
     if (addr == NULL || port == NULL) {
-        fprintf(stderr, "inet_connect: host and/or port not specified\n");
+        fprintf(stderr,
+                "inet_parse_connect_opts: host and/or port not specified\n");
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
-        return -1;
+        return NULL;
     }
 
-    if (qemu_opt_get_bool(opts, "ipv4", 0))
+    if (qemu_opt_get_bool(opts, "ipv4", 0)) {
         ai.ai_family = PF_INET;
-    if (qemu_opt_get_bool(opts, "ipv6", 0))
+    }
+    if (qemu_opt_get_bool(opts, "ipv6", 0)) {
         ai.ai_family = PF_INET6;
+    }
 
     /* lookup */
-    if (0 != (rc = getaddrinfo(addr, port, &ai, &res))) {
-        fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
+    rc = getaddrinfo(addr, port, &ai, &res);
+    if (rc != 0) {
+        fprintf(stderr, "getaddrinfo(%s,%s): %s\n", addr, port,
                 gai_strerror(rc));
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
-	return -1;
+        return NULL;
+    }
+    return res;
+}
+
+int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp)
+{
+    struct addrinfo *res, *e;
+    int sock = -1;
+    bool block = qemu_opt_get_bool(opts, "block", 0);
+
+    res = inet_parse_connect_opts(opts, errp);
+    if (!res) {
+        return -1;
+    }
+
+    if (in_progress) {
+        *in_progress = false;
     }
 
     for (e = res; e != NULL; e = e->ai_next) {
-        if (getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
-                            uaddr,INET6_ADDRSTRLEN,uport,32,
-                            NI_NUMERICHOST | NI_NUMERICSERV) != 0) {
-            fprintf(stderr,"%s: getnameinfo: oops\n", __FUNCTION__);
-            continue;
-        }
-        sock = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
-        if (sock < 0) {
-            fprintf(stderr,"%s: socket(%s): %s\n", __FUNCTION__,
-            inet_strfamily(e->ai_family), strerror(errno));
-            continue;
-        }
-        setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)&on,sizeof(on));
-        if (!block) {
-            socket_set_nonblock(sock);
-        }
-        /* connect to peer */
-        do {
-            rc = 0;
-            if (connect(sock, e->ai_addr, e->ai_addrlen) < 0) {
-                rc = -socket_error();
-            }
-        } while (rc == -EINTR);
-
-  #ifdef _WIN32
-        if (!block && (rc == -EINPROGRESS || rc == -EWOULDBLOCK
-                       || rc == -WSAEALREADY)) {
-  #else
-        if (!block && (rc == -EINPROGRESS)) {
-  #endif
-            if (in_progress) {
-                *in_progress = true;
-            }
-        } else if (rc < 0) {
-            if (NULL == e->ai_next)
-                fprintf(stderr, "%s: connect(%s,%s,%s,%s): %s\n", __FUNCTION__,
-                        inet_strfamily(e->ai_family),
-                        e->ai_canonname, uaddr, uport, strerror(errno));
-            closesocket(sock);
-            continue;
+        sock = inet_connect_addr(e, block, in_progress);
+        if (sock >= 0) {
+            break;
         }
-        freeaddrinfo(res);
-        return sock;
     }
-    error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+    if (sock < 0) {
+        error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+    }
     freeaddrinfo(res);
-    return -1;
+    return sock;
 }
 
 int inet_dgram_opts(QemuOpts *opts)
-- 
cgit v1.2.3


From 5db5f44cb4b7f24b9e0efdefc9015e36b7c34881 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Mon, 24 Sep 2012 13:11:08 +0200
Subject: Separate inet_connect into inet_connect (blocking) and
 inet_nonblocking_connect

No need to add non blocking parameters to the blocking inet_connect
add block parameter for inet_connect_opts instead of using QemuOpt "block".

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 migration-tcp.c |  2 +-
 nbd.c           |  2 +-
 qemu-char.c     |  2 +-
 qemu-sockets.c  | 58 +++++++++++++++++++++++++++++++++++++++++++++++----------
 qemu_socket.h   |  7 +++++--
 ui/vnc.c        |  2 +-
 6 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/migration-tcp.c b/migration-tcp.c
index ac891c38a..7f6ad9872 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -88,7 +88,7 @@ int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
     s->write = socket_write;
     s->close = tcp_close;
 
-    s->fd = inet_connect(host_port, false, &in_progress, errp);
+    s->fd = inet_nonblocking_connect(host_port, &in_progress, errp);
     if (error_is_set(errp)) {
         migrate_fd_error(s);
         return -1;
diff --git a/nbd.c b/nbd.c
index 57edfde0c..6f0db62de 100644
--- a/nbd.c
+++ b/nbd.c
@@ -208,7 +208,7 @@ int tcp_socket_outgoing(const char *address, uint16_t port)
 
 int tcp_socket_outgoing_spec(const char *address_and_port)
 {
-    return inet_connect(address_and_port, true, NULL, NULL);
+    return inet_connect(address_and_port, NULL);
 }
 
 int tcp_socket_incoming(const char *address, uint16_t port)
diff --git a/qemu-char.c b/qemu-char.c
index 7f0f89515..13b87b53c 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2456,7 +2456,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
         if (is_listen) {
             fd = inet_listen_opts(opts, 0, NULL);
         } else {
-            fd = inet_connect_opts(opts, NULL, NULL);
+            fd = inet_connect_opts(opts, true, NULL, NULL);
         }
     }
     if (fd < 0) {
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 22797bf2e..0883a66eb 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -54,9 +54,6 @@ static QemuOptsList dummy_opts = {
         },{
             .name = "ipv6",
             .type = QEMU_OPT_BOOL,
-        },{
-            .name = "block",
-            .type = QEMU_OPT_BOOL,
         },
         { /* end if list */ }
     },
@@ -294,11 +291,22 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
     return res;
 }
 
-int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp)
+/**
+ * Create a socket and connect it to an address.
+ *
+ * @opts: QEMU options, recognized parameters strings "host" and "port",
+ *        bools "ipv4" and "ipv6".
+ * @block: set true for blocking socket
+ * @in_progress: set to true in case of ongoing connect
+ * @errp: set on error
+ *
+ * Returns: -1 on error, file descriptor on success.
+ */
+int inet_connect_opts(QemuOpts *opts, bool block, bool *in_progress,
+                      Error **errp)
 {
     struct addrinfo *res, *e;
     int sock = -1;
-    bool block = qemu_opt_get_bool(opts, "block", 0);
 
     res = inet_parse_connect_opts(opts, errp);
     if (!res) {
@@ -515,17 +523,47 @@ int inet_listen(const char *str, char *ostr, int olen,
     return sock;
 }
 
-int inet_connect(const char *str, bool block, bool *in_progress, Error **errp)
+/**
+ * Create a blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @errp: set in case of an error
+ *
+ * Returns -1 in case of error, file descriptor on success
+ **/
+int inet_connect(const char *str, Error **errp)
 {
     QemuOpts *opts;
     int sock = -1;
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     if (inet_parse(opts, str) == 0) {
-        if (block) {
-            qemu_opt_set(opts, "block", "on");
-        }
-        sock = inet_connect_opts(opts, in_progress, errp);
+        sock = inet_connect_opts(opts, true, NULL, errp);
+    } else {
+        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+    }
+    qemu_opts_del(opts);
+    return sock;
+}
+
+/**
+ * Create a non-blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @in_progress: set to true in case of ongoing connect
+ * @errp: set in case of an error
+ *
+ * Returns: -1 on error, file descriptor on success.
+ **/
+int inet_nonblocking_connect(const char *str, bool *in_progress,
+                             Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+
+    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    if (inet_parse(opts, str) == 0) {
+        sock = inet_connect_opts(opts, false, in_progress, errp);
     } else {
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
     }
diff --git a/qemu_socket.h b/qemu_socket.h
index 30ae6af8b..80696aa6d 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -42,8 +42,11 @@ int send_all(int fd, const void *buf, int len1);
 int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp);
 int inet_listen(const char *str, char *ostr, int olen,
                 int socktype, int port_offset, Error **errp);
-int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp);
-int inet_connect(const char *str, bool block, bool *in_progress, Error **errp);
+int inet_connect_opts(QemuOpts *opts, bool block, bool *in_progress,
+                      Error **errp);
+int inet_connect(const char *str, Error **errp);
+int inet_nonblocking_connect(const char *str, bool *in_progress,
+                             Error **errp);
 int inet_dgram_opts(QemuOpts *opts);
 const char *inet_strfamily(int family);
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 385e345c3..01b2dafea 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3061,7 +3061,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
         if (strncmp(display, "unix:", 5) == 0)
             vs->lsock = unix_connect(display+5);
         else
-            vs->lsock = inet_connect(display, true, NULL, NULL);
+            vs->lsock = inet_connect(display, NULL);
         if (-1 == vs->lsock) {
             g_free(vs->display);
             vs->display = NULL;
-- 
cgit v1.2.3


From 233aa5c2d1cf4655ffe335025a68cf5454f87dad Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Mon, 24 Sep 2012 13:11:09 +0200
Subject: Fix address handling in inet_nonblocking_connect

getaddrinfo can give us a list of addresses, but we only try to
connect to the first one. If that fails we never proceed to
the next one.  This is common on desktop setups that often have ipv6
configured but not actually working.

To fix this make inet_connect_nonblocking retry connection with a different
address.
callers on inet_nonblocking_connect register a callback function that will
be called when connect opertion completes, in case of failure the fd will have
a negative value

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 migration-tcp.c |  37 ++++------------
 qemu-char.c     |   2 +-
 qemu-sockets.c  | 129 +++++++++++++++++++++++++++++++++++++++++++++-----------
 qemu_socket.h   |  16 ++++---
 4 files changed, 126 insertions(+), 58 deletions(-)

diff --git a/migration-tcp.c b/migration-tcp.c
index 7f6ad9872..a15c2b87a 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -53,54 +53,35 @@ static int tcp_close(MigrationState *s)
     return r;
 }
 
-static void tcp_wait_for_connect(void *opaque)
+static void tcp_wait_for_connect(int fd, void *opaque)
 {
     MigrationState *s = opaque;
-    int val, ret;
-    socklen_t valsize = sizeof(val);
 
-    DPRINTF("connect completed\n");
-    do {
-        ret = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, (void *) &val, &valsize);
-    } while (ret == -1 && (socket_error()) == EINTR);
-
-    if (ret < 0) {
+    if (fd < 0) {
+        DPRINTF("migrate connect error\n");
+        s->fd = -1;
         migrate_fd_error(s);
-        return;
-    }
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
-    if (val == 0)
+    } else {
+        DPRINTF("migrate connect success\n");
+        s->fd = fd;
         migrate_fd_connect(s);
-    else {
-        DPRINTF("error connecting %d\n", val);
-        migrate_fd_error(s);
     }
 }
 
 int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
                                  Error **errp)
 {
-    bool in_progress;
-
     s->get_error = socket_errno;
     s->write = socket_write;
     s->close = tcp_close;
 
-    s->fd = inet_nonblocking_connect(host_port, &in_progress, errp);
+    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s,
+                                     errp);
     if (error_is_set(errp)) {
         migrate_fd_error(s);
         return -1;
     }
 
-    if (in_progress) {
-        DPRINTF("connect in progress\n");
-        qemu_set_fd_handler2(s->fd, NULL, NULL, tcp_wait_for_connect, s);
-    } else {
-        migrate_fd_connect(s);
-    }
-
     return 0;
 }
 
diff --git a/qemu-char.c b/qemu-char.c
index 13b87b53c..b082bae11 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2456,7 +2456,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
         if (is_listen) {
             fd = inet_listen_opts(opts, 0, NULL);
         } else {
-            fd = inet_connect_opts(opts, true, NULL, NULL);
+            fd = inet_connect_opts(opts, NULL, NULL, NULL);
         }
     }
     if (fd < 0) {
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 0883a66eb..1f14e8bc6 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -24,6 +24,7 @@
 
 #include "qemu_socket.h"
 #include "qemu-common.h" /* for qemu_isdigit */
+#include "main-loop.h"
 
 #ifndef AI_ADDRCONFIG
 # define AI_ADDRCONFIG 0
@@ -214,14 +215,66 @@ listen:
     ((rc) == -EINPROGRESS)
 #endif
 
-static int inet_connect_addr(struct addrinfo *addr, bool block,
-                             bool *in_progress)
+/* Struct to store connect state for non blocking connect */
+typedef struct ConnectState {
+    int fd;
+    struct addrinfo *addr_list;
+    struct addrinfo *current_addr;
+    NonBlockingConnectHandler *callback;
+    void *opaque;
+} ConnectState;
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state);
+
+static void wait_for_connect(void *opaque)
 {
-    int sock, rc;
+    ConnectState *s = opaque;
+    int val = 0, rc = 0;
+    socklen_t valsize = sizeof(val);
+    bool in_progress;
+
+    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+
+    do {
+        rc = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, (void *) &val, &valsize);
+    } while (rc == -1 && socket_error() == EINTR);
+
+    /* update rc to contain error */
+    if (!rc && val) {
+        rc = -1;
+    }
+
+    /* connect error */
+    if (rc < 0) {
+        closesocket(s->fd);
+        s->fd = rc;
+    }
+
+    /* try to connect to the next address on the list */
+    while (s->current_addr->ai_next != NULL && s->fd < 0) {
+        s->current_addr = s->current_addr->ai_next;
+        s->fd = inet_connect_addr(s->current_addr, &in_progress, s);
+        /* connect in progress */
+        if (in_progress) {
+            return;
+        }
+    }
 
-    if (in_progress) {
-        *in_progress = false;
+    freeaddrinfo(s->addr_list);
+    if (s->callback) {
+        s->callback(s->fd, s->opaque);
     }
+    g_free(s);
+    return;
+}
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state)
+{
+    int sock, rc;
+
+    *in_progress = false;
 
     sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
     if (sock < 0) {
@@ -230,7 +283,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool block,
         return -1;
     }
     setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
-    if (!block) {
+    if (connect_state != NULL) {
         socket_set_nonblock(sock);
     }
     /* connect to peer */
@@ -241,10 +294,11 @@ static int inet_connect_addr(struct addrinfo *addr, bool block,
         }
     } while (rc == -EINTR);
 
-    if (!block && QEMU_SOCKET_RC_INPROGRESS(rc)) {
-        if (in_progress) {
-            *in_progress = true;
-        }
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler2(sock, NULL, NULL, wait_for_connect,
+                             connect_state);
+        *in_progress = true;
     } else if (rc < 0) {
         closesocket(sock);
         return -1;
@@ -260,6 +314,7 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
     const char *port;
 
     memset(&ai, 0, sizeof(ai));
+
     ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
     ai.ai_family = PF_UNSPEC;
     ai.ai_socktype = SOCK_STREAM;
@@ -296,36 +351,55 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
  *
  * @opts: QEMU options, recognized parameters strings "host" and "port",
  *        bools "ipv4" and "ipv6".
- * @block: set true for blocking socket
- * @in_progress: set to true in case of ongoing connect
  * @errp: set on error
+ * @callback: callback function for non-blocking connect
+ * @opaque: opaque for callback function
  *
  * Returns: -1 on error, file descriptor on success.
+ *
+ * If @callback is non-null, the connect is non-blocking.  If this
+ * function succeeds, callback will be called when the connection
+ * completes, with the file descriptor on success, or -1 on error.
  */
-int inet_connect_opts(QemuOpts *opts, bool block, bool *in_progress,
-                      Error **errp)
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
 {
     struct addrinfo *res, *e;
     int sock = -1;
+    bool in_progress;
+    ConnectState *connect_state = NULL;
 
     res = inet_parse_connect_opts(opts, errp);
     if (!res) {
         return -1;
     }
 
-    if (in_progress) {
-        *in_progress = false;
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->addr_list = res;
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
     }
 
     for (e = res; e != NULL; e = e->ai_next) {
-        sock = inet_connect_addr(e, block, in_progress);
-        if (sock >= 0) {
+        if (connect_state != NULL) {
+            connect_state->current_addr = e;
+        }
+        sock = inet_connect_addr(e, &in_progress, connect_state);
+        if (in_progress) {
+            return sock;
+        } else if (sock >= 0) {
+            /* non blocking socket immediate success, call callback */
+            if (callback != NULL) {
+                callback(sock, opaque);
+            }
             break;
         }
     }
     if (sock < 0) {
         error_set(errp, QERR_SOCKET_CONNECT_FAILED);
     }
+    g_free(connect_state);
     freeaddrinfo(res);
     return sock;
 }
@@ -538,7 +612,7 @@ int inet_connect(const char *str, Error **errp)
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     if (inet_parse(opts, str) == 0) {
-        sock = inet_connect_opts(opts, true, NULL, errp);
+        sock = inet_connect_opts(opts, errp, NULL, NULL);
     } else {
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
     }
@@ -548,22 +622,29 @@ int inet_connect(const char *str, Error **errp)
 
 /**
  * Create a non-blocking socket and connect it to an address.
+ * Calls the callback function with fd in case of success or -1 in case of
+ * error.
  *
  * @str: address string
- * @in_progress: set to true in case of ongoing connect
+ * @callback: callback function that is called when connect completes,
+ *            cannot be NULL.
+ * @opaque: opaque for callback function
  * @errp: set in case of an error
  *
- * Returns: -1 on error, file descriptor on success.
+ * Returns: -1 on immediate error, file descriptor on success.
  **/
-int inet_nonblocking_connect(const char *str, bool *in_progress,
-                             Error **errp)
+int inet_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp)
 {
     QemuOpts *opts;
     int sock = -1;
 
+    g_assert(callback != NULL);
+
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     if (inet_parse(opts, str) == 0) {
-        sock = inet_connect_opts(opts, false, in_progress, errp);
+        sock = inet_connect_opts(opts, errp, callback, opaque);
     } else {
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
     }
diff --git a/qemu_socket.h b/qemu_socket.h
index 80696aa6d..3e8aee9ca 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -38,15 +38,21 @@ void socket_set_block(int fd);
 void socket_set_nonblock(int fd);
 int send_all(int fd, const void *buf, int len1);
 
-/* New, ipv6-ready socket helper functions, see qemu-sockets.c */
+/* callback function for nonblocking connect
+ * valid fd on success, negative error code on failure
+ */
+typedef void NonBlockingConnectHandler(int fd, void *opaque);
+
 int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp);
 int inet_listen(const char *str, char *ostr, int olen,
                 int socktype, int port_offset, Error **errp);
-int inet_connect_opts(QemuOpts *opts, bool block, bool *in_progress,
-                      Error **errp);
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque);
 int inet_connect(const char *str, Error **errp);
-int inet_nonblocking_connect(const char *str, bool *in_progress,
-                             Error **errp);
+int inet_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp);
+
 int inet_dgram_opts(QemuOpts *opts);
 const char *inet_strfamily(int family);
 
-- 
cgit v1.2.3


From 3202becaa2b805497ce9e6faa6edfb83665f91b1 Mon Sep 17 00:00:00 2001
From: Orit Wasserman <owasserm@redhat.com>
Date: Mon, 24 Sep 2012 13:11:10 +0200
Subject: Clear handler only for valid fd

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 migration.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/migration.c b/migration.c
index 1edeec5dd..22a05c437 100644
--- a/migration.c
+++ b/migration.c
@@ -240,7 +240,9 @@ static int migrate_fd_cleanup(MigrationState *s)
 {
     int ret = 0;
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+    if (s->fd != -1) {
+        qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+    }
 
     if (s->file) {
         DPRINTF("closing file\n");
-- 
cgit v1.2.3


From ac05f3492421caeb05809ffa02c6198ede179e43 Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Fri, 7 Sep 2012 11:11:03 +0800
Subject: add a boot parameter to set reboot timeout

Added an option to let qemu transfer a configuration file to bios,
"etc/boot-fail-wait", which could be specified by command
    -boot reboot-timeout=T
T have a max value of 0xffff, unit is ms.

With this option, guest will wait for a given time if not find
bootabled device, then reboot. If reboot-timeout is '-1', guest
will not reboot, qemu passes '-1' to bios by default.

This feature need the new seabios's support.

Seabios pulls the value from the fwcfg "file" interface, this
interface is used because SeaBIOS needs a reliable way of
obtaining a name, value size, and value. It in no way requires
that there be a real file on the user's host machine.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 hw/fw_cfg.c     | 25 +++++++++++++++++++++++++
 qemu-config.c   |  3 +++
 qemu-options.hx | 12 +++++++++---
 vl.c            |  3 ++-
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index 7b3b5769a..dcde1a943 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -183,6 +183,30 @@ static void fw_cfg_bootsplash(FWCfgState *s)
     }
 }
 
+static void fw_cfg_reboot(FWCfgState *s)
+{
+    int reboot_timeout = -1;
+    char *p;
+    const char *temp;
+
+    /* get user configuration */
+    QemuOptsList *plist = qemu_find_opts("boot-opts");
+    QemuOpts *opts = QTAILQ_FIRST(&plist->head);
+    if (opts != NULL) {
+        temp = qemu_opt_get(opts, "reboot-timeout");
+        if (temp != NULL) {
+            p = (char *)temp;
+            reboot_timeout = strtol(p, (char **)&p, 10);
+        }
+    }
+    /* validate the input */
+    if (reboot_timeout > 0xffff) {
+        error_report("reboot timeout is larger than 65535, force it to 65535.");
+        reboot_timeout = 0xffff;
+    }
+    fw_cfg_add_file(s, "etc/boot-fail-wait", g_memdup(&reboot_timeout, 4), 4);
+}
+
 static void fw_cfg_write(FWCfgState *s, uint8_t value)
 {
     int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
@@ -497,6 +521,7 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
+    fw_cfg_reboot(s);
 
     s->machine_ready.notify = fw_cfg_machine_ready;
     qemu_add_machine_init_done_notifier(&s->machine_ready);
diff --git a/qemu-config.c b/qemu-config.c
index 12eafbb4f..cd1ec2165 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -645,6 +645,9 @@ QemuOptsList qemu_boot_opts = {
         }, {
             .name = "splash-time",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "reboot-timeout",
+            .type = QEMU_OPT_STRING,
         },
         { /*End of list */ }
     },
diff --git a/qemu-options.hx b/qemu-options.hx
index 09c86c4cb..7d97f9692 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -344,13 +344,14 @@ ETEXI
 
 DEF("boot", HAS_ARG, QEMU_OPTION_boot,
     "-boot [order=drives][,once=drives][,menu=on|off]\n"
-    "      [,splash=sp_name][,splash-time=sp_time]\n"
+    "      [,splash=sp_name][,splash-time=sp_time][,reboot-timeout=rb_time]\n"
     "                'drives': floppy (a), hard disk (c), CD-ROM (d), network (n)\n"
     "                'sp_name': the file's name that would be passed to bios as logo picture, if menu=on\n"
-    "                'sp_time': the period that splash picture last if menu=on, unit is ms\n",
+    "                'sp_time': the period that splash picture last if menu=on, unit is ms\n"
+    "                'rb_timeout': the timeout before guest reboot when boot failed, unit is ms\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -boot [order=@var{drives}][,once=@var{drives}][,menu=on|off][,splash=@var{sp_name}][,splash-time=@var{sp_time}]
+@item -boot [order=@var{drives}][,once=@var{drives}][,menu=on|off][,splash=@var{sp_name}][,splash-time=@var{sp_time}][,reboot-timeout=@var{rb_timeout}]
 @findex -boot
 Specify boot order @var{drives} as a string of drive letters. Valid
 drive letters depend on the target achitecture. The x86 PC uses: a, b
@@ -369,6 +370,11 @@ limitation: The splash file could be a jpeg file or a BMP file in 24 BPP
 format(true color). The resolution should be supported by the SVGA mode, so
 the recommended is 320x240, 640x480, 800x640.
 
+A timeout could be passed to bios, guest will pause for @var{rb_timeout} ms
+when boot failed, then reboot. If @var{rb_timeout} is '-1', guest will not
+reboot, qemu passes '-1' to bios by default. Currently Seabios for X86
+system support it.
+
 @example
 # try to boot from network first, then from hard disk
 qemu-system-i386 -boot order=nc
diff --git a/vl.c b/vl.c
index 599d17a75..8d305ca59 100644
--- a/vl.c
+++ b/vl.c
@@ -2632,7 +2632,8 @@ int main(int argc, char **argv, char **envp)
                 {
                     static const char * const params[] = {
                         "order", "once", "menu",
-                        "splash", "splash-time", NULL
+                        "splash", "splash-time",
+                        "reboot-timeout", NULL
                     };
                     char buf[sizeof(boot_devices)];
                     char *standard_boot_devices;
-- 
cgit v1.2.3


From 14c126baf1c38607c5bd988878de85a06cefd8cf Mon Sep 17 00:00:00 2001
From: Brendan Fennell <bfennell@skynet.ie>
Date: Wed, 26 Sep 2012 16:46:28 +0100
Subject: pl190: fix read of VECTADDR

Reading VECTADDR was causing us to set the current priority to
the wrong value, the most obvious effect of which was that we
would return the vector for the wrong interrupt as the result
of the read.

Signed-off-by: Brendan Fennell <bfennell@skynet.ie>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/pl190.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/hw/pl190.c b/hw/pl190.c
index cb50afb9f..7332f4dba 100644
--- a/hw/pl190.c
+++ b/hw/pl190.c
@@ -117,12 +117,18 @@ static uint64_t pl190_read(void *opaque, target_phys_addr_t offset,
         return s->protected;
     case 12: /* VECTADDR */
         /* Read vector address at the start of an ISR.  Increases the
-           current priority level to that of the current interrupt.  */
-        for (i = 0; i < s->priority; i++)
-          {
-            if ((s->level | s->soft_level) & s->prio_mask[i])
-              break;
-          }
+         * current priority level to that of the current interrupt.
+         *
+         * Since an enabled interrupt X at priority P causes prio_mask[Y]
+         * to have bit X set for all Y > P, this loop will stop with
+         * i == the priority of the highest priority set interrupt.
+         */
+        for (i = 0; i < s->priority; i++) {
+            if ((s->level | s->soft_level) & s->prio_mask[i + 1]) {
+                break;
+            }
+        }
+
         /* Reading this value with no pending interrupts is undefined.
            We return the default address.  */
         if (i == PL190_NUM_PRIO)
-- 
cgit v1.2.3


From 9892cae39562d2e6c00ccc5966302c00f23be6d4 Mon Sep 17 00:00:00 2001
From: Meador Inge <meadori@codesourcery.com>
Date: Wed, 26 Sep 2012 16:46:28 +0100
Subject: hw/armv7m_nvic: Correctly register GIC region when setting up NVIC

When setting up the NVIC memory regions the memory range
0x100..0xcff is aliased to an IO memory region that belongs
to the ARM GIC.  This aliased region should be added to the
NVIC memory container, but the actual GIC IO memory region
was being added instead.  This mixup was causing the wrong
IO memory access functions to be called when accessing parts
of the NVIC memory.

Signed-off-by: Meador Inge <meadori@codesourcery.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/armv7m_nvic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 6a0832eb3..5c0911647 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -489,7 +489,8 @@ static int armv7m_nvic_init(SysBusDevice *dev)
      */
     memory_region_init_alias(&s->gic_iomem_alias, "nvic-gic", &s->gic.iomem,
                              0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100, &s->gic.iomem, 1);
+    memory_region_add_subregion_overlap(&s->container, 0x100,
+                                        &s->gic_iomem_alias, 1);
     /* Map the whole thing into system memory at the location required
      * by the v7M architecture.
      */
-- 
cgit v1.2.3


From 661bafb3e14bfffcb0a7c7910534c7944608ca45 Mon Sep 17 00:00:00 2001
From: Francesco Lavra <francescolavra.fl@gmail.com>
Date: Wed, 19 Sep 2012 05:51:58 +0000
Subject: Versatile Express: Fix NOR flash 0 address and remove flash alias

In the A series memory map (implemented in the Cortex A15 CoreTile), the
first NOR flash bank (flash 0) is mapped to address 0x08000000, while
address 0x00000000 can be configured as alias to either the first or the
second flash bank. This patch fixes the definition of flash 0 address,
and for simplicity removes the alias definition.

Signed-off-by: Francesco Lavra <francescolavra.fl@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/vexpress.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/hw/vexpress.c b/hw/vexpress.c
index b6158447d..454c2bbae 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -62,7 +62,6 @@ enum {
     VE_COMPACTFLASH,
     VE_CLCD,
     VE_NORFLASH0,
-    VE_NORFLASH0ALIAS,
     VE_NORFLASH1,
     VE_SRAM,
     VE_VIDEORAM,
@@ -104,9 +103,8 @@ static target_phys_addr_t motherboard_legacy_map[] = {
 };
 
 static target_phys_addr_t motherboard_aseries_map[] = {
-    /* CS0: 0x00000000 .. 0x0c000000 */
-    [VE_NORFLASH0] = 0x00000000,
-    [VE_NORFLASH0ALIAS] = 0x08000000,
+    /* CS0: 0x08000000 .. 0x0c000000 */
+    [VE_NORFLASH0] = 0x08000000,
     /* CS4: 0x0c000000 .. 0x10000000 */
     [VE_NORFLASH1] = 0x0c000000,
     /* CS5: 0x10000000 .. 0x14000000 */
@@ -413,7 +411,6 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     sysbus_create_simple("pl111", map[VE_CLCD], pic[14]);
 
     /* VE_NORFLASH0: not modelled */
-    /* VE_NORFLASH0ALIAS: not modelled */
     /* VE_NORFLASH1: not modelled */
 
     sram_size = 0x2000000;
-- 
cgit v1.2.3


From 3dc3e7dd936f2e7f3e6dd4056f81c8961dc8201b Mon Sep 17 00:00:00 2001
From: Francesco Lavra <francescolavra.fl@gmail.com>
Date: Wed, 19 Sep 2012 05:57:21 +0000
Subject: Versatile Express: Add modelling of NOR flash

This patch adds modelling of the two NOR flash banks found on the
Versatile Express motherboard. Tested with U-Boot running on an emulated
Versatile Express, with either A9 or A15 CoreTile.

Signed-off-by: Francesco Lavra <francescolavra.fl@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/vexpress.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/hw/vexpress.c b/hw/vexpress.c
index 454c2bbae..3596d1e33 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -29,8 +29,12 @@
 #include "sysemu.h"
 #include "boards.h"
 #include "exec-memory.h"
+#include "blockdev.h"
+#include "flash.h"
 
 #define VEXPRESS_BOARD_ID 0x8e0
+#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
+#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024)
 
 static struct arm_boot_info vexpress_binfo;
 
@@ -355,6 +359,7 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     qemu_irq pic[64];
     uint32_t proc_id;
     uint32_t sys_id;
+    DriveInfo *dinfo;
     ram_addr_t vram_size, sram_size;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *vram = g_new(MemoryRegion, 1);
@@ -410,8 +415,25 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
 
     sysbus_create_simple("pl111", map[VE_CLCD], pic[14]);
 
-    /* VE_NORFLASH0: not modelled */
-    /* VE_NORFLASH1: not modelled */
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, "vexpress.flash0",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 0.\n");
+        exit(1);
+    }
+
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH1], NULL, "vexpress.flash1",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 1.\n");
+        exit(1);
+    }
 
     sram_size = 0x2000000;
     memory_region_init_ram(sram, "vexpress.sram", sram_size);
-- 
cgit v1.2.3


From 6673f47da21718d07346b0f3725f0dbf0d6d8e45 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Tue, 18 Sep 2012 22:43:38 +0200
Subject: tci: Fix for AREG0 free mode

Support for helper functions with 5 arguments was missing
in the code generator and in the interpreter.

There is no need to pass the constant TCG_AREG0 from the
code generator to the interpreter. Remove that code for
the INDEX_op_qemu_st* opcodes.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/tci/tcg-target.c | 10 +++++-----
 tci.c                | 13 +++++++++----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 3f4a24bb8..d272a906e 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -300,7 +300,7 @@ static const int tcg_target_reg_alloc_order[] = {
 #endif
 };
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 
@@ -309,16 +309,18 @@ static const int tcg_target_call_iarg_regs[] = {
     TCG_REG_R1,
     TCG_REG_R2,
     TCG_REG_R3,
-#if TCG_TARGET_REG_BITS == 32
-    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
 #if 0 /* used for TCG_REG_CALL_STACK */
     TCG_REG_R4,
 #endif
     TCG_REG_R5,
+#if TCG_TARGET_REG_BITS == 32
+    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
     TCG_REG_R6,
     TCG_REG_R7,
 #if TCG_TARGET_NB_REGS >= 16
     TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
 #else
 # error Too few input registers available
 #endif
@@ -798,7 +800,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
-        tcg_out_r(s, TCG_AREG0);
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -809,7 +810,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_r(s, TCG_AREG0);
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
diff --git a/tci.c b/tci.c
index ce8a98814..a4f7b7841 100644
--- a/tci.c
+++ b/tci.c
@@ -36,17 +36,19 @@
         tcg_abort(); \
     } while (0)
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 #if TCG_TARGET_REG_BITS == 32
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong);
 #else
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong);
 #endif
 
 /* TCI can optionally use a global register variable for env. */
@@ -489,14 +491,17 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
                                           tci_read_reg(TCG_REG_R5),
                                           tci_read_reg(TCG_REG_R6),
                                           tci_read_reg(TCG_REG_R7),
-                                          tci_read_reg(TCG_REG_R8));
+                                          tci_read_reg(TCG_REG_R8),
+                                          tci_read_reg(TCG_REG_R9),
+                                          tci_read_reg(TCG_REG_R10));
             tci_write_reg(TCG_REG_R0, tmp64);
             tci_write_reg(TCG_REG_R1, tmp64 >> 32);
 #else
             tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
                                           tci_read_reg(TCG_REG_R1),
                                           tci_read_reg(TCG_REG_R2),
-                                          tci_read_reg(TCG_REG_R3));
+                                          tci_read_reg(TCG_REG_R3),
+                                          tci_read_reg(TCG_REG_R5));
             tci_write_reg(TCG_REG_R0, tmp64);
 #endif
             break;
-- 
cgit v1.2.3


From fdefe51c288866b98e62663fa18c8af1d66bf5f6 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:47 -0700
Subject: Emit debug_insn for CPU_LOG_TB_OP_OPT as well.

For all targets that currently call tcg_gen_debug_insn_start,
add CPU_LOG_TB_OP_OPT to the condition that gates it.

This is useful for comparing optimization dumps, when the
pre-optimization dump is merely noise.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-alpha/translate.c      | 2 +-
 target-arm/translate.c        | 2 +-
 target-cris/translate.c       | 3 ++-
 target-i386/translate.c       | 3 ++-
 target-lm32/translate.c       | 2 +-
 target-microblaze/translate.c | 3 ++-
 target-mips/translate.c       | 3 ++-
 target-openrisc/translate.c   | 2 +-
 target-ppc/translate.c        | 3 ++-
 target-sh4/translate.c        | 2 +-
 target-sparc/translate.c      | 3 ++-
 target-xtensa/translate.c     | 2 +-
 12 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 3f9aee12d..f707d8deb 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -3421,7 +3421,7 @@ static inline void gen_intermediate_code_internal(CPUAlphaState *env,
         insn = cpu_ldl_code(env, ctx.pc);
         num_insns++;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.pc);
         }
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index f4b447a49..5fded491e 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9816,7 +9816,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 19144b5e2..755de659d 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3074,8 +3074,9 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
 	int insn_len = 2;
 	int i;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
 		tcg_gen_debug_insn_start(dc->pc);
+        }
 
 	/* Load a halfword onto the instruction register.  */
         dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index eb0cabcf1..323869d87 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4202,8 +4202,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
+    }
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 5f6dcbac8..77c286661 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -942,7 +942,7 @@ static const DecoderInfo decinfo[] = {
 
 static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
     }
 
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 9c7d77f57..7d864b1da 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1664,8 +1664,9 @@ static inline void decode(DisasContext *dc, uint32_t ir)
 {
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
diff --git a/target-mips/translate.c b/target-mips/translate.c
index fa79d4945..454e5cc81 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -12124,8 +12124,9 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
+    }
 
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 325ba09cb..e2cad3ad4 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1715,7 +1715,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
             gen_opc_icount[k] = num_insns;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ac915ccad..1042268ec 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9690,8 +9690,9 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env,
         LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
                     ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
                     opc3(ctx.opcode), little_endian ? "little" : "big");
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.nip);
+        }
         ctx.nip += 4;
         table = env->opcodes;
         num_insns++;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 0fa83cab9..9d955eb20 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1924,7 +1924,7 @@ static void decode_opc(DisasContext * ctx)
 {
     uint32_t old_flags = ctx->flags;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
     }
 
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index b95f91cd3..e5ebedfa2 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2394,8 +2394,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
     opc = GET_FIELD(insn, 0, 1);
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index ba3ffcb7a..b9acd706c 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2923,7 +2923,7 @@ static void gen_intermediate_code_internal(
             gen_opc_icount[lj] = insn_count;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc.pc);
         }
 
-- 
cgit v1.2.3


From fa547e617c2f499903dccb8f1b9031bfe724e11e Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:48 -0700
Subject: target-m68k: Call tcg_gen_debug_insn_start

Cc: Paul Brook <paul@codesourcery.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-m68k/translate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index fb707f21b..451ef7410 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2953,6 +2953,10 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s)
 {
     uint16_t insn;
 
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
     insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
-- 
cgit v1.2.3


From 7193b5f6f52d633531406771b9370d7b591cef88 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:49 -0700
Subject: target-s390x: Call tcg_gen_debug_insn_start

Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-s390x/translate.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 3214783aa..6fa76a00f 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5173,10 +5173,11 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
-#if defined(S390X_DEBUG_DISAS_VERBOSE)
-        LOG_DISAS("pc " TARGET_FMT_lx "\n",
-                  dc.pc);
-#endif
+
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+            tcg_gen_debug_insn_start(dc.pc);
+        }
+
         disas_s390_insn(env, &dc);
 
         num_insns++;
-- 
cgit v1.2.3


From daa47c34a893917d712923b107d33f7b89a3a53b Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:50 -0700
Subject: target-unicore32: Call tcg_gen_debug_insn_start

Acked-by: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-unicore32/translate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index b786a6b89..36f4f2f96 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1861,6 +1861,10 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
 {
     unsigned int insn;
 
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
     insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
-- 
cgit v1.2.3


From 0d404541b24b332f6a822139c6bd889b7e319762 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:51 -0700
Subject: target-s390x: Use CPU_LOG_INT

Three places in the interrupt code did we not honor the mask.

Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-s390x/helper.c      | 7 ++++---
 target-s390x/misc_helper.c | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index a5741ecde..22256b057 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -511,7 +511,8 @@ static void do_program_interrupt(CPUS390XState *env)
         break;
     }
 
-    qemu_log("%s: code=0x%x ilc=%d\n", __func__, env->int_pgm_code, ilc);
+    qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilc=%d\n",
+                  __func__, env->int_pgm_code, ilc);
 
     lowcore = cpu_physical_memory_map(env->psa, &len, 1);
 
@@ -575,8 +576,8 @@ static void do_ext_interrupt(CPUS390XState *env)
 
 void do_interrupt(CPUS390XState *env)
 {
-    qemu_log("%s: %d at pc=%" PRIx64 "\n", __func__, env->exception_index,
-             env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
+                  __func__, env->exception_index, env->psw.addr);
 
     s390_add_running_cpu(env);
     /* handle external interrupts */
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 2938ac9c7..e9b3caed4 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -53,7 +53,8 @@ void HELPER(exception)(CPUS390XState *env, uint32_t excp)
 #ifndef CONFIG_USER_ONLY
 void program_interrupt(CPUS390XState *env, uint32_t code, int ilc)
 {
-    qemu_log("program interrupt at %#" PRIx64 "\n", env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
 
     if (kvm_enabled()) {
 #ifdef CONFIG_KVM
-- 
cgit v1.2.3


From 87a5395bdd75c22e8c9b92c5655810762a7fd5bf Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:52 -0700
Subject: target-s390x: Avoid double CPU_LOG_TB_CPU

This is already handled generically in cpu_exec.

Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-s390x/translate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 6fa76a00f..4cc92255a 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5220,7 +5220,6 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
         tb->icount = num_insns;
     }
 #if defined(S390X_DEBUG_DISAS)
-    log_cpu_state_mask(CPU_LOG_TB_CPU, env, 0);
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(pc_start, dc.pc - pc_start, 1);
-- 
cgit v1.2.3


From d885bdd481fc1c11d3158cc1c4c68bffdb2c26fe Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 24 Sep 2012 14:55:53 -0700
Subject: target-s390x: Tidy cpu_dump_state

The blank lines inside the single dump make it difficult for the
eye to pick out the block.  Worse, with interior newlines, but
no blank line following, the PSW line appears to belong to the
next dump block.

Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-s390x/translate.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 4cc92255a..db464cc07 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -79,6 +79,14 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
 {
     int i;
 
+    if (env->cc_op > 3) {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
+                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
+    } else {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
+                    env->psw.mask, env->psw.addr, env->cc_op);
+    }
+
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "R%02d=%016" PRIx64, i, env->regs[i]);
         if ((i % 4) == 3) {
@@ -97,8 +105,6 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
         }
     }
 
-    cpu_fprintf(f, "\n");
-
 #ifndef CONFIG_USER_ONLY
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "C%02d=%016" PRIx64, i, env->cregs[i]);
@@ -110,22 +116,14 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
     }
 #endif
 
-    cpu_fprintf(f, "\n");
-
-    if (env->cc_op > 3) {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
-                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
-    } else {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
-                    env->psw.mask, env->psw.addr, env->cc_op);
-    }
-
 #ifdef DEBUG_INLINE_BRANCHES
     for (i = 0; i < CC_OP_MAX; i++) {
         cpu_fprintf(f, "  %15s = %10ld\t%10ld\n", cc_name(i),
                     inline_branch_miss[i], inline_branch_hit[i]);
     }
 #endif
+
+    cpu_fprintf(f, "\n");
 }
 
 static TCGv_i64 psw_addr;
-- 
cgit v1.2.3


From 946d58be1533bf843b499df12e1d9f97b28245c8 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 25 Sep 2012 15:47:36 +0200
Subject: block-migration: Flush requests in blk_mig_cleanup

When cancelling block migration, all in-flight requests of the block
migration must be completed before the data can be freed. This was
visible as failing assertions and segfaults.

Reported-by: Peter Lieven <pl@dlhnet.de>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block-migration.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block-migration.c b/block-migration.c
index 7def8ab19..ed933017f 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -519,6 +519,8 @@ static void blk_mig_cleanup(void)
     BlkMigDevState *bmds;
     BlkMigBlock *blk;
 
+    bdrv_drain_all();
+
     set_dirty_tracking(0);
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
-- 
cgit v1.2.3


From 870f5681c9dbafc738082b1fd48e0cc013bf43c7 Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Tue, 25 Sep 2012 12:29:39 -0400
Subject: block: after creating a live snapshot, make old image read-only

Currently, after a live snapshot of a drive, the image that has
been 'demoted' to be below the new active layer remains r/w.
This patch reopens it read-only.

Note that we do not check for error on the reopen(), because we
will not abort the snapshots if the reopen fails.

This patch depends on the bdrv_reopen() series.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index e5d450f0b..0267fa30b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -805,6 +805,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
         /* This removes our old bs from the bdrv_states, and adds the new bs */
         bdrv_append(states->new_bs, states->old_bs);
+        /* We don't need (or want) to use the transactional
+         * bdrv_reopen_multiple() across all the entries at once, because we
+         * don't want to abort all of them if one of them fails the reopen */
+        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
     }
 
     /* success */
-- 
cgit v1.2.3


From 00f78533326c5ba2e62fafada16655aa558a5520 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 14:40:56 +0530
Subject: aio: Fix qemu_aio_wait() to maintain correct walking_handlers count

Fix qemu_aio_wait() to ensure that registered aio handlers don't get
deleted when they are still active. This is ensured by maintaning the
right count of walking_handlers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 aio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aio.c b/aio.c
index 0a9eb10c7..99b8b7226 100644
--- a/aio.c
+++ b/aio.c
@@ -119,7 +119,7 @@ bool qemu_aio_wait(void)
         return true;
     }
 
-    walking_handlers = 1;
+    walking_handlers++;
 
     FD_ZERO(&rdfds);
     FD_ZERO(&wrfds);
@@ -147,7 +147,7 @@ bool qemu_aio_wait(void)
         }
     }
 
-    walking_handlers = 0;
+    walking_handlers--;
 
     /* No AIO operations?  Get us out of here */
     if (!busy) {
@@ -159,7 +159,7 @@ bool qemu_aio_wait(void)
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        walking_handlers = 1;
+        walking_handlers++;
 
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
@@ -187,7 +187,7 @@ bool qemu_aio_wait(void)
             }
         }
 
-        walking_handlers = 0;
+        walking_handlers--;
     }
 
     return true;
-- 
cgit v1.2.3


From ca0defb95c69f88a905d64adbe253e7bb8fde14a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2012 14:42:02 +0530
Subject: qemu: URI parsing library

Add a new URI parsing library to QEMU. The code has been borrowed from
libxml2 and libvirt.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs |    2 +-
 uri.c         | 2249 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 uri.h         |  113 +++
 3 files changed, 2363 insertions(+), 1 deletion(-)
 create mode 100644 uri.c
 create mode 100644 uri.h

diff --git a/Makefile.objs b/Makefile.objs
index 441275730..7c1c68206 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/uri.c b/uri.c
new file mode 100644
index 000000000..dd922de33
--- /dev/null
+++ b/uri.c
@@ -0,0 +1,2249 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha    = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ *            "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ *           "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
+    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
+    ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p)                                                    \
+      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
+       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
+       ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ *            "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+        ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ *    authority     = server | reg_name
+ *    reg_name      = 1*( unreserved | escaped | "$" | "," |
+ *                        ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path          = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ *									*
+ *                         RFC 3986 parser				*
+ *									*
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
+                      ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p)							\
+       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
+        ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *                     / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p)						\
+      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
+       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
+       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
+       ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p)						\
+      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
+       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
+       ((*(p) == '@')))
+
+/*
+ *    reserved      = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p)						\
+      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
+       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ *    pct-encoded   = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p)						\
+     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p)							\
+     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
+      ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+    const char *cur;
+
+    if (str == NULL)
+	return(-1);
+
+    cur = *str;
+    if (!ISA_ALPHA(cur))
+	return(2);
+    cur++;
+    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+    if (uri != NULL) {
+	if (uri->scheme != NULL) g_free(uri->scheme);
+	uri->scheme = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment      = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ *       in the fragment identifier but this is used very broadly for
+ *       xpointer scheme selection, so we are allowing it here to not break
+ *       for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           (*cur == '[') || (*cur == ']') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+        if (uri->fragment != NULL)
+            g_free(uri->fragment);
+	if (uri->cleanup & 2)
+	    uri->fragment = g_strndup(*str, cur - *str);
+	else
+	    uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+	if (uri->query != NULL)
+	    g_free (uri->query);
+	uri->query = g_strndup (*str, cur - *str);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a port  part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port          = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+    const char *cur = *str;
+
+    if (ISA_DIGIT(cur)) {
+	if (uri != NULL)
+	    uri->port = 0;
+	while (ISA_DIGIT(cur)) {
+	    if (uri != NULL)
+		uri->port = uri->port * 10 + (*cur - '0');
+	    cur++;
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+    const char *cur;
+
+    cur = *str;
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+           ISA_SUB_DELIM(cur) || (*cur == ':'))
+	NEXT(cur);
+    if (*cur == '@') {
+	if (uri != NULL) {
+	    if (uri->user != NULL) g_free(uri->user);
+	    if (uri->cleanup & 2)
+		uri->user = g_strndup(*str, cur - *str);
+	    else
+		uri->user = uri_string_unescape(*str, cur - *str, NULL);
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str:  the string to analyze
+ *
+ *    dec-octet     = DIGIT                 ; 0-9
+ *                  / %x31-39 DIGIT         ; 10-99
+ *                  / "1" 2DIGIT            ; 100-199
+ *                  / "2" %x30-34 DIGIT     ; 200-249
+ *                  / "25" %x30-35          ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+    const char *cur = *str;
+
+    if (!(ISA_DIGIT(cur)))
+        return(1);
+    if (!ISA_DIGIT(cur+1))
+	cur++;
+    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+	cur += 2;
+    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) == '5') &&
+	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+	cur += 3;
+    else
+        return(1);
+    *str = cur;
+    return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host          = IP-literal / IPv4address / reg-name
+ * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
+ * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name      = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    const char *host;
+
+    host = cur;
+    /*
+     * IPv6 and future adressing scheme are enclosed between brackets
+     */
+    if (*cur == '[') {
+        cur++;
+	while ((*cur != ']') && (*cur != 0))
+	    cur++;
+	if (*cur != ']')
+	    return(1);
+	cur++;
+	goto found;
+    }
+    /*
+     * try to parse an IPv4
+     */
+    if (ISA_DIGIT(cur)) {
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+	cur++;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	goto found;
+not_ipv4:
+        cur = *str;
+    }
+    /*
+     * then this should be a hostname which can be empty
+     */
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+        NEXT(cur);
+found:
+    if (uri != NULL) {
+	if (uri->authority != NULL) g_free(uri->authority);
+	uri->authority = NULL;
+	if (uri->server != NULL) g_free(uri->server);
+	if (cur != host) {
+	    if (uri->cleanup & 2)
+		uri->server = g_strndup(host, cur - host);
+	    else
+		uri->server = uri_string_unescape(host, cur - host, NULL);
+	} else
+	    uri->server = NULL;
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority     = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+    /*
+     * try to parse an userinfo and check for the trailing @
+     */
+    ret = rfc3986_parse_user_info(uri, &cur);
+    if ((ret != 0) || (*cur != '@'))
+        cur = *str;
+    else
+        cur++;
+    ret = rfc3986_parse_host(uri, &cur);
+    if (ret != 0) return(ret);
+    if (*cur == ':') {
+        cur++;
+        ret = rfc3986_parse_port(uri, &cur);
+	if (ret != 0) return(ret);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str:  the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment       = *pchar
+ * segment-nz    = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ *               ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+    const char *cur;
+
+    cur = *str;
+    if (!ISA_PCHAR(cur)) {
+        if (empty)
+	    return(0);
+	return(1);
+    }
+    while (ISA_PCHAR(cur) && (*cur != forbid))
+        NEXT(cur);
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty  = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (*str != cur) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if (*cur != '/')
+        return(1);
+    cur++;
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret == 0) {
+	while (*cur == '/') {
+	    cur++;
+	    ret = rfc3986_parse_segment(&cur, 0, 1);
+	    if (ret != 0) return(ret);
+	}
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, ':', 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part     = "//" authority path-abempty
+ *                / path-absolute
+ *                / path-rootless
+ *                / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if ((*cur == '/') && (*(cur + 1) == '/')) {
+        cur += 2;
+	ret = rfc3986_parse_authority(uri, &cur);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &cur);
+	if (ret != 0) return(ret);
+	*str = cur;
+	return(0);
+    } else if (*cur == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &cur);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(cur)) {
+        ret = rfc3986_parse_path_rootless(uri, &cur);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ *               / path-absolute
+ *               / path-noscheme
+ *               / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+    int ret;
+
+    if ((*str == '/') && (*(str + 1) == '/')) {
+        str += 2;
+	ret = rfc3986_parse_authority(uri, &str);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (*str == '/') {
+	ret = rfc3986_parse_path_absolute(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(str)) {
+        ret = rfc3986_parse_path_no_scheme(uri, &str);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+    int ret;
+
+    ret = rfc3986_parse_scheme(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str != ':') {
+	return(1);
+    }
+    str++;
+    ret = rfc3986_parse_hier_part(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+    int ret;
+
+    if (str == NULL)
+	return(-1);
+    uri_clean(uri);
+
+    /*
+     * Try first to parse absolute refs, then fallback to relative if
+     * it fails.
+     */
+    ret = rfc3986_parse(uri, str);
+    if (ret != 0) {
+	uri_clean(uri);
+        ret = rfc3986_parse_relative_ref(uri, str);
+	if (ret != 0) {
+	    uri_clean(uri);
+	    return(ret);
+	}
+    }
+    return(0);
+}
+
+/**
+ * uri_parse:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+	ret = rfc3986_parse_uri_reference(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+    return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str:  the URI string to analyze
+ * @raw:  if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+        if (raw) {
+	    uri->cleanup |= 2;
+	}
+	ret = uri_parse_into(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Generic URI structure functions			*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+    URI *ret;
+
+    ret = (URI *) g_malloc(sizeof(URI));
+    memset(ret, 0, sizeof(URI));
+    return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+    char *temp;
+    int tmp;
+
+    tmp = *max * 2;
+    temp = g_realloc(ret, (tmp + 1));
+    *max = tmp;
+    return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri:  pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+    char *ret = NULL;
+    char *temp;
+    const char *p;
+    int len;
+    int max;
+
+    if (uri == NULL) return(NULL);
+
+
+    max = 80;
+    ret = g_malloc(max + 1);
+    len = 0;
+
+    if (uri->scheme != NULL) {
+	p = uri->scheme;
+	while (*p != 0) {
+	    if (len >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+		ret = temp;
+	    }
+	    ret[len++] = *p++;
+	}
+	if (len >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = ':';
+    }
+    if (uri->opaque != NULL) {
+	p = uri->opaque;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    } else {
+	if (uri->server != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    if (uri->user != NULL) {
+		p = uri->user;
+		while (*p != 0) {
+		    if (len + 3 >= max) {
+                        temp = realloc2n(ret, &max);
+                        if (temp == NULL) goto mem_error;
+                        ret = temp;
+		    }
+		    if ((IS_UNRESERVED(*(p))) ||
+			((*(p) == ';')) || ((*(p) == ':')) ||
+			((*(p) == '&')) || ((*(p) == '=')) ||
+			((*(p) == '+')) || ((*(p) == '$')) ||
+			((*(p) == ',')))
+			ret[len++] = *p++;
+		    else {
+			int val = *(unsigned char *)p++;
+			int hi = val / 0x10, lo = val % 0x10;
+			ret[len++] = '%';
+			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		    }
+		}
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = '@';
+	    }
+	    p = uri->server;
+	    while (*p != 0) {
+		if (len >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	    if (uri->port > 0) {
+		if (len + 10 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		len += snprintf(&ret[len], max - len, ":%d", uri->port);
+	    }
+	} else if (uri->authority != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    p = uri->authority;
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) ||
+                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+                    ((*(p) == '=')) || ((*(p) == '+')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	} else if (uri->scheme != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	}
+	if (uri->path != NULL) {
+	    p = uri->path;
+	    /*
+	     * the colon in file:///d: should not be escaped or
+	     * Windows accesses fail later.
+	     */
+	    if ((uri->scheme != NULL) &&
+		(p[0] == '/') &&
+		(((p[1] >= 'a') && (p[1] <= 'z')) ||
+		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+		(p[2] == ':') &&
+	        (!strcmp(uri->scheme, "file"))) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+	    }
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+	            ((*(p) == ',')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	}
+	if (uri->query != NULL) {
+	    if (len + 1 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '?';
+	    p = uri->query;
+	    while (*p != 0) {
+		if (len + 1 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	}
+    }
+    if (uri->fragment != NULL) {
+	if (len + 3 >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = '#';
+	p = uri->fragment;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    }
+    if (len >= max) {
+        temp = realloc2n(ret, &max);
+        if (temp == NULL) goto mem_error;
+        ret = temp;
+    }
+    ret[len] = 0;
+    return(ret);
+
+mem_error:
+    g_free(ret);
+    return(NULL);
+}
+
+/**
+ * uri_clean:
+ * @uri:  pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+    if (uri == NULL) return;
+
+    if (uri->scheme != NULL) g_free(uri->scheme);
+    uri->scheme = NULL;
+    if (uri->server != NULL) g_free(uri->server);
+    uri->server = NULL;
+    if (uri->user != NULL) g_free(uri->user);
+    uri->user = NULL;
+    if (uri->path != NULL) g_free(uri->path);
+    uri->path = NULL;
+    if (uri->fragment != NULL) g_free(uri->fragment);
+    uri->fragment = NULL;
+    if (uri->opaque != NULL) g_free(uri->opaque);
+    uri->opaque = NULL;
+    if (uri->authority != NULL) g_free(uri->authority);
+    uri->authority = NULL;
+    if (uri->query != NULL) g_free(uri->query);
+    uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri:  pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+    uri_clean(uri);
+    g_free(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Helper functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path:  pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+    char *cur, *out;
+
+    if (path == NULL)
+	return(-1);
+
+    /* Skip all initial "/" chars.  We want to get to the beginning of the
+     * first non-empty segment.
+     */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+      return(0);
+
+    /* Keep everything we've seen so far.  */
+    out = cur;
+
+    /*
+     * Analyze each segment in sequence for cases (c) and (d).
+     */
+    while (cur[0] != '\0') {
+	/*
+	 * c) All occurrences of "./", where "." is a complete path segment,
+	 *    are removed from the buffer string.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '/')) {
+	    cur += 2;
+	    /* '//' normalization should be done at this point too */
+	    while (cur[0] == '/')
+		cur++;
+	    continue;
+	}
+
+	/*
+	 * d) If the buffer string ends with "." as a complete path segment,
+	 *    that "." is removed.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '\0'))
+	    break;
+
+	/* Otherwise keep the segment.  */
+	while (cur[0] != '/') {
+            if (cur[0] == '\0')
+              goto done_cd;
+	    (out++)[0] = (cur++)[0];
+	}
+	/* nomalize // */
+	while ((cur[0] == '/') && (cur[1] == '/'))
+	    cur++;
+
+        (out++)[0] = (cur++)[0];
+    }
+ done_cd:
+    out[0] = '\0';
+
+    /* Reset to the beginning of the first segment for the next sequence.  */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+	return(0);
+
+    /*
+     * Analyze each segment in sequence for cases (e) and (f).
+     *
+     * e) All occurrences of "<segment>/../", where <segment> is a
+     *    complete path segment not equal to "..", are removed from the
+     *    buffer string.  Removal of these path segments is performed
+     *    iteratively, removing the leftmost matching pattern on each
+     *    iteration, until no matching pattern remains.
+     *
+     * f) If the buffer string ends with "<segment>/..", where <segment>
+     *    is a complete path segment not equal to "..", that
+     *    "<segment>/.." is removed.
+     *
+     * To satisfy the "iterative" clause in (e), we need to collapse the
+     * string every time we find something that needs to be removed.  Thus,
+     * we don't need to keep two pointers into the string: we only need a
+     * "current position" pointer.
+     */
+    while (1) {
+        char *segp, *tmp;
+
+        /* At the beginning of each iteration of this loop, "cur" points to
+         * the first character of the segment we want to examine.
+         */
+
+        /* Find the end of the current segment.  */
+        segp = cur;
+        while ((segp[0] != '/') && (segp[0] != '\0'))
+          ++segp;
+
+        /* If this is the last segment, we're done (we need at least two
+         * segments to meet the criteria for the (e) and (f) cases).
+         */
+        if (segp[0] == '\0')
+          break;
+
+        /* If the first segment is "..", or if the next segment _isn't_ "..",
+         * keep this segment and try the next one.
+         */
+        ++segp;
+        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+            || ((segp[0] != '.') || (segp[1] != '.')
+                || ((segp[2] != '/') && (segp[2] != '\0')))) {
+          cur = segp;
+          continue;
+        }
+
+        /* If we get here, remove this segment and the next one and back up
+         * to the previous segment (if there is one), to implement the
+         * "iteratively" clause.  It's pretty much impossible to back up
+         * while maintaining two pointers into the buffer, so just compact
+         * the whole buffer now.
+         */
+
+        /* If this is the end of the buffer, we're done.  */
+        if (segp[2] == '\0') {
+          cur[0] = '\0';
+          break;
+        }
+        /* Valgrind complained, strcpy(cur, segp + 3); */
+        /* string will overlap, do not use strcpy */
+        tmp = cur;
+        segp += 3;
+        while ((*tmp++ = *segp++) != 0)
+          ;
+
+        /* If there are no previous segments, then keep going from here.  */
+        segp = cur;
+        while ((segp > path) && ((--segp)[0] == '/'))
+          ;
+        if (segp == path)
+          continue;
+
+        /* "segp" is pointing to the end of a previous segment; find it's
+         * start.  We need to back up to the previous segment and start
+         * over with that to handle things like "foo/bar/../..".  If we
+         * don't do this, then on the first pass we'll remove the "bar/..",
+         * but be pointing at the second ".." so we won't realize we can also
+         * remove the "foo/..".
+         */
+        cur = segp;
+        while ((cur > path) && (cur[-1] != '/'))
+          --cur;
+    }
+    out[0] = '\0';
+
+    /*
+     * g) If the resulting buffer string still begins with one or more
+     *    complete path segments of "..", then the reference is
+     *    considered to be in error. Implementations may handle this
+     *    error by retaining these components in the resolved path (i.e.,
+     *    treating them as part of the final URI), by removing them from
+     *    the resolved path (i.e., discarding relative levels above the
+     *    root), or by avoiding traversal of the reference.
+     *
+     * We discard them from the final path.
+     */
+    if (path[0] == '/') {
+      cur = path;
+      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+             && ((cur[3] == '/') || (cur[3] == '\0')))
+	cur += 3;
+
+      if (cur != path) {
+	out = path;
+	while (cur[0] != '\0')
+          (out++)[0] = (cur++)[0];
+	out[0] = 0;
+      }
+    }
+
+    return(0);
+}
+
+static int is_hex(char c) {
+    if (((c >= '0') && (c <= '9')) ||
+        ((c >= 'a') && (c <= 'f')) ||
+        ((c >= 'A') && (c <= 'F')))
+	return(1);
+    return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str:  the string to unescape
+ * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target:  optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+    char *ret, *out;
+    const char *in;
+
+    if (str == NULL)
+	return(NULL);
+    if (len <= 0) len = strlen(str);
+    if (len < 0) return(NULL);
+
+    if (target == NULL) {
+	ret = g_malloc(len + 1);
+    } else
+	ret = target;
+    in = str;
+    out = ret;
+    while(len > 0) {
+	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = (*in - 'A') + 10;
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = *out * 16 + (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = *out * 16 + (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = *out * 16 + (*in - 'A') + 10;
+	    in++;
+	    len -= 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	    len--;
+	}
+    }
+    *out = 0;
+    return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+    char *ret, ch;
+    char *temp;
+    const char *in;
+    int len, out;
+
+    if (str == NULL)
+	return(NULL);
+    if (str[0] == 0)
+	return(g_strdup(str));
+    len = strlen(str);
+    if (!(len > 0)) return(NULL);
+
+    len += 20;
+    ret = g_malloc(len);
+    in = str;
+    out = 0;
+    while(*in != 0) {
+	if (len - out <= 3) {
+            temp = realloc2n(ret, &len);
+	    ret = temp;
+	}
+
+	ch = *in;
+
+	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+	    unsigned char val;
+	    ret[out++] = '%';
+	    val = ch >> 4;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    val = ch & 0xF;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    in++;
+	} else {
+	    ret[out++] = *in++;
+	}
+
+    }
+    ret[out] = 0;
+    return(ret);
+}
+
+/************************************************************************
+ *									*
+ *			Public functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI:  the URI instance found in the document
+ * @base:  the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ *         of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+    char *val = NULL;
+    int ret, len, indx, cur, out;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    URI *res = NULL;
+
+    /*
+     * 1) The URI reference is parsed into the potential four components and
+     *    fragment identifier, as described in Section 4.3.
+     *
+     *    NOTE that a completely empty URI is treated by modern browsers
+     *    as a reference to "." rather than as a synonym for the current
+     *    URI.  Should we do that here?
+     */
+    if (uri == NULL)
+	ret = -1;
+    else {
+	if (*uri) {
+	    ref = uri_new();
+	    if (ref == NULL)
+		goto done;
+	    ret = uri_parse_into(ref, uri);
+	}
+	else
+	    ret = 0;
+    }
+    if (ret != 0)
+	goto done;
+    if ((ref != NULL) && (ref->scheme != NULL)) {
+	/*
+	 * The URI is absolute don't modify.
+	 */
+	val = g_strdup(uri);
+	goto done;
+    }
+    if (base == NULL)
+	ret = -1;
+    else {
+	bas = uri_new();
+	if (bas == NULL)
+	    goto done;
+	ret = uri_parse_into(bas, base);
+    }
+    if (ret != 0) {
+	if (ref)
+	    val = uri_to_string(ref);
+	goto done;
+    }
+    if (ref == NULL) {
+	/*
+	 * the base fragment must be ignored
+	 */
+	if (bas->fragment != NULL) {
+	    g_free(bas->fragment);
+	    bas->fragment = NULL;
+	}
+	val = uri_to_string(bas);
+	goto done;
+    }
+
+    /*
+     * 2) If the path component is empty and the scheme, authority, and
+     *    query components are undefined, then it is a reference to the
+     *    current document and we are done.  Otherwise, the reference URI's
+     *    query and fragment components are defined as found (or not found)
+     *    within the URI reference and not inherited from the base URI.
+     *
+     *    NOTE that in modern browsers, the parsing differs from the above
+     *    in the following aspect:  the query component is allowed to be
+     *    defined while still treating this as a reference to the current
+     *    document.
+     */
+    res = uri_new();
+    if (res == NULL)
+	goto done;
+    if ((ref->scheme == NULL) && (ref->path == NULL) &&
+	((ref->authority == NULL) && (ref->server == NULL))) {
+	if (bas->scheme != NULL)
+	    res->scheme = g_strdup(bas->scheme);
+	if (bas->authority != NULL)
+	    res->authority = g_strdup(bas->authority);
+	else if (bas->server != NULL) {
+	    res->server = g_strdup(bas->server);
+	    if (bas->user != NULL)
+		res->user = g_strdup(bas->user);
+	    res->port = bas->port;
+	}
+	if (bas->path != NULL)
+	    res->path = g_strdup(bas->path);
+	if (ref->query != NULL)
+	    res->query = g_strdup (ref->query);
+	else if (bas->query != NULL)
+	    res->query = g_strdup(bas->query);
+	if (ref->fragment != NULL)
+	    res->fragment = g_strdup(ref->fragment);
+	goto step_7;
+    }
+
+    /*
+     * 3) If the scheme component is defined, indicating that the reference
+     *    starts with a scheme name, then the reference is interpreted as an
+     *    absolute URI and we are done.  Otherwise, the reference URI's
+     *    scheme is inherited from the base URI's scheme component.
+     */
+    if (ref->scheme != NULL) {
+	val = uri_to_string(ref);
+	goto done;
+    }
+    if (bas->scheme != NULL)
+	res->scheme = g_strdup(bas->scheme);
+
+    if (ref->query != NULL)
+	res->query = g_strdup(ref->query);
+    if (ref->fragment != NULL)
+	res->fragment = g_strdup(ref->fragment);
+
+    /*
+     * 4) If the authority component is defined, then the reference is a
+     *    network-path and we skip to step 7.  Otherwise, the reference
+     *    URI's authority is inherited from the base URI's authority
+     *    component, which will also be undefined if the URI scheme does not
+     *    use an authority component.
+     */
+    if ((ref->authority != NULL) || (ref->server != NULL)) {
+	if (ref->authority != NULL)
+	    res->authority = g_strdup(ref->authority);
+	else {
+	    res->server = g_strdup(ref->server);
+	    if (ref->user != NULL)
+		res->user = g_strdup(ref->user);
+            res->port = ref->port;
+	}
+	if (ref->path != NULL)
+	    res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+    if (bas->authority != NULL)
+	res->authority = g_strdup(bas->authority);
+    else if (bas->server != NULL) {
+	res->server = g_strdup(bas->server);
+	if (bas->user != NULL)
+	    res->user = g_strdup(bas->user);
+	res->port = bas->port;
+    }
+
+    /*
+     * 5) If the path component begins with a slash character ("/"), then
+     *    the reference is an absolute-path and we skip to step 7.
+     */
+    if ((ref->path != NULL) && (ref->path[0] == '/')) {
+	res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+
+
+    /*
+     * 6) If this step is reached, then we are resolving a relative-path
+     *    reference.  The relative path needs to be merged with the base
+     *    URI's path.  Although there are many ways to do this, we will
+     *    describe a simple method using a separate string buffer.
+     *
+     * Allocate a buffer large enough for the result string.
+     */
+    len = 2; /* extra / and 0 */
+    if (ref->path != NULL)
+	len += strlen(ref->path);
+    if (bas->path != NULL)
+	len += strlen(bas->path);
+    res->path = g_malloc(len);
+    res->path[0] = 0;
+
+    /*
+     * a) All but the last segment of the base URI's path component is
+     *    copied to the buffer.  In other words, any characters after the
+     *    last (right-most) slash character, if any, are excluded.
+     */
+    cur = 0;
+    out = 0;
+    if (bas->path != NULL) {
+	while (bas->path[cur] != 0) {
+	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+		cur++;
+	    if (bas->path[cur] == 0)
+		break;
+
+	    cur++;
+	    while (out < cur) {
+		res->path[out] = bas->path[out];
+		out++;
+	    }
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * b) The reference's path component is appended to the buffer
+     *    string.
+     */
+    if (ref->path != NULL && ref->path[0] != 0) {
+	indx = 0;
+	/*
+	 * Ensure the path includes a '/'
+	 */
+	if ((out == 0) && (bas->server != NULL))
+	    res->path[out++] = '/';
+	while (ref->path[indx] != 0) {
+	    res->path[out++] = ref->path[indx++];
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * Steps c) to h) are really path normalization steps
+     */
+    normalize_uri_path(res->path);
+
+step_7:
+
+    /*
+     * 7) The resulting URI components, including any inherited from the
+     *    base URI, are recombined to give the absolute form of the URI
+     *    reference.
+     */
+    val = uri_to_string(res);
+
+done:
+    if (ref != NULL)
+	uri_free(ref);
+    if (bas != NULL)
+	uri_free(bas);
+    if (res != NULL)
+	uri_free(res);
+    return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI:  the URI reference under consideration
+ * @base:  the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base.  Some examples of this operation include:
+ *     base = "http://site1.com/docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   pic1.gif
+ *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
+ *
+ *     base = "docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really wierd or complicated, it may be
+ *       worthwhile to first convert it into a "nice" one by calling
+ *       uri_resolve (using 'base') before calling this routine,
+ *       since this routine (for reasonable efficiency) assumes URI has
+ *       already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+    char *val = NULL;
+    int ret;
+    int ix;
+    int pos = 0;
+    int nbslash = 0;
+    int len;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    char *bptr, *uptr, *vptr;
+    int remove_path = 0;
+
+    if ((uri == NULL) || (*uri == 0))
+	return NULL;
+
+    /*
+     * First parse URI into a standard form
+     */
+    ref = uri_new ();
+    if (ref == NULL)
+	return NULL;
+    /* If URI not already in "relative" form */
+    if (uri[0] != '.') {
+	ret = uri_parse_into (ref, uri);
+	if (ret != 0)
+	    goto done;		/* Error in URI, return NULL */
+    } else
+	ref->path = g_strdup(uri);
+
+    /*
+     * Next parse base into the same standard form
+     */
+    if ((base == NULL) || (*base == 0)) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    bas = uri_new ();
+    if (bas == NULL)
+	goto done;
+    if (base[0] != '.') {
+	ret = uri_parse_into (bas, base);
+	if (ret != 0)
+	    goto done;		/* Error in base, return NULL */
+    } else
+	bas->path = g_strdup(base);
+
+    /*
+     * If the scheme / server on the URI differs from the base,
+     * just return the URI
+     */
+    if ((ref->scheme != NULL) &&
+	((bas->scheme == NULL) ||
+	 (strcmp (bas->scheme, ref->scheme)) ||
+	 (strcmp (bas->server, ref->server)))) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    if (!strcmp(bas->path, ref->path)) {
+	val = g_strdup("");
+	goto done;
+    }
+    if (bas->path == NULL) {
+	val = g_strdup(ref->path);
+	goto done;
+    }
+    if (ref->path == NULL) {
+        ref->path = (char *) "/";
+	remove_path = 1;
+    }
+
+    /*
+     * At this point (at last!) we can compare the two paths
+     *
+     * First we take care of the special case where either of the
+     * two path components may be missing (bug 316224)
+     */
+    if (bas->path == NULL) {
+	if (ref->path != NULL) {
+	    uptr = ref->path;
+	    if (*uptr == '/')
+		uptr++;
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	}
+	goto done;
+    }
+    bptr = bas->path;
+    if (ref->path == NULL) {
+	for (ix = 0; bptr[ix] != 0; ix++) {
+	    if (bptr[ix] == '/')
+		nbslash++;
+	}
+	uptr = NULL;
+	len = 1;	/* this is for a string terminator only */
+    } else {
+    /*
+     * Next we compare the two strings and find where they first differ
+     */
+	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+            pos += 2;
+	if ((*bptr == '.') && (bptr[1] == '/'))
+            bptr += 2;
+	else if ((*bptr == '/') && (ref->path[pos] != '/'))
+	    bptr++;
+	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+	    pos++;
+
+	if (bptr[pos] == ref->path[pos]) {
+	    val = g_strdup("");
+	    goto done;		/* (I can't imagine why anyone would do this) */
+	}
+
+	/*
+	 * In URI, "back up" to the last '/' encountered.  This will be the
+	 * beginning of the "unique" suffix of URI
+	 */
+	ix = pos;
+	if ((ref->path[ix] == '/') && (ix > 0))
+	    ix--;
+	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+	    ix -= 2;
+	for (; ix > 0; ix--) {
+	    if (ref->path[ix] == '/')
+		break;
+	}
+	if (ix == 0) {
+	    uptr = ref->path;
+	} else {
+	    ix++;
+	    uptr = &ref->path[ix];
+	}
+
+	/*
+	 * In base, count the number of '/' from the differing point
+	 */
+	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+	    for (; bptr[ix] != 0; ix++) {
+		if (bptr[ix] == '/')
+		    nbslash++;
+	    }
+	}
+	len = strlen (uptr) + 1;
+    }
+
+    if (nbslash == 0) {
+	if (uptr != NULL)
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	goto done;
+    }
+
+    /*
+     * Allocate just enough space for the returned string -
+     * length of the remainder of the URI, plus enough space
+     * for the "../" groups, plus one for the terminator
+     */
+    val = g_malloc (len + 3 * nbslash);
+    vptr = val;
+    /*
+     * Put in as many "../" as needed
+     */
+    for (; nbslash>0; nbslash--) {
+	*vptr++ = '.';
+	*vptr++ = '.';
+	*vptr++ = '/';
+    }
+    /*
+     * Finish up with the end of the URI
+     */
+    if (uptr != NULL) {
+        if ((vptr > val) && (len > 0) &&
+	    (uptr[0] == '/') && (vptr[-1] == '/')) {
+	    memcpy (vptr, uptr + 1, len - 1);
+	    vptr[len - 2] = 0;
+	} else {
+	    memcpy (vptr, uptr, len);
+	    vptr[len - 1] = 0;
+	}
+    } else {
+	vptr[len - 1] = 0;
+    }
+
+    /* escape the freshly-built path */
+    vptr = val;
+	/* exception characters from uri_to_string */
+    val = uri_string_escape(vptr, "/;&=+$,");
+    g_free(vptr);
+
+done:
+    /*
+     * Free the working variables
+     */
+    if (remove_path != 0)
+        ref->path = NULL;
+    if (ref != NULL)
+	uri_free (ref);
+    if (bas != NULL)
+	uri_free (bas);
+
+    return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+    struct QueryParams *ps;
+
+    if (init_alloc <= 0) init_alloc = 1;
+
+    ps = g_new(QueryParams, 1);
+    ps->n = 0;
+    ps->alloc = init_alloc;
+    ps->p = g_new(QueryParam, ps->alloc);
+
+    return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+               const char *name, const char *value)
+{
+    if (ps->n >= ps->alloc) {
+        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+        ps->alloc *= 2;
+    }
+
+    ps->p[ps->n].name = g_strdup(name);
+    ps->p[ps->n].value = value ? g_strdup(value) : NULL;
+    ps->p[ps->n].ignore = 0;
+    ps->n++;
+
+    return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+    int i;
+
+    for (i = 0; i < ps->n; ++i) {
+        g_free (ps->p[i].name);
+        g_free (ps->p[i].value);
+    }
+    g_free (ps->p);
+    g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+    struct QueryParams *ps;
+    const char *end, *eq;
+
+    ps = query_params_new (0);
+    if (!query || query[0] == '\0') return ps;
+
+    while (*query) {
+        char *name = NULL, *value = NULL;
+
+        /* Find the next separator, or end of the string. */
+        end = strchr (query, '&');
+        if (!end)
+            end = strchr (query, ';');
+        if (!end)
+            end = query + strlen (query);
+
+        /* Find the first '=' character between here and end. */
+        eq = strchr (query, '=');
+        if (eq && eq >= end) eq = NULL;
+
+        /* Empty section (eg. "&&"). */
+        if (end == query)
+            goto next;
+
+        /* If there is no '=' character, then we have just "name"
+         * and consistent with CGI.pm we assume value is "".
+         */
+        else if (!eq) {
+            name = uri_string_unescape (query, end - query, NULL);
+            value = NULL;
+        }
+        /* Or if we have "name=" here (works around annoying
+         * problem when calling uri_string_unescape with len = 0).
+         */
+        else if (eq+1 == end) {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = g_new0(char, 1);
+        }
+        /* If the '=' character is at the beginning then we have
+         * "=value" and consistent with CGI.pm we _ignore_ this.
+         */
+        else if (query == eq)
+            goto next;
+
+        /* Otherwise it's "name=value". */
+        else {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+        }
+
+        /* Append to the parameter set. */
+        query_params_append (ps, name, value);
+        g_free(name);
+        g_free(value);
+
+    next:
+        query = end;
+        if (*query) query ++; /* skip '&' separator */
+    }
+
+    return ps;
+}
diff --git a/uri.h b/uri.h
new file mode 100644
index 000000000..de99b3bd4
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,113 @@
+/**
+ * Summary: library of generic URI related routines
+ * Description: library of generic URI related routines
+ *              Implements RFC 2396
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * Author: Daniel Veillard
+ **
+ * Copyright (C) 2007 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones@redhat.com>
+ *
+ * Utility functions to help parse and assemble query strings.
+ */
+
+#ifndef QEMU_URI_H
+#define QEMU_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * URI:
+ *
+ * A parsed URI reference. This is a struct containing the various fields
+ * as described in RFC 2396 but separated for further processing.
+ */
+typedef struct URI {
+    char *scheme;	/* the URI scheme */
+    char *opaque;	/* opaque part */
+    char *authority;	/* the authority part */
+    char *server;	/* the server part */
+    char *user;		/* the user part */
+    int port;		/* the port number */
+    char *path;		/* the path string */
+    char *fragment;	/* the fragment identifier */
+    int  cleanup;	/* parsing potentially unclean URI */
+    char *query;	/* the query string (as it appears in the URI) */
+} URI;
+
+URI *uri_new(void);
+char *uri_resolve(const char *URI, const char *base);
+char *uri_resolve_relative(const char *URI, const char *base);
+URI *uri_parse(const char *str);
+URI *uri_parse_raw(const char *str, int raw);
+int uri_parse_into(URI *uri, const char *str);
+char *uri_to_string(URI *uri);
+char *uri_string_escape(const char *str, const char *list);
+char *uri_string_unescape(const char *str, int len, char *target);
+void uri_free(URI *uri);
+
+/* Single web service query parameter 'name=value'. */
+typedef struct QueryParam {
+  char *name;			/* Name (unescaped). */
+  char *value;			/* Value (unescaped). */
+  int ignore;			/* Ignore this field in qparam_get_query */
+} QueryParam;
+
+/* Set of parameters. */
+typedef struct QueryParams {
+  int n;			/* number of parameters used */
+  int alloc;			/* allocated space */
+  QueryParam *p;		/* array of parameters */
+} QueryParams;
+
+struct QueryParams *query_params_new (int init_alloc);
+int query_param_append (QueryParams *ps, const char *name, const char *value);
+extern char *query_param_to_string (const QueryParams *ps);
+extern QueryParams *query_params_parse (const char *query);
+extern void query_params_free (QueryParams *ps);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QEMU_URI_H */
-- 
cgit v1.2.3


From 2db2bfc0ccac5fd68dbf0ceb70fbc372c5d8a8c7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 27 Sep 2012 19:27:43 +0530
Subject: aio: Another fix to the walking_handlers logic

The AIO dispatch loop will call QLIST_REMOVE and g_free even if there
are other pending calls to qemu_aio_wait outside the current one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 aio.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/aio.c b/aio.c
index 99b8b7226..c738a4e15 100644
--- a/aio.c
+++ b/aio.c
@@ -159,14 +159,14 @@ bool qemu_aio_wait(void)
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        walking_handlers++;
-
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
         node = QLIST_FIRST(&aio_handlers);
         while (node) {
             AioHandler *tmp;
 
+            walking_handlers++;
+
             if (!node->deleted &&
                 FD_ISSET(node->fd, &rdfds) &&
                 node->io_read) {
@@ -181,13 +181,13 @@ bool qemu_aio_wait(void)
             tmp = node;
             node = QLIST_NEXT(node, node);
 
-            if (tmp->deleted) {
+            walking_handlers--;
+
+            if (!walking_handlers && tmp->deleted) {
                 QLIST_REMOVE(tmp, node);
                 g_free(tmp);
             }
         }
-
-        walking_handlers--;
     }
 
     return true;
-- 
cgit v1.2.3


From eb100396b9d2658e8bf9cdc14bfcb16a9152f464 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
Date: Mon, 24 Sep 2012 14:42:45 +0530
Subject: configure: Add a config option for GlusterFS as block backend

GlusterFS support in QEMU depends on libgfapi, libgfrpc and
libgfxdr provided by GlusterFS.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 configure | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/configure b/configure
index 8f99b7b94..72d4b0d98 100755
--- a/configure
+++ b/configure
@@ -219,6 +219,7 @@ want_tools="yes"
 libiscsi=""
 coroutine=""
 seccomp=""
+glusterfs=""
 
 # parse CC options first
 for opt do
@@ -856,6 +857,10 @@ for opt do
   ;;
   --disable-seccomp) seccomp="no"
   ;;
+  --disable-glusterfs) glusterfs="no"
+  ;;
+  --enable-glusterfs) glusterfs="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -1128,6 +1133,8 @@ echo "  --disable-seccomp        disable seccomp support"
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
 echo "                           gthread, ucontext, sigaltstack, windows"
+echo "  --enable-glusterfs       enable GlusterFS backend"
+echo "  --disable-glusterfs      disable GlusterFS backend"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2303,6 +2310,29 @@ EOF
   fi
 fi
 
+##########################################
+# glusterfs probe
+if test "$glusterfs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+    (void) glfs_new("volume");
+    return 0;
+}
+EOF
+  glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+  if compile_prog "" "$glusterfs_libs" ; then
+    glusterfs=yes
+    libs_tools="$glusterfs_libs $libs_tools"
+    libs_softmmu="$glusterfs_libs $libs_softmmu"
+  else
+    if test "$glusterfs" = "yes" ; then
+      feature_not_found "GlusterFS backend support"
+    fi
+    glusterfs=no
+  fi
+fi
+
 #
 # Check for xxxat() functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
@@ -3170,6 +3200,7 @@ echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine_backend"
+echo "GlusterFS support $glusterfs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then
   echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi
 
+if test "$glusterfs" = "yes" ; then
+  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
-- 
cgit v1.2.3


From 8d6d89cb63c57569864ecdeb84d3a1c2ebd031cc Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
Date: Thu, 27 Sep 2012 19:30:32 +0530
Subject: block: Support GlusterFS as a QEMU block backend.

This patch adds gluster as the new block backend in QEMU. This gives
QEMU the ability to boot VM images from gluster volumes. Its already
possible to boot from VM images on gluster volumes using FUSE mount, but
this patchset provides the ability to boot VM images from gluster volumes
by by-passing the FUSE layer in gluster. This is made possible by
using libgfapi routines to perform IO on gluster volumes directly.

VM Image on gluster volume is specified like this:

file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]

'gluster' is the protocol.

'transport' specifies the transport type used to connect to gluster
management daemon (glusterd). Valid transport types are
tcp, unix and rdma. If a transport type isn't specified, then tcp
type is assumed.

'server' specifies the server where the volume file specification for
the given volume resides. This can be either hostname, ipv4 address
or ipv6 address. ipv6 address needs to be within square brackets [ ].
If transport type is 'unix', then 'server' field should not be specifed.
The 'socket' field needs to be populated with the path to unix domain
socket.

'port' is the port number on which glusterd is listening. This is optional
and if not specified, QEMU will send 0 which will make gluster to use the
default port. If the transport type is unix, then 'port' should not be
specified.

'volname' is the name of the gluster volume which contains the VM image.

'image' is the path to the actual VM image that resides on gluster volume.

Examples:

file=gluster://1.2.3.4/testvol/a.img
file=gluster+tcp://1.2.3.4/testvol/a.img
file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
file=gluster+rdma://1.2.3.4:24007/testvol/a.img

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/Makefile.objs |   1 +
 block/gluster.c     | 624 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 625 insertions(+)
 create mode 100644 block/gluster.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d39b..a1ae67f33 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 000000000..3588d7377
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,624 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
+ *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <glusterfs/api/glfs.h>
+#include "block_int.h"
+#include "qemu_socket.h"
+#include "uri.h"
+
+typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
+    int64_t size;
+    int ret;
+    bool *finished;
+    QEMUBH *bh;
+} GlusterAIOCB;
+
+typedef struct BDRVGlusterState {
+    struct glfs *glfs;
+    int fds[2];
+    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
+typedef struct GlusterConf {
+    char *server;
+    int port;
+    char *volname;
+    char *image;
+    char *transport;
+} GlusterConf;
+
+static void qemu_gluster_gconf_free(GlusterConf *gconf)
+{
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
+}
+
+static int parse_volume_options(GlusterConf *gconf, char *path)
+{
+    char *p, *q;
+
+    if (!path) {
+        return -EINVAL;
+    }
+
+    /* volume */
+    p = q = path + strspn(path, "/");
+    p += strcspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->volname = g_strndup(q, p - q);
+
+    /* image */
+    p += strspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->image = g_strdup(p);
+    return 0;
+}
+
+/*
+ * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ *
+ * 'gluster' is the protocol.
+ *
+ * 'transport' specifies the transport type used to connect to gluster
+ * management daemon (glusterd). Valid transport types are
+ * tcp, unix and rdma. If a transport type isn't specified, then tcp
+ * type is assumed.
+ *
+ * 'server' specifies the server where the volume file specification for
+ * the given volume resides. This can be either hostname, ipv4 address
+ * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * If transport type is 'unix', then 'server' field should not be specifed.
+ * The 'socket' field needs to be populated with the path to unix domain
+ * socket.
+ *
+ * 'port' is the port number on which glusterd is listening. This is optional
+ * and if not specified, QEMU will send 0 which will make gluster to use the
+ * default port. If the transport type is unix, then 'port' should not be
+ * specified.
+ *
+ * 'volname' is the name of the gluster volume which contains the VM image.
+ *
+ * 'image' is the path to the actual VM image that resides on gluster volume.
+ *
+ * Examples:
+ *
+ * file=gluster://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
+ * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
+ * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+ */
+static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+{
+    URI *uri;
+    QueryParams *qp = NULL;
+    bool is_unix = false;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "gluster")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+unix")) {
+        gconf->transport = g_strdup("unix");
+        is_unix = true;
+    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
+        gconf->transport = g_strdup("rdma");
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = parse_volume_options(gconf, uri->path);
+    if (ret < 0) {
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (is_unix) {
+        if (uri->server || uri->port) {
+            ret = -EINVAL;
+            goto out;
+        }
+        if (strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        gconf->server = g_strdup(qp->p[0].value);
+    } else {
+        gconf->server = g_strdup(uri->server);
+        gconf->port = uri->port;
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+{
+    struct glfs *glfs = NULL;
+    int ret;
+    int old_errno;
+
+    ret = qemu_gluster_parseuri(gconf, filename);
+    if (ret < 0) {
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
+        errno = -ret;
+        goto out;
+    }
+
+    glfs = glfs_new(gconf->volname);
+    if (!glfs) {
+        goto out;
+    }
+
+    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+            gconf->port);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /*
+     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
+     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
+     */
+    ret = glfs_set_logging(glfs, "-", 4);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = glfs_init(glfs);
+    if (ret) {
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
+        goto out;
+    }
+    return glfs;
+
+out:
+    if (glfs) {
+        old_errno = errno;
+        glfs_fini(glfs);
+        errno = old_errno;
+    }
+    return NULL;
+}
+
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+{
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
+    } else {
+        ret = -EIO; /* Partial read/write - fail it */
+    }
+
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int open_flags = O_BINARY;
+    int ret = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    s->glfs = qemu_gluster_init(gconf, filename);
+    if (!s->glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }
+
+    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    if (!s->fd) {
+        ret = -errno;
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (!ret) {
+        return ret;
+    }
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+    int ret = 0;
+    int64_t total_size = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    glfs = qemu_gluster_init(gconf, filename);
+    if (!glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = glfs_creat(glfs, gconf->image,
+        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (glfs) {
+        glfs_fini(glfs);
+    }
+    return ret;
+}
+
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static AIOPool gluster_aio_pool = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+    size_t size;
+    off_t offset;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = size;
+    acb->ret = 0;
+    acb->finished = NULL;
+
+    if (write) {
+        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    } else {
+        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;
+    s->qemu_aio_count++;
+
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int64_t ret;
+
+    ret = glfs_lseek(s->fd, 0, SEEK_END);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return ret;
+    }
+}
+
+static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    struct stat st;
+    int ret;
+
+    ret = glfs_fstat(s->fd, &st);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return st.st_blocks * 512;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+    glfs_fini(s->glfs);
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_tcp = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+tcp",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_unix = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+unix",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_rdma = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+rdma",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster_rdma);
+    bdrv_register(&bdrv_gluster_unix);
+    bdrv_register(&bdrv_gluster_tcp);
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
-- 
cgit v1.2.3


From 6ebdcee2d8e9e4b41ffe4e49039927550848b926 Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:12 -0400
Subject: block: add support functions for live commit, to find and delete
 images.

Add bdrv_find_overlay(), and bdrv_drop_intermediate().

bdrv_find_overlay():  given 'bs' and the active (topmost) BDS of an image chain,
                    find the image that is the immediate top of 'bs'

bdrv_drop_intermediate():
                    Given 3 BDS (active, top, base), drop images above
                    base up to and including top, and set base to be the
                    backing file of top's overlay node.

                    E.g., this converts:

                    bottom <- base <- intermediate <- top <- active

                    to

                    bottom <- base <- active

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block.h |   4 ++
 2 files changed, 147 insertions(+)

diff --git a/block.c b/block.c
index 751ebdc06..54209a526 100644
--- a/block.c
+++ b/block.c
@@ -1724,6 +1724,149 @@ int bdrv_change_backing_file(BlockDriverState *bs,
     return ret;
 }
 
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs)
+{
+    BlockDriverState *overlay = NULL;
+    BlockDriverState *intermediate;
+
+    assert(active != NULL);
+    assert(bs != NULL);
+
+    /* if bs is the same as active, then by definition it has no overlay
+     */
+    if (active == bs) {
+        return NULL;
+    }
+
+    intermediate = active;
+    while (intermediate->backing_hd) {
+        if (intermediate->backing_hd == bs) {
+            overlay = intermediate;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+
+    return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+    BlockDriverState *bs;
+    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ *  if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base)
+{
+    BlockDriverState *intermediate;
+    BlockDriverState *base_bs = NULL;
+    BlockDriverState *new_top_bs = NULL;
+    BlkIntermediateStates *intermediate_state, *next;
+    int ret = -EIO;
+
+    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+    QSIMPLEQ_INIT(&states_to_delete);
+
+    if (!top->drv || !base->drv) {
+        goto exit;
+    }
+
+    new_top_bs = bdrv_find_overlay(active, top);
+
+    if (new_top_bs == NULL) {
+        /* we could not find the image above 'top', this is an error */
+        goto exit;
+    }
+
+    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+     * to do, no intermediate images */
+    if (new_top_bs->backing_hd == base) {
+        ret = 0;
+        goto exit;
+    }
+
+    intermediate = top;
+
+    /* now we will go down through the list, and add each BDS we find
+     * into our deletion queue, until we hit the 'base'
+     */
+    while (intermediate) {
+        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+        intermediate_state->bs = intermediate;
+        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+        if (intermediate->backing_hd == base) {
+            base_bs = intermediate->backing_hd;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+    if (base_bs == NULL) {
+        /* something went wrong, we did not end at the base. safely
+         * unravel everything, and exit with error */
+        goto exit;
+    }
+
+    /* success - we can delete the intermediate states, and link top->base */
+    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+                                   base_bs->drv ? base_bs->drv->format_name : "");
+    if (ret) {
+        goto exit;
+    }
+    new_top_bs->backing_hd = base_bs;
+
+
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        /* so that bdrv_close() does not recursively close the chain */
+        intermediate_state->bs->backing_hd = NULL;
+        bdrv_delete(intermediate_state->bs);
+    }
+    ret = 0;
+
+exit:
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        g_free(intermediate_state);
+    }
+    return ret;
+}
+
+
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                                    size_t size)
 {
diff --git a/block.h b/block.h
index b1095d859..8c9b4245d 100644
--- a/block.h
+++ b/block.h
@@ -203,6 +203,10 @@ int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs);
 
 
 typedef struct BdrvCheckResult {
-- 
cgit v1.2.3


From 747ff602636fd83daae7ee4b1dd6e8d257a89fea Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:13 -0400
Subject: block: add live block commit functionality

This adds the live commit coroutine.  This iteration focuses on the
commit only below the active layer, and not the active layer itself.

The behaviour is similar to block streaming; the sectors are walked
through, and anything that exists above 'base' is committed back down
into base.  At the end, intermediate images are deleted, and the
chain stitched together.  Images are restored to their original open
flags upon completion.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/Makefile.objs |   1 +
 block/commit.c      | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 block_int.h         |  16 ++++
 trace-events        |   2 +
 4 files changed, 286 insertions(+)
 create mode 100644 block/commit.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index a1ae67f33..81fd43cef 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,6 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 block-obj-y += stream.o
+block-obj-y += commit.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/commit.c b/block/commit.c
new file mode 100644
index 000000000..624ec5f62
--- /dev/null
+++ b/block/commit.c
@@ -0,0 +1,267 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Jeff Cody   <jcody@redhat.com>
+ *  Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *active;
+    BlockDriverState *top;
+    BlockDriverState *base;
+    BlockErrorAction on_error;
+    int base_flags;
+    int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+                                        BlockDriverState *base,
+                                        int64_t sector_num, int nb_sectors,
+                                        void *buf)
+{
+    int ret = 0;
+
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_write(base, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+    CommitBlockJob *s = opaque;
+    BlockDriverState *active = s->active;
+    BlockDriverState *top = s->top;
+    BlockDriverState *base = s->base;
+    BlockDriverState *overlay_bs = NULL;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n = 0;
+    void *buf;
+    int bytes_written = 0;
+    int64_t base_len;
+
+    ret = s->common.len = bdrv_getlength(top);
+
+
+    if (s->common.len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    ret = base_len = bdrv_getlength(base);
+    if (base_len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    if (base_len < s->common.len) {
+        ret = bdrv_truncate(base, s->common.len);
+        if (ret) {
+            goto exit_restore_reopen;
+        }
+    }
+
+    overlay_bs = bdrv_find_overlay(active, top);
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+    for (sector_num = 0; sector_num < end; sector_num += n) {
+        uint64_t delay_ns = 0;
+        bool copy;
+
+wait:
+        /* Note that even when no rate limit is applied we need to yield
+         * with no pending I/O here so that qemu_aio_flush() returns.
+         */
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        if (block_job_is_cancelled(&s->common)) {
+            break;
+        }
+        /* Copy if allocated above the base */
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
+        copy = (ret == 1);
+        trace_commit_one_iteration(s, sector_num, n, ret);
+        if (copy) {
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, n);
+                if (delay_ns > 0) {
+                    goto wait;
+                }
+            }
+            ret = commit_populate(top, base, sector_num, n, buf);
+            bytes_written += n * BDRV_SECTOR_SIZE;
+        }
+        if (ret < 0) {
+            if (s->on_error == BLOCK_ERR_STOP_ANY    ||
+                s->on_error == BLOCK_ERR_REPORT      ||
+                (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
+                goto exit_free_buf;
+            } else {
+                n = 0;
+                continue;
+            }
+        }
+        /* Publish progress */
+        s->common.offset += n * BDRV_SECTOR_SIZE;
+    }
+
+    ret = 0;
+
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base);
+    }
+
+exit_free_buf:
+    qemu_vfree(buf);
+
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+
+    block_job_complete(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+    .instance_size = sizeof(CommitBlockJob),
+    .job_type      = "commit",
+    .set_speed     = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                  BlockDriverState *top, int64_t speed,
+                  BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    CommitBlockJob *s;
+    BlockReopenQueue *reopen_queue = NULL;
+    int orig_overlay_flags;
+    int orig_base_flags;
+    BlockDriverState *overlay_bs;
+    Error *local_err = NULL;
+
+    if ((on_error == BLOCK_ERR_STOP_ANY ||
+         on_error == BLOCK_ERR_STOP_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+        return;
+    }
+
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
+    if (top == base) {
+        error_setg(errp, "Invalid files for merge: top and base are the same");
+        return;
+    }
+
+    /* top and base may be valid, but let's make sure that base is reachable
+     * from top */
+    if (bdrv_find_backing_image(top, base->filename) != base) {
+        error_setg(errp,
+                   "Base (%s) is not reachable from top (%s)",
+                   base->filename, top->filename);
+        return;
+    }
+
+    overlay_bs = bdrv_find_overlay(bs, top);
+
+    if (overlay_bs == NULL) {
+        error_setg(errp, "Could not find overlay image for %s:", top->filename);
+        return;
+    }
+
+    orig_base_flags    = bdrv_get_flags(base);
+    orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+    /* convert base & overlay_bs to r/w, if necessary */
+    if (!(orig_base_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+                                         orig_base_flags | BDRV_O_RDWR);
+    }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
+    if (reopen_queue) {
+        bdrv_reopen_multiple(reopen_queue, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->base   = base;
+    s->top    = top;
+    s->active = bs;
+
+    s->base_flags          = orig_base_flags;
+    s->orig_overlay_flags  = orig_overlay_flags;
+
+    s->on_error = on_error;
+    s->common.co = qemu_coroutine_create(commit_run);
+
+    trace_commit_start(bs, base, top, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block_int.h b/block_int.h
index ac4245cb1..56164a742 100644
--- a/block_int.h
+++ b/block_int.h
@@ -463,4 +463,20 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
 
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                 BlockDriverState *top, int64_t speed,
+                 BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                 void *opaque, Error **errp);
+
 #endif /* BLOCK_INT_H */
diff --git a/trace-events b/trace-events
index f5b509755..dbc300721 100644
--- a/trace-events
+++ b/trace-events
@@ -74,6 +74,8 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-- 
cgit v1.2.3


From 9abf2dbaf6ae990c498ec0245986bddcd6b013ea Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:14 -0400
Subject: blockdev: rename block_stream_cb to a generic block_job_cb

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c   | 6 +++---
 trace-events | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0267fa30b..f910ac530 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1070,12 +1070,12 @@ static QObject *qobject_from_block_job(BlockJob *job)
                               job->speed);
 }
 
-static void block_stream_cb(void *opaque, int ret)
+static void block_job_cb(void *opaque, int ret)
 {
     BlockDriverState *bs = opaque;
     QObject *obj;
 
-    trace_block_stream_cb(bs, bs->job, ret);
+    trace_block_job_cb(bs, bs->job, ret);
 
     assert(bs->job);
     obj = qobject_from_block_job(bs->job);
@@ -1117,7 +1117,7 @@ void qmp_block_stream(const char *device, bool has_base,
     }
 
     stream_start(bs, base_bs, base, has_speed ? speed : 0,
-                 block_stream_cb, bs, &local_err);
+                 block_job_cb, bs, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         return;
diff --git a/trace-events b/trace-events
index dbc300721..29771a7b4 100644
--- a/trace-events
+++ b/trace-events
@@ -79,7 +79,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # hw/virtio-blk.c
-- 
cgit v1.2.3


From 79fac5680d3680c9fb43d14a8d4e39ced25530f8 Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:15 -0400
Subject: block: helper function, to find the base image of a chain

This is a simple helper function, that will return the base image
of a given image chain.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 16 ++++++++++++++++
 block.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/block.c b/block.c
index 54209a526..d7a6d14a9 100644
--- a/block.c
+++ b/block.c
@@ -3117,6 +3117,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
 }
 
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+    BlockDriverState *curr_bs = NULL;
+
+    if (!bs) {
+        return NULL;
+    }
+
+    curr_bs = bs;
+
+    while (curr_bs->backing_hd) {
+        curr_bs = curr_bs->backing_hd;
+    }
+    return curr_bs;
+}
+
 #define NB_SUFFIXES 4
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
diff --git a/block.h b/block.h
index 8c9b4245d..e9249c44e 100644
--- a/block.h
+++ b/block.h
@@ -207,6 +207,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base);
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                     BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
 
 
 typedef struct BdrvCheckResult {
-- 
cgit v1.2.3


From ed61fc10e8c8d2d1287f7edae92e44f5c97c540d Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:16 -0400
Subject: QAPI: add command for live block commit, 'block-commit'

The command for live block commit is added, which has the following
arguments:

device: the block device to perform the commit on (mandatory)
base:   the base image to commit into; optional (if not specified,
        it is the underlying original image)
top:    the top image of the commit - all data from inside top down
        to base will be committed into base (mandatory for now; see
        note, below)

speed:  maximum speed, in bytes/sec

Note: Eventually this command will support merging down the active layer,
      but that code is not yet complete.  If the active layer is passed
      in as top, then an error will be returned.  Once merging down the
      active layer is supported, the 'top' argument may become optional,
      and default to the active layer.

The is done as a block job, so upon completion a BLOCK_JOB_COMPLETED will
be emitted.

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 QMP/qmp-events.txt |  6 ++++--
 blockdev.c         | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qapi-schema.json   | 34 ++++++++++++++++++++++++++++++++
 qmp-commands.hx    |  6 ++++++
 4 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 287805825..449102042 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -50,7 +50,8 @@ Emitted when a block job has been cancelled.
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -73,7 +74,8 @@ Emitted when a block job has completed.
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
diff --git a/blockdev.c b/blockdev.c
index f910ac530..cea22e497 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1131,6 +1131,64 @@ void qmp_block_stream(const char *device, bool has_base,
     trace_qmp_block_stream(bs, bs->job);
 }
 
+void qmp_block_commit(const char *device,
+                      bool has_base, const char *base, const char *top,
+                      bool has_speed, int64_t speed,
+                      Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *base_bs, *top_bs;
+    Error *local_err = NULL;
+    /* This will be part of the QMP command, if/when the
+     * BlockdevOnError change for blkmirror makes it in
+     */
+    BlockErrorAction on_error = BLOCK_ERR_REPORT;
+
+    /* drain all i/o before commits */
+    bdrv_drain_all();
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+    if (base && has_base) {
+        base_bs = bdrv_find_backing_image(bs, base);
+    } else {
+        base_bs = bdrv_find_base(bs);
+    }
+
+    if (base_bs == NULL) {
+        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+        return;
+    }
+
+    /* default top_bs is the active layer */
+    top_bs = bs;
+
+    if (top) {
+        if (strcmp(bs->filename, top) != 0) {
+            top_bs = bdrv_find_backing_image(bs, top);
+        }
+    }
+
+    if (top_bs == NULL) {
+        error_setg(errp, "Top image file %s not found", top ? top : "NULL");
+        return;
+    }
+
+    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
 static BlockJob *find_block_job(const char *device)
 {
     BlockDriverState *bs;
diff --git a/qapi-schema.json b/qapi-schema.json
index 14e44199b..58165453f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1468,6 +1468,40 @@
   'returns': 'str' }
 
 ##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device:  the name of the device
+#
+# @base:   #optional The file name of the backing image to write data into.
+#                    If not specified, this is the deepest backing image
+#
+# @top:              The file name of the backing image within the image chain,
+#                    which contains the topmost data to be committed down.
+#                    Note, the active layer as 'top' is currently unsupported.
+#
+#                    If top == base, that is an error.
+#
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If commit or stream is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image commit is not supported by this device, NotSupported
+#          If @base or @top is invalid, a generic error is returned
+#          If @top is the active layer, or omitted, a generic error is returned
+#          If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+            '*speed': 'int' } }
+
 # @migrate_cancel
 #
 # Cancel the current executing migration process.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddba6..a55a3f54b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -791,6 +791,12 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_stream,
     },
 
+    {
+        .name       = "block-commit",
+        .args_type  = "device:B,base:s?,top:s,speed:o?",
+        .mhandler.cmd_new = qmp_marshal_input_block_commit,
+    },
+
     {
         .name       = "block-job-set-speed",
         .args_type  = "device:B,speed:o",
-- 
cgit v1.2.3


From 747051cd97c384e70eec0ceb905f08e630b6a1c4 Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 27 Sep 2012 13:29:17 -0400
Subject: qemu-iotests: add initial tests for live block commit

Derived from the streaming test cases (030), this adds the
following 9 tests:

1. For the following image chain, commit [mid] into [backing],
   and use qemu-io to verify [backing] has its original data, as
   well as the data from [mid]

           [backing] <-- [mid] <-- [test]

2. Verifies that 'block-commit' with the 'speed' parameter sets the
   speed parameter, as reported by 'query-block-jobs'

3. Verifies that a bogus 'device' parameter to 'block-commit'
   results in error

4-9: Appropriate error values returned for the following argument errors:
    * top == base
    * top is nonexistent
    * base is nonexistent
    * top == active layer (this is currently not supported)
    * top and base arguments are reversed
    * top argument is omitted

Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/040     | 178 +++++++++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/040.out |   5 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 184 insertions(+)
 create mode 100755 tests/qemu-iotests/040
 create mode 100644 tests/qemu-iotests/040.out

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
new file mode 100755
index 000000000..258e7eae3
--- /dev/null
+++ b/tests/qemu-iotests/040
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+#
+# Tests for image block commit.
+#
+# Copyright (C) 2012 IBM, Corp.
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Test for live block commit
+# Derived from Image Streaming Test 030
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+import struct
+
+backing_img = os.path.join(iotests.test_dir, 'backing.img')
+mid_img = os.path.join(iotests.test_dir, 'mid.img')
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+class ImageCommitTestCase(iotests.QMPTestCase):
+    '''Abstract base class for image commit test cases'''
+
+    def assert_no_active_commit(self):
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return', [])
+
+    def cancel_and_wait(self, drive='drive0'):
+        '''Cancel a block job and wait for it to finish'''
+        result = self.vm.qmp('block-job-cancel', device=drive)
+        self.assert_qmp(result, 'return', {})
+
+        cancelled = False
+        while not cancelled:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_CANCELLED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', drive)
+                    cancelled = True
+
+        self.assert_no_active_commit()
+
+    def create_image(self, name, size):
+        file = open(name, 'w')
+        i = 0
+        while i < size:
+            sector = struct.pack('>l504xl', i / 512, i / 512)
+            file.write(sector)
+            i = i + 512
+        file.close()
+
+
+class TestSingleDrive(ImageCommitTestCase):
+    image_len = 1 * 1024 * 1024
+    test_len = 1 * 1024 * 256
+
+    def setUp(self):
+        self.create_image(backing_img, TestSingleDrive.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
+        qemu_io('-c', 'write -P 0xef 524288 524288', mid_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_commit(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_commit()
+        self.vm.shutdown()
+
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_top_same_base(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same')
+
+    def test_top_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image file badfile not found')
+
+    def test_base_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
+
+    def test_top_is_active(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+
+    def test_top_and_base_reversed(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img})
+
+    def test_top_omitted(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing")
+
+
+class TestSetSpeed(ImageCommitTestCase):
+    image_len = 80 * 1024 * 1024 # MB
+
+    def setUp(self):
+        qemu_img('create', backing_img, str(TestSetSpeed.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_set_speed(self):
+        self.assert_no_active_commit()
+
+        result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # Ensure the speed we set was accepted
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 1024 * 1024)
+
+        self.cancel_and_wait()
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
new file mode 100644
index 000000000..dae404e27
--- /dev/null
+++ b/tests/qemu-iotests/040.out
@@ -0,0 +1,5 @@
+.........
+----------------------------------------------------------------------
+Ran 9 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index ebb5ca4b4..4b54fa61f 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -46,3 +46,4 @@
 037 rw auto backing
 038 rw auto backing
 039 rw auto
+040 rw auto
-- 
cgit v1.2.3


From 7ef1507045d35bd4d220cf3bfe0e80e7ac101d00 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:44 +0200
Subject: qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE

The DeviceNotActive text is not a particularly good match, add
a separate text while keeping the same class.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 4 ++--
 qerror.h   | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index cea22e497..d82461208 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1205,7 +1205,7 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
     BlockJob *job = find_block_job(device);
 
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
 
@@ -1217,7 +1217,7 @@ void qmp_block_job_cancel(const char *device, Error **errp)
     BlockJob *job = find_block_job(device);
 
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
 
diff --git a/qerror.h b/qerror.h
index d0a76a4f7..485c77366 100644
--- a/qerror.h
+++ b/qerror.h
@@ -48,6 +48,9 @@ void assert_no_error(Error *err);
 #define QERR_BASE_NOT_FOUND \
     ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
 
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+    ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
-- 
cgit v1.2.3


From 7e03a9342fff50f2a6a4086906fa66e6c6d4351d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:46 +0200
Subject: block: fix documentation of block_job_cancel_sync

Do this in a separate commit before we move the functions to
blockjob.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block_int.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/block_int.h b/block_int.h
index 56164a742..6b6b3ab63 100644
--- a/block_int.h
+++ b/block_int.h
@@ -425,15 +425,13 @@ void block_job_cancel(BlockJob *job);
 bool block_job_is_cancelled(BlockJob *job);
 
 /**
- * block_job_cancel:
+ * block_job_cancel_sync:
  * @job: The job to be canceled.
  *
- * Asynchronously cancel the job and wait for it to reach a quiescent
- * state.  Note that the completion callback will still be called
- * asynchronously, hence it is *not* valid to call #bdrv_delete
- * immediately after #block_job_cancel_sync.  Users of block jobs
- * will usually protect the BlockDriverState objects with a reference
- * count, should this be a concern.
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
  *
  * Returns the return value from the job if the job actually completed
  * during the call, or -ECANCELED if it was canceled.
-- 
cgit v1.2.3


From 2f0c9fe64c6a2887047b7eab05cd85b2643234c8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:47 +0200
Subject: block: move job APIs to separate files

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 Makefile.objs       |   5 +-
 block.c             | 128 +------------------------------------
 block.h             |   2 +
 block/Makefile.objs |   5 +-
 block/commit.c      |   1 +
 block/stream.c      |   1 +
 block_int.h         | 151 --------------------------------------------
 blockdev.c          |   1 +
 blockjob.c          | 163 +++++++++++++++++++++++++++++++++++++++++++++++
 blockjob.h          | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 354 insertions(+), 282 deletions(-)
 create mode 100644 blockjob.c
 create mode 100644 blockjob.h

diff --git a/Makefile.objs b/Makefile.objs
index 7c1c68206..b1f3e2254 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o
+block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
+block-obj-y += qemu-progress.o qemu-sockets.o uri.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -59,7 +60,7 @@ endif
 # suppress *all* target specific code in case of system emulation, i.e. a
 # single QEMU executable should support all CPUs and machines.
 
-common-obj-y = $(block-obj-y) blockdev.o
+common-obj-y = $(block-obj-y) blockdev.o block/
 common-obj-y += net.o net/
 common-obj-y += qom/
 common-obj-y += readline.o console.o cursor.o
diff --git a/block.c b/block.c
index d7a6d14a9..8202f2716 100644
--- a/block.c
+++ b/block.c
@@ -26,6 +26,7 @@
 #include "trace.h"
 #include "monitor.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "module.h"
 #include "qjson.h"
 #include "qemu-coroutine.h"
@@ -4406,130 +4407,3 @@ out:
 
     return ret;
 }
-
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp)
-{
-    BlockJob *job;
-
-    if (bs->job || bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
-        return NULL;
-    }
-    bdrv_set_in_use(bs, 1);
-
-    job = g_malloc0(job_type->instance_size);
-    job->job_type      = job_type;
-    job->bs            = bs;
-    job->cb            = cb;
-    job->opaque        = opaque;
-    job->busy          = true;
-    bs->job = job;
-
-    /* Only set speed when necessary to avoid NotSupported error */
-    if (speed != 0) {
-        Error *local_err = NULL;
-
-        block_job_set_speed(job, speed, &local_err);
-        if (error_is_set(&local_err)) {
-            bs->job = NULL;
-            g_free(job);
-            bdrv_set_in_use(bs, 0);
-            error_propagate(errp, local_err);
-            return NULL;
-        }
-    }
-    return job;
-}
-
-void block_job_complete(BlockJob *job, int ret)
-{
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-    job->cb(job->opaque, ret);
-    bs->job = NULL;
-    g_free(job);
-    bdrv_set_in_use(bs, 0);
-}
-
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    Error *local_err = NULL;
-
-    if (!job->job_type->set_speed) {
-        error_set(errp, QERR_NOT_SUPPORTED);
-        return;
-    }
-    job->job_type->set_speed(job, speed, &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    job->speed = speed;
-}
-
-void block_job_cancel(BlockJob *job)
-{
-    job->cancelled = true;
-    if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co, NULL);
-    }
-}
-
-bool block_job_is_cancelled(BlockJob *job)
-{
-    return job->cancelled;
-}
-
-struct BlockCancelData {
-    BlockJob *job;
-    BlockDriverCompletionFunc *cb;
-    void *opaque;
-    bool cancelled;
-    int ret;
-};
-
-static void block_job_cancel_cb(void *opaque, int ret)
-{
-    struct BlockCancelData *data = opaque;
-
-    data->cancelled = block_job_is_cancelled(data->job);
-    data->ret = ret;
-    data->cb(data->opaque, ret);
-}
-
-int block_job_cancel_sync(BlockJob *job)
-{
-    struct BlockCancelData data;
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-
-    /* Set up our own callback to store the result and chain to
-     * the original callback.
-     */
-    data.job = job;
-    data.cb = job->cb;
-    data.opaque = job->opaque;
-    data.ret = -EINPROGRESS;
-    job->cb = block_job_cancel_cb;
-    job->opaque = &data;
-    block_job_cancel(job);
-    while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
-    }
-    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
-}
-
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
-{
-    /* Check cancellation *before* setting busy = false, too!  */
-    if (!block_job_is_cancelled(job)) {
-        job->busy = false;
-        co_sleep_ns(clock, ns);
-        job->busy = true;
-    }
-}
diff --git a/block.h b/block.h
index e9249c44e..bd002d573 100644
--- a/block.h
+++ b/block.h
@@ -6,9 +6,11 @@
 #include "qemu-option.h"
 #include "qemu-coroutine.h"
 #include "qobject.h"
+#include "qapi-types.h"
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 81fd43cef..554f429d0 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,11 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-obj-y += stream.o
-block-obj-y += commit.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+
+common-obj-y += stream.o
+common-obj-y += commit.o
diff --git a/block/commit.c b/block/commit.c
index 624ec5f62..cabb470b5 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -14,6 +14,7 @@
 
 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"
 
 enum {
diff --git a/block/stream.c b/block/stream.c
index c4f87dd5b..57e4be7c6 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -13,6 +13,7 @@
 
 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"
 
 enum {
diff --git a/block_int.h b/block_int.h
index 6b6b3ab63..61dc73b0e 100644
--- a/block_int.h
+++ b/block_int.h
@@ -67,73 +67,6 @@ typedef struct BlockIOBaseValue {
     uint64_t ios[2];
 } BlockIOBaseValue;
 
-typedef struct BlockJob BlockJob;
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    const char *job_type;
-
-    /** Optional callback for job types that support setting a speed limit */
-    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
-    /** The job type, including the job vtable.  */
-    const BlockJobType *job_type;
-
-    /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
-
-    /**
-     * The coroutine that executes the job.  If not NULL, it is
-     * reentered when busy is false and the job is cancelled.
-     */
-    Coroutine *co;
-
-    /**
-     * Set to true if the job should cancel itself.  The flag must
-     * always be tested just before toggling the busy flag from false
-     * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
-     */
-    bool cancelled;
-
-    /**
-     * Set to false by the job while it is in a quiescent state, where
-     * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
-     */
-    bool busy;
-
-    /** Offset that is published by the query-block-jobs QMP API */
-    int64_t offset;
-
-    /** Length that is published by the query-block-jobs QMP API */
-    int64_t len;
-
-    /** Speed that was set with @block_job_set_speed.  */
-    int64_t speed;
-
-    /** The completion function that will be called when the job completes.  */
-    BlockDriverCompletionFunc *cb;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-};
-
 struct BlockDriver {
     const char *format_name;
     int instance_size;
@@ -354,90 +287,6 @@ void bdrv_set_io_limits(BlockDriverState *bs,
 int is_windows_drive(const char *filename);
 #endif
 
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it.  The job
- * will call @cb asynchronously when the job completes.  Note that
- * @bs may have been closed at the time the @cb it is called.  If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds.  Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_complete:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_complete(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_cancel_sync:
- * @job: The job to be canceled.
- *
- * Synchronously cancel the job.  The completion callback is called
- * before the function returns.  The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
-
 /**
  * stream_start:
  * @bs: Block device to operate on.
diff --git a/blockdev.c b/blockdev.c
index d82461208..d3f91c0ae 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -9,6 +9,7 @@
 
 #include "blockdev.h"
 #include "hw/block-common.h"
+#include "blockjob.h"
 #include "monitor.h"
 #include "qerror.h"
 #include "qemu-option.h"
diff --git a/blockjob.c b/blockjob.c
new file mode 100644
index 000000000..9737a43b2
--- /dev/null
+++ b/blockjob.c
@@ -0,0 +1,163 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor.h"
+#include "block.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qjson.h"
+#include "qemu-coroutine.h"
+#include "qmp-commands.h"
+#include "qemu-timer.h"
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp)
+{
+    BlockJob *job;
+
+    if (bs->job || bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+        return NULL;
+    }
+    bdrv_set_in_use(bs, 1);
+
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
+    job->bs            = bs;
+    job->cb            = cb;
+    job->opaque        = opaque;
+    job->busy          = true;
+    bs->job = job;
+
+    /* Only set speed when necessary to avoid NotSupported error */
+    if (speed != 0) {
+        Error *local_err = NULL;
+
+        block_job_set_speed(job, speed, &local_err);
+        if (error_is_set(&local_err)) {
+            bs->job = NULL;
+            g_free(job);
+            bdrv_set_in_use(bs, 0);
+            error_propagate(errp, local_err);
+            return NULL;
+        }
+    }
+    return job;
+}
+
+void block_job_complete(BlockJob *job, int ret)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    job->cb(job->opaque, ret);
+    bs->job = NULL;
+    g_free(job);
+    bdrv_set_in_use(bs, 0);
+}
+
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!job->job_type->set_speed) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+        return;
+    }
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    job->speed = speed;
+}
+
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    if (job->co && !job->busy) {
+        qemu_coroutine_enter(job->co, NULL);
+    }
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+    return job->cancelled;
+}
+
+struct BlockCancelData {
+    BlockJob *job;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+    bool cancelled;
+    int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+    struct BlockCancelData *data = opaque;
+
+    data->cancelled = block_job_is_cancelled(data->job);
+    data->ret = ret;
+    data->cb(data->opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+    struct BlockCancelData data;
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+
+    /* Set up our own callback to store the result and chain to
+     * the original callback.
+     */
+    data.job = job;
+    data.cb = job->cb;
+    data.opaque = job->opaque;
+    data.ret = -EINPROGRESS;
+    job->cb = block_job_cancel_cb;
+    job->opaque = &data;
+    block_job_cancel(job);
+    while (data.ret == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+}
+
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
+{
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (!block_job_is_cancelled(job)) {
+        job->busy = false;
+        co_sleep_ns(clock, ns);
+        job->busy = true;
+    }
+}
diff --git a/blockjob.h b/blockjob.h
new file mode 100644
index 000000000..753f5bcd1
--- /dev/null
+++ b/blockjob.h
@@ -0,0 +1,179 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    const char *job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+    /** The job type, including the job vtable.  */
+    const BlockJobType *job_type;
+
+    /** The block device on which the job is operating.  */
+    BlockDriverState *bs;
+
+    /**
+     * The coroutine that executes the job.  If not NULL, it is
+     * reentered when busy is false and the job is cancelled.
+     */
+    Coroutine *co;
+
+    /**
+     * Set to true if the job should cancel itself.  The flag must
+     * always be tested just before toggling the busy flag from false
+     * to true.  After a job has been cancelled, it should only yield
+     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     */
+    bool cancelled;
+
+    /**
+     * Set to false by the job while it is in a quiescent state, where
+     * no I/O is pending and the job has yielded on any condition
+     * that is not detected by #qemu_aio_wait, such as a timer.
+     */
+    bool busy;
+
+    /** Offset that is published by the query-block-jobs QMP API */
+    int64_t offset;
+
+    /** Length that is published by the query-block-jobs QMP API */
+    int64_t len;
+
+    /** Speed that was set with @block_job_set_speed.  */
+    int64_t speed;
+
+    /** The completion function that will be called when the job completes.  */
+    BlockDriverCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it.  The job
+ * will call @cb asynchronously when the job completes.  Note that
+ * @bs may have been closed at the time the @cb it is called.  If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds.  Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+#endif
-- 
cgit v1.2.3


From 30e628b709fcd30db298878e435e3bc93919c48c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:48 +0200
Subject: block: add block_job_query

Extract it out of the implementation of info block-jobs.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 15 ++-------------
 blockjob.c | 11 +++++++++++
 blockjob.h |  8 ++++++++
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index d3f91c0ae..9a98ce9c8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1232,19 +1232,8 @@ static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
     BlockJob *job = bs->job;
 
     if (job) {
-        BlockJobInfoList *elem;
-        BlockJobInfo *info = g_new(BlockJobInfo, 1);
-        *info = (BlockJobInfo){
-            .type   = g_strdup(job->job_type->job_type),
-            .device = g_strdup(bdrv_get_device_name(bs)),
-            .len    = job->len,
-            .offset = job->offset,
-            .speed  = job->speed,
-        };
-
-        elem = g_new0(BlockJobInfoList, 1);
-        elem->value = info;
-
+        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        elem->value = block_job_query(bs->job);
         (*prev)->next = elem;
         *prev = elem;
     }
diff --git a/blockjob.c b/blockjob.c
index 9737a43b2..dea63f8c1 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -161,3 +161,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
         job->busy = true;
     }
 }
+
+BlockJobInfo *block_job_query(BlockJob *job)
+{
+    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    info->type   = g_strdup(job->job_type->job_type);
+    info->device = g_strdup(bdrv_get_device_name(job->bs));
+    info->len    = job->len;
+    info->offset = job->offset;
+    info->speed  = job->speed;
+    return info;
+}
diff --git a/blockjob.h b/blockjob.h
index 753f5bcd1..f3d8d58ce 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -162,6 +162,14 @@ void block_job_cancel(BlockJob *job);
  */
 bool block_job_is_cancelled(BlockJob *job);
 
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
 /**
  * block_job_cancel_sync:
  * @job: The job to be canceled.
-- 
cgit v1.2.3


From 8d65883fff22e00d70f5880a26b7a1248c59a2d8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:49 +0200
Subject: qmp: add 'busy' member to BlockJobInfo

Because pausing a job is asynchronous, we need to know whether it has
completed.  This is described by the "busy" field of BlockJob; copy it
to BlockJobInfo.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockjob.c       | 1 +
 qapi-schema.json | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/blockjob.c b/blockjob.c
index dea63f8c1..64c9d2dda 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -168,6 +168,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
     info->type   = g_strdup(job->job_type->job_type);
     info->device = g_strdup(bdrv_get_device_name(job->bs));
     info->len    = job->len;
+    info->busy   = job->busy;
     info->offset = job->offset;
     info->speed  = job->speed;
     return info;
diff --git a/qapi-schema.json b/qapi-schema.json
index 58165453f..6fc6edaa2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1098,6 +1098,9 @@
 #
 # @len: the maximum progress value
 #
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
 # @offset: the current progress value
 #
 # @speed: the rate limit, bytes per second
@@ -1106,7 +1109,7 @@
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'speed': 'int'} }
 
 ##
 # @query-block-jobs:
-- 
cgit v1.2.3


From 8acc72a4d20910d522516dab31272fe66da8da28 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:50 +0200
Subject: block: add support for job pause/resume

Job pausing reuses the existing support for cancellable sleeps.  A pause
happens at the next sleeping point and lasts until the coroutine is
re-entered explicitly.  Cancellation was already doing a forced resume,
so implement it explicitly in terms of resume.

Paused jobs cannot be canceled without first resuming them.  This ensures
that I/O errors are never missed by management.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c       |  4 ++++
 blockjob.c       | 35 ++++++++++++++++++++++++++++++-----
 blockjob.h       | 31 +++++++++++++++++++++++++++++++
 qapi-schema.json |  5 ++++-
 qerror.h         |  3 +++
 5 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 9a98ce9c8..612dd71f2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1221,6 +1221,10 @@ void qmp_block_job_cancel(const char *device, Error **errp)
         error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
+    if (job->paused) {
+        error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
+        return;
+    }
 
     trace_qmp_block_job_cancel(job);
     block_job_cancel(job);
diff --git a/blockjob.c b/blockjob.c
index 64c9d2dda..8219f7397 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -99,14 +99,30 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
     job->speed = speed;
 }
 
-void block_job_cancel(BlockJob *job)
+void block_job_pause(BlockJob *job)
 {
-    job->cancelled = true;
+    job->paused = true;
+}
+
+bool block_job_is_paused(BlockJob *job)
+{
+    return job->paused;
+}
+
+void block_job_resume(BlockJob *job)
+{
+    job->paused = false;
     if (job->co && !job->busy) {
         qemu_coroutine_enter(job->co, NULL);
     }
 }
 
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    block_job_resume(job);
+}
+
 bool block_job_is_cancelled(BlockJob *job)
 {
     return job->cancelled;
@@ -154,12 +170,20 @@ int block_job_cancel_sync(BlockJob *job)
 
 void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 {
+    assert(job->busy);
+
     /* Check cancellation *before* setting busy = false, too!  */
-    if (!block_job_is_cancelled(job)) {
-        job->busy = false;
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    if (block_job_is_paused(job)) {
+        qemu_coroutine_yield();
+    } else {
         co_sleep_ns(clock, ns);
-        job->busy = true;
     }
+    job->busy = true;
 }
 
 BlockJobInfo *block_job_query(BlockJob *job)
@@ -169,6 +193,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
     info->device = g_strdup(bdrv_get_device_name(job->bs));
     info->len    = job->len;
     info->busy   = job->busy;
+    info->paused = job->paused;
     info->offset = job->offset;
     info->speed  = job->speed;
     return info;
diff --git a/blockjob.h b/blockjob.h
index f3d8d58ce..ece5afa75 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -69,6 +69,12 @@ struct BlockJob {
      */
     bool cancelled;
 
+    /**
+     * Set to true if the job is either paused, or will pause itself
+     * as soon as possible (if busy == true).
+     */
+    bool paused;
+
     /**
      * Set to false by the job while it is in a quiescent state, where
      * no I/O is pending and the job has yielded on any condition
@@ -170,6 +176,31 @@ bool block_job_is_cancelled(BlockJob *job);
  */
 BlockJobInfo *block_job_query(BlockJob *job);
 
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
 /**
  * block_job_cancel_sync:
  * @job: The job to be canceled.
diff --git a/qapi-schema.json b/qapi-schema.json
index 6fc6edaa2..86a6c7fe9 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1101,6 +1101,9 @@
 # @busy: false if the job is known to be in a quiescent state, with
 #        no pending I/O.  Since 1.3.
 #
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
 # @offset: the current progress value
 #
 # @speed: the rate limit, bytes per second
@@ -1109,7 +1112,7 @@
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'busy': 'bool', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int'} }
 
 ##
 # @query-block-jobs:
diff --git a/qerror.h b/qerror.h
index 485c77366..c91708cc3 100644
--- a/qerror.h
+++ b/qerror.h
@@ -51,6 +51,9 @@ void assert_no_error(Error *err);
 #define QERR_BLOCK_JOB_NOT_ACTIVE \
     ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
 
+#define QERR_BLOCK_JOB_PAUSED \
+    ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
-- 
cgit v1.2.3


From 6e37fb811ac86739e5ed30dba3a8e4848bd21b56 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:51 +0200
Subject: qmp: add block-job-pause and block-job-resume

Add QMP commands matching the functionality.

Paused jobs cannot be canceled without first resuming them.  This
ensures that I/O errors are never missed by management.  However, an
optional force argument can be specified to allow that.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c       | 35 +++++++++++++++++++++++++++++++++--
 hmp-commands.hx  | 35 ++++++++++++++++++++++++++++++++---
 hmp.c            | 23 ++++++++++++++++++++++-
 hmp.h            |  2 ++
 qapi-schema.json | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 qmp-commands.hx  | 12 +++++++++++-
 trace-events     |  2 ++
 7 files changed, 147 insertions(+), 8 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 612dd71f2..f097e5743 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1213,15 +1213,20 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
     block_job_set_speed(job, speed, errp);
 }
 
-void qmp_block_job_cancel(const char *device, Error **errp)
+void qmp_block_job_cancel(const char *device,
+                          bool has_force, bool force, Error **errp)
 {
     BlockJob *job = find_block_job(device);
 
+    if (!has_force) {
+        force = false;
+    }
+
     if (!job) {
         error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
-    if (job->paused) {
+    if (job->paused && !force) {
         error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
         return;
     }
@@ -1230,6 +1235,32 @@ void qmp_block_job_cancel(const char *device, Error **errp)
     block_job_cancel(job);
 }
 
+void qmp_block_job_pause(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_pause(job);
+    block_job_pause(job);
+}
+
+void qmp_block_job_resume(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_resume(job);
+    block_job_resume(job);
+}
+
 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
 {
     BlockJobInfoList **prev = opaque;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed67e997f..27d90a24a 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -99,9 +99,10 @@ ETEXI
 
     {
         .name       = "block_job_cancel",
-        .args_type  = "device:B",
-        .params     = "device",
-        .help       = "stop an active background block operation",
+        .args_type  = "force:-f,device:B",
+        .params     = "[-f] device",
+        .help       = "stop an active background block operation (use -f"
+                      "\n\t\t\t if the operation is currently paused)",
         .mhandler.cmd = hmp_block_job_cancel,
     },
 
@@ -109,6 +110,34 @@ STEXI
 @item block_job_cancel
 @findex block_job_cancel
 Stop an active block streaming operation.
+ETEXI
+
+    {
+        .name       = "block_job_pause",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "pause an active background block operation",
+        .mhandler.cmd = hmp_block_job_pause,
+    },
+
+STEXI
+@item block_job_pause
+@findex block_job_pause
+Pause an active block streaming operation.
+ETEXI
+
+    {
+        .name       = "block_job_resume",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "resume a paused background block operation",
+        .mhandler.cmd = hmp_block_job_resume,
+    },
+
+STEXI
+@item block_job_resume
+@findex block_job_resume
+Resume a paused block streaming operation.
 ETEXI
 
     {
diff --git a/hmp.c b/hmp.c
index ba6fbd3dc..55601f7da 100644
--- a/hmp.c
+++ b/hmp.c
@@ -950,8 +950,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
 {
     Error *error = NULL;
     const char *device = qdict_get_str(qdict, "device");
+    bool force = qdict_get_try_bool(qdict, "force", 0);
 
-    qmp_block_job_cancel(device, &error);
+    qmp_block_job_cancel(device, true, force, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_pause(device, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_resume(device, &error);
 
     hmp_handle_error(mon, &error);
 }
diff --git a/hmp.h b/hmp.h
index 48b9c59f8..71ea38452 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index 86a6c7fe9..0f2b1a0a1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1893,12 +1893,56 @@
 #
 # @device: the device name
 #
+# @force: #optional whether to allow cancellation of a paused job (default
+#         false).  Since 1.3.
+#
 # Returns: Nothing on success
 #          If no background operation is active on this device, DeviceNotActive
 #
 # Since: 1.1
 ##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress.  Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress.  Resuming an already running job is not an error.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
 
 ##
 # @ObjectTypeInfo:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index a55a3f54b..71d7c25f5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -805,9 +805,19 @@ EQMP
 
     {
         .name       = "block-job-cancel",
-        .args_type  = "device:B",
+        .args_type  = "device:B,force:b?",
         .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
     },
+    {
+        .name       = "block-job-pause",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_pause,
+    },
+    {
+        .name       = "block-job-resume",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_resume,
+    },
     {
         .name       = "transaction",
         .args_type  = "actions:q",
diff --git a/trace-events b/trace-events
index 29771a7b4..42b66f19f 100644
--- a/trace-events
+++ b/trace-events
@@ -79,6 +79,8 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
 block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
-- 
cgit v1.2.3


From 0c81734765c9af1705f8e531b9431d63ee8ffd3d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:52 +0200
Subject: qemu-iotests: add test for pausing a streaming operation

These check that a paused streaming job does not advance its offset.

Sometimes the new test fails; the map is different between the source
and the destination of the streaming because qemu-io does not always
pack adjacent clusters that have the same allocated/unallocated state.
However, this also happens with the existing test_stream testcase, and
is better fixed in qemu-io.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/030     | 40 ++++++++++++++++++++++++++++++++++++++--
 tests/qemu-iotests/030.out |  4 ++--
 tests/qemu-iotests/group   |  2 +-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 55b16f81d..dfacdf11a 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -18,6 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+import time
 import os
 import iotests
 from iotests import qemu_img, qemu_io
@@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase):
                          qemu_io('-c', 'map', test_img),
                          'image file map does not match backing file after streaming')
 
+    def test_stream_pause(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-c', 'map', backing_img),
+                         qemu_io('-c', 'map', test_img),
+                         'image file map does not match backing file after streaming')
+
     def test_stream_partial(self):
         self.assert_no_active_streams()
 
@@ -173,8 +211,6 @@ class TestStreamStop(ImageStreamingTestCase):
         os.remove(backing_img)
 
     def test_stream_stop(self):
-        import time
-
         self.assert_no_active_streams()
 
         result = self.vm.qmp('block-stream', device='drive0')
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 2f7d3902f..594c16f49 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-.......
+........
 ----------------------------------------------------------------------
-Ran 7 tests
+Ran 8 tests
 
 OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 4b54fa61f..66d2ba968 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -36,7 +36,7 @@
 027 rw auto quick
 028 rw backing auto
 029 rw auto quick
-030 rw auto
+030 rw auto backing
 031 rw auto quick
 032 rw auto
 033 rw auto
-- 
cgit v1.2.3


From ff06f5f351c3b19d5cdcb8bcb9f9cc9a01cac066 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:54 +0200
Subject: iostatus: rename BlockErrorAction, BlockQMPEventAction

We want to remove knowledge of BLOCK_ERR_STOP_ENOSPC from drivers;
drivers should only be told whether to stop/report/ignore the error.
On the other hand, we want to keep using the nicer BlockErrorAction
name in the drivers.  So rename the enums, while leaving aside the
names of the enum values for now.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c         |  8 ++++----
 block.h         | 12 ++++++------
 block_int.h     |  2 +-
 hw/ide/core.c   |  2 +-
 hw/scsi-disk.c  |  2 +-
 hw/virtio-blk.c |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/block.c b/block.c
index 8202f2716..7b4508295 100644
--- a/block.c
+++ b/block.c
@@ -1387,7 +1387,7 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
 }
 
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read)
+                               BlockErrorAction action, int is_read)
 {
     QObject *data;
     const char *action_str;
@@ -2474,14 +2474,14 @@ void bdrv_set_io_limits(BlockDriverState *bs,
     bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error)
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error)
 {
     bs->on_read_error = on_read_error;
     bs->on_write_error = on_write_error;
 }
 
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read)
 {
     return is_read ? bs->on_read_error : bs->on_write_error;
 }
diff --git a/block.h b/block.h
index bd002d573..038621f42 100644
--- a/block.h
+++ b/block.h
@@ -93,11 +93,11 @@ typedef struct BlockDevOps {
 typedef enum {
     BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
     BLOCK_ERR_STOP_ANY
-} BlockErrorAction;
+} BlockdevOnError;
 
 typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockQMPEventAction;
+} BlockErrorAction;
 
 typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
 
@@ -114,7 +114,7 @@ void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read);
+                               BlockErrorAction action, int is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -284,9 +284,9 @@ int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                       int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error);
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/block_int.h b/block_int.h
index 61dc73b0e..b98c770b9 100644
--- a/block_int.h
+++ b/block_int.h
@@ -262,7 +262,7 @@ struct BlockDriverState {
 
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
-    BlockErrorAction on_read_error, on_write_error;
+    BlockdevOnError on_read_error, on_write_error;
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d6fb69c63..57b9fa458 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -557,7 +557,7 @@ void ide_dma_error(IDEState *s)
 static int ide_handle_rw_error(IDEState *s, int error, int op)
 {
     int is_read = (op & BM_STATUS_RETRY_READ);
-    BlockErrorAction action = bdrv_get_on_error(s->bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
 
     if (action == BLOCK_ERR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 95e91585e..fef83a360 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -388,7 +388,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
     int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
     if (action == BLOCK_ERR_IGNORE) {
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 6f6d172fd..01e537dc9 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     int is_read)
 {
-    BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
     VirtIOBlock *s = req->dev;
 
     if (action == BLOCK_ERR_IGNORE) {
-- 
cgit v1.2.3


From 92aa5c6d77ac29574c1717bcf57827fa1e586f31 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Sep 2012 17:22:55 +0200
Subject: iostatus: move BlockdevOnError declaration to QAPI

This will let block-stream reuse the enum.  Places that used the enums
are renamed accordingly.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c           |  6 +++---
 block.h           |  5 -----
 block/commit.c    | 14 +++++++-------
 block_int.h       |  2 +-
 blockdev.c        | 14 +++++++-------
 hw/fdc.c          |  4 ++--
 hw/ide/core.c     |  6 +++---
 hw/scsi-disk.c    |  6 +++---
 hw/scsi-generic.c |  4 ++--
 hw/virtio-blk.c   |  6 +++---
 qapi-schema.json  | 23 +++++++++++++++++++++++
 11 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/block.c b/block.c
index 7b4508295..1c3ebd785 100644
--- a/block.c
+++ b/block.c
@@ -4209,9 +4209,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs)
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
 {
     return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
-            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
-            bs->on_read_error == BLOCK_ERR_STOP_ANY));
+           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 }
 
 void bdrv_iostatus_disable(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 038621f42..ee8112945 100644
--- a/block.h
+++ b/block.h
@@ -90,11 +90,6 @@ typedef struct BlockDevOps {
 #define BDRV_SECTOR_SIZE   (1ULL << BDRV_SECTOR_BITS)
 #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)
 
-typedef enum {
-    BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
-    BLOCK_ERR_STOP_ANY
-} BlockdevOnError;
-
 typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockErrorAction;
diff --git a/block/commit.c b/block/commit.c
index cabb470b5..733c91403 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -34,7 +34,7 @@ typedef struct CommitBlockJob {
     BlockDriverState *active;
     BlockDriverState *top;
     BlockDriverState *base;
-    BlockErrorAction on_error;
+    BlockdevOnError on_error;
     int base_flags;
     int orig_overlay_flags;
 } CommitBlockJob;
@@ -126,9 +126,9 @@ wait:
             bytes_written += n * BDRV_SECTOR_SIZE;
         }
         if (ret < 0) {
-            if (s->on_error == BLOCK_ERR_STOP_ANY    ||
-                s->on_error == BLOCK_ERR_REPORT      ||
-                (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
+            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
+                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
+                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
                 goto exit_free_buf;
             } else {
                 n = 0;
@@ -182,7 +182,7 @@ static BlockJobType commit_job_type = {
 
 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                   BlockDriverState *top, int64_t speed,
-                  BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     CommitBlockJob *s;
@@ -192,8 +192,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
     BlockDriverState *overlay_bs;
     Error *local_err = NULL;
 
-    if ((on_error == BLOCK_ERR_STOP_ANY ||
-         on_error == BLOCK_ERR_STOP_ENOSPC) &&
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
         error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
         return;
diff --git a/block_int.h b/block_int.h
index b98c770b9..615aafca8 100644
--- a/block_int.h
+++ b/block_int.h
@@ -323,7 +323,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
  */
 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
-                 BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                 BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp);
 
 #endif /* BLOCK_INT_H */
diff --git a/blockdev.c b/blockdev.c
index f097e5743..63307154b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -241,13 +241,13 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
 static int parse_block_error_action(const char *buf, int is_read)
 {
     if (!strcmp(buf, "ignore")) {
-        return BLOCK_ERR_IGNORE;
+        return BLOCKDEV_ON_ERROR_IGNORE;
     } else if (!is_read && !strcmp(buf, "enospc")) {
-        return BLOCK_ERR_STOP_ENOSPC;
+        return BLOCKDEV_ON_ERROR_ENOSPC;
     } else if (!strcmp(buf, "stop")) {
-        return BLOCK_ERR_STOP_ANY;
+        return BLOCKDEV_ON_ERROR_STOP;
     } else if (!strcmp(buf, "report")) {
-        return BLOCK_ERR_REPORT;
+        return BLOCKDEV_ON_ERROR_REPORT;
     } else {
         error_report("'%s' invalid %s error action",
                      buf, is_read ? "read" : "write");
@@ -433,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
         return NULL;
     }
 
-    on_write_error = BLOCK_ERR_STOP_ENOSPC;
+    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
             error_report("werror is not supported by this bus type");
@@ -446,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
         }
     }
 
-    on_read_error = BLOCK_ERR_REPORT;
+    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
             error_report("rerror is not supported by this bus type");
@@ -1143,7 +1143,7 @@ void qmp_block_commit(const char *device,
     /* This will be part of the QMP command, if/when the
      * BlockdevOnError change for blkmirror makes it in
      */
-    BlockErrorAction on_error = BLOCK_ERR_REPORT;
+    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
 
     /* drain all i/o before commits */
     bdrv_drain_all();
diff --git a/hw/fdc.c b/hw/fdc.c
index 08830c1ba..43b0f2050 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl)
         drive->fdctrl = fdctrl;
 
         if (drive->bs) {
-            if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+            if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
                 error_report("fdc doesn't support drive option werror");
                 return -1;
             }
-            if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) {
+            if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
                 error_report("fdc doesn't support drive option rerror");
                 return -1;
             }
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 57b9fa458..2620e87ae 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -559,13 +559,13 @@ static int ide_handle_rw_error(IDEState *s, int error, int op)
     int is_read = (op & BM_STATUS_RETRY_READ);
     BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
         s->bus->error_status = op;
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index fef83a360..c295326e9 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -390,13 +390,13 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
 
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
         vm_stop(RUN_STATE_IO_ERROR);
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index a5eb663ec..d9045341b 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s)
         return -1;
     }
 
-    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
     }
-    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) {
+    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
         error_report("Device doesn't support drive option rerror");
         return -1;
     }
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 01e537dc9..f178fa86c 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -69,13 +69,13 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
     VirtIOBlock *s = req->dev;
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
         req->next = s->rq;
         s->rq = req;
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
diff --git a/qapi-schema.json b/qapi-schema.json
index 0f2b1a0a1..a7264135a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1087,6 +1087,29 @@
 ##
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
+##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop'] }
+
 ##
 # @BlockJobInfo:
 #
-- 
cgit v1.2.3