qemu/block/curl.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1116 lines
32 KiB
C
Raw Normal View History

/*
* QEMU Block driver for CURL images
*
* Copyright (c) 2009 Alexander Graf <agraf@suse.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
2016-03-14 16:01:28 +08:00
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qemu/option.h"
#include "block/block-io.h"
#include "block/block_int.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
#include "crypto/secret.h"
#include <curl/curl.h>
#include "qemu/cutils.h"
#include "trace.h"
// #define DEBUG_VERBOSE
/* CURL 7.85.0 switches to a string based API for specifying
* the desired protocols.
*/
#if LIBCURL_VERSION_NUM >= 0x075500
#define PROTOCOLS "HTTP,HTTPS,FTP,FTPS"
#else
#define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
CURLPROTO_FTP | CURLPROTO_FTPS)
#endif
#define CURL_NUM_STATES 8
#define CURL_NUM_ACB 8
#define CURL_TIMEOUT_MAX 10000
#define CURL_BLOCK_OPT_URL "url"
#define CURL_BLOCK_OPT_READAHEAD "readahead"
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
#define CURL_BLOCK_OPT_TIMEOUT "timeout"
#define CURL_BLOCK_OPT_COOKIE "cookie"
#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret"
#define CURL_BLOCK_OPT_USERNAME "username"
#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
#define CURL_BLOCK_OPT_READAHEAD_DEFAULT (256 * 1024)
#define CURL_BLOCK_OPT_SSLVERIFY_DEFAULT true
#define CURL_BLOCK_OPT_TIMEOUT_DEFAULT 5
struct BDRVCURLState;
struct CURLState;
static bool libcurl_initialized;
typedef struct CURLAIOCB {
Coroutine *co;
QEMUIOVector *qiov;
uint64_t offset;
uint64_t bytes;
int ret;
size_t start;
size_t end;
} CURLAIOCB;
typedef struct CURLSocket {
int fd;
struct BDRVCURLState *s;
} CURLSocket;
typedef struct CURLState
{
struct BDRVCURLState *s;
CURLAIOCB *acb[CURL_NUM_ACB];
CURL *curl;
char *orig_buf;
uint64_t buf_start;
size_t buf_off;
size_t buf_len;
char range[128];
char errmsg[CURL_ERROR_SIZE];
char in_use;
} CURLState;
typedef struct BDRVCURLState {
CURLM *multi;
QEMUTimer timer;
uint64_t len;
CURLState states[CURL_NUM_STATES];
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
GHashTable *sockets; /* GINT_TO_POINTER(fd) -> socket */
char *url;
size_t readahead_size;
bool sslverify;
uint64_t timeout;
char *cookie;
bool accept_range;
AioContext *aio_context;
QemuMutex mutex;
CoQueue free_state_waitq;
char *username;
char *password;
char *proxyusername;
char *proxypassword;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
static void curl_multi_do(void *arg);
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
static gboolean curl_drop_socket(void *key, void *value, void *opaque)
{
CURLSocket *socket = value;
BDRVCURLState *s = socket->s;
aio_set_fd_handler(s->aio_context, socket->fd,
aio-posix: split poll check from ready handler Adaptive polling measures the execution time of the polling check plus handlers called when a polled event becomes ready. Handlers can take a significant amount of time, making it look like polling was running for a long time when in fact the event handler was running for a long time. For example, on Linux the io_submit(2) syscall invoked when a virtio-blk device's virtqueue becomes ready can take 10s of microseconds. This can exceed the default polling interval (32 microseconds) and cause adaptive polling to stop polling. By excluding the handler's execution time from the polling check we make the adaptive polling calculation more accurate. As a result, the event loop now stays in polling mode where previously it would have fallen back to file descriptor monitoring. The following data was collected with virtio-blk num-queues=2 event_idx=off using an IOThread. Before: 168k IOPS, IOThread syscalls: 9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16 9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8 9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8 9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3 9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32 174k IOPS (+3.6%), IOThread syscalls: 9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32 9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8 9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8 9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32 Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because the IOThread stays in polling mode instead of falling back to file descriptor monitoring. As usual, polling is not implemented on Windows so this patch ignores the new io_poll_read() callback in aio-win32.c. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20211207132336.36627-2-stefanha@redhat.com [Fixed up aio_set_event_notifier() calls in tests/unit/test-fdmon-epoll.c added after this series was queued. --Stefan] Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 21:23:31 +08:00
NULL, NULL, NULL, NULL, NULL);
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
return true;
}
static void curl_drop_all_sockets(GHashTable *sockets)
{
g_hash_table_foreach_remove(sockets, curl_drop_socket, NULL);
}
/* Called from curl_multi_do_locked, with s->mutex held. */
static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
{
BDRVCURLState *s = opaque;
trace_curl_timer_cb(timeout_ms);
if (timeout_ms == -1) {
timer_del(&s->timer);
} else {
int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
timer_mod(&s->timer,
qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
}
return 0;
}
/* Called from curl_multi_do_locked, with s->mutex held. */
static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
void *userp, void *sp)
{
BDRVCURLState *s;
CURLState *state = NULL;
CURLSocket *socket;
curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
s = state->s;
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
socket = g_hash_table_lookup(s->sockets, GINT_TO_POINTER(fd));
if (!socket) {
socket = g_new0(CURLSocket, 1);
socket->fd = fd;
socket->s = s;
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
g_hash_table_insert(s->sockets, GINT_TO_POINTER(fd), socket);
}
trace_curl_sock_cb(action, (int)fd);
switch (action) {
case CURL_POLL_IN:
aio_set_fd_handler(s->aio_context, fd,
aio-posix: split poll check from ready handler Adaptive polling measures the execution time of the polling check plus handlers called when a polled event becomes ready. Handlers can take a significant amount of time, making it look like polling was running for a long time when in fact the event handler was running for a long time. For example, on Linux the io_submit(2) syscall invoked when a virtio-blk device's virtqueue becomes ready can take 10s of microseconds. This can exceed the default polling interval (32 microseconds) and cause adaptive polling to stop polling. By excluding the handler's execution time from the polling check we make the adaptive polling calculation more accurate. As a result, the event loop now stays in polling mode where previously it would have fallen back to file descriptor monitoring. The following data was collected with virtio-blk num-queues=2 event_idx=off using an IOThread. Before: 168k IOPS, IOThread syscalls: 9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16 9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8 9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8 9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3 9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32 174k IOPS (+3.6%), IOThread syscalls: 9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32 9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8 9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8 9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32 Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because the IOThread stays in polling mode instead of falling back to file descriptor monitoring. As usual, polling is not implemented on Windows so this patch ignores the new io_poll_read() callback in aio-win32.c. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20211207132336.36627-2-stefanha@redhat.com [Fixed up aio_set_event_notifier() calls in tests/unit/test-fdmon-epoll.c added after this series was queued. --Stefan] Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 21:23:31 +08:00
curl_multi_do, NULL, NULL, NULL, socket);
break;
case CURL_POLL_OUT:
aio_set_fd_handler(s->aio_context, fd,
aio-posix: split poll check from ready handler Adaptive polling measures the execution time of the polling check plus handlers called when a polled event becomes ready. Handlers can take a significant amount of time, making it look like polling was running for a long time when in fact the event handler was running for a long time. For example, on Linux the io_submit(2) syscall invoked when a virtio-blk device's virtqueue becomes ready can take 10s of microseconds. This can exceed the default polling interval (32 microseconds) and cause adaptive polling to stop polling. By excluding the handler's execution time from the polling check we make the adaptive polling calculation more accurate. As a result, the event loop now stays in polling mode where previously it would have fallen back to file descriptor monitoring. The following data was collected with virtio-blk num-queues=2 event_idx=off using an IOThread. Before: 168k IOPS, IOThread syscalls: 9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16 9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8 9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8 9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3 9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32 174k IOPS (+3.6%), IOThread syscalls: 9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32 9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8 9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8 9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32 Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because the IOThread stays in polling mode instead of falling back to file descriptor monitoring. As usual, polling is not implemented on Windows so this patch ignores the new io_poll_read() callback in aio-win32.c. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20211207132336.36627-2-stefanha@redhat.com [Fixed up aio_set_event_notifier() calls in tests/unit/test-fdmon-epoll.c added after this series was queued. --Stefan] Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 21:23:31 +08:00
NULL, curl_multi_do, NULL, NULL, socket);
break;
case CURL_POLL_INOUT:
aio_set_fd_handler(s->aio_context, fd,
aio-posix: split poll check from ready handler Adaptive polling measures the execution time of the polling check plus handlers called when a polled event becomes ready. Handlers can take a significant amount of time, making it look like polling was running for a long time when in fact the event handler was running for a long time. For example, on Linux the io_submit(2) syscall invoked when a virtio-blk device's virtqueue becomes ready can take 10s of microseconds. This can exceed the default polling interval (32 microseconds) and cause adaptive polling to stop polling. By excluding the handler's execution time from the polling check we make the adaptive polling calculation more accurate. As a result, the event loop now stays in polling mode where previously it would have fallen back to file descriptor monitoring. The following data was collected with virtio-blk num-queues=2 event_idx=off using an IOThread. Before: 168k IOPS, IOThread syscalls: 9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16 9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8 9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8 9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3 9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32 174k IOPS (+3.6%), IOThread syscalls: 9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32 9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8 9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8 9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32 Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because the IOThread stays in polling mode instead of falling back to file descriptor monitoring. As usual, polling is not implemented on Windows so this patch ignores the new io_poll_read() callback in aio-win32.c. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20211207132336.36627-2-stefanha@redhat.com [Fixed up aio_set_event_notifier() calls in tests/unit/test-fdmon-epoll.c added after this series was queued. --Stefan] Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 21:23:31 +08:00
curl_multi_do, curl_multi_do,
NULL, NULL, socket);
break;
case CURL_POLL_REMOVE:
aio_set_fd_handler(s->aio_context, fd,
aio-posix: split poll check from ready handler Adaptive polling measures the execution time of the polling check plus handlers called when a polled event becomes ready. Handlers can take a significant amount of time, making it look like polling was running for a long time when in fact the event handler was running for a long time. For example, on Linux the io_submit(2) syscall invoked when a virtio-blk device's virtqueue becomes ready can take 10s of microseconds. This can exceed the default polling interval (32 microseconds) and cause adaptive polling to stop polling. By excluding the handler's execution time from the polling check we make the adaptive polling calculation more accurate. As a result, the event loop now stays in polling mode where previously it would have fallen back to file descriptor monitoring. The following data was collected with virtio-blk num-queues=2 event_idx=off using an IOThread. Before: 168k IOPS, IOThread syscalls: 9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16 9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8 9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8 9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3 9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8 9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32 174k IOPS (+3.6%), IOThread syscalls: 9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32 9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8 9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8 9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32 Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because the IOThread stays in polling mode instead of falling back to file descriptor monitoring. As usual, polling is not implemented on Windows so this patch ignores the new io_poll_read() callback in aio-win32.c. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Message-id: 20211207132336.36627-2-stefanha@redhat.com [Fixed up aio_set_event_notifier() calls in tests/unit/test-fdmon-epoll.c added after this series was queued. --Stefan] Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 21:23:31 +08:00
NULL, NULL, NULL, NULL, NULL);
break;
}
if (action == CURL_POLL_REMOVE) {
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
g_hash_table_remove(s->sockets, GINT_TO_POINTER(fd));
}
return 0;
}
/* Called from curl_multi_do_locked, with s->mutex held. */
static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
{
BDRVCURLState *s = opaque;
size_t realsize = size * nmemb;
const char *header = (char *)ptr;
const char *end = header + realsize;
const char *accept_ranges = "accept-ranges:";
const char *bytes = "bytes";
if (realsize >= strlen(accept_ranges)
&& g_ascii_strncasecmp(header, accept_ranges,
strlen(accept_ranges)) == 0) {
char *p = strchr(header, ':') + 1;
/* Skip whitespace between the header name and value. */
while (p < end && *p && g_ascii_isspace(*p)) {
p++;
}
if (end - p >= strlen(bytes)
&& strncmp(p, bytes, strlen(bytes)) == 0) {
/* Check that there is nothing but whitespace after the value. */
p += strlen(bytes);
while (p < end && *p && g_ascii_isspace(*p)) {
p++;
}
if (p == end || !*p) {
s->accept_range = true;
}
}
}
return realsize;
}
/* Called from curl_multi_do_locked, with s->mutex held. */
static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
{
CURLState *s = ((CURLState*)opaque);
size_t realsize = size * nmemb;
trace_curl_read_cb(realsize);
if (!s || !s->orig_buf) {
goto read_end;
}
if (s->buf_off >= s->buf_len) {
/* buffer full, read nothing */
goto read_end;
}
realsize = MIN(realsize, s->buf_len - s->buf_off);
memcpy(s->orig_buf + s->buf_off, ptr, realsize);
s->buf_off += realsize;
read_end:
/* curl will error out if we do not return this value */
return size * nmemb;
}
/* Called with s->mutex held. */
static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
CURLAIOCB *acb)
{
int i;
uint64_t end = start + len;
uint64_t clamped_end = MIN(end, s->len);
uint64_t clamped_len = clamped_end - start;
for (i=0; i<CURL_NUM_STATES; i++) {
CURLState *state = &s->states[i];
uint64_t buf_end = (state->buf_start + state->buf_off);
uint64_t buf_fend = (state->buf_start + state->buf_len);
if (!state->orig_buf)
continue;
if (!state->buf_off)
continue;
// Does the existing buffer cover our section?
if ((start >= state->buf_start) &&
(start <= buf_end) &&
(clamped_end >= state->buf_start) &&
(clamped_end <= buf_end))
{
char *buf = state->orig_buf + (start - state->buf_start);
qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len);
if (clamped_len < len) {
qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
}
acb->ret = 0;
return true;
}
// Wait for unfinished chunks
if (state->in_use &&
(start >= state->buf_start) &&
(start <= buf_fend) &&
(clamped_end >= state->buf_start) &&
(clamped_end <= buf_fend))
{
int j;
acb->start = start - state->buf_start;
acb->end = acb->start + clamped_len;
for (j=0; j<CURL_NUM_ACB; j++) {
if (!state->acb[j]) {
state->acb[j] = acb;
return true;
}
}
}
}
return false;
}
/* Called with s->mutex held. */
static void curl_multi_check_completion(BDRVCURLState *s)
{
int msgs_in_queue;
/* Try to find done transfers, so we can free the easy
* handle again. */
for (;;) {
CURLMsg *msg;
msg = curl_multi_info_read(s->multi, &msgs_in_queue);
/* Quit when there are no more completions */
if (!msg)
break;
if (msg->msg == CURLMSG_DONE) {
int i;
CURLState *state = NULL;
bool error = msg->data.result != CURLE_OK;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE,
(char **)&state);
if (error) {
static int errcount = 100;
/* Don't lose the original error message from curl, since
* it contains extra data.
*/
if (errcount > 0) {
error_report("curl: %s", state->errmsg);
if (--errcount == 0) {
error_report("curl: further errors suppressed");
}
}
}
for (i = 0; i < CURL_NUM_ACB; i++) {
CURLAIOCB *acb = state->acb[i];
if (acb == NULL) {
continue;
}
if (!error) {
/* Assert that we have read all data */
assert(state->buf_off >= acb->end);
qemu_iovec_from_buf(acb->qiov, 0,
state->orig_buf + acb->start,
acb->end - acb->start);
if (acb->end - acb->start < acb->bytes) {
size_t offset = acb->end - acb->start;
qemu_iovec_memset(acb->qiov, offset, 0,
acb->bytes - offset);
}
}
acb->ret = error ? -EIO : 0;
state->acb[i] = NULL;
qemu_mutex_unlock(&s->mutex);
aio_co_wake(acb->co);
qemu_mutex_lock(&s->mutex);
}
curl_clean_state(state);
break;
}
}
}
/* Called with s->mutex held. */
static void curl_multi_do_locked(CURLSocket *socket)
{
BDRVCURLState *s = socket->s;
int running;
int r;
if (!s->multi) {
return;
}
do {
r = curl_multi_socket_action(s->multi, socket->fd, 0, &running);
} while (r == CURLM_CALL_MULTI_PERFORM);
}
static void curl_multi_do(void *arg)
{
CURLSocket *socket = arg;
BDRVCURLState *s = socket->s;
qemu_mutex_lock(&s->mutex);
curl_multi_do_locked(socket);
curl_multi_check_completion(s);
qemu_mutex_unlock(&s->mutex);
}
static void curl_multi_timeout_do(void *arg)
{
BDRVCURLState *s = (BDRVCURLState *)arg;
int running;
if (!s->multi) {
return;
}
qemu_mutex_lock(&s->mutex);
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
curl_multi_check_completion(s);
qemu_mutex_unlock(&s->mutex);
}
/* Called with s->mutex held. */
static CURLState *curl_find_state(BDRVCURLState *s)
{
CURLState *state = NULL;
int i;
for (i = 0; i < CURL_NUM_STATES; i++) {
if (!s->states[i].in_use) {
state = &s->states[i];
state->in_use = 1;
break;
}
}
return state;
}
static int curl_init_state(BDRVCURLState *s, CURLState *state)
{
if (!state->curl) {
state->curl = curl_easy_init();
if (!state->curl) {
return -EIO;
}
if (curl_easy_setopt(state->curl, CURLOPT_URL, s->url) ||
curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
(long) s->sslverify) ||
curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST,
s->sslverify ? 2L : 0L)) {
goto err;
}
if (s->cookie) {
if (curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie)) {
goto err;
}
}
if (curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout) ||
curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
(void *)curl_read_cb) ||
curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state) ||
curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state) ||
curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1) ||
curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1) ||
curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1) ||
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg) ||
curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1)) {
goto err;
}
if (s->username) {
if (curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username)) {
goto err;
}
}
if (s->password) {
if (curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password)) {
goto err;
}
}
if (s->proxyusername) {
if (curl_easy_setopt(state->curl,
CURLOPT_PROXYUSERNAME, s->proxyusername)) {
goto err;
}
}
if (s->proxypassword) {
if (curl_easy_setopt(state->curl,
CURLOPT_PROXYPASSWORD, s->proxypassword)) {
goto err;
}
}
/* Restrict supported protocols to avoid security issues in the more
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
* CVE-2013-0249.
*
* Restricting protocols is only supported from 7.19.4 upwards. Note:
* version 7.85.0 deprecates CURLOPT_*PROTOCOLS in favour of a string
* based CURLOPT_*PROTOCOLS_STR API.
*/
#if LIBCURL_VERSION_NUM >= 0x075500
if (curl_easy_setopt(state->curl,
CURLOPT_PROTOCOLS_STR, PROTOCOLS) ||
curl_easy_setopt(state->curl,
CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS)) {
goto err;
}
#elif LIBCURL_VERSION_NUM >= 0x071304
if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
goto err;
}
#endif
#ifdef DEBUG_VERBOSE
if (curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1)) {
goto err;
}
#endif
}
state->s = s;
return 0;
err:
curl_easy_cleanup(state->curl);
state->curl = NULL;
return -EIO;
}
/* Called with s->mutex held. */
static void curl_clean_state(CURLState *s)
{
int j;
for (j = 0; j < CURL_NUM_ACB; j++) {
assert(!s->acb[j]);
}
if (s->s->multi)
curl_multi_remove_handle(s->s->multi, s->curl);
s->in_use = 0;
qemu_co_enter_next(&s->s->free_state_waitq, &s->s->mutex);
}
static void curl_parse_filename(const char *filename, QDict *options,
Error **errp)
{
qdict_put_str(options, CURL_BLOCK_OPT_URL, filename);
}
static void curl_detach_aio_context(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
int i;
WITH_QEMU_LOCK_GUARD(&s->mutex) {
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
curl_drop_all_sockets(s->sockets);
for (i = 0; i < CURL_NUM_STATES; i++) {
if (s->states[i].in_use) {
curl_clean_state(&s->states[i]);
}
if (s->states[i].curl) {
curl_easy_cleanup(s->states[i].curl);
s->states[i].curl = NULL;
}
g_free(s->states[i].orig_buf);
s->states[i].orig_buf = NULL;
}
if (s->multi) {
curl_multi_cleanup(s->multi);
s->multi = NULL;
}
}
timer_del(&s->timer);
}
static void curl_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BDRVCURLState *s = bs->opaque;
aio_timer_init(new_context, &s->timer,
QEMU_CLOCK_REALTIME, SCALE_NS,
curl_multi_timeout_do, s);
assert(!s->multi);
s->multi = curl_multi_init();
s->aio_context = new_context;
curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
}
static QemuOptsList runtime_opts = {
.name = "curl",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = CURL_BLOCK_OPT_URL,
.type = QEMU_OPT_STRING,
.help = "URL to open",
},
{
.name = CURL_BLOCK_OPT_READAHEAD,
.type = QEMU_OPT_SIZE,
.help = "Readahead size",
},
{
.name = CURL_BLOCK_OPT_SSLVERIFY,
.type = QEMU_OPT_BOOL,
.help = "Verify SSL certificate"
},
{
.name = CURL_BLOCK_OPT_TIMEOUT,
.type = QEMU_OPT_NUMBER,
.help = "Curl timeout"
},
{
.name = CURL_BLOCK_OPT_COOKIE,
.type = QEMU_OPT_STRING,
.help = "Pass the cookie or list of cookies with each request"
},
{
.name = CURL_BLOCK_OPT_COOKIE_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of secret used as cookie passed with each request"
},
{
.name = CURL_BLOCK_OPT_USERNAME,
.type = QEMU_OPT_STRING,
.help = "Username for HTTP auth"
},
{
.name = CURL_BLOCK_OPT_PASSWORD_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of secret used as password for HTTP auth",
},
{
.name = CURL_BLOCK_OPT_PROXY_USERNAME,
.type = QEMU_OPT_STRING,
.help = "Username for HTTP proxy auth"
},
{
.name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
.type = QEMU_OPT_STRING,
.help = "ID of secret used as password for HTTP proxy auth",
},
{ /* end of list */ }
},
};
static int curl_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVCURLState *s = bs->opaque;
CURLState *state = NULL;
QemuOpts *opts;
const char *file;
const char *cookie;
const char *cookie_secret;
/* CURL >= 7.55.0 uses curl_off_t for content length instead of a double */
#if LIBCURL_VERSION_NUM >= 0x073700
curl_off_t cl;
#else
double cl;
#endif
const char *secretid;
const char *protocol_delimiter;
int ret;
bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, "curl driver does not support writes",
errp);
bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
return ret;
}
if (!libcurl_initialized) {
ret = curl_global_init(CURL_GLOBAL_ALL);
if (ret) {
error_setg(errp, "libcurl initialization failed with %d", ret);
return -EIO;
}
libcurl_initialized = true;
}
qemu_mutex_init(&s->mutex);
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
error: Eliminate error_propagate() with Coccinelle, part 1 When all we do with an Error we receive into a local variable is propagating to somewhere else, we can just as well receive it there right away. Convert if (!foo(..., &err)) { ... error_propagate(errp, err); ... return ... } to if (!foo(..., errp)) { ... ... return ... } where nothing else needs @err. Coccinelle script: @rule1 forall@ identifier fun, err, errp, lbl; expression list args, args2; binary operator op; constant c1, c2; symbol false; @@ if ( ( - fun(args, &err, args2) + fun(args, errp, args2) | - !fun(args, &err, args2) + !fun(args, errp, args2) | - fun(args, &err, args2) op c1 + fun(args, errp, args2) op c1 ) ) { ... when != err when != lbl: when strict - error_propagate(errp, err); ... when != err ( return; | return c2; | return false; ) } @rule2 forall@ identifier fun, err, errp, lbl; expression list args, args2; expression var; binary operator op; constant c1, c2; symbol false; @@ - var = fun(args, &err, args2); + var = fun(args, errp, args2); ... when != err if ( ( var | !var | var op c1 ) ) { ... when != err when != lbl: when strict - error_propagate(errp, err); ... when != err ( return; | return c2; | return false; | return var; ) } @depends on rule1 || rule2@ identifier err; @@ - Error *err = NULL; ... when != err Not exactly elegant, I'm afraid. The "when != lbl:" is necessary to avoid transforming if (fun(args, &err)) { goto out } ... out: error_propagate(errp, err); even though other paths to label out still need the error_propagate(). For an actual example, see sclp_realize(). Without the "when strict", Coccinelle transforms vfio_msix_setup(), incorrectly. I don't know what exactly "when strict" does, only that it helps here. The match of return is narrower than what I want, but I can't figure out how to express "return where the operand doesn't use @err". For an example where it's too narrow, see vfio_intx_enable(). Silently fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Converted manually. Line breaks tidied up manually. One nested declaration of @local_err deleted manually. Preexisting unwanted blank line dropped in hw/riscv/sifive_e.c. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20200707160613.848843-35-armbru@redhat.com>
2020-07-08 00:06:02 +08:00
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
goto out_noclean;
}
s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD,
CURL_BLOCK_OPT_READAHEAD_DEFAULT);
if ((s->readahead_size & 0x1ff) != 0) {
error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
s->readahead_size);
goto out_noclean;
}
s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
CURL_BLOCK_OPT_TIMEOUT_DEFAULT);
if (s->timeout > CURL_TIMEOUT_MAX) {
error_setg(errp, "timeout parameter is too large or negative");
goto out_noclean;
}
s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY,
CURL_BLOCK_OPT_SSLVERIFY_DEFAULT);
cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET);
if (cookie && cookie_secret) {
error_setg(errp,
"curl driver cannot handle both cookie and cookie secret");
goto out_noclean;
}
if (cookie_secret) {
s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp);
if (!s->cookie) {
goto out_noclean;
}
} else {
s->cookie = g_strdup(cookie);
}
file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
if (file == NULL) {
error_setg(errp, "curl block driver requires an 'url' option");
goto out_noclean;
}
if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
!strstart(protocol_delimiter, "://", NULL))
{
error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
"start with '%s://')", bs->drv->protocol_name, file,
bs->drv->protocol_name);
goto out_noclean;
}
s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
if (secretid) {
s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
if (!s->password) {
goto out_noclean;
}
}
s->proxyusername = g_strdup(
qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
if (secretid) {
s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
if (!s->proxypassword) {
goto out_noclean;
}
}
trace_curl_open(file);
qemu_co_queue_init(&s->free_state_waitq);
s->aio_context = bdrv_get_aio_context(bs);
s->url = g_strdup(file);
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
s->sockets = g_hash_table_new_full(NULL, NULL, NULL, g_free);
qemu_mutex_lock(&s->mutex);
state = curl_find_state(s);
qemu_mutex_unlock(&s->mutex);
if (!state) {
goto out_noclean;
}
// Get file size
if (curl_init_state(s, state) < 0) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"curl library initialization failed.");
goto out;
}
s->accept_range = false;
if (curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1) ||
curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, curl_header_cb) ||
curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s)) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"curl library initialization failed.");
goto out;
}
if (curl_easy_perform(state->curl))
goto out;
/* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
* the *_T version which returns a more sensible type for content length.
*/
#if LIBCURL_VERSION_NUM >= 0x073700
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
goto out;
}
#else
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
goto out;
}
#endif
/* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
* know or the size is zero. From 7.19.4 CURL returns -1 if size is not
* known and zero if it is really zero-length file. */
#if LIBCURL_VERSION_NUM >= 0x071304
if (cl < 0) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"Server didn't report file size.");
goto out;
}
#else
if (cl <= 0) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"Unknown file size or zero-length file.");
goto out;
}
#endif
s->len = cl;
if ((!strncasecmp(s->url, "http://", strlen("http://"))
|| !strncasecmp(s->url, "https://", strlen("https://")))
&& !s->accept_range) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"Server does not support 'range' (byte ranges).");
goto out;
}
trace_curl_open_size(s->len);
qemu_mutex_lock(&s->mutex);
curl_clean_state(state);
qemu_mutex_unlock(&s->mutex);
curl_easy_cleanup(state->curl);
state->curl = NULL;
curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
return 0;
out:
error_setg(errp, "CURL: Error opening file: %s", state->errmsg);
curl_easy_cleanup(state->curl);
state->curl = NULL;
out_noclean:
qemu_mutex_destroy(&s->mutex);
g_free(s->cookie);
g_free(s->url);
g_free(s->username);
g_free(s->proxyusername);
g_free(s->proxypassword);
if (s->sockets) {
curl_drop_all_sockets(s->sockets);
g_hash_table_destroy(s->sockets);
}
qemu_opts_del(opts);
return -EINVAL;
}
static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
{
CURLState *state;
int running;
BDRVCURLState *s = bs->opaque;
uint64_t start = acb->offset;
uint64_t end;
qemu_mutex_lock(&s->mutex);
// In case we have the requested data already (e.g. read-ahead),
// we can just call the callback and be done.
if (curl_find_buf(s, start, acb->bytes, acb)) {
goto out;
}
// No cache found, so let's start a new request
for (;;) {
state = curl_find_state(s);
if (state) {
break;
}
qemu_co_queue_wait(&s->free_state_waitq, &s->mutex);
}
if (curl_init_state(s, state) < 0) {
curl_clean_state(state);
acb->ret = -EIO;
goto out;
}
acb->start = 0;
acb->end = MIN(acb->bytes, s->len - start);
state->buf_off = 0;
g_free(state->orig_buf);
state->buf_start = start;
state->buf_len = MIN(acb->end + s->readahead_size, s->len - start);
end = start + state->buf_len - 1;
state->orig_buf = g_try_malloc(state->buf_len);
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->ret = -ENOMEM;
goto out;
}
state->acb[0] = acb;
snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
trace_curl_setup_preadv(acb->bytes, start, state->range);
if (curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range) ||
curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) {
state->acb[0] = NULL;
acb->ret = -EIO;
curl_clean_state(state);
goto out;
}
/* Tell curl it needs to kick things off */
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
out:
qemu_mutex_unlock(&s->mutex);
}
static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
block: use int64_t instead of uint64_t in driver read handlers We are generally moving to int64_t for both offset and bytes parameters on all io paths. Main motivation is realization of 64-bit write_zeroes operation for fast zeroing large disk chunks, up to the whole disk. We chose signed type, to be consistent with off_t (which is signed) and with possibility for signed return type (where negative value means error). So, convert driver read handlers parameters which are already 64bit to signed type. While being here, convert also flags parameter to be BdrvRequestFlags. Now let's consider all callers. Simple git grep '\->bdrv_\(aio\|co\)_preadv\(_part\)\?' shows that's there three callers of driver function: bdrv_driver_preadv() in block/io.c, passes int64_t, checked by bdrv_check_qiov_request() to be non-negative. qcow2_load_vmstate() does bdrv_check_qiov_request(). do_perform_cow_read() has uint64_t argument. And a lot of things in qcow2 driver are uint64_t, so converting it is big job. But we must not work with requests that don't satisfy bdrv_check_qiov_request(), so let's just assert it here. Still, the functions may be called directly, not only by drv->... Let's check: git grep '\.bdrv_\(aio\|co\)_preadv\(_part\)\?\s*=' | \ awk '{print $4}' | sed 's/,//' | sed 's/&//' | sort | uniq | \ while read func; do git grep "$func(" | \ grep -v "$func(BlockDriverState"; done The only one such caller: QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); ... ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); in tests/unit/test-bdrv-drain.c, and it's OK obviously. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210903102807.27127-4-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake <eblake@redhat.com> [eblake: fix typos] Signed-off-by: Eric Blake <eblake@redhat.com>
2021-09-03 18:27:59 +08:00
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
CURLAIOCB acb = {
.co = qemu_coroutine_self(),
.ret = -EINPROGRESS,
.qiov = qiov,
.offset = offset,
.bytes = bytes
};
curl_setup_preadv(bs, &acb);
while (acb.ret == -EINPROGRESS) {
qemu_coroutine_yield();
}
return acb.ret;
}
static void curl_close(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
trace_curl_close();
curl_detach_aio_context(bs);
qemu_mutex_destroy(&s->mutex);
curl: Disconnect sockets from CURLState When a curl transfer is finished, that does not mean that CURL lets go of all the sockets it used for it. We therefore must not free a CURLSocket object before CURL has invoked curl_sock_cb() to tell us to remove it. Otherwise, we may get a use-after-free, as described in this bug report: https://bugs.launchpad.net/qemu/+bug/1916501 (Reproducer from that report: $ qemu-img convert -f qcow2 -O raw \ https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img \ out.img ) (Alternatively, it might seem logical to force-drop all sockets that have been used for a state when the respective transfer is done, kind of like it is done now, but including unsetting the AIO handlers. Unfortunately, doing so makes the driver just hang instead of crashing, which seems to evidence that CURL still uses those sockets.) Make the CURLSocket object independent of "its" CURLState by putting all sockets into a hash table belonging to the BDRVCURLState instead of a list that belongs to a CURLState. Do not touch any sockets in curl_clean_state(). Testing, it seems like all sockets are indeed gone by the time the curl BDS is closed, so it seems like there really was no point in freeing any socket just because a transfer is done. libcurl does invoke curl_sock_cb() with CURL_POLL_REMOVE for every socket it has. Buglink: https://bugs.launchpad.net/qemu/+bug/1916501 Signed-off-by: Max Reitz <mreitz@redhat.com> Message-Id: <20210309130541.37540-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-03-09 21:05:41 +08:00
g_hash_table_destroy(s->sockets);
g_free(s->cookie);
g_free(s->url);
g_free(s->username);
g_free(s->proxyusername);
g_free(s->proxypassword);
}
2023-01-14 04:42:04 +08:00
static int64_t coroutine_fn curl_co_getlength(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
return s->len;
}
static void curl_refresh_filename(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
/* "readahead" and "timeout" do not change the guest-visible data,
* so ignore them */
if (s->sslverify != CURL_BLOCK_OPT_SSLVERIFY_DEFAULT ||
s->cookie || s->username || s->password || s->proxyusername ||
s->proxypassword)
{
return;
}
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), s->url);
}
static const char *const curl_strong_runtime_opts[] = {
CURL_BLOCK_OPT_URL,
CURL_BLOCK_OPT_SSLVERIFY,
CURL_BLOCK_OPT_COOKIE,
CURL_BLOCK_OPT_COOKIE_SECRET,
CURL_BLOCK_OPT_USERNAME,
CURL_BLOCK_OPT_PASSWORD_SECRET,
CURL_BLOCK_OPT_PROXY_USERNAME,
CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
NULL
};
static BlockDriver bdrv_http = {
.format_name = "http",
.protocol_name = "http",
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
2023-01-14 04:42:04 +08:00
.bdrv_co_getlength = curl_co_getlength,
.bdrv_co_preadv = curl_co_preadv,
.bdrv_detach_aio_context = curl_detach_aio_context,
.bdrv_attach_aio_context = curl_attach_aio_context,
.bdrv_refresh_filename = curl_refresh_filename,
.strong_runtime_opts = curl_strong_runtime_opts,
};
static BlockDriver bdrv_https = {
.format_name = "https",
.protocol_name = "https",
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
2023-01-14 04:42:04 +08:00
.bdrv_co_getlength = curl_co_getlength,
.bdrv_co_preadv = curl_co_preadv,
.bdrv_detach_aio_context = curl_detach_aio_context,
.bdrv_attach_aio_context = curl_attach_aio_context,
.bdrv_refresh_filename = curl_refresh_filename,
.strong_runtime_opts = curl_strong_runtime_opts,
};
static BlockDriver bdrv_ftp = {
.format_name = "ftp",
.protocol_name = "ftp",
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
2023-01-14 04:42:04 +08:00
.bdrv_co_getlength = curl_co_getlength,
.bdrv_co_preadv = curl_co_preadv,
.bdrv_detach_aio_context = curl_detach_aio_context,
.bdrv_attach_aio_context = curl_attach_aio_context,
.bdrv_refresh_filename = curl_refresh_filename,
.strong_runtime_opts = curl_strong_runtime_opts,
};
static BlockDriver bdrv_ftps = {
.format_name = "ftps",
.protocol_name = "ftps",
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
2023-01-14 04:42:04 +08:00
.bdrv_co_getlength = curl_co_getlength,
.bdrv_co_preadv = curl_co_preadv,
.bdrv_detach_aio_context = curl_detach_aio_context,
.bdrv_attach_aio_context = curl_attach_aio_context,
.bdrv_refresh_filename = curl_refresh_filename,
.strong_runtime_opts = curl_strong_runtime_opts,
};
static void curl_block_init(void)
{
bdrv_register(&bdrv_http);
bdrv_register(&bdrv_https);
bdrv_register(&bdrv_ftp);
bdrv_register(&bdrv_ftps);
}
block_init(curl_block_init);