Block layer patches
- stream: Fix potential crash during job completion - aio: add the aio_add_sqe() io_uring API - qcow2: put discards in discard queue when discard-no-unref is enabled - qcow2, vmdk: Restrict creation with secondary file using protocol - qemu-img rebase: Fix assertion failure due to exceeding IO_BUF_SIZE - iotests: Run iotests with sanitizers - iotests: Add more image formats to the thorough testing - iotests: Improve the dry run list to speed up thorough testing - Code cleanup -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmkTqWcRHGt3b2xmQHJl ZGhhdC5jb20ACgkQfwmycsiPL9awPg//VqEgqYbEr3dVUvBFk8tlcewoo7KGICVk 4kddOwMJIdcsVpiLuNzqQARH2kHV93Hiv+mVt25o00PkJx565eCGTh/bBFas3UXL JMBjgHyJutGr4cijkNrnQgqWfeTgc32xdVEWh1nZM2K7LslzC9I1PfUzfxRMYqZA Em0KE3vwQDC7xtIyk4t451hkfcQY8fwN9bDMpD+zbzaLsYTEyOJ900En88iW7oHE TuJhrviin11jdQCA26QVNXRaw7iIVVo8vJP1VEgbn31iY+Qpcr/HcQRs0x2gex67 OqIdh4onqkdGCFDxTGUoAH+jORXWUmk/JipIhl9pJP0ZDyAjsm97ThJ6SvctURsK UMU0dzXEc1C5spD2CWnN0PujqHYQqYaylx7MdiCJMjaCfDB3ZeIRsTGoiLMB24P+ WBrcn2P+f03nC/sVvxRZWrpyI2kZwEh1RsO/mnLQ3apVBFeKqaFi8Ouo9oi1ZMd6 ahUw7sZSoTxmGY1FhOSRCGEh2Wjy0ZIOx9tHT1U9vig5Kf9KeE81yO8yaq2T60mq 9eaUL8rcUrKRiJw9NUkcEYmIUJrh0nUe/kK2RWmbEGMYIH7ASrGqiyUP5FxpekD+ i/uen4BeyRwe6rnPOzGolg+HMysMBr8VD/8PwJ8g88FLH1jIdTYvFUdRbrkciUlo okC+y4+kqiU= =SI8s -----END PGP SIGNATURE----- Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging Block layer patches - stream: Fix potential crash during job completion - aio: add the aio_add_sqe() io_uring API - qcow2: put discards in discard queue when discard-no-unref is enabled - qcow2, vmdk: Restrict creation with secondary file using protocol - qemu-img rebase: Fix assertion failure due to exceeding IO_BUF_SIZE - iotests: Run iotests with sanitizers - iotests: Add more image formats to the thorough testing - iotests: Improve the dry run list to speed up thorough testing - Code cleanup # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCgAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmkTqWcRHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9awPg//VqEgqYbEr3dVUvBFk8tlcewoo7KGICVk # 4kddOwMJIdcsVpiLuNzqQARH2kHV93Hiv+mVt25o00PkJx565eCGTh/bBFas3UXL # JMBjgHyJutGr4cijkNrnQgqWfeTgc32xdVEWh1nZM2K7LslzC9I1PfUzfxRMYqZA # Em0KE3vwQDC7xtIyk4t451hkfcQY8fwN9bDMpD+zbzaLsYTEyOJ900En88iW7oHE # TuJhrviin11jdQCA26QVNXRaw7iIVVo8vJP1VEgbn31iY+Qpcr/HcQRs0x2gex67 # OqIdh4onqkdGCFDxTGUoAH+jORXWUmk/JipIhl9pJP0ZDyAjsm97ThJ6SvctURsK # UMU0dzXEc1C5spD2CWnN0PujqHYQqYaylx7MdiCJMjaCfDB3ZeIRsTGoiLMB24P+ # WBrcn2P+f03nC/sVvxRZWrpyI2kZwEh1RsO/mnLQ3apVBFeKqaFi8Ouo9oi1ZMd6 # ahUw7sZSoTxmGY1FhOSRCGEh2Wjy0ZIOx9tHT1U9vig5Kf9KeE81yO8yaq2T60mq # 9eaUL8rcUrKRiJw9NUkcEYmIUJrh0nUe/kK2RWmbEGMYIH7ASrGqiyUP5FxpekD+ # i/uen4BeyRwe6rnPOzGolg+HMysMBr8VD/8PwJ8g88FLH1jIdTYvFUdRbrkciUlo # okC+y4+kqiU= # =SI8s # -----END PGP SIGNATURE----- # gpg: Signature made Tue 11 Nov 2025 10:23:51 PM CET # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (28 commits) qemu-img rebase: don't exceed IO_BUF_SIZE in one operation qcow2, vmdk: Restrict creation with secondary file using protocol block: Allow drivers to control protocol prefix at creation tests/qemu-iotest: Add more image formats to the thorough testing tests/qemu-iotests: Improve the dry run list to speed up thorough testing tests/qemu-iotests/184: Fix skip message for qemu-img without throttle qcow2: put discards in discard queue when discard-no-unref is enabled qcow2: rename update_refcount_discard to queue_discard iotests: Run iotests with sanitizers qemu-img: Fix amend option parse error handling iotests: Test resizing file node under raw with size/offset block: Drop detach_subchain for bdrv_replace_node block: replace TABs with space block/io_uring: use non-vectored read/write when possible block/io_uring: use aio_add_sqe() aio-posix: add aio_add_sqe() API for user-defined io_uring requests aio-posix: add fdmon_ops->dispatch() aio-posix: unindent fdmon_io_uring_destroy() aio-posix: gracefully handle io_uring_queue_init() failure aio: add errp argument to aio_context_setup() ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
9febfa94b6
47 changed files with 1040 additions and 805 deletions
42
block.c
42
block.c
|
|
@ -693,7 +693,7 @@ out:
|
|||
}
|
||||
|
||||
int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
|
||||
Error **errp)
|
||||
bool allow_protocol_prefix, Error **errp)
|
||||
{
|
||||
QemuOpts *protocol_opts;
|
||||
BlockDriver *drv;
|
||||
|
|
@ -702,7 +702,7 @@ int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
|
|||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
drv = bdrv_find_protocol(filename, true, errp);
|
||||
drv = bdrv_find_protocol(filename, allow_protocol_prefix, errp);
|
||||
if (drv == NULL) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
|
@ -5398,17 +5398,13 @@ bdrv_replace_node_noperm(BlockDriverState *from,
|
|||
*
|
||||
* With auto_skip=false the error is returned if from has a parent which should
|
||||
* not be updated.
|
||||
*
|
||||
* With @detach_subchain=true @to must be in a backing chain of @from. In this
|
||||
* case backing link of the cow-parent of @to is removed.
|
||||
*/
|
||||
static int GRAPH_WRLOCK
|
||||
bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to,
|
||||
bool auto_skip, bool detach_subchain, Error **errp)
|
||||
bool auto_skip, Error **errp)
|
||||
{
|
||||
Transaction *tran = tran_new();
|
||||
g_autoptr(GSList) refresh_list = NULL;
|
||||
BlockDriverState *to_cow_parent = NULL;
|
||||
int ret;
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
|
@ -5417,17 +5413,6 @@ bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to,
|
|||
assert(to->quiesce_counter);
|
||||
assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
|
||||
|
||||
if (detach_subchain) {
|
||||
assert(bdrv_chain_contains(from, to));
|
||||
assert(from != to);
|
||||
for (to_cow_parent = from;
|
||||
bdrv_filter_or_cow_bs(to_cow_parent) != to;
|
||||
to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
|
||||
{
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the replacement without permission update.
|
||||
* Replacement may influence the permissions, we should calculate new
|
||||
|
|
@ -5439,11 +5424,6 @@ bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (detach_subchain) {
|
||||
/* to_cow_parent is already drained because from is drained */
|
||||
bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
|
||||
}
|
||||
|
||||
refresh_list = g_slist_prepend(refresh_list, to);
|
||||
refresh_list = g_slist_prepend(refresh_list, from);
|
||||
|
||||
|
|
@ -5462,7 +5442,7 @@ out:
|
|||
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
|
||||
Error **errp)
|
||||
{
|
||||
return bdrv_replace_node_common(from, to, true, false, errp);
|
||||
return bdrv_replace_node_common(from, to, true, errp);
|
||||
}
|
||||
|
||||
int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||
|
|
@ -5478,7 +5458,7 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
|||
|
||||
bdrv_drained_begin(child_bs);
|
||||
bdrv_graph_wrlock();
|
||||
ret = bdrv_replace_node_common(bs, child_bs, true, true, errp);
|
||||
ret = bdrv_replace_node_common(bs, child_bs, true, errp);
|
||||
bdrv_graph_wrunlock();
|
||||
bdrv_drained_end(child_bs);
|
||||
|
||||
|
|
@ -5929,17 +5909,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
|
|||
updated_children = g_slist_prepend(updated_children, c);
|
||||
}
|
||||
|
||||
/*
|
||||
* It seems correct to pass detach_subchain=true here, but it triggers
|
||||
* one more yet not fixed bug, when due to nested aio_poll loop we switch to
|
||||
* another drained section, which modify the graph (for example, removing
|
||||
* the child, which we keep in updated_children list). So, it's a TODO.
|
||||
*
|
||||
* Note, bug triggered if pass detach_subchain=true here and run
|
||||
* test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
|
||||
* That's a FIXME.
|
||||
*/
|
||||
bdrv_replace_node_common(top, base, false, false, &local_err);
|
||||
bdrv_replace_node_common(top, base, false, &local_err);
|
||||
bdrv_graph_wrunlock();
|
||||
|
||||
if (local_err) {
|
||||
|
|
|
|||
|
|
@ -300,15 +300,15 @@ static void bochs_close(BlockDriverState *bs)
|
|||
}
|
||||
|
||||
static BlockDriver bdrv_bochs = {
|
||||
.format_name = "bochs",
|
||||
.instance_size = sizeof(BDRVBochsState),
|
||||
.bdrv_probe = bochs_probe,
|
||||
.bdrv_open = bochs_open,
|
||||
.format_name = "bochs",
|
||||
.instance_size = sizeof(BDRVBochsState),
|
||||
.bdrv_probe = bochs_probe,
|
||||
.bdrv_open = bochs_open,
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
.bdrv_refresh_limits = bochs_refresh_limits,
|
||||
.bdrv_co_preadv = bochs_co_preadv,
|
||||
.bdrv_close = bochs_close,
|
||||
.is_format = true,
|
||||
.bdrv_co_preadv = bochs_co_preadv,
|
||||
.bdrv_close = bochs_close,
|
||||
.is_format = true,
|
||||
};
|
||||
|
||||
static void bdrv_bochs_init(void)
|
||||
|
|
|
|||
|
|
@ -835,7 +835,7 @@ block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create protocol layer */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@
|
|||
#define FTYPE_FILE 0
|
||||
#define FTYPE_CD 1
|
||||
|
||||
#define MAX_BLOCKSIZE 4096
|
||||
#define MAX_BLOCKSIZE 4096
|
||||
|
||||
/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
|
||||
* leaving a few more bytes for its future use. */
|
||||
|
|
@ -755,14 +755,23 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
|||
}
|
||||
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||||
|
||||
#ifndef CONFIG_LINUX_IO_URING
|
||||
if (s->use_linux_io_uring) {
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
if (!aio_has_io_uring()) {
|
||||
error_setg(errp, "aio=io_uring was specified, but is not "
|
||||
"available (disabled via io_uring_disabled "
|
||||
"sysctl or blocked by container runtime "
|
||||
"seccomp policy?)");
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
#else
|
||||
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||||
"in this build.");
|
||||
"in this build");
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
#endif /* !defined(CONFIG_LINUX_IO_URING) */
|
||||
}
|
||||
|
||||
s->has_discard = true;
|
||||
s->has_write_zeroes = true;
|
||||
|
|
@ -2522,27 +2531,6 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
|||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
AioContext *ctx;
|
||||
|
||||
if (!s->use_linux_io_uring) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ctx = qemu_get_current_aio_context();
|
||||
if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||||
error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||
"falling back to thread pool: ");
|
||||
s->use_linux_io_uring = false;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||
{
|
||||
|
|
@ -2595,7 +2583,7 @@ raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, uint64_t bytes,
|
|||
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||||
type |= QEMU_AIO_MISALIGNED;
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
} else if (raw_check_linux_io_uring(s)) {
|
||||
} else if (s->use_linux_io_uring) {
|
||||
assert(qiov->size == bytes);
|
||||
ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
|
||||
goto out;
|
||||
|
|
@ -2692,7 +2680,7 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
|||
};
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
if (raw_check_linux_io_uring(s)) {
|
||||
if (s->use_linux_io_uring) {
|
||||
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -4574,20 +4562,20 @@ static void coroutine_fn cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
|
|||
}
|
||||
|
||||
static BlockDriver bdrv_host_cdrom = {
|
||||
.format_name = "host_cdrom",
|
||||
.protocol_name = "host_cdrom",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_probe_device = cdrom_probe_device,
|
||||
.bdrv_parse_filename = cdrom_parse_filename,
|
||||
.bdrv_open = cdrom_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_reopen_prepare = raw_reopen_prepare,
|
||||
.bdrv_reopen_commit = raw_reopen_commit,
|
||||
.bdrv_reopen_abort = raw_reopen_abort,
|
||||
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
||||
.create_opts = &bdrv_create_opts_simple,
|
||||
.mutable_opts = mutable_opts,
|
||||
.format_name = "host_cdrom",
|
||||
.protocol_name = "host_cdrom",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_probe_device = cdrom_probe_device,
|
||||
.bdrv_parse_filename = cdrom_parse_filename,
|
||||
.bdrv_open = cdrom_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_reopen_prepare = raw_reopen_prepare,
|
||||
.bdrv_reopen_commit = raw_reopen_commit,
|
||||
.bdrv_reopen_abort = raw_reopen_abort,
|
||||
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
||||
.create_opts = &bdrv_create_opts_simple,
|
||||
.mutable_opts = mutable_opts,
|
||||
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
|
||||
|
||||
.bdrv_co_preadv = raw_co_preadv,
|
||||
|
|
@ -4700,20 +4688,20 @@ static void coroutine_fn cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
|
|||
}
|
||||
|
||||
static BlockDriver bdrv_host_cdrom = {
|
||||
.format_name = "host_cdrom",
|
||||
.protocol_name = "host_cdrom",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_probe_device = cdrom_probe_device,
|
||||
.bdrv_parse_filename = cdrom_parse_filename,
|
||||
.bdrv_open = cdrom_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_reopen_prepare = raw_reopen_prepare,
|
||||
.bdrv_reopen_commit = raw_reopen_commit,
|
||||
.bdrv_reopen_abort = raw_reopen_abort,
|
||||
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
||||
.create_opts = &bdrv_create_opts_simple,
|
||||
.mutable_opts = mutable_opts,
|
||||
.format_name = "host_cdrom",
|
||||
.protocol_name = "host_cdrom",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_probe_device = cdrom_probe_device,
|
||||
.bdrv_parse_filename = cdrom_parse_filename,
|
||||
.bdrv_open = cdrom_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_reopen_prepare = raw_reopen_prepare,
|
||||
.bdrv_reopen_commit = raw_reopen_commit,
|
||||
.bdrv_reopen_abort = raw_reopen_abort,
|
||||
.bdrv_co_create_opts = bdrv_co_create_opts_simple,
|
||||
.create_opts = &bdrv_create_opts_simple,
|
||||
.mutable_opts = mutable_opts,
|
||||
|
||||
.bdrv_co_preadv = raw_co_preadv,
|
||||
.bdrv_co_pwritev = raw_co_pwritev,
|
||||
|
|
|
|||
|
|
@ -741,16 +741,16 @@ static QemuOptsList raw_create_opts = {
|
|||
};
|
||||
|
||||
BlockDriver bdrv_file = {
|
||||
.format_name = "file",
|
||||
.protocol_name = "file",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_parse_filename = raw_parse_filename,
|
||||
.bdrv_open = raw_open,
|
||||
.bdrv_refresh_limits = raw_probe_alignment,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_co_create_opts = raw_co_create_opts,
|
||||
.bdrv_has_zero_init = bdrv_has_zero_init_1,
|
||||
.format_name = "file",
|
||||
.protocol_name = "file",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_parse_filename = raw_parse_filename,
|
||||
.bdrv_open = raw_open,
|
||||
.bdrv_refresh_limits = raw_probe_alignment,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_co_create_opts = raw_co_create_opts,
|
||||
.bdrv_has_zero_init = bdrv_has_zero_init_1,
|
||||
|
||||
.bdrv_reopen_prepare = raw_reopen_prepare,
|
||||
.bdrv_reopen_commit = raw_reopen_commit,
|
||||
|
|
@ -914,15 +914,15 @@ done:
|
|||
}
|
||||
|
||||
static BlockDriver bdrv_host_device = {
|
||||
.format_name = "host_device",
|
||||
.protocol_name = "host_device",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_parse_filename = hdev_parse_filename,
|
||||
.bdrv_probe_device = hdev_probe_device,
|
||||
.bdrv_open = hdev_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_refresh_limits = hdev_refresh_limits,
|
||||
.format_name = "host_device",
|
||||
.protocol_name = "host_device",
|
||||
.instance_size = sizeof(BDRVRawState),
|
||||
.bdrv_needs_filename = true,
|
||||
.bdrv_parse_filename = hdev_parse_filename,
|
||||
.bdrv_probe_device = hdev_probe_device,
|
||||
.bdrv_open = hdev_open,
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_refresh_limits = hdev_refresh_limits,
|
||||
|
||||
.bdrv_aio_preadv = raw_aio_preadv,
|
||||
.bdrv_aio_pwritev = raw_aio_pwritev,
|
||||
|
|
|
|||
507
block/io_uring.c
507
block/io_uring.c
|
|
@ -11,28 +11,20 @@
|
|||
#include "qemu/osdep.h"
|
||||
#include <liburing.h>
|
||||
#include "block/aio.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "block/block.h"
|
||||
#include "block/raw-aio.h"
|
||||
#include "qemu/coroutine.h"
|
||||
#include "qemu/defer-call.h"
|
||||
#include "qapi/error.h"
|
||||
#include "system/block-backend.h"
|
||||
#include "trace.h"
|
||||
|
||||
/* Only used for assertions. */
|
||||
#include "qemu/coroutine_int.h"
|
||||
|
||||
/* io_uring ring size */
|
||||
#define MAX_ENTRIES 128
|
||||
|
||||
typedef struct LuringAIOCB {
|
||||
typedef struct {
|
||||
Coroutine *co;
|
||||
struct io_uring_sqe sqeq;
|
||||
ssize_t ret;
|
||||
QEMUIOVector *qiov;
|
||||
bool is_read;
|
||||
QSIMPLEQ_ENTRY(LuringAIOCB) next;
|
||||
uint64_t offset;
|
||||
ssize_t ret;
|
||||
int type;
|
||||
int fd;
|
||||
BdrvRequestFlags flags;
|
||||
|
||||
/*
|
||||
* Buffered reads may require resubmission, see
|
||||
|
|
@ -40,36 +32,69 @@ typedef struct LuringAIOCB {
|
|||
*/
|
||||
int total_read;
|
||||
QEMUIOVector resubmit_qiov;
|
||||
} LuringAIOCB;
|
||||
|
||||
typedef struct LuringQueue {
|
||||
unsigned int in_queue;
|
||||
unsigned int in_flight;
|
||||
bool blocked;
|
||||
QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue;
|
||||
} LuringQueue;
|
||||
CqeHandler cqe_handler;
|
||||
} LuringRequest;
|
||||
|
||||
struct LuringState {
|
||||
AioContext *aio_context;
|
||||
|
||||
struct io_uring ring;
|
||||
|
||||
/* No locking required, only accessed from AioContext home thread */
|
||||
LuringQueue io_q;
|
||||
|
||||
QEMUBH *completion_bh;
|
||||
};
|
||||
|
||||
/**
|
||||
* luring_resubmit:
|
||||
*
|
||||
* Resubmit a request by appending it to submit_queue. The caller must ensure
|
||||
* that ioq_submit() is called later so that submit_queue requests are started.
|
||||
*/
|
||||
static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
|
||||
static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
|
||||
{
|
||||
QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
|
||||
s->io_q.in_queue++;
|
||||
LuringRequest *req = opaque;
|
||||
QEMUIOVector *qiov = req->qiov;
|
||||
uint64_t offset = req->offset;
|
||||
int fd = req->fd;
|
||||
BdrvRequestFlags flags = req->flags;
|
||||
|
||||
switch (req->type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
{
|
||||
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||
if (luring_flags != 0 || qiov->niov > 1) {
|
||||
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
||||
io_uring_prep_writev2(sqe, fd, qiov->iov,
|
||||
qiov->niov, offset, luring_flags);
|
||||
#else
|
||||
/*
|
||||
* FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see
|
||||
* luring_has_fua().
|
||||
*/
|
||||
assert(luring_flags == 0);
|
||||
|
||||
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
|
||||
#endif
|
||||
} else {
|
||||
/* The man page says non-vectored is faster than vectored */
|
||||
struct iovec *iov = qiov->iov;
|
||||
io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case QEMU_AIO_ZONE_APPEND:
|
||||
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
|
||||
break;
|
||||
case QEMU_AIO_READ:
|
||||
{
|
||||
if (req->resubmit_qiov.iov != NULL) {
|
||||
qiov = &req->resubmit_qiov;
|
||||
}
|
||||
if (qiov->niov > 1) {
|
||||
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
|
||||
offset + req->total_read);
|
||||
} else {
|
||||
/* The man page says non-vectored is faster than vectored */
|
||||
struct iovec *iov = qiov->iov;
|
||||
io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
|
||||
offset + req->total_read);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case QEMU_AIO_FLUSH:
|
||||
io_uring_prep_fsync(sqe, fd, IORING_FSYNC_DATASYNC);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n",
|
||||
__func__, req->type);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -78,385 +103,115 @@ static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
|
|||
* Short reads are rare but may occur. The remaining read request needs to be
|
||||
* resubmitted.
|
||||
*/
|
||||
static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
|
||||
int nread)
|
||||
static void luring_resubmit_short_read(LuringRequest *req, int nread)
|
||||
{
|
||||
QEMUIOVector *resubmit_qiov;
|
||||
size_t remaining;
|
||||
|
||||
trace_luring_resubmit_short_read(s, luringcb, nread);
|
||||
trace_luring_resubmit_short_read(req, nread);
|
||||
|
||||
/* Update read position */
|
||||
luringcb->total_read += nread;
|
||||
remaining = luringcb->qiov->size - luringcb->total_read;
|
||||
req->total_read += nread;
|
||||
remaining = req->qiov->size - req->total_read;
|
||||
|
||||
/* Shorten qiov */
|
||||
resubmit_qiov = &luringcb->resubmit_qiov;
|
||||
resubmit_qiov = &req->resubmit_qiov;
|
||||
if (resubmit_qiov->iov == NULL) {
|
||||
qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov);
|
||||
qemu_iovec_init(resubmit_qiov, req->qiov->niov);
|
||||
} else {
|
||||
qemu_iovec_reset(resubmit_qiov);
|
||||
}
|
||||
qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read,
|
||||
remaining);
|
||||
qemu_iovec_concat(resubmit_qiov, req->qiov, req->total_read, remaining);
|
||||
|
||||
/* Update sqe */
|
||||
luringcb->sqeq.off += nread;
|
||||
luringcb->sqeq.addr = (uintptr_t)luringcb->resubmit_qiov.iov;
|
||||
luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
|
||||
|
||||
luring_resubmit(s, luringcb);
|
||||
aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
|
||||
}
|
||||
|
||||
/**
|
||||
* luring_process_completions:
|
||||
* @s: AIO state
|
||||
*
|
||||
* Fetches completed I/O requests, consumes cqes and invokes their callbacks
|
||||
* The function is somewhat tricky because it supports nested event loops, for
|
||||
* example when a request callback invokes aio_poll().
|
||||
*
|
||||
* Function schedules BH completion so it can be called again in a nested
|
||||
* event loop. When there are no events left to complete the BH is being
|
||||
* canceled.
|
||||
*
|
||||
*/
|
||||
static void luring_process_completions(LuringState *s)
|
||||
static void luring_cqe_handler(CqeHandler *cqe_handler)
|
||||
{
|
||||
struct io_uring_cqe *cqes;
|
||||
int total_bytes;
|
||||
LuringRequest *req = container_of(cqe_handler, LuringRequest, cqe_handler);
|
||||
int ret = cqe_handler->cqe.res;
|
||||
|
||||
defer_call_begin();
|
||||
trace_luring_cqe_handler(req, ret);
|
||||
|
||||
/*
|
||||
* Request completion callbacks can run the nested event loop.
|
||||
* Schedule ourselves so the nested event loop will "see" remaining
|
||||
* completed requests and process them. Without this, completion
|
||||
* callbacks that wait for other requests using a nested event loop
|
||||
* would hang forever.
|
||||
*
|
||||
* This workaround is needed because io_uring uses poll_wait, which
|
||||
* is woken up when new events are added to the uring, thus polling on
|
||||
* the same uring fd will block unless more events are received.
|
||||
*
|
||||
* Other leaf block drivers (drivers that access the data themselves)
|
||||
* are networking based, so they poll sockets for data and run the
|
||||
* correct coroutine.
|
||||
*/
|
||||
qemu_bh_schedule(s->completion_bh);
|
||||
|
||||
while (io_uring_peek_cqe(&s->ring, &cqes) == 0) {
|
||||
LuringAIOCB *luringcb;
|
||||
int ret;
|
||||
|
||||
if (!cqes) {
|
||||
break;
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* Only writev/readv/fsync requests on regular files or host block
|
||||
* devices are submitted. Therefore -EAGAIN is not expected but it's
|
||||
* known to happen sometimes with Linux SCSI. Submit again and hope
|
||||
* the request completes successfully.
|
||||
*
|
||||
* For more information, see:
|
||||
* https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
|
||||
*
|
||||
* If the code is changed to submit other types of requests in the
|
||||
* future, then this workaround may need to be extended to deal with
|
||||
* genuine -EAGAIN results that should not be resubmitted
|
||||
* immediately.
|
||||
*/
|
||||
if (ret == -EINTR || ret == -EAGAIN) {
|
||||
aio_add_sqe(luring_prep_sqe, req, &req->cqe_handler);
|
||||
return;
|
||||
}
|
||||
|
||||
luringcb = io_uring_cqe_get_data(cqes);
|
||||
ret = cqes->res;
|
||||
io_uring_cqe_seen(&s->ring, cqes);
|
||||
cqes = NULL;
|
||||
|
||||
/* Change counters one-by-one because we can be nested. */
|
||||
s->io_q.in_flight--;
|
||||
trace_luring_process_completion(s, luringcb, ret);
|
||||
|
||||
} else if (req->qiov) {
|
||||
/* total_read is non-zero only for resubmitted read requests */
|
||||
total_bytes = ret + luringcb->total_read;
|
||||
int total_bytes = ret + req->total_read;
|
||||
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* Only writev/readv/fsync requests on regular files or host block
|
||||
* devices are submitted. Therefore -EAGAIN is not expected but it's
|
||||
* known to happen sometimes with Linux SCSI. Submit again and hope
|
||||
* the request completes successfully.
|
||||
*
|
||||
* For more information, see:
|
||||
* https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
|
||||
*
|
||||
* If the code is changed to submit other types of requests in the
|
||||
* future, then this workaround may need to be extended to deal with
|
||||
* genuine -EAGAIN results that should not be resubmitted
|
||||
* immediately.
|
||||
*/
|
||||
if (ret == -EINTR || ret == -EAGAIN) {
|
||||
luring_resubmit(s, luringcb);
|
||||
continue;
|
||||
}
|
||||
} else if (!luringcb->qiov) {
|
||||
goto end;
|
||||
} else if (total_bytes == luringcb->qiov->size) {
|
||||
if (total_bytes == req->qiov->size) {
|
||||
ret = 0;
|
||||
/* Only read/write */
|
||||
} else {
|
||||
/* Short Read/Write */
|
||||
if (luringcb->is_read) {
|
||||
if (req->type == QEMU_AIO_READ) {
|
||||
if (ret > 0) {
|
||||
luring_resubmit_short_read(s, luringcb, ret);
|
||||
continue;
|
||||
} else {
|
||||
/* Pad with zeroes */
|
||||
qemu_iovec_memset(luringcb->qiov, total_bytes, 0,
|
||||
luringcb->qiov->size - total_bytes);
|
||||
ret = 0;
|
||||
luring_resubmit_short_read(req, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pad with zeroes */
|
||||
qemu_iovec_memset(req->qiov, total_bytes, 0,
|
||||
req->qiov->size - total_bytes);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
}
|
||||
end:
|
||||
luringcb->ret = ret;
|
||||
qemu_iovec_destroy(&luringcb->resubmit_qiov);
|
||||
|
||||
/*
|
||||
* If the coroutine is already entered it must be in ioq_submit()
|
||||
* and will notice luringcb->ret has been filled in when it
|
||||
* eventually runs later. Coroutines cannot be entered recursively
|
||||
* so avoid doing that!
|
||||
*/
|
||||
assert(luringcb->co->ctx == s->aio_context);
|
||||
if (!qemu_coroutine_entered(luringcb->co)) {
|
||||
aio_co_wake(luringcb->co);
|
||||
}
|
||||
}
|
||||
|
||||
qemu_bh_cancel(s->completion_bh);
|
||||
req->ret = ret;
|
||||
qemu_iovec_destroy(&req->resubmit_qiov);
|
||||
|
||||
defer_call_end();
|
||||
}
|
||||
|
||||
static int ioq_submit(LuringState *s)
|
||||
{
|
||||
int ret = 0;
|
||||
LuringAIOCB *luringcb, *luringcb_next;
|
||||
|
||||
while (s->io_q.in_queue > 0) {
|
||||
/*
|
||||
* Try to fetch sqes from the ring for requests waiting in
|
||||
* the overflow queue
|
||||
*/
|
||||
QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next,
|
||||
luringcb_next) {
|
||||
struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring);
|
||||
if (!sqes) {
|
||||
break;
|
||||
}
|
||||
/* Prep sqe for submission */
|
||||
*sqes = luringcb->sqeq;
|
||||
QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next);
|
||||
}
|
||||
ret = io_uring_submit(&s->ring);
|
||||
trace_luring_io_uring_submit(s, ret);
|
||||
/* Prevent infinite loop if submission is refused */
|
||||
if (ret <= 0) {
|
||||
if (ret == -EAGAIN || ret == -EINTR) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
s->io_q.in_flight += ret;
|
||||
s->io_q.in_queue -= ret;
|
||||
}
|
||||
s->io_q.blocked = (s->io_q.in_queue > 0);
|
||||
|
||||
if (s->io_q.in_flight) {
|
||||
/*
|
||||
* We can try to complete something just right away if there are
|
||||
* still requests in-flight.
|
||||
*/
|
||||
luring_process_completions(s);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void luring_process_completions_and_submit(LuringState *s)
|
||||
{
|
||||
luring_process_completions(s);
|
||||
|
||||
if (s->io_q.in_queue > 0) {
|
||||
ioq_submit(s);
|
||||
/*
|
||||
* If the coroutine is already entered it must be in luring_co_submit() and
|
||||
* will notice req->ret has been filled in when it eventually runs later.
|
||||
* Coroutines cannot be entered recursively so avoid doing that!
|
||||
*/
|
||||
if (!qemu_coroutine_entered(req->co)) {
|
||||
aio_co_wake(req->co);
|
||||
}
|
||||
}
|
||||
|
||||
static void qemu_luring_completion_bh(void *opaque)
|
||||
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd,
|
||||
uint64_t offset, QEMUIOVector *qiov,
|
||||
int type, BdrvRequestFlags flags)
|
||||
{
|
||||
LuringState *s = opaque;
|
||||
luring_process_completions_and_submit(s);
|
||||
}
|
||||
|
||||
static void qemu_luring_completion_cb(void *opaque)
|
||||
{
|
||||
LuringState *s = opaque;
|
||||
luring_process_completions_and_submit(s);
|
||||
}
|
||||
|
||||
static bool qemu_luring_poll_cb(void *opaque)
|
||||
{
|
||||
LuringState *s = opaque;
|
||||
|
||||
return io_uring_cq_ready(&s->ring);
|
||||
}
|
||||
|
||||
static void qemu_luring_poll_ready(void *opaque)
|
||||
{
|
||||
LuringState *s = opaque;
|
||||
|
||||
luring_process_completions_and_submit(s);
|
||||
}
|
||||
|
||||
static void ioq_init(LuringQueue *io_q)
|
||||
{
|
||||
QSIMPLEQ_INIT(&io_q->submit_queue);
|
||||
io_q->in_queue = 0;
|
||||
io_q->in_flight = 0;
|
||||
io_q->blocked = false;
|
||||
}
|
||||
|
||||
static void luring_deferred_fn(void *opaque)
|
||||
{
|
||||
LuringState *s = opaque;
|
||||
trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue,
|
||||
s->io_q.in_flight);
|
||||
if (!s->io_q.blocked && s->io_q.in_queue > 0) {
|
||||
ioq_submit(s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* luring_do_submit:
|
||||
* @fd: file descriptor for I/O
|
||||
* @luringcb: AIO control block
|
||||
* @s: AIO state
|
||||
* @offset: offset for request
|
||||
* @type: type of request
|
||||
*
|
||||
* Fetches sqes from ring, adds to pending queue and preps them
|
||||
*
|
||||
*/
|
||||
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
|
||||
uint64_t offset, int type, BdrvRequestFlags flags)
|
||||
{
|
||||
int ret;
|
||||
struct io_uring_sqe *sqes = &luringcb->sqeq;
|
||||
|
||||
switch (type) {
|
||||
case QEMU_AIO_WRITE:
|
||||
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
||||
{
|
||||
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||
io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
|
||||
luringcb->qiov->niov, offset, luring_flags);
|
||||
}
|
||||
#else
|
||||
assert(flags == 0);
|
||||
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
||||
luringcb->qiov->niov, offset);
|
||||
#endif
|
||||
break;
|
||||
case QEMU_AIO_ZONE_APPEND:
|
||||
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
||||
luringcb->qiov->niov, offset);
|
||||
break;
|
||||
case QEMU_AIO_READ:
|
||||
io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
|
||||
luringcb->qiov->niov, offset);
|
||||
break;
|
||||
case QEMU_AIO_FLUSH:
|
||||
io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n",
|
||||
__func__, type);
|
||||
abort();
|
||||
}
|
||||
io_uring_sqe_set_data(sqes, luringcb);
|
||||
|
||||
QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
|
||||
s->io_q.in_queue++;
|
||||
trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue,
|
||||
s->io_q.in_flight);
|
||||
if (!s->io_q.blocked) {
|
||||
if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) {
|
||||
ret = ioq_submit(s);
|
||||
trace_luring_do_submit_done(s, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
defer_call(luring_deferred_fn, s);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
||||
QEMUIOVector *qiov, int type,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
int ret;
|
||||
AioContext *ctx = qemu_get_current_aio_context();
|
||||
LuringState *s = aio_get_linux_io_uring(ctx);
|
||||
LuringAIOCB luringcb = {
|
||||
LuringRequest req = {
|
||||
.co = qemu_coroutine_self(),
|
||||
.ret = -EINPROGRESS,
|
||||
.qiov = qiov,
|
||||
.is_read = (type == QEMU_AIO_READ),
|
||||
.ret = -EINPROGRESS,
|
||||
.type = type,
|
||||
.fd = fd,
|
||||
.offset = offset,
|
||||
.flags = flags,
|
||||
};
|
||||
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
|
||||
type);
|
||||
ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
|
||||
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
req.cqe_handler.cb = luring_cqe_handler;
|
||||
|
||||
if (luringcb.ret == -EINPROGRESS) {
|
||||
trace_luring_co_submit(bs, &req, fd, offset, qiov ? qiov->size : 0, type);
|
||||
aio_add_sqe(luring_prep_sqe, &req, &req.cqe_handler);
|
||||
|
||||
if (req.ret == -EINPROGRESS) {
|
||||
qemu_coroutine_yield();
|
||||
}
|
||||
return luringcb.ret;
|
||||
}
|
||||
|
||||
void luring_detach_aio_context(LuringState *s, AioContext *old_context)
|
||||
{
|
||||
aio_set_fd_handler(old_context, s->ring.ring_fd,
|
||||
NULL, NULL, NULL, NULL, s);
|
||||
qemu_bh_delete(s->completion_bh);
|
||||
s->aio_context = NULL;
|
||||
}
|
||||
|
||||
void luring_attach_aio_context(LuringState *s, AioContext *new_context)
|
||||
{
|
||||
s->aio_context = new_context;
|
||||
s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
|
||||
aio_set_fd_handler(s->aio_context, s->ring.ring_fd,
|
||||
qemu_luring_completion_cb, NULL,
|
||||
qemu_luring_poll_cb, qemu_luring_poll_ready, s);
|
||||
}
|
||||
|
||||
LuringState *luring_init(Error **errp)
|
||||
{
|
||||
int rc;
|
||||
LuringState *s = g_new0(LuringState, 1);
|
||||
struct io_uring *ring = &s->ring;
|
||||
|
||||
trace_luring_init_state(s, sizeof(*s));
|
||||
|
||||
rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
|
||||
if (rc < 0) {
|
||||
error_setg_errno(errp, -rc, "failed to init linux io_uring ring");
|
||||
g_free(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ioq_init(&s->io_q);
|
||||
return s;
|
||||
|
||||
}
|
||||
|
||||
void luring_cleanup(LuringState *s)
|
||||
{
|
||||
io_uring_queue_exit(&s->ring);
|
||||
trace_luring_cleanup_state(s);
|
||||
g_free(s);
|
||||
return req.ret;
|
||||
}
|
||||
|
||||
bool luring_has_fua(void)
|
||||
|
|
|
|||
|
|
@ -1117,7 +1117,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto done;
|
||||
}
|
||||
|
|
|
|||
12
block/qcow.c
12
block/qcow.c
|
|
@ -978,7 +978,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
|
@ -1184,11 +1184,11 @@ static const char *const qcow_strong_runtime_opts[] = {
|
|||
};
|
||||
|
||||
static BlockDriver bdrv_qcow = {
|
||||
.format_name = "qcow",
|
||||
.instance_size = sizeof(BDRVQcowState),
|
||||
.bdrv_probe = qcow_probe,
|
||||
.bdrv_open = qcow_open,
|
||||
.bdrv_close = qcow_close,
|
||||
.format_name = "qcow",
|
||||
.instance_size = sizeof(BDRVQcowState),
|
||||
.bdrv_probe = qcow_probe,
|
||||
.bdrv_open = qcow_open,
|
||||
.bdrv_close = qcow_close,
|
||||
.bdrv_child_perm = bdrv_default_perms,
|
||||
.bdrv_reopen_prepare = qcow_reopen_prepare,
|
||||
.bdrv_co_create = qcow_co_create,
|
||||
|
|
|
|||
|
|
@ -1978,12 +1978,10 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
|
|||
if (!keep_reference) {
|
||||
/* Then decrease the refcount */
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, type);
|
||||
} else if (s->discard_passthrough[type] &&
|
||||
(cluster_type == QCOW2_CLUSTER_NORMAL ||
|
||||
cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||
} else {
|
||||
/* If we keep the reference, pass on the discard still */
|
||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size);
|
||||
qcow2_discard_cluster(bs, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size, cluster_type, type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2092,12 +2090,10 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
|||
if (!keep_reference) {
|
||||
/* Then decrease the refcount */
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
|
||||
} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
|
||||
(type == QCOW2_CLUSTER_NORMAL ||
|
||||
type == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||
} else {
|
||||
/* If we keep the reference, pass on the discard still */
|
||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size);
|
||||
qcow2_discard_cluster(bs, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size, type, QCOW2_DISCARD_REQUEST);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -754,8 +754,8 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
|
|||
}
|
||||
}
|
||||
|
||||
static void update_refcount_discard(BlockDriverState *bs,
|
||||
uint64_t offset, uint64_t length)
|
||||
static void queue_discard(BlockDriverState *bs,
|
||||
uint64_t offset, uint64_t length)
|
||||
{
|
||||
BDRVQcow2State *s = bs->opaque;
|
||||
Qcow2DiscardRegion *d, *p, *next;
|
||||
|
|
@ -902,7 +902,7 @@ update_refcount(BlockDriverState *bs, int64_t offset, int64_t length,
|
|||
}
|
||||
|
||||
if (s->discard_passthrough[type]) {
|
||||
update_refcount_discard(bs, cluster_offset, s->cluster_size);
|
||||
queue_discard(bs, cluster_offset, s->cluster_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1205,6 +1205,23 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
|
|||
}
|
||||
}
|
||||
|
||||
void qcow2_discard_cluster(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t length, QCow2ClusterType ctype,
|
||||
enum qcow2_discard_type dtype)
|
||||
{
|
||||
BDRVQcow2State *s = bs->opaque;
|
||||
|
||||
if (s->discard_passthrough[dtype] &&
|
||||
(ctype == QCOW2_CLUSTER_NORMAL ||
|
||||
ctype == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||
if (has_data_file(bs)) {
|
||||
bdrv_pdiscard(s->data_file, offset, length);
|
||||
} else {
|
||||
queue_discard(bs, offset, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int qcow2_write_caches(BlockDriverState *bs)
|
||||
{
|
||||
BDRVQcow2State *s = bs->opaque;
|
||||
|
|
@ -3619,7 +3636,7 @@ qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
|
|||
/* discard refblock from the cache if refblock is cached */
|
||||
qcow2_cache_discard(s->refcount_block_cache, refblock);
|
||||
}
|
||||
update_refcount_discard(bs, discard_block_offs, s->cluster_size);
|
||||
queue_discard(bs, discard_block_offs, s->cluster_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3956,7 +3956,7 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto finish;
|
||||
}
|
||||
|
|
@ -3971,7 +3971,7 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
|
|||
/* Create and open an external data file (protocol layer) */
|
||||
val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
|
||||
if (val) {
|
||||
ret = bdrv_co_create_file(val, opts, errp);
|
||||
ret = bdrv_co_create_file(val, opts, false, errp);
|
||||
if (ret < 0) {
|
||||
goto finish;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -880,6 +880,10 @@ void GRAPH_RDLOCK qcow2_free_clusters(BlockDriverState *bs,
|
|||
void GRAPH_RDLOCK
|
||||
qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
|
||||
enum qcow2_discard_type type);
|
||||
void GRAPH_RDLOCK
|
||||
qcow2_discard_cluster(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t length, QCow2ClusterType ctype,
|
||||
enum qcow2_discard_type dtype);
|
||||
|
||||
int GRAPH_RDLOCK
|
||||
qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset,
|
||||
|
|
|
|||
|
|
@ -788,7 +788,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -463,7 +463,7 @@ static int coroutine_fn GRAPH_UNLOCKED
|
|||
raw_co_create_opts(BlockDriver *drv, const char *filename,
|
||||
QemuOpts *opts, Error **errp)
|
||||
{
|
||||
return bdrv_co_create_file(filename, opts, errp);
|
||||
return bdrv_co_create_file(filename, opts, true, errp);
|
||||
}
|
||||
|
||||
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
|
|
|
|||
|
|
@ -62,15 +62,9 @@ qmp_block_stream(void *bs) "bs %p"
|
|||
file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
|
||||
|
||||
# io_uring.c
|
||||
luring_init_state(void *s, size_t size) "s %p size %zu"
|
||||
luring_cleanup_state(void *s) "%p freed"
|
||||
luring_unplug_fn(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
|
||||
luring_do_submit(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
|
||||
luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d"
|
||||
luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d"
|
||||
luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d"
|
||||
luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d"
|
||||
luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d"
|
||||
luring_cqe_handler(void *req, int ret) "req %p ret %d"
|
||||
luring_co_submit(void *bs, void *req, int fd, uint64_t offset, size_t nbytes, int type) "bs %p req %p fd %d offset %" PRId64 " nbytes %zd type %d"
|
||||
luring_resubmit_short_read(void *req, int nread) "req %p nread %d"
|
||||
|
||||
# qcow2.c
|
||||
qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
|
||||
|
|
|
|||
|
|
@ -938,7 +938,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true);
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto done;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2096,7 +2096,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2334,7 +2334,7 @@ vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
|
|||
int ret;
|
||||
BlockBackend *blk = NULL;
|
||||
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, false, errp);
|
||||
if (ret < 0) {
|
||||
goto exit;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1118,7 +1118,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,
|
|||
}
|
||||
|
||||
/* Create and open the file (protocol layer) */
|
||||
ret = bdrv_co_create_file(filename, opts, errp);
|
||||
ret = bdrv_co_create_file(filename, opts, true, errp);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,27 @@ typedef struct LuringState LuringState;
|
|||
/* Is polling disabled? */
|
||||
bool aio_poll_disabled(AioContext *ctx);
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
/*
|
||||
* Each io_uring request must have a unique CqeHandler that processes the cqe.
|
||||
* The lifetime of a CqeHandler must be at least from aio_add_sqe() until
|
||||
* ->cb() invocation.
|
||||
*/
|
||||
typedef struct CqeHandler CqeHandler;
|
||||
struct CqeHandler {
|
||||
/* Called by the AioContext when the request has completed */
|
||||
void (*cb)(CqeHandler *handler);
|
||||
|
||||
/* Used internally, do not access this */
|
||||
QSIMPLEQ_ENTRY(CqeHandler) next;
|
||||
|
||||
/* This field is filled in before ->cb() is called */
|
||||
struct io_uring_cqe cqe;
|
||||
};
|
||||
|
||||
typedef QSIMPLEQ_HEAD(, CqeHandler) CqeHandlerSimpleQ;
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
|
||||
/* Callbacks for file descriptor monitoring implementations */
|
||||
typedef struct {
|
||||
/*
|
||||
|
|
@ -106,6 +127,78 @@ typedef struct {
|
|||
* Returns: true if ->wait() should be called, false otherwise.
|
||||
*/
|
||||
bool (*need_wait)(AioContext *ctx);
|
||||
|
||||
/*
|
||||
* dispatch:
|
||||
* @ctx: the AioContext
|
||||
*
|
||||
* Dispatch any work that is specific to this file descriptor monitoring
|
||||
* implementation. Usually the event loop's generic file descriptor
|
||||
* monitoring, BH, and timer dispatching code is sufficient, but file
|
||||
* descriptor monitoring implementations offering additional functionality
|
||||
* may need to implement this function for custom behavior. Called at a
|
||||
* point in the event loop when it is safe to invoke user-defined
|
||||
* callbacks.
|
||||
*
|
||||
* This function is optional and may be NULL.
|
||||
*
|
||||
* Returns: true if progress was made (see aio_poll()'s return value),
|
||||
* false otherwise.
|
||||
*/
|
||||
bool (*dispatch)(AioContext *ctx);
|
||||
|
||||
/*
|
||||
* gsource_prepare:
|
||||
* @ctx: the AioContext
|
||||
*
|
||||
* Prepare for the glib event loop to wait for events instead of the usual
|
||||
* ->wait() call. See glib's GSourceFuncs->prepare().
|
||||
*/
|
||||
void (*gsource_prepare)(AioContext *ctx);
|
||||
|
||||
/*
|
||||
* gsource_check:
|
||||
* @ctx: the AioContext
|
||||
*
|
||||
* Called by the glib event loop from glib's GSourceFuncs->check() after
|
||||
* waiting for events.
|
||||
*
|
||||
* Returns: true when ready to be dispatched.
|
||||
*/
|
||||
bool (*gsource_check)(AioContext *ctx);
|
||||
|
||||
/*
|
||||
* gsource_dispatch:
|
||||
* @ctx: the AioContext
|
||||
* @ready_list: list for handlers that become ready
|
||||
*
|
||||
* Place ready AioHandlers on ready_list. Called as part of the glib event
|
||||
* loop from glib's GSourceFuncs->dispatch().
|
||||
*
|
||||
* Called with list_lock incremented.
|
||||
*/
|
||||
void (*gsource_dispatch)(AioContext *ctx, AioHandlerList *ready_list);
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
/**
|
||||
* add_sqe: Add an io_uring sqe for submission.
|
||||
* @prep_sqe: invoked with an sqe that should be prepared for submission
|
||||
* @opaque: user-defined argument to @prep_sqe()
|
||||
* @cqe_handler: the unique cqe handler associated with this request
|
||||
*
|
||||
* The caller's @prep_sqe() function is invoked to fill in the details of
|
||||
* the sqe. Do not call io_uring_sqe_set_data() on this sqe.
|
||||
*
|
||||
* The kernel may see the sqe as soon as @prep_sqe() returns or it may take
|
||||
* until the next event loop iteration.
|
||||
*
|
||||
* This function is called from the current AioContext and is not
|
||||
* thread-safe.
|
||||
*/
|
||||
void (*add_sqe)(AioContext *ctx,
|
||||
void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque),
|
||||
void *opaque, CqeHandler *cqe_handler);
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
} FDMonOps;
|
||||
|
||||
/*
|
||||
|
|
@ -217,12 +310,14 @@ struct AioContext {
|
|||
struct LinuxAioState *linux_aio;
|
||||
#endif
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
LuringState *linux_io_uring;
|
||||
|
||||
/* State for file descriptor monitoring using Linux io_uring */
|
||||
struct io_uring fdmon_io_uring;
|
||||
AioHandlerSList submit_list;
|
||||
#endif
|
||||
void *io_uring_fd_tag;
|
||||
|
||||
/* Pending callback state for cqe handlers */
|
||||
CqeHandlerSimpleQ cqe_handler_ready_list;
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
|
||||
/* TimerLists for calling timers - one per clock type. Has its own
|
||||
* locking.
|
||||
|
|
@ -254,7 +349,13 @@ struct AioContext {
|
|||
/* epoll(7) state used when built with CONFIG_EPOLL */
|
||||
int epollfd;
|
||||
|
||||
/* The GSource unix fd tag for epollfd */
|
||||
void *epollfd_tag;
|
||||
|
||||
const FDMonOps *fdmon_ops;
|
||||
|
||||
/* Was aio_context_new() successful? */
|
||||
bool initialized;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -512,11 +613,6 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
|
|||
/* Return the LinuxAioState bound to this AioContext */
|
||||
struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
|
||||
|
||||
/* Setup the LuringState bound to this AioContext */
|
||||
LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
|
||||
|
||||
/* Return the LuringState bound to this AioContext */
|
||||
LuringState *aio_get_linux_io_uring(AioContext *ctx);
|
||||
/**
|
||||
* aio_timer_new_with_attrs:
|
||||
* @ctx: the aio context
|
||||
|
|
@ -679,10 +775,13 @@ void qemu_set_current_aio_context(AioContext *ctx);
|
|||
/**
|
||||
* aio_context_setup:
|
||||
* @ctx: the aio context
|
||||
* @errp: error pointer
|
||||
*
|
||||
* Initialize the aio context.
|
||||
*
|
||||
* Returns: true on success, false otherwise
|
||||
*/
|
||||
void aio_context_setup(AioContext *ctx);
|
||||
bool aio_context_setup(AioContext *ctx, Error **errp);
|
||||
|
||||
/**
|
||||
* aio_context_destroy:
|
||||
|
|
@ -692,9 +791,6 @@ void aio_context_setup(AioContext *ctx);
|
|||
*/
|
||||
void aio_context_destroy(AioContext *ctx);
|
||||
|
||||
/* Used internally, do not call outside AioContext code */
|
||||
void aio_context_use_g_source(AioContext *ctx);
|
||||
|
||||
/**
|
||||
* aio_context_set_poll_params:
|
||||
* @ctx: the aio context
|
||||
|
|
@ -724,4 +820,40 @@ void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch);
|
|||
*/
|
||||
void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
|
||||
int64_t max, Error **errp);
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
/**
|
||||
* aio_has_io_uring: Return whether io_uring is available.
|
||||
*
|
||||
* io_uring is either available in all AioContexts or in none, so this only
|
||||
* needs to be called once from within any thread's AioContext.
|
||||
*/
|
||||
static inline bool aio_has_io_uring(void)
|
||||
{
|
||||
AioContext *ctx = qemu_get_current_aio_context();
|
||||
return ctx->fdmon_ops->add_sqe;
|
||||
}
|
||||
|
||||
/**
|
||||
* aio_add_sqe: Add an io_uring sqe for submission.
|
||||
* @prep_sqe: invoked with an sqe that should be prepared for submission
|
||||
* @opaque: user-defined argument to @prep_sqe()
|
||||
* @cqe_handler: the unique cqe handler associated with this request
|
||||
*
|
||||
* The caller's @prep_sqe() function is invoked to fill in the details of the
|
||||
* sqe. Do not call io_uring_sqe_set_data() on this sqe.
|
||||
*
|
||||
* The sqe is submitted by the current AioContext. The kernel may see the sqe
|
||||
* as soon as @prep_sqe() returns or it may take until the next event loop
|
||||
* iteration.
|
||||
*
|
||||
* When the AioContext is destroyed, pending sqes are ignored and their
|
||||
* CqeHandlers are not invoked.
|
||||
*
|
||||
* This function must be called only when aio_has_io_uring() returns true.
|
||||
*/
|
||||
void aio_add_sqe(void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque),
|
||||
void *opaque, CqeHandler *cqe_handler);
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
|
|||
QemuOpts *opts, Error **errp);
|
||||
|
||||
int coroutine_fn GRAPH_UNLOCKED
|
||||
bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
|
||||
bdrv_co_create_file(const char *filename, QemuOpts *opts,
|
||||
bool allow_protocol_prefix, Error **errp);
|
||||
|
||||
BlockDriverState *bdrv_new(void);
|
||||
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ enum {
|
|||
NBD_CMD_BLOCK_STATUS = 7,
|
||||
};
|
||||
|
||||
#define NBD_DEFAULT_PORT 10809
|
||||
#define NBD_DEFAULT_PORT 10809
|
||||
|
||||
/* Maximum size of a single READ/WRITE data buffer */
|
||||
#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
|
||||
|
|
|
|||
|
|
@ -74,15 +74,10 @@ static inline bool laio_has_fua(void)
|
|||
#endif
|
||||
/* io_uring.c - Linux io_uring implementation */
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
LuringState *luring_init(Error **errp);
|
||||
void luring_cleanup(LuringState *s);
|
||||
|
||||
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
|
||||
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
||||
QEMUIOVector *qiov, int type,
|
||||
BdrvRequestFlags flags);
|
||||
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
|
||||
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
|
||||
bool luring_has_fua(void);
|
||||
#else
|
||||
static inline bool luring_has_fua(void)
|
||||
|
|
|
|||
|
|
@ -2745,6 +2745,8 @@ endif
|
|||
if linux_io_uring.found()
|
||||
config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
|
||||
cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
|
||||
config_host_data.set('HAVE_IO_URING_CQ_HAS_OVERFLOW',
|
||||
cc.has_header_symbol('liburing.h', 'io_uring_cq_has_overflow'))
|
||||
endif
|
||||
config_host_data.set('HAVE_TCP_KEEPCNT',
|
||||
cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT') or
|
||||
|
|
|
|||
|
|
@ -4081,7 +4081,7 @@ static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
|
|||
n += offset - QEMU_ALIGN_DOWN(offset, write_align);
|
||||
offset = QEMU_ALIGN_DOWN(offset, write_align);
|
||||
n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
|
||||
n = MIN(n, size - offset);
|
||||
n = MIN(n, MIN(size - offset, IO_BUF_SIZE));
|
||||
assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
|
||||
n_alloc == n);
|
||||
|
||||
|
|
@ -4597,9 +4597,9 @@ static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
|
|||
amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
|
||||
opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
|
||||
if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
|
||||
qemu_opts_del(opts);
|
||||
/* Try to parse options using the create options */
|
||||
amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
|
||||
qemu_opts_del(opts);
|
||||
opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
|
||||
if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
|
||||
error_append_hint(&err,
|
||||
|
|
|
|||
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Linux io_uring support.
|
||||
*
|
||||
* Copyright (C) 2009 IBM, Corp.
|
||||
* Copyright (C) 2009 Red Hat, Inc.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
#include "qemu/osdep.h"
|
||||
#include "block/aio.h"
|
||||
#include "block/raw-aio.h"
|
||||
|
||||
void luring_detach_aio_context(LuringState *s, AioContext *old_context)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
void luring_attach_aio_context(LuringState *s, AioContext *new_context)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
LuringState *luring_init(Error **errp)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
void luring_cleanup(LuringState *s)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
|
@ -32,9 +32,6 @@ if have_block or have_ga
|
|||
stub_ss.add(files('cpus-virtual-clock.c'))
|
||||
stub_ss.add(files('icount.c'))
|
||||
stub_ss.add(files('graph-lock.c'))
|
||||
if linux_io_uring.found()
|
||||
stub_ss.add(files('io_uring.c'))
|
||||
endif
|
||||
if libaio.found()
|
||||
stub_ss.add(files('linux-aio.c'))
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -315,6 +315,52 @@ echo
|
|||
|
||||
$QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
|
||||
|
||||
# Check that the region to copy to the overlay during a rebase
|
||||
# operation does not exceed the I/O buffer size.
|
||||
#
|
||||
# backing_new <-- backing_old <-- overlay
|
||||
#
|
||||
# Backing (new): -- -- -- -- <-- Empty image, size 4MB
|
||||
# Backing (old):|--|ff|ff|--| <-- 4 clusters, 1MB each
|
||||
# Overlay: |-- --|-- --| <-- 2 clusters, 2MB each
|
||||
#
|
||||
# The data at [1MB, 3MB) must be copied from the old backing image to
|
||||
# the overlay. However the rebase code will extend that region to the
|
||||
# overlay's (sub)cluster boundaries to avoid CoW (see commit 12df580b).
|
||||
# This test checks that IO_BUF_SIZE (2 MB) is taken into account.
|
||||
|
||||
echo
|
||||
echo "=== Test that the region to copy does not exceed 2MB (IO_BUF_SIZE) ==="
|
||||
echo
|
||||
|
||||
echo "Creating backing chain"
|
||||
echo
|
||||
|
||||
TEST_IMG=$BASE_NEW _make_test_img 4M
|
||||
TEST_IMG=$BASE_OLD CLUSTER_SIZE=1M _make_test_img -b "$BASE_NEW" -F $IMGFMT
|
||||
TEST_IMG=$OVERLAY CLUSTER_SIZE=2M _make_test_img -b "$BASE_OLD" -F $IMGFMT
|
||||
|
||||
echo
|
||||
echo "Writing data to region [1MB, 3MB)"
|
||||
echo
|
||||
|
||||
$QEMU_IO "$BASE_OLD" -c "write -P 0xff 1M 2M" | _filter_qemu_io
|
||||
|
||||
echo
|
||||
echo "Rebasing"
|
||||
echo
|
||||
|
||||
$QEMU_IMG rebase -b "$BASE_NEW" -F $IMGFMT "$OVERLAY"
|
||||
|
||||
echo "Verifying the data"
|
||||
echo
|
||||
|
||||
$QEMU_IO "$OVERLAY" -c "read -P 0x00 0 1M" | _filter_qemu_io
|
||||
$QEMU_IO "$OVERLAY" -c "read -P 0xff 1M 2M" | _filter_qemu_io
|
||||
$QEMU_IO "$OVERLAY" -c "read -P 0x00 3M 1M" | _filter_qemu_io
|
||||
|
||||
$QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
|
||||
|
||||
echo
|
||||
|
||||
# success, all done
|
||||
|
|
|
|||
|
|
@ -243,4 +243,30 @@ Offset Length File
|
|||
0 0x20000 TEST_DIR/subdir/t.IMGFMT
|
||||
0x40000 0x20000 TEST_DIR/subdir/t.IMGFMT
|
||||
|
||||
=== Test that the region to copy does not exceed 2MB (IO_BUF_SIZE) ===
|
||||
|
||||
Creating backing chain
|
||||
|
||||
Formatting 'TEST_DIR/subdir/t.IMGFMT.base_new', fmt=IMGFMT size=4194304
|
||||
Formatting 'TEST_DIR/subdir/t.IMGFMT.base_old', fmt=IMGFMT size=4194304 backing_file=TEST_DIR/subdir/t.IMGFMT.base_new backing_fmt=IMGFMT
|
||||
Formatting 'TEST_DIR/subdir/t.IMGFMT', fmt=IMGFMT size=4194304 backing_file=TEST_DIR/subdir/t.IMGFMT.base_old backing_fmt=IMGFMT
|
||||
|
||||
Writing data to region [1MB, 3MB)
|
||||
|
||||
wrote 2097152/2097152 bytes at offset 1048576
|
||||
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
|
||||
Rebasing
|
||||
|
||||
Verifying the data
|
||||
|
||||
read 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 2097152/2097152 bytes at offset 1048576
|
||||
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 1048576/1048576 bytes at offset 3145728
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
Offset Length File
|
||||
0 0x400000 TEST_DIR/subdir/t.IMGFMT
|
||||
|
||||
*** done
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ run_qemu()
|
|||
}
|
||||
|
||||
test_throttle=$($QEMU_IMG --help|grep throttle)
|
||||
[ "$test_throttle" = "" ] && _supported_fmt throttle
|
||||
[ "$test_throttle" = "" ] && _notrun "qemu-img does not support throttle"
|
||||
|
||||
echo
|
||||
echo "== checking interface =="
|
||||
|
|
|
|||
|
|
@ -310,14 +310,18 @@ def test_bitmap_sync(bsync_mode, msync_mode='bitmap', failure=None):
|
|||
'state': 1,
|
||||
'new_state': 2
|
||||
}, {
|
||||
'event': 'read_aio',
|
||||
'event': 'flush_to_disk',
|
||||
'state': 2,
|
||||
'new_state': 3
|
||||
}, {
|
||||
'event': "read_aio",
|
||||
'state': 3,
|
||||
'new_state': 4
|
||||
}],
|
||||
'inject-error': [{
|
||||
'event': 'read_aio',
|
||||
'errno': 5,
|
||||
'state': 3,
|
||||
'state': 4,
|
||||
'immediately': False,
|
||||
'once': True
|
||||
}]
|
||||
|
|
|
|||
|
|
@ -272,7 +272,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -1017,7 +1017,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -1762,7 +1762,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -2507,7 +2507,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -3252,7 +3252,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -3997,7 +3997,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
@ -4742,7 +4742,7 @@ qemu_img compare "TEST_DIR/PID-img" "TEST_DIR/PID-fbackup2" ==> Identical, OK!
|
|||
|
||||
--- Preparing image & VM ---
|
||||
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 3}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "read_aio", "new-state": 3, "state": 2}]}, "node-name": "drive0"}}
|
||||
{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "blkdebug", "image": {"driver": "file", "filename": "TEST_DIR/PID-img"}, "inject-error": [{"errno": 5, "event": "read_aio", "immediately": false, "once": true, "state": 4}], "set-state": [{"event": "flush_to_disk", "new-state": 2, "state": 1}, {"event": "flush_to_disk", "new-state": 3, "state": 2}, {"event": "read_aio", "new-state": 4, "state": 3}]}, "node-name": "drive0"}}
|
||||
{"return": {}}
|
||||
|
||||
--- Write #0 ---
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import argparse
|
||||
import shutil
|
||||
|
|
@ -82,7 +83,7 @@ def make_argparser() -> argparse.ArgumentParser:
|
|||
g_env.add_argument('-i', dest='aiomode', default='threads',
|
||||
help='sets AIOMODE environment variable')
|
||||
|
||||
p.set_defaults(imgfmt='raw', imgproto='file')
|
||||
p.set_defaults(imgproto='file')
|
||||
|
||||
format_list = ['raw', 'bochs', 'cloop', 'parallels', 'qcow', 'qcow2',
|
||||
'qed', 'vdi', 'vpc', 'vhdx', 'vmdk', 'luks', 'dmg', 'vvfat']
|
||||
|
|
@ -137,15 +138,50 @@ def make_argparser() -> argparse.ArgumentParser:
|
|||
return p
|
||||
|
||||
|
||||
def dry_run_list(test_dir, imgfmt, testlist):
|
||||
for t in testlist:
|
||||
if not imgfmt:
|
||||
print('\n'.join([os.path.basename(t)]))
|
||||
continue
|
||||
# If a format has been given, we look for the "supported_fmt"
|
||||
# and the "unsupported_fmt" lines in the test and try to find out
|
||||
# whether the format is supported or not. This is only heuristics
|
||||
# (it can e.g. fail if the "unsupported_fmts" and "supported_fmts"
|
||||
# statements are in the same line), but it should be good enough
|
||||
# to get a proper list for "make check-block"
|
||||
with open(os.path.join(test_dir, t), 'r', encoding='utf-8') as fh:
|
||||
supported = True
|
||||
check_next_line = False
|
||||
sd = "[ \t'\"]" # Start delimiter
|
||||
ed = "([ \t'\"]|$)" # End delimiter
|
||||
for line in fh:
|
||||
if 'unsupported_fmt' in line:
|
||||
if re.search(sd + imgfmt + ed, line):
|
||||
supported = False
|
||||
break
|
||||
elif 'supported_fmt' in line or check_next_line:
|
||||
if re.search(sd + 'generic' + ed, line):
|
||||
continue # Might be followed by "unsupported" line
|
||||
supported = re.search(sd + imgfmt + ed, line)
|
||||
check_next_line = not ']' in line and \
|
||||
('supported_fmts=[' in line or check_next_line)
|
||||
if supported or not check_next_line:
|
||||
break
|
||||
if supported:
|
||||
print('\n'.join([os.path.basename(t)]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
warnings.simplefilter("default")
|
||||
os.environ["PYTHONWARNINGS"] = "default"
|
||||
|
||||
args = make_argparser().parse_args()
|
||||
|
||||
image_format = args.imgfmt or 'raw'
|
||||
|
||||
env = TestEnv(source_dir=args.source_dir,
|
||||
build_dir=args.build_dir,
|
||||
imgfmt=args.imgfmt, imgproto=args.imgproto,
|
||||
imgfmt=image_format, imgproto=args.imgproto,
|
||||
aiomode=args.aiomode, cachemode=args.cachemode,
|
||||
imgopts=args.imgopts, misalign=args.misalign,
|
||||
debug=args.debug, valgrind=args.valgrind,
|
||||
|
|
@ -189,7 +225,7 @@ if __name__ == '__main__':
|
|||
|
||||
if args.dry_run:
|
||||
with env:
|
||||
print('\n'.join([os.path.basename(t) for t in tests]))
|
||||
dry_run_list(env.source_iotests, args.imgfmt, tests)
|
||||
else:
|
||||
with TestRunner(env, tap=args.tap,
|
||||
color=args.color) as tr:
|
||||
|
|
|
|||
|
|
@ -2,14 +2,6 @@ if not have_tools or host_os == 'windows'
|
|||
subdir_done()
|
||||
endif
|
||||
|
||||
foreach cflag: qemu_ldflags
|
||||
if cflag.startswith('-fsanitize') and \
|
||||
not cflag.contains('safe-stack') and not cflag.contains('cfi-icall')
|
||||
message('Sanitizers are enabled ==> Disabled the qemu-iotests.')
|
||||
subdir_done()
|
||||
endif
|
||||
endforeach
|
||||
|
||||
bash = find_program('bash', required: false, version: '>= 4.0')
|
||||
if not bash.found()
|
||||
message('bash >= v4.0 not available ==> Disabled the qemu-iotests.')
|
||||
|
|
@ -21,7 +13,10 @@ qemu_iotests_env = {'PYTHON': python.full_path()}
|
|||
qemu_iotests_formats = {
|
||||
'qcow2': 'quick',
|
||||
'raw': 'slow',
|
||||
'parallels': 'thorough',
|
||||
'qed': 'thorough',
|
||||
'vdi': 'thorough',
|
||||
'vhdx': 'thorough',
|
||||
'vmdk': 'thorough',
|
||||
'vpc': 'thorough'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,10 +263,21 @@ class TestRunner(contextlib.AbstractContextManager['TestRunner']):
|
|||
Path(env[d]).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
test_dir = env['TEST_DIR']
|
||||
f_asan = Path(test_dir, f_test.name + '.out.asan')
|
||||
f_bad = Path(test_dir, f_test.name + '.out.bad')
|
||||
f_notrun = Path(test_dir, f_test.name + '.notrun')
|
||||
f_casenotrun = Path(test_dir, f_test.name + '.casenotrun')
|
||||
|
||||
env['ASAN_OPTIONS'] = f'detect_leaks=0:log_path={f_asan}'
|
||||
|
||||
def unlink_asan():
|
||||
with os.scandir(test_dir) as it:
|
||||
for entry in it:
|
||||
if entry.name.startswith(f_asan.name):
|
||||
os.unlink(entry)
|
||||
|
||||
unlink_asan()
|
||||
|
||||
for p in (f_notrun, f_casenotrun):
|
||||
silent_unlink(p)
|
||||
|
||||
|
|
@ -312,6 +323,7 @@ class TestRunner(contextlib.AbstractContextManager['TestRunner']):
|
|||
description=f'output mismatch (see {f_bad})',
|
||||
diff=diff, casenotrun=casenotrun)
|
||||
else:
|
||||
unlink_asan()
|
||||
f_bad.unlink()
|
||||
return TestResult(status='pass', elapsed=elapsed,
|
||||
casenotrun=casenotrun)
|
||||
|
|
|
|||
|
|
@ -8,16 +8,27 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
import iotests
|
||||
from iotests import imgfmt, qemu_img_create, QMPTestCase
|
||||
|
||||
image_size = 1 * 1024 * 1024
|
||||
image = os.path.join(iotests.test_dir, 'test.img')
|
||||
|
||||
class TestResizeBelowRaw(QMPTestCase):
|
||||
class BaseResizeBelowRaw(QMPTestCase):
|
||||
raw_size: Optional[int] = None
|
||||
raw_offset: Optional[int] = None
|
||||
|
||||
def setUp(self) -> None:
|
||||
qemu_img_create('-f', imgfmt, image, str(image_size))
|
||||
|
||||
extra_options: Dict[str, str] = {}
|
||||
if self.raw_size is not None:
|
||||
extra_options['size'] = str(self.raw_size)
|
||||
if self.raw_offset is not None:
|
||||
extra_options['offset'] = str(self.raw_offset)
|
||||
|
||||
self.vm = iotests.VM()
|
||||
self.vm.add_blockdev(self.vm.qmp_to_opts({
|
||||
'driver': imgfmt,
|
||||
|
|
@ -26,7 +37,8 @@ class TestResizeBelowRaw(QMPTestCase):
|
|||
'driver': 'file',
|
||||
'filename': image,
|
||||
'node-name': 'file0',
|
||||
}
|
||||
},
|
||||
**extra_options
|
||||
}))
|
||||
self.vm.launch()
|
||||
|
||||
|
|
@ -34,14 +46,16 @@ class TestResizeBelowRaw(QMPTestCase):
|
|||
self.vm.shutdown()
|
||||
os.remove(image)
|
||||
|
||||
def assert_size(self, size: int) -> None:
|
||||
def assert_size(self, size: int, file_size: Optional[int] = None) -> None:
|
||||
nodes = self.vm.qmp('query-named-block-nodes', flat=True)['return']
|
||||
self.assertEqual(len(nodes), 2)
|
||||
for node in nodes:
|
||||
if node['drv'] == 'file':
|
||||
if node['drv'] == 'file' and file_size is not None:
|
||||
self.assertEqual(node['image']['virtual-size'], file_size)
|
||||
continue
|
||||
self.assertEqual(node['image']['virtual-size'], size)
|
||||
|
||||
class TestResizeBelowUnlimitedRaw(BaseResizeBelowRaw):
|
||||
def test_resize_below_raw(self) -> None:
|
||||
self.assert_size(image_size)
|
||||
self.vm.qmp('block_resize', node_name='file0', size=2*image_size)
|
||||
|
|
@ -49,5 +63,36 @@ class TestResizeBelowRaw(QMPTestCase):
|
|||
self.vm.qmp('block_resize', node_name='node0', size=3*image_size)
|
||||
self.assert_size(3*image_size)
|
||||
|
||||
# offset = 0 behaves the same as absent offset
|
||||
class TestResizeBelowRawWithZeroOffset(TestResizeBelowUnlimitedRaw):
|
||||
raw_offset = 0
|
||||
|
||||
class TestResizeBelowRawWithSize(BaseResizeBelowRaw):
|
||||
raw_size = image_size // 2
|
||||
|
||||
def test_resize_below_raw_with_size(self) -> None:
|
||||
self.assert_size(image_size // 2, image_size)
|
||||
|
||||
# This QMP command fails because node0 unshares RESIZE
|
||||
self.vm.qmp('block_resize', node_name='file0', size=2*image_size)
|
||||
self.assert_size(image_size // 2, image_size)
|
||||
|
||||
# This QMP command fails because node0 is a fixed-size disk
|
||||
self.vm.qmp('block_resize', node_name='node0', size=3*image_size)
|
||||
self.assert_size(image_size // 2, image_size)
|
||||
|
||||
class TestResizeBelowRawWithOffset(BaseResizeBelowRaw):
|
||||
raw_offset = image_size // 4
|
||||
|
||||
def test_resize_below_raw_with_offset(self) -> None:
|
||||
self.assert_size(image_size * 3 // 4, image_size)
|
||||
|
||||
# This QMP command fails because node0 unshares RESIZE
|
||||
self.vm.qmp('block_resize', node_name='file0', size=2*image_size)
|
||||
self.assert_size(image_size * 3 // 4, image_size)
|
||||
|
||||
self.vm.qmp('block_resize', node_name='node0', size=3*image_size)
|
||||
self.assert_size(3 * image_size, image_size * 13 // 4)
|
||||
|
||||
if __name__ == '__main__':
|
||||
iotests.main(supported_fmts=['raw'], supported_protocols=['file'])
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
.
|
||||
....
|
||||
----------------------------------------------------------------------
|
||||
Ran 1 tests
|
||||
Ran 4 tests
|
||||
|
||||
OK
|
||||
|
|
|
|||
|
|
@ -527,7 +527,12 @@ static void test_source_bh_delete_from_cb(void)
|
|||
g_assert_cmpint(data1.n, ==, data1.max);
|
||||
g_assert(data1.bh == NULL);
|
||||
|
||||
assert(g_main_context_iteration(NULL, false));
|
||||
/*
|
||||
* There may be up to one more iteration due to the aio_notify
|
||||
* EventNotifier.
|
||||
*/
|
||||
g_main_context_iteration(NULL, false);
|
||||
|
||||
assert(!g_main_context_iteration(NULL, false));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include "qemu/osdep.h"
|
||||
#include "block/aio.h"
|
||||
#include "qapi/error.h"
|
||||
#include "util/aio-posix.h"
|
||||
|
||||
typedef struct {
|
||||
AioContext *ctx;
|
||||
|
|
@ -71,17 +72,17 @@ static void test(void)
|
|||
.ctx = aio_context_new(&error_abort),
|
||||
};
|
||||
|
||||
if (td.ctx->fdmon_ops != &fdmon_poll_ops) {
|
||||
/* This test is tied to fdmon-poll.c */
|
||||
g_test_skip("fdmon_poll_ops not in use");
|
||||
return;
|
||||
}
|
||||
|
||||
qemu_set_current_aio_context(td.ctx);
|
||||
|
||||
/* Enable polling */
|
||||
aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort);
|
||||
|
||||
/*
|
||||
* The GSource is unused but this has the side-effect of changing the fdmon
|
||||
* that AioContext uses.
|
||||
*/
|
||||
aio_get_g_source(td.ctx);
|
||||
|
||||
/* Make the event notifier active (set) right away */
|
||||
event_notifier_init(&td.poll_notifier, 1);
|
||||
aio_set_event_notifier(td.ctx, &td.poll_notifier,
|
||||
|
|
|
|||
141
util/aio-posix.c
141
util/aio-posix.c
|
|
@ -16,6 +16,7 @@
|
|||
#include "qemu/osdep.h"
|
||||
#include "block/block.h"
|
||||
#include "block/thread-pool.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qemu/lockcnt.h"
|
||||
#include "qemu/rcu.h"
|
||||
|
|
@ -70,15 +71,6 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
|
|||
|
||||
static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
|
||||
{
|
||||
/* If the GSource is in the process of being destroyed then
|
||||
* g_source_remove_poll() causes an assertion failure. Skip
|
||||
* removal in that case, because glib cleans up its state during
|
||||
* destruction anyway.
|
||||
*/
|
||||
if (!g_source_is_destroyed(&ctx->source)) {
|
||||
g_source_remove_poll(&ctx->source, &node->pfd);
|
||||
}
|
||||
|
||||
node->pfd.revents = 0;
|
||||
node->poll_ready = false;
|
||||
|
||||
|
|
@ -153,7 +145,6 @@ void aio_set_fd_handler(AioContext *ctx,
|
|||
} else {
|
||||
new_node->pfd = node->pfd;
|
||||
}
|
||||
g_source_add_poll(&ctx->source, &new_node->pfd);
|
||||
|
||||
new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
|
||||
new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
|
||||
|
|
@ -267,37 +258,13 @@ bool aio_prepare(AioContext *ctx)
|
|||
poll_set_started(ctx, &ready_list, false);
|
||||
/* TODO what to do with this list? */
|
||||
|
||||
ctx->fdmon_ops->gsource_prepare(ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool aio_pending(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node;
|
||||
bool result = false;
|
||||
|
||||
/*
|
||||
* We have to walk very carefully in case aio_set_fd_handler is
|
||||
* called while we're walking.
|
||||
*/
|
||||
qemu_lockcnt_inc(&ctx->list_lock);
|
||||
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
int revents;
|
||||
|
||||
/* TODO should this check poll ready? */
|
||||
revents = node->pfd.revents & node->pfd.events;
|
||||
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
qemu_lockcnt_dec(&ctx->list_lock);
|
||||
|
||||
return result;
|
||||
return ctx->fdmon_ops->gsource_check(ctx);
|
||||
}
|
||||
|
||||
static void aio_free_deleted_handlers(AioContext *ctx)
|
||||
|
|
@ -390,10 +357,6 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
|
|||
return progress;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a list of ready handlers then this is more efficient than
|
||||
* scanning all handlers with aio_dispatch_handlers().
|
||||
*/
|
||||
static bool aio_dispatch_ready_handlers(AioContext *ctx,
|
||||
AioHandlerList *ready_list,
|
||||
int64_t block_ns)
|
||||
|
|
@ -417,24 +380,23 @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
|
|||
return progress;
|
||||
}
|
||||
|
||||
/* Slower than aio_dispatch_ready_handlers() but only used via glib */
|
||||
static bool aio_dispatch_handlers(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node, *tmp;
|
||||
bool progress = false;
|
||||
|
||||
QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
|
||||
progress = aio_dispatch_handler(ctx, node) || progress;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
void aio_dispatch(AioContext *ctx)
|
||||
{
|
||||
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
|
||||
|
||||
qemu_lockcnt_inc(&ctx->list_lock);
|
||||
|
||||
aio_bh_poll(ctx);
|
||||
aio_dispatch_handlers(ctx);
|
||||
|
||||
ctx->fdmon_ops->gsource_dispatch(ctx, &ready_list);
|
||||
|
||||
if (ctx->fdmon_ops->dispatch) {
|
||||
ctx->fdmon_ops->dispatch(ctx);
|
||||
}
|
||||
|
||||
/* block_ns is 0 because polling is disabled in the glib event loop */
|
||||
aio_dispatch_ready_handlers(ctx, &ready_list, 0);
|
||||
|
||||
aio_free_deleted_handlers(ctx);
|
||||
qemu_lockcnt_dec(&ctx->list_lock);
|
||||
|
||||
|
|
@ -559,7 +521,14 @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
|
|||
elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
|
||||
max_ns = qemu_soonest_timeout(*timeout, max_ns);
|
||||
assert(!(max_ns && progress));
|
||||
} while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
|
||||
|
||||
if (ctx->fdmon_ops->need_wait(ctx)) {
|
||||
if (fdmon_supports_polling(ctx)) {
|
||||
*timeout = 0; /* stay in polling mode */
|
||||
}
|
||||
break;
|
||||
}
|
||||
} while (elapsed_time < max_ns);
|
||||
|
||||
if (remove_idle_poll_handlers(ctx, ready_list,
|
||||
start_time + elapsed_time)) {
|
||||
|
|
@ -722,7 +691,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|||
* up IO threads when some work becomes pending. It is essential to
|
||||
* avoid hangs or unnecessary latency.
|
||||
*/
|
||||
if (poll_set_started(ctx, &ready_list, false)) {
|
||||
if (timeout && poll_set_started(ctx, &ready_list, false)) {
|
||||
timeout = 0;
|
||||
progress = true;
|
||||
}
|
||||
|
|
@ -743,6 +712,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|||
block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
|
||||
}
|
||||
|
||||
if (ctx->fdmon_ops->dispatch) {
|
||||
progress |= ctx->fdmon_ops->dispatch(ctx);
|
||||
}
|
||||
|
||||
progress |= aio_bh_poll(ctx);
|
||||
progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
|
||||
|
||||
|
|
@ -755,35 +728,50 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|||
return progress;
|
||||
}
|
||||
|
||||
void aio_context_setup(AioContext *ctx)
|
||||
bool aio_context_setup(AioContext *ctx, Error **errp)
|
||||
{
|
||||
ctx->fdmon_ops = &fdmon_poll_ops;
|
||||
ctx->epollfd = -1;
|
||||
ctx->epollfd_tag = NULL;
|
||||
|
||||
/* Use the fastest fd monitoring implementation if available */
|
||||
if (fdmon_io_uring_setup(ctx)) {
|
||||
return;
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
{
|
||||
static bool need_io_uring;
|
||||
Error *local_err = NULL; /* ERRP_GUARD() doesn't handle error_abort */
|
||||
|
||||
/* io_uring takes precedence because it provides aio_add_sqe() support */
|
||||
if (fdmon_io_uring_setup(ctx, &local_err)) {
|
||||
/*
|
||||
* If one AioContext gets io_uring, then all AioContexts need io_uring
|
||||
* so that aio_add_sqe() support is available across all threads.
|
||||
*/
|
||||
need_io_uring = true;
|
||||
return true;
|
||||
}
|
||||
if (need_io_uring) {
|
||||
error_propagate(errp, local_err);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Silently fall back on systems where io_uring is unavailable */
|
||||
error_free(local_err);
|
||||
}
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
|
||||
fdmon_epoll_setup(ctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
void aio_context_destroy(AioContext *ctx)
|
||||
{
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
fdmon_io_uring_destroy(ctx);
|
||||
fdmon_epoll_disable(ctx);
|
||||
aio_free_deleted_handlers(ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
qemu_lockcnt_lock(&ctx->list_lock);
|
||||
fdmon_epoll_disable(ctx);
|
||||
qemu_lockcnt_unlock(&ctx->list_lock);
|
||||
|
||||
void aio_context_use_g_source(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
* Disable io_uring when the glib main loop is used because it doesn't
|
||||
* support mixed glib/aio_poll() usage. It relies on aio_poll() being
|
||||
* called regularly so that changes to the monitored file descriptors are
|
||||
* submitted, otherwise a list of pending fd handlers builds up.
|
||||
*/
|
||||
fdmon_io_uring_destroy(ctx);
|
||||
aio_free_deleted_handlers(ctx);
|
||||
}
|
||||
|
||||
|
|
@ -818,3 +806,12 @@ void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch)
|
|||
|
||||
aio_notify(ctx);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
void aio_add_sqe(void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque),
|
||||
void *opaque, CqeHandler *cqe_handler)
|
||||
{
|
||||
AioContext *ctx = qemu_get_current_aio_context();
|
||||
ctx->fdmon_ops->add_sqe(ctx, prep_sqe, opaque, cqe_handler);
|
||||
}
|
||||
#endif /* CONFIG_LINUX_IO_URING */
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#define AIO_POSIX_H
|
||||
|
||||
#include "block/aio.h"
|
||||
#include "qapi/error.h"
|
||||
|
||||
struct AioHandler {
|
||||
GPollFD pfd;
|
||||
|
|
@ -35,6 +36,7 @@ struct AioHandler {
|
|||
#ifdef CONFIG_LINUX_IO_URING
|
||||
QSLIST_ENTRY(AioHandler) node_submitted;
|
||||
unsigned flags; /* see fdmon-io_uring.c */
|
||||
CqeHandler internal_cqe_handler; /* used for POLL_ADD/POLL_REMOVE */
|
||||
#endif
|
||||
int64_t poll_idle_timeout; /* when to stop userspace polling */
|
||||
bool poll_ready; /* has polling detected an event? */
|
||||
|
|
@ -47,9 +49,14 @@ void aio_add_ready_handler(AioHandlerList *ready_list, AioHandler *node,
|
|||
|
||||
extern const FDMonOps fdmon_poll_ops;
|
||||
|
||||
/* Switch back to poll(2). list_lock must be held. */
|
||||
void fdmon_poll_downgrade(AioContext *ctx);
|
||||
|
||||
#ifdef CONFIG_EPOLL_CREATE1
|
||||
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd);
|
||||
void fdmon_epoll_setup(AioContext *ctx);
|
||||
|
||||
/* list_lock must be held */
|
||||
void fdmon_epoll_disable(AioContext *ctx);
|
||||
#else
|
||||
static inline bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
|
||||
|
|
@ -67,17 +74,8 @@ static inline void fdmon_epoll_disable(AioContext *ctx)
|
|||
#endif /* !CONFIG_EPOLL_CREATE1 */
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
bool fdmon_io_uring_setup(AioContext *ctx);
|
||||
bool fdmon_io_uring_setup(AioContext *ctx, Error **errp);
|
||||
void fdmon_io_uring_destroy(AioContext *ctx);
|
||||
#else
|
||||
static inline bool fdmon_io_uring_setup(AioContext *ctx)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void fdmon_io_uring_destroy(AioContext *ctx)
|
||||
{
|
||||
}
|
||||
#endif /* !CONFIG_LINUX_IO_URING */
|
||||
|
||||
#endif /* AIO_POSIX_H */
|
||||
|
|
|
|||
|
|
@ -419,18 +419,15 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|||
return progress;
|
||||
}
|
||||
|
||||
void aio_context_setup(AioContext *ctx)
|
||||
bool aio_context_setup(AioContext *ctx, Error **errp)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void aio_context_destroy(AioContext *ctx)
|
||||
{
|
||||
}
|
||||
|
||||
void aio_context_use_g_source(AioContext *ctx)
|
||||
{
|
||||
}
|
||||
|
||||
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
|
||||
int64_t grow, int64_t shrink, Error **errp)
|
||||
{
|
||||
|
|
|
|||
74
util/async.c
74
util/async.c
|
|
@ -366,12 +366,16 @@ aio_ctx_dispatch(GSource *source,
|
|||
}
|
||||
|
||||
static void
|
||||
aio_ctx_finalize(GSource *source)
|
||||
aio_ctx_finalize(GSource *source)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
QEMUBH *bh;
|
||||
unsigned flags;
|
||||
|
||||
if (!ctx->initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
thread_pool_free_aio(ctx->thread_pool);
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
|
|
@ -382,14 +386,6 @@ aio_ctx_finalize(GSource *source)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
if (ctx->linux_io_uring) {
|
||||
luring_detach_aio_context(ctx->linux_io_uring, ctx);
|
||||
luring_cleanup(ctx->linux_io_uring);
|
||||
ctx->linux_io_uring = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
|
||||
qemu_bh_delete(ctx->co_schedule_bh);
|
||||
|
||||
|
|
@ -418,10 +414,11 @@ aio_ctx_finalize(GSource *source)
|
|||
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
|
||||
event_notifier_cleanup(&ctx->notifier);
|
||||
qemu_rec_mutex_destroy(&ctx->lock);
|
||||
qemu_lockcnt_destroy(&ctx->list_lock);
|
||||
timerlistgroup_deinit(&ctx->tlg);
|
||||
unregister_aiocontext(ctx);
|
||||
aio_context_destroy(ctx);
|
||||
/* aio_context_destroy() still needs the lock */
|
||||
qemu_lockcnt_destroy(&ctx->list_lock);
|
||||
}
|
||||
|
||||
static GSourceFuncs aio_source_funcs = {
|
||||
|
|
@ -433,7 +430,6 @@ static GSourceFuncs aio_source_funcs = {
|
|||
|
||||
GSource *aio_get_g_source(AioContext *ctx)
|
||||
{
|
||||
aio_context_use_g_source(ctx);
|
||||
g_source_ref(&ctx->source);
|
||||
return &ctx->source;
|
||||
}
|
||||
|
|
@ -465,29 +461,6 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
|
||||
{
|
||||
if (ctx->linux_io_uring) {
|
||||
return ctx->linux_io_uring;
|
||||
}
|
||||
|
||||
ctx->linux_io_uring = luring_init(errp);
|
||||
if (!ctx->linux_io_uring) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
luring_attach_aio_context(ctx->linux_io_uring, ctx);
|
||||
return ctx->linux_io_uring;
|
||||
}
|
||||
|
||||
LuringState *aio_get_linux_io_uring(AioContext *ctx)
|
||||
{
|
||||
assert(ctx->linux_io_uring);
|
||||
return ctx->linux_io_uring;
|
||||
}
|
||||
#endif
|
||||
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
|
|
@ -577,19 +550,42 @@ static void co_schedule_bh_cb(void *opaque)
|
|||
|
||||
AioContext *aio_context_new(Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
int ret;
|
||||
AioContext *ctx;
|
||||
|
||||
/*
|
||||
* ctx is freed by g_source_unref() (e.g. aio_context_unref()). ctx's
|
||||
* resources are freed as follows:
|
||||
*
|
||||
* 1. By aio_ctx_finalize() after aio_context_new() has returned and set
|
||||
* ->initialized = true.
|
||||
*
|
||||
* 2. By manual cleanup code in this function's error paths before goto
|
||||
* fail.
|
||||
*
|
||||
* Be careful to free resources in both cases!
|
||||
*/
|
||||
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
|
||||
QSLIST_INIT(&ctx->bh_list);
|
||||
QSIMPLEQ_INIT(&ctx->bh_slice_list);
|
||||
aio_context_setup(ctx);
|
||||
|
||||
ret = event_notifier_init(&ctx->notifier, false);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "Failed to initialize event notifier");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Resources cannot easily be freed manually after aio_context_setup(). If
|
||||
* you add any new resources to AioContext, it's probably best to acquire
|
||||
* them before aio_context_setup().
|
||||
*/
|
||||
if (!aio_context_setup(ctx, errp)) {
|
||||
event_notifier_cleanup(&ctx->notifier);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
g_source_set_can_recurse(&ctx->source, true);
|
||||
qemu_lockcnt_init(&ctx->list_lock);
|
||||
|
||||
|
|
@ -604,10 +600,6 @@ AioContext *aio_context_new(Error **errp)
|
|||
ctx->linux_aio = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
ctx->linux_io_uring = NULL;
|
||||
#endif
|
||||
|
||||
ctx->thread_pool = NULL;
|
||||
qemu_rec_mutex_init(&ctx->lock);
|
||||
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
|
||||
|
|
@ -623,9 +615,11 @@ AioContext *aio_context_new(Error **errp)
|
|||
|
||||
register_aiocontext(ctx);
|
||||
|
||||
ctx->initialized = true;
|
||||
|
||||
return ctx;
|
||||
fail:
|
||||
g_source_destroy(&ctx->source);
|
||||
g_source_unref(&ctx->source);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,12 @@ void fdmon_epoll_disable(AioContext *ctx)
|
|||
ctx->epollfd = -1;
|
||||
}
|
||||
|
||||
/* Switch back */
|
||||
ctx->fdmon_ops = &fdmon_poll_ops;
|
||||
if (ctx->epollfd_tag) {
|
||||
g_source_remove_unix_fd(&ctx->source, ctx->epollfd_tag);
|
||||
ctx->epollfd_tag = NULL;
|
||||
}
|
||||
|
||||
fdmon_poll_downgrade(ctx);
|
||||
}
|
||||
|
||||
static inline int epoll_events_from_pfd(int pfd_events)
|
||||
|
|
@ -93,10 +97,29 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void fdmon_epoll_gsource_prepare(AioContext *ctx)
|
||||
{
|
||||
/* Do nothing */
|
||||
}
|
||||
|
||||
static bool fdmon_epoll_gsource_check(AioContext *ctx)
|
||||
{
|
||||
return g_source_query_unix_fd(&ctx->source, ctx->epollfd_tag) & G_IO_IN;
|
||||
}
|
||||
|
||||
static void fdmon_epoll_gsource_dispatch(AioContext *ctx,
|
||||
AioHandlerList *ready_list)
|
||||
{
|
||||
fdmon_epoll_wait(ctx, ready_list, 0);
|
||||
}
|
||||
|
||||
static const FDMonOps fdmon_epoll_ops = {
|
||||
.update = fdmon_epoll_update,
|
||||
.wait = fdmon_epoll_wait,
|
||||
.need_wait = aio_poll_disabled,
|
||||
.gsource_prepare = fdmon_epoll_gsource_prepare,
|
||||
.gsource_check = fdmon_epoll_gsource_check,
|
||||
.gsource_dispatch = fdmon_epoll_gsource_dispatch,
|
||||
};
|
||||
|
||||
static bool fdmon_epoll_try_enable(AioContext *ctx)
|
||||
|
|
@ -118,6 +141,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
|
|||
}
|
||||
|
||||
ctx->fdmon_ops = &fdmon_epoll_ops;
|
||||
ctx->epollfd_tag = g_source_add_unix_fd(&ctx->source, ctx->epollfd,
|
||||
G_IO_IN);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -139,12 +164,11 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
|
|||
}
|
||||
|
||||
ok = fdmon_epoll_try_enable(ctx);
|
||||
|
||||
qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
|
||||
|
||||
if (!ok) {
|
||||
fdmon_epoll_disable(ctx);
|
||||
}
|
||||
|
||||
qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,16 +45,20 @@
|
|||
|
||||
#include "qemu/osdep.h"
|
||||
#include <poll.h>
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/defer-call.h"
|
||||
#include "qemu/rcu_queue.h"
|
||||
#include "aio-posix.h"
|
||||
#include "trace.h"
|
||||
|
||||
enum {
|
||||
FDMON_IO_URING_ENTRIES = 128, /* sq/cq ring size */
|
||||
|
||||
/* AioHandler::flags */
|
||||
FDMON_IO_URING_PENDING = (1 << 0),
|
||||
FDMON_IO_URING_ADD = (1 << 1),
|
||||
FDMON_IO_URING_REMOVE = (1 << 2),
|
||||
FDMON_IO_URING_PENDING = (1 << 0),
|
||||
FDMON_IO_URING_ADD = (1 << 1),
|
||||
FDMON_IO_URING_REMOVE = (1 << 2),
|
||||
FDMON_IO_URING_DELETE_AIO_HANDLER = (1 << 3),
|
||||
};
|
||||
|
||||
static inline int poll_events_from_pfd(int pfd_events)
|
||||
|
|
@ -74,8 +78,8 @@ static inline int pfd_events_from_poll(int poll_events)
|
|||
}
|
||||
|
||||
/*
|
||||
* Returns an sqe for submitting a request. Only be called within
|
||||
* fdmon_io_uring_wait().
|
||||
* Returns an sqe for submitting a request. Only called from the AioContext
|
||||
* thread.
|
||||
*/
|
||||
static struct io_uring_sqe *get_sqe(AioContext *ctx)
|
||||
{
|
||||
|
|
@ -166,41 +170,50 @@ static void fdmon_io_uring_update(AioContext *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
static void fdmon_io_uring_add_sqe(AioContext *ctx,
|
||||
void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque),
|
||||
void *opaque, CqeHandler *cqe_handler)
|
||||
{
|
||||
struct io_uring_sqe *sqe = get_sqe(ctx);
|
||||
|
||||
prep_sqe(sqe, opaque);
|
||||
io_uring_sqe_set_data(sqe, cqe_handler);
|
||||
|
||||
trace_fdmon_io_uring_add_sqe(ctx, opaque, sqe->opcode, sqe->fd, sqe->off,
|
||||
cqe_handler);
|
||||
}
|
||||
|
||||
static void fdmon_special_cqe_handler(CqeHandler *cqe_handler)
|
||||
{
|
||||
/*
|
||||
* This is an empty function that is never called. It is used as a function
|
||||
* pointer to distinguish it from ordinary cqe handlers.
|
||||
*/
|
||||
}
|
||||
|
||||
static void add_poll_add_sqe(AioContext *ctx, AioHandler *node)
|
||||
{
|
||||
struct io_uring_sqe *sqe = get_sqe(ctx);
|
||||
int events = poll_events_from_pfd(node->pfd.events);
|
||||
|
||||
io_uring_prep_poll_add(sqe, node->pfd.fd, events);
|
||||
io_uring_sqe_set_data(sqe, node);
|
||||
node->internal_cqe_handler.cb = fdmon_special_cqe_handler;
|
||||
io_uring_sqe_set_data(sqe, &node->internal_cqe_handler);
|
||||
}
|
||||
|
||||
static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
|
||||
{
|
||||
struct io_uring_sqe *sqe = get_sqe(ctx);
|
||||
CqeHandler *cqe_handler = &node->internal_cqe_handler;
|
||||
|
||||
#ifdef LIBURING_HAVE_DATA64
|
||||
io_uring_prep_poll_remove(sqe, (uintptr_t)node);
|
||||
io_uring_prep_poll_remove(sqe, (uintptr_t)cqe_handler);
|
||||
#else
|
||||
io_uring_prep_poll_remove(sqe, node);
|
||||
io_uring_prep_poll_remove(sqe, cqe_handler);
|
||||
#endif
|
||||
io_uring_sqe_set_data(sqe, NULL);
|
||||
}
|
||||
|
||||
/* Add a timeout that self-cancels when another cqe becomes ready */
|
||||
static void add_timeout_sqe(AioContext *ctx, int64_t ns)
|
||||
{
|
||||
struct io_uring_sqe *sqe;
|
||||
struct __kernel_timespec ts = {
|
||||
.tv_sec = ns / NANOSECONDS_PER_SECOND,
|
||||
.tv_nsec = ns % NANOSECONDS_PER_SECOND,
|
||||
};
|
||||
|
||||
sqe = get_sqe(ctx);
|
||||
io_uring_prep_timeout(sqe, &ts, 1, 0);
|
||||
io_uring_sqe_set_data(sqe, NULL);
|
||||
}
|
||||
|
||||
/* Add sqes from ctx->submit_list for submission */
|
||||
static void fill_sq_ring(AioContext *ctx)
|
||||
{
|
||||
|
|
@ -218,22 +231,26 @@ static void fill_sq_ring(AioContext *ctx)
|
|||
if (flags & FDMON_IO_URING_REMOVE) {
|
||||
add_poll_remove_sqe(ctx, node);
|
||||
}
|
||||
if (flags & FDMON_IO_URING_DELETE_AIO_HANDLER) {
|
||||
/*
|
||||
* process_cqe() sets this flag after ADD and REMOVE have been
|
||||
* cleared. They cannot be set again, so they must be clear.
|
||||
*/
|
||||
assert(!(flags & FDMON_IO_URING_ADD));
|
||||
assert(!(flags & FDMON_IO_URING_REMOVE));
|
||||
|
||||
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns true if a handler became ready */
|
||||
static bool process_cqe(AioContext *ctx,
|
||||
AioHandlerList *ready_list,
|
||||
struct io_uring_cqe *cqe)
|
||||
static bool process_cqe_aio_handler(AioContext *ctx,
|
||||
AioHandlerList *ready_list,
|
||||
AioHandler *node,
|
||||
struct io_uring_cqe *cqe)
|
||||
{
|
||||
AioHandler *node = io_uring_cqe_get_data(cqe);
|
||||
unsigned flags;
|
||||
|
||||
/* poll_timeout and poll_remove have a zero user_data field */
|
||||
if (!node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deletion can only happen when IORING_OP_POLL_ADD completes. If we race
|
||||
* with enqueue() here then we can safely clear the FDMON_IO_URING_REMOVE
|
||||
|
|
@ -241,7 +258,12 @@ static bool process_cqe(AioContext *ctx,
|
|||
*/
|
||||
flags = qatomic_fetch_and(&node->flags, ~FDMON_IO_URING_REMOVE);
|
||||
if (flags & FDMON_IO_URING_REMOVE) {
|
||||
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
|
||||
if (flags & FDMON_IO_URING_PENDING) {
|
||||
/* Still on ctx->submit_list, defer deletion until fill_sq_ring() */
|
||||
qatomic_or(&node->flags, FDMON_IO_URING_DELETE_AIO_HANDLER);
|
||||
} else {
|
||||
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -252,6 +274,35 @@ static bool process_cqe(AioContext *ctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Returns true if a handler became ready */
|
||||
static bool process_cqe(AioContext *ctx,
|
||||
AioHandlerList *ready_list,
|
||||
struct io_uring_cqe *cqe)
|
||||
{
|
||||
CqeHandler *cqe_handler = io_uring_cqe_get_data(cqe);
|
||||
|
||||
/* poll_timeout and poll_remove have a zero user_data field */
|
||||
if (!cqe_handler) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special handling for AioHandler cqes. They need ready_list and have a
|
||||
* return value.
|
||||
*/
|
||||
if (cqe_handler->cb == fdmon_special_cqe_handler) {
|
||||
AioHandler *node = container_of(cqe_handler, AioHandler,
|
||||
internal_cqe_handler);
|
||||
return process_cqe_aio_handler(ctx, ready_list, node, cqe);
|
||||
}
|
||||
|
||||
cqe_handler->cqe = *cqe;
|
||||
|
||||
/* Handlers are invoked later by fdmon_io_uring_dispatch() */
|
||||
QSIMPLEQ_INSERT_TAIL(&ctx->cqe_handler_ready_list, cqe_handler, next);
|
||||
return false;
|
||||
}
|
||||
|
||||
static int process_cq_ring(AioContext *ctx, AioHandlerList *ready_list)
|
||||
{
|
||||
struct io_uring *ring = &ctx->fdmon_io_uring;
|
||||
|
|
@ -260,6 +311,13 @@ static int process_cq_ring(AioContext *ctx, AioHandlerList *ready_list)
|
|||
unsigned num_ready = 0;
|
||||
unsigned head;
|
||||
|
||||
#ifdef HAVE_IO_URING_CQ_HAS_OVERFLOW
|
||||
/* If the CQ overflowed then fetch CQEs with a syscall */
|
||||
if (io_uring_cq_has_overflow(ring)) {
|
||||
io_uring_get_events(ring);
|
||||
}
|
||||
#endif
|
||||
|
||||
io_uring_for_each_cqe(ring, head, cqe) {
|
||||
if (process_cqe(ctx, ready_list, cqe)) {
|
||||
num_ready++;
|
||||
|
|
@ -272,23 +330,91 @@ static int process_cq_ring(AioContext *ctx, AioHandlerList *ready_list)
|
|||
return num_ready;
|
||||
}
|
||||
|
||||
/* This is where SQEs are submitted in the glib event loop */
|
||||
static void fdmon_io_uring_gsource_prepare(AioContext *ctx)
|
||||
{
|
||||
fill_sq_ring(ctx);
|
||||
if (io_uring_sq_ready(&ctx->fdmon_io_uring)) {
|
||||
while (io_uring_submit(&ctx->fdmon_io_uring) == -EINTR) {
|
||||
/* Keep trying if syscall was interrupted */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool fdmon_io_uring_gsource_check(AioContext *ctx)
|
||||
{
|
||||
gpointer tag = ctx->io_uring_fd_tag;
|
||||
return g_source_query_unix_fd(&ctx->source, tag) & G_IO_IN;
|
||||
}
|
||||
|
||||
/* Dispatch CQE handlers that are ready */
|
||||
static bool fdmon_io_uring_dispatch(AioContext *ctx)
|
||||
{
|
||||
CqeHandlerSimpleQ *ready_list = &ctx->cqe_handler_ready_list;
|
||||
bool progress = false;
|
||||
|
||||
/* Handlers may use defer_call() to coalesce frequent operations */
|
||||
defer_call_begin();
|
||||
|
||||
while (!QSIMPLEQ_EMPTY(ready_list)) {
|
||||
CqeHandler *cqe_handler = QSIMPLEQ_FIRST(ready_list);
|
||||
|
||||
QSIMPLEQ_REMOVE_HEAD(ready_list, next);
|
||||
|
||||
trace_fdmon_io_uring_cqe_handler(ctx, cqe_handler,
|
||||
cqe_handler->cqe.res);
|
||||
cqe_handler->cb(cqe_handler);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
defer_call_end();
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
||||
/* This is where CQEs are processed in the glib event loop */
|
||||
static void fdmon_io_uring_gsource_dispatch(AioContext *ctx,
|
||||
AioHandlerList *ready_list)
|
||||
{
|
||||
process_cq_ring(ctx, ready_list);
|
||||
}
|
||||
|
||||
static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
|
||||
int64_t timeout)
|
||||
{
|
||||
struct __kernel_timespec ts;
|
||||
unsigned wait_nr = 1; /* block until at least one cqe is ready */
|
||||
int ret;
|
||||
|
||||
if (timeout == 0) {
|
||||
wait_nr = 0; /* non-blocking */
|
||||
} else if (timeout > 0) {
|
||||
add_timeout_sqe(ctx, timeout);
|
||||
/* Add a timeout that self-cancels when another cqe becomes ready */
|
||||
struct io_uring_sqe *sqe;
|
||||
|
||||
ts = (struct __kernel_timespec){
|
||||
.tv_sec = timeout / NANOSECONDS_PER_SECOND,
|
||||
.tv_nsec = timeout % NANOSECONDS_PER_SECOND,
|
||||
};
|
||||
|
||||
sqe = get_sqe(ctx);
|
||||
io_uring_prep_timeout(sqe, &ts, 1, 0);
|
||||
io_uring_sqe_set_data(sqe, NULL);
|
||||
}
|
||||
|
||||
fill_sq_ring(ctx);
|
||||
|
||||
/*
|
||||
* Loop to handle signals in both cases:
|
||||
* 1. If no SQEs were submitted, then -EINTR is returned.
|
||||
* 2. If SQEs were submitted then the number of SQEs submitted is returned
|
||||
* rather than -EINTR.
|
||||
*/
|
||||
do {
|
||||
ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
|
||||
} while (ret == -EINTR);
|
||||
} while (ret == -EINTR ||
|
||||
(ret >= 0 && wait_nr > io_uring_cq_ready(&ctx->fdmon_io_uring)));
|
||||
|
||||
assert(ret >= 0);
|
||||
|
||||
|
|
@ -319,43 +445,66 @@ static const FDMonOps fdmon_io_uring_ops = {
|
|||
.update = fdmon_io_uring_update,
|
||||
.wait = fdmon_io_uring_wait,
|
||||
.need_wait = fdmon_io_uring_need_wait,
|
||||
.dispatch = fdmon_io_uring_dispatch,
|
||||
.gsource_prepare = fdmon_io_uring_gsource_prepare,
|
||||
.gsource_check = fdmon_io_uring_gsource_check,
|
||||
.gsource_dispatch = fdmon_io_uring_gsource_dispatch,
|
||||
.add_sqe = fdmon_io_uring_add_sqe,
|
||||
};
|
||||
|
||||
bool fdmon_io_uring_setup(AioContext *ctx)
|
||||
bool fdmon_io_uring_setup(AioContext *ctx, Error **errp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ctx->io_uring_fd_tag = NULL;
|
||||
|
||||
ret = io_uring_queue_init(FDMON_IO_URING_ENTRIES, &ctx->fdmon_io_uring, 0);
|
||||
if (ret != 0) {
|
||||
error_setg_errno(errp, -ret, "Failed to initialize io_uring");
|
||||
return false;
|
||||
}
|
||||
|
||||
QSLIST_INIT(&ctx->submit_list);
|
||||
QSIMPLEQ_INIT(&ctx->cqe_handler_ready_list);
|
||||
ctx->fdmon_ops = &fdmon_io_uring_ops;
|
||||
ctx->io_uring_fd_tag = g_source_add_unix_fd(&ctx->source,
|
||||
ctx->fdmon_io_uring.ring_fd, G_IO_IN);
|
||||
return true;
|
||||
}
|
||||
|
||||
void fdmon_io_uring_destroy(AioContext *ctx)
|
||||
{
|
||||
if (ctx->fdmon_ops == &fdmon_io_uring_ops) {
|
||||
AioHandler *node;
|
||||
AioHandler *node;
|
||||
|
||||
io_uring_queue_exit(&ctx->fdmon_io_uring);
|
||||
if (ctx->fdmon_ops != &fdmon_io_uring_ops) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Move handlers due to be removed onto the deleted list */
|
||||
while ((node = QSLIST_FIRST_RCU(&ctx->submit_list))) {
|
||||
unsigned flags = qatomic_fetch_and(&node->flags,
|
||||
~(FDMON_IO_URING_PENDING |
|
||||
FDMON_IO_URING_ADD |
|
||||
FDMON_IO_URING_REMOVE));
|
||||
io_uring_queue_exit(&ctx->fdmon_io_uring);
|
||||
|
||||
if (flags & FDMON_IO_URING_REMOVE) {
|
||||
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
|
||||
}
|
||||
/* Move handlers due to be removed onto the deleted list */
|
||||
while ((node = QSLIST_FIRST_RCU(&ctx->submit_list))) {
|
||||
unsigned flags = qatomic_fetch_and(&node->flags,
|
||||
~(FDMON_IO_URING_PENDING |
|
||||
FDMON_IO_URING_ADD |
|
||||
FDMON_IO_URING_REMOVE |
|
||||
FDMON_IO_URING_DELETE_AIO_HANDLER));
|
||||
|
||||
QSLIST_REMOVE_HEAD_RCU(&ctx->submit_list, node_submitted);
|
||||
if ((flags & FDMON_IO_URING_REMOVE) ||
|
||||
(flags & FDMON_IO_URING_DELETE_AIO_HANDLER)) {
|
||||
QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers,
|
||||
node, node_deleted);
|
||||
}
|
||||
|
||||
ctx->fdmon_ops = &fdmon_poll_ops;
|
||||
QSLIST_REMOVE_HEAD_RCU(&ctx->submit_list, node_submitted);
|
||||
}
|
||||
|
||||
g_source_remove_unix_fd(&ctx->source, ctx->io_uring_fd_tag);
|
||||
ctx->io_uring_fd_tag = NULL;
|
||||
|
||||
assert(QSIMPLEQ_EMPTY(&ctx->cqe_handler_ready_list));
|
||||
|
||||
qemu_lockcnt_lock(&ctx->list_lock);
|
||||
fdmon_poll_downgrade(ctx);
|
||||
qemu_lockcnt_unlock(&ctx->list_lock);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,11 @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
|
|||
|
||||
/* epoll(7) is faster above a certain number of fds */
|
||||
if (fdmon_epoll_try_upgrade(ctx, npfd)) {
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
|
||||
g_source_remove_poll(&ctx->source, &node->pfd);
|
||||
}
|
||||
}
|
||||
npfd = 0; /* we won't need pollfds[], reset npfd */
|
||||
return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
|
||||
}
|
||||
|
|
@ -97,11 +102,89 @@ static void fdmon_poll_update(AioContext *ctx,
|
|||
AioHandler *old_node,
|
||||
AioHandler *new_node)
|
||||
{
|
||||
/* Do nothing, AioHandler already contains the state we'll need */
|
||||
if (old_node) {
|
||||
/*
|
||||
* If the GSource is in the process of being destroyed then
|
||||
* g_source_remove_poll() causes an assertion failure. Skip removal in
|
||||
* that case, because glib cleans up its state during destruction
|
||||
* anyway.
|
||||
*/
|
||||
if (!g_source_is_destroyed(&ctx->source)) {
|
||||
g_source_remove_poll(&ctx->source, &old_node->pfd);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_node) {
|
||||
g_source_add_poll(&ctx->source, &new_node->pfd);
|
||||
}
|
||||
}
|
||||
|
||||
static void fdmon_poll_gsource_prepare(AioContext *ctx)
|
||||
{
|
||||
/* Do nothing */
|
||||
}
|
||||
|
||||
static bool fdmon_poll_gsource_check(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node;
|
||||
bool result = false;
|
||||
|
||||
/*
|
||||
* We have to walk very carefully in case aio_set_fd_handler is
|
||||
* called while we're walking.
|
||||
*/
|
||||
qemu_lockcnt_inc(&ctx->list_lock);
|
||||
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
int revents = node->pfd.revents & node->pfd.events;
|
||||
|
||||
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
qemu_lockcnt_dec(&ctx->list_lock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void fdmon_poll_gsource_dispatch(AioContext *ctx,
|
||||
AioHandlerList *ready_list)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
int revents = node->pfd.revents;
|
||||
|
||||
if (revents) {
|
||||
aio_add_ready_handler(ready_list, node, revents);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const FDMonOps fdmon_poll_ops = {
|
||||
.update = fdmon_poll_update,
|
||||
.wait = fdmon_poll_wait,
|
||||
.need_wait = aio_poll_disabled,
|
||||
.gsource_prepare = fdmon_poll_gsource_prepare,
|
||||
.gsource_check = fdmon_poll_gsource_check,
|
||||
.gsource_dispatch = fdmon_poll_gsource_dispatch,
|
||||
};
|
||||
|
||||
void fdmon_poll_downgrade(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
ctx->fdmon_ops = &fdmon_poll_ops;
|
||||
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
|
||||
g_source_add_poll(&ctx->source, &node->pfd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@ buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes
|
|||
buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
|
||||
buffer_free(const char *buf, size_t len) "%s: capacity %zd"
|
||||
|
||||
# fdmon-io_uring.c
|
||||
fdmon_io_uring_add_sqe(void *ctx, void *opaque, int opcode, int fd, uint64_t off, void *cqe_handler) "ctx %p opaque %p opcode %d fd %d off %"PRId64" cqe_handler %p"
|
||||
fdmon_io_uring_cqe_handler(void *ctx, void *cqe_handler, int cqe_res) "ctx %p cqe_handler %p cqe_res %d"
|
||||
|
||||
# filemonitor-inotify.c
|
||||
qemu_file_monitor_add_watch(void *mon, const char *dirpath, const char *filename, void *cb, void *opaque, int64_t id) "File monitor %p add watch dir='%s' file='%s' cb=%p opaque=%p id=%" PRId64
|
||||
qemu_file_monitor_remove_watch(void *mon, const char *dirpath, int64_t id) "File monitor %p remove watch dir='%s' id=%" PRId64
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue