qemu-cr16/util/fdmon-poll.c
Stefan Hajnoczi ded29e64c6 aio-posix: integrate fdmon into glib event loop
AioContext's glib integration only supports ppoll(2) file descriptor
monitoring. epoll(7) and io_uring(7) disable themselves and switch back
to ppoll(2) when the glib event loop is used. The main loop thread
cannot use epoll(7) or io_uring(7) because it always uses the glib event
loop.

Future QEMU features may require io_uring(7). One example is uring_cmd
support in FUSE exports. Each feature could create its own io_uring(7)
context and integrate it into the event loop, but this is inefficient
due to extra syscalls. It would be more efficient to reuse the
AioContext's existing fdmon-io_uring.c io_uring(7) context because
fdmon-io_uring.c will already be active on systems where Linux io_uring
is available.

In order to keep fdmon-io_uring.c's AioContext operational even when the
glib event loop is used, extend FDMonOps with an API similar to
GSourceFuncs so that file descriptor monitoring can integrate into the
glib event loop.

A quick summary of the GSourceFuncs API:
- prepare() is called each event loop iteration before waiting for file
  descriptors and timers.
- check() is called to determine whether events are ready to be
  dispatched after waiting.
- dispatch() is called to process events.

More details here: https://docs.gtk.org/glib/struct.SourceFuncs.html

Move the ppoll(2)-specific code from aio-posix.c into fdmon-poll.c and
also implement epoll(7)- and io_uring(7)-specific file descriptor
monitoring code for glib event loops.

Note that it's still faster to use aio_poll() rather than the glib event
loop since glib waits for file descriptor activity with ppoll(2) and
does not support adaptive polling. But at least epoll(7) and io_uring(7)
now work in glib event loops.

Splitting this into multiple commits without temporarily breaking
AioContext proved difficult so this commit makes all the changes. The
next commit will remove the aio_context_use_g_source() API because it is
no longer needed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20251104022933.618123-7-stefanha@redhat.com>
[kwolf: Build fixes; fix AioContext.list_lock use after destroy]
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2025-11-11 22:04:53 +01:00

190 lines
5.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* poll(2) file descriptor monitoring
*
* Uses ppoll(2) when available, g_poll() otherwise.
*/
#include "qemu/osdep.h"
#include "aio-posix.h"
#include "qemu/rcu_queue.h"
/*
* These thread-local variables are used only in fdmon_poll_wait() around the
* call to the poll() system call. In particular they are not used while
* aio_poll is performing callbacks, which makes it much easier to think about
* reentrancy!
*
* Stack-allocated arrays would be perfect but they have size limitations;
* heap allocation is expensive enough that we want to reuse arrays across
* calls to aio_poll(). And because poll() has to be called without holding
* any lock, the arrays cannot be stored in AioContext. Thread-local data
* has none of the disadvantages of these three options.
*/
static __thread GPollFD *pollfds;
static __thread AioHandler **nodes;
static __thread unsigned npfd, nalloc;
static __thread Notifier pollfds_cleanup_notifier;
static void pollfds_cleanup(Notifier *n, void *unused)
{
g_assert(npfd == 0);
g_free(pollfds);
g_free(nodes);
nalloc = 0;
}
static void add_pollfd(AioHandler *node)
{
if (npfd == nalloc) {
if (nalloc == 0) {
pollfds_cleanup_notifier.notify = pollfds_cleanup;
qemu_thread_atexit_add(&pollfds_cleanup_notifier);
nalloc = 8;
} else {
g_assert(nalloc <= INT_MAX);
nalloc *= 2;
}
pollfds = g_renew(GPollFD, pollfds, nalloc);
nodes = g_renew(AioHandler *, nodes, nalloc);
}
nodes[npfd] = node;
pollfds[npfd] = (GPollFD) {
.fd = node->pfd.fd,
.events = node->pfd.events,
};
npfd++;
}
static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
int64_t timeout)
{
AioHandler *node;
int ret;
assert(npfd == 0);
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
add_pollfd(node);
}
}
/* epoll(7) is faster above a certain number of fds */
if (fdmon_epoll_try_upgrade(ctx, npfd)) {
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
g_source_remove_poll(&ctx->source, &node->pfd);
}
}
npfd = 0; /* we won't need pollfds[], reset npfd */
return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
}
ret = qemu_poll_ns(pollfds, npfd, timeout);
if (ret > 0) {
int i;
for (i = 0; i < npfd; i++) {
int revents = pollfds[i].revents;
if (revents) {
aio_add_ready_handler(ready_list, nodes[i], revents);
}
}
}
npfd = 0;
return ret;
}
static void fdmon_poll_update(AioContext *ctx,
AioHandler *old_node,
AioHandler *new_node)
{
if (old_node) {
/*
* If the GSource is in the process of being destroyed then
* g_source_remove_poll() causes an assertion failure. Skip removal in
* that case, because glib cleans up its state during destruction
* anyway.
*/
if (!g_source_is_destroyed(&ctx->source)) {
g_source_remove_poll(&ctx->source, &old_node->pfd);
}
}
if (new_node) {
g_source_add_poll(&ctx->source, &new_node->pfd);
}
}
static void fdmon_poll_gsource_prepare(AioContext *ctx)
{
/* Do nothing */
}
static bool fdmon_poll_gsource_check(AioContext *ctx)
{
AioHandler *node;
bool result = false;
/*
* We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking.
*/
qemu_lockcnt_inc(&ctx->list_lock);
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
int revents = node->pfd.revents & node->pfd.events;
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
result = true;
break;
}
if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
result = true;
break;
}
}
qemu_lockcnt_dec(&ctx->list_lock);
return result;
}
static void fdmon_poll_gsource_dispatch(AioContext *ctx,
AioHandlerList *ready_list)
{
AioHandler *node;
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
int revents = node->pfd.revents;
if (revents) {
aio_add_ready_handler(ready_list, node, revents);
}
}
}
const FDMonOps fdmon_poll_ops = {
.update = fdmon_poll_update,
.wait = fdmon_poll_wait,
.need_wait = aio_poll_disabled,
.gsource_prepare = fdmon_poll_gsource_prepare,
.gsource_check = fdmon_poll_gsource_check,
.gsource_dispatch = fdmon_poll_gsource_dispatch,
};
void fdmon_poll_downgrade(AioContext *ctx)
{
AioHandler *node;
ctx->fdmon_ops = &fdmon_poll_ops;
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
g_source_add_poll(&ctx->source, &node->pfd);
}
}
}