block/io_uring: use non-vectored read/write when possible

The io_uring_prep_readv2/writev2() man pages recommend using the
non-vectored read/write operations when possible for performance
reasons.

I didn't measure a significant difference but it doesn't hurt to have
this optimization in place.

Suggested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20251104022933.618123-16-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2025-11-03 21:29:33 -05:00 committed by Kevin Wolf
parent 047dabef97
commit 684363fa3b

View file

@ -46,17 +46,28 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
switch (req->type) {
case QEMU_AIO_WRITE:
#ifdef HAVE_IO_URING_PREP_WRITEV2
{
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
io_uring_prep_writev2(sqe, fd, qiov->iov,
qiov->niov, offset, luring_flags);
}
if (luring_flags != 0 || qiov->niov > 1) {
#ifdef HAVE_IO_URING_PREP_WRITEV2
io_uring_prep_writev2(sqe, fd, qiov->iov,
qiov->niov, offset, luring_flags);
#else
assert(flags == 0);
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
/*
* FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see
* luring_has_fua().
*/
assert(luring_flags == 0);
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
#endif
} else {
/* The man page says non-vectored is faster than vectored */
struct iovec *iov = qiov->iov;
io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset);
}
break;
}
case QEMU_AIO_ZONE_APPEND:
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
break;
@ -65,8 +76,15 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
if (req->resubmit_qiov.iov != NULL) {
qiov = &req->resubmit_qiov;
}
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
offset + req->total_read);
if (qiov->niov > 1) {
io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
offset + req->total_read);
} else {
/* The man page says non-vectored is faster than vectored */
struct iovec *iov = qiov->iov;
io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
offset + req->total_read);
}
break;
}
case QEMU_AIO_FLUSH: