block/io_uring: use non-vectored read/write when possible

The io_uring_prep_readv2/writev2() man pages recommend using the non-vectored read/write operations when possible for performance reasons. I didn't measure a significant difference but it doesn't hurt to have this optimization in place. Suggested-by: Eric Blake <eblake@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20251104022933.618123-16-stefanha@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2025-11-03 21:29:33 -05:00 · 2025-11-03 21:29:33 -05:00 · 684363fa3b
commit 684363fa3b
parent 047dabef97
1 changed files with 26 additions and 8 deletions
--- a/block/io_uring.c
+++ b/block/io_uring.c
@ -46,17 +46,28 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)

    switch (req->type) {
    case QEMU_AIO_WRITE:
-#ifdef HAVE_IO_URING_PREP_WRITEV2
    {
        int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
-        io_uring_prep_writev2(sqe, fd, qiov->iov,
-                              qiov->niov, offset, luring_flags);
-    }
+        if (luring_flags != 0 || qiov->niov > 1) {
+#ifdef HAVE_IO_URING_PREP_WRITEV2
+            io_uring_prep_writev2(sqe, fd, qiov->iov,
+                                  qiov->niov, offset, luring_flags);
 #else
-        assert(flags == 0);
-        io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
+            /*
+             * FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see
+             * luring_has_fua().
+             */
+            assert(luring_flags == 0);
+
+            io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
 #endif
+        } else {
+            /* The man page says non-vectored is faster than vectored */
+            struct iovec *iov = qiov->iov;
+            io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset);
+        }
        break;
+    }
    case QEMU_AIO_ZONE_APPEND:
        io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
        break;
@ -65,8 +76,15 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
        if (req->resubmit_qiov.iov != NULL) {
            qiov = &req->resubmit_qiov;
        }
-        io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
-                            offset + req->total_read);
+        if (qiov->niov > 1) {
+            io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
+                                offset + req->total_read);
+        } else {
+            /* The man page says non-vectored is faster than vectored */
+            struct iovec *iov = qiov->iov;
+            io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
+                               offset + req->total_read);
+        }
        break;
    }
    case QEMU_AIO_FLUSH: