vfio queue:

* Remove workaround for kernel DMA unmap overflow
 * Remove invalid uses of ram_addr_t type
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmjfpl4ACgkQUaNDx8/7
 7KFAHQ//R0WtsAsEYE8Diczscl9++gqORrrLYN2ffTKrhUBrBskPptWZ+4Rh4R2e
 OSxdcf1cl0sFNkzCqnbWE3sbAG1Yq6mvCXTGTx3Y+2wi0KNwZXSxYGMWApOydp5K
 McQv1Uyd48TKCEwjumu6jmoPUSi89kvA58BLjBtw2bwJQzdlMZpIHX0XlSjlBHTz
 wHPqqW5+WCWq52pTp2vNkRrcqTl/HuoaijHPEJMzd/GIl1x2tBruuXuwzkY33ZKy
 EyDNq/stK12Pa1Va1ey8QOMQUJJ1jb3feVognDDVRMUGbBPljMawi8vtXW6LW28P
 0micGzDk1A3yi8X+tIHjQE/rcL86mIKyzCmrSB7WM+t3r79/hWZQruUu2e1eUGCE
 Mw5K0UoxBvp4LxeB2wKSIFUL1VgcB0azgsq6nOwRgMyzcqjniBu7M7gctIQQdypZ
 wSdUo8cViagUXS+YDVLsMreq4FShFWx6JLOGlxvN/eTaicUTjiOccriGmu1huhW/
 VzcfkgZWL1lSKoDeOAOafNjUP557hv0YbiAGa8ywglrukFLdFKIFJOvNdnzmmkiG
 5YJt2RH/rx+etF0hBI4uZLCnumpiKVM27/9MuMRiF7jZSXx0rz8tFVcscxQY10GP
 pSPL3SZAeLD4HMhndrlLSPAJyboQ4TGPA26yn5nahUGmOhoP91o=
 =kCV9
 -----END PGP SIGNATURE-----

Merge tag 'pull-vfio-20251003' of https://github.com/legoater/qemu into staging

vfio queue:

* Remove workaround for kernel DMA unmap overflow
* Remove invalid uses of ram_addr_t type

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmjfpl4ACgkQUaNDx8/7
# 7KFAHQ//R0WtsAsEYE8Diczscl9++gqORrrLYN2ffTKrhUBrBskPptWZ+4Rh4R2e
# OSxdcf1cl0sFNkzCqnbWE3sbAG1Yq6mvCXTGTx3Y+2wi0KNwZXSxYGMWApOydp5K
# McQv1Uyd48TKCEwjumu6jmoPUSi89kvA58BLjBtw2bwJQzdlMZpIHX0XlSjlBHTz
# wHPqqW5+WCWq52pTp2vNkRrcqTl/HuoaijHPEJMzd/GIl1x2tBruuXuwzkY33ZKy
# EyDNq/stK12Pa1Va1ey8QOMQUJJ1jb3feVognDDVRMUGbBPljMawi8vtXW6LW28P
# 0micGzDk1A3yi8X+tIHjQE/rcL86mIKyzCmrSB7WM+t3r79/hWZQruUu2e1eUGCE
# Mw5K0UoxBvp4LxeB2wKSIFUL1VgcB0azgsq6nOwRgMyzcqjniBu7M7gctIQQdypZ
# wSdUo8cViagUXS+YDVLsMreq4FShFWx6JLOGlxvN/eTaicUTjiOccriGmu1huhW/
# VzcfkgZWL1lSKoDeOAOafNjUP557hv0YbiAGa8ywglrukFLdFKIFJOvNdnzmmkiG
# 5YJt2RH/rx+etF0hBI4uZLCnumpiKVM27/9MuMRiF7jZSXx0rz8tFVcscxQY10GP
# pSPL3SZAeLD4HMhndrlLSPAJyboQ4TGPA26yn5nahUGmOhoP91o=
# =kCV9
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 03 Oct 2025 03:33:02 AM PDT
# gpg:                using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full]
# gpg:                 aka "Cédric Le Goater <clg@kaod.org>" [full]

* tag 'pull-vfio-20251003' of https://github.com/legoater/qemu:
  hw/vfio: Use uint64_t for IOVA mapping size in vfio_container_dma_*map
  hw/vfio: Avoid ram_addr_t in vfio_container_query_dirty_bitmap()
  hw/vfio: Reorder vfio_container_query_dirty_bitmap() trace format
  system/iommufd: Use uint64_t type for IOVA mapping size
  vfio: Remove workaround for kernel DMA unmap overflow bug

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-10-03 04:57:45 -07:00
commit 81e3121bef
11 changed files with 45 additions and 62 deletions

View file

@ -197,7 +197,7 @@ void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
}
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
uint64_t size, void *vaddr, bool readonly)
{
int ret, fd = be->fd;
struct iommu_ioas_map map = {
@ -230,7 +230,7 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
}
int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
int mfd, unsigned long start, bool readonly)
{
int ret, fd = be->fd;
@ -268,7 +268,7 @@ int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
}
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size)
hwaddr iova, uint64_t size)
{
int ret, fd = be->fd;
struct iommu_ioas_unmap unmap = {

View file

@ -39,7 +39,7 @@ static void vfio_user_listener_commit(VFIOContainer *bcontainer)
}
static int vfio_user_dma_unmap(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
@ -81,7 +81,7 @@ static int vfio_user_dma_unmap(const VFIOContainer *bcontainer,
}
static int vfio_user_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
uint64_t size, void *vaddr, bool readonly,
MemoryRegion *mrp)
{
VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);

View file

@ -69,7 +69,7 @@ static int vfio_ram_block_discard_disable(VFIOLegacyContainer *container,
}
static int vfio_dma_unmap_bitmap(const VFIOLegacyContainer *container,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb)
{
const VFIOContainer *bcontainer = VFIO_IOMMU(container);
@ -122,7 +122,7 @@ unmap_exit:
}
static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb)
{
const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
@ -147,25 +147,7 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer,
need_dirty_sync = true;
}
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
/*
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
* v4.15) where an overflow in its wrap-around check prevents us from
* unmapping the last page of the address space. Test for the error
* condition and re-try the unmap excluding the last page. The
* expectation is that we've never mapped the last page anyway and this
* unmap request comes via vIOMMU support which also makes it unlikely
* that this page is used. This bug was introduced well after type1 v2
* support was introduced, so we shouldn't need to test for v1. A fix
* is queued for kernel v5.0 so this workaround can be removed once
* affected kernels are sufficiently deprecated.
*/
if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
container->iommu_type == VFIO_TYPE1v2_IOMMU) {
trace_vfio_legacy_dma_unmap_overflow_workaround();
unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
continue;
}
if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
return -errno;
}
@ -185,7 +167,7 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer,
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
*/
static int vfio_legacy_dma_unmap(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
int ret;
@ -210,7 +192,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainer *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
uint64_t size, void *vaddr, bool readonly,
MemoryRegion *mr)
{
const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);

View file

@ -74,7 +74,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
}
int vfio_container_dma_map(VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
@ -93,7 +93,7 @@ int vfio_container_dma_map(VFIOContainer *bcontainer,
}
int vfio_container_dma_unmap(VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
@ -246,7 +246,7 @@ static int vfio_container_devices_query_dirty_bitmap(
int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
uint64_t iova, uint64_t size,
ram_addr_t ram_addr, Error **errp)
hwaddr translated_addr, Error **errp)
{
bool all_device_dirty_tracking =
vfio_container_devices_dirty_tracking_is_supported(bcontainer);
@ -255,7 +255,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
int ret;
if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
cpu_physical_memory_set_dirty_range(ram_addr, size,
cpu_physical_memory_set_dirty_range(translated_addr, size,
tcg_enabled() ? DIRTY_CLIENTS_ALL :
DIRTY_CLIENTS_NOCODE);
return 0;
@ -280,11 +280,12 @@ int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
goto out;
}
dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
translated_addr,
vbmap.pages);
trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
dirty_pages);
trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size,
translated_addr, dirty_pages);
out:
g_free(vbmap.bitmap);

View file

@ -39,7 +39,7 @@ static bool vfio_dma_unmap_vaddr_all(VFIOLegacyContainer *container,
* The incoming state is cleared thereafter.
*/
static int vfio_legacy_cpr_dma_map(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
hwaddr iova, uint64_t size, void *vaddr,
bool readonly, MemoryRegion *mr)
{
const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);

View file

@ -35,7 +35,7 @@
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainer *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
uint64_t size, void *vaddr, bool readonly,
MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
@ -46,7 +46,7 @@ static int iommufd_cdev_map(const VFIOContainer *bcontainer, hwaddr iova,
}
static int iommufd_cdev_map_file(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
int fd, unsigned long start, bool readonly)
{
const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
@ -57,7 +57,7 @@ static int iommufd_cdev_map_file(const VFIOContainer *bcontainer,
}
static int iommufd_cdev_unmap(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);

View file

@ -1059,7 +1059,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = gdn->giommu;
VFIOContainer *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
ram_addr_t translated_addr;
hwaddr translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
MemoryRegion *mr;
@ -1108,8 +1108,8 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
{
const hwaddr size = int128_get64(section->size);
const hwaddr iova = section->offset_within_address_space;
const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
section->offset_within_region;
const hwaddr translated_addr = memory_region_get_ram_addr(section->mr) +
section->offset_within_region;
VFIORamDiscardListener *vrdl = opaque;
Error *local_err = NULL;
int ret;
@ -1118,8 +1118,8 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
* Sync the whole mapped region (spanning multiple individual mappings)
* in one go.
*/
ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr,
&local_err);
ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size,
translated_addr, &local_err);
if (ret) {
error_report_err(local_err);
}
@ -1183,7 +1183,7 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *bcontainer,
static int vfio_sync_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section, Error **errp)
{
ram_addr_t ram_addr;
hwaddr translated_addr;
if (memory_region_is_iommu(section->mr)) {
return vfio_sync_iommu_dirty_bitmap(bcontainer, section);
@ -1198,12 +1198,12 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *bcontainer,
return ret;
}
ram_addr = memory_region_get_ram_addr(section->mr) +
section->offset_within_region;
translated_addr = memory_region_get_ram_addr(section->mr) +
section->offset_within_region;
return vfio_container_query_dirty_bitmap(bcontainer,
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
int128_get64(section->size), ram_addr, errp);
int128_get64(section->size), translated_addr, errp);
}
static void vfio_listener_log_sync(MemoryListener *listener,

View file

@ -104,15 +104,14 @@ vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, ui
vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
# container-base.c
vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
# container.c
vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t translated_addr, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" gpa=0x%"PRIx64" dirty_pages=%"PRIu64
# container-legacy.c
vfio_container_disconnect(int fd) "close container->fd=%d"
vfio_group_put(int fd) "close group->fd=%d"
vfio_device_get(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
vfio_device_put(int fd) "close vdev->fd=%d"
vfio_legacy_dma_unmap_overflow_workaround(void) ""
# region.c
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"

View file

@ -81,10 +81,10 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
VFIOContainer *bcontainer);
int vfio_container_dma_map(VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
bool vfio_container_add_section_window(VFIOContainer *bcontainer,
MemoryRegionSection *section,
@ -98,7 +98,8 @@ bool vfio_container_dirty_tracking_is_started(
bool vfio_container_devices_dirty_tracking_is_supported(
const VFIOContainer *bcontainer);
int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp);
uint64_t iova, uint64_t size,
hwaddr translated_addr, Error **errp);
GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer);
@ -166,7 +167,7 @@ struct VFIOIOMMUClass {
* Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_map)(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
void *vaddr, bool readonly, MemoryRegion *mr);
/**
* @dma_map_file
@ -181,7 +182,7 @@ struct VFIOIOMMUClass {
* @readonly: map read only if true
*/
int (*dma_map_file)(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
int fd, unsigned long start, bool readonly);
/**
* @dma_unmap
@ -197,7 +198,7 @@ struct VFIOIOMMUClass {
* Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_unmap)(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);

View file

@ -21,7 +21,7 @@ struct VFIOIOMMUFDContainer;
struct IOMMUFDBackend;
typedef int (*dma_map_fn)(const struct VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
hwaddr iova, uint64_t size, void *vaddr,
bool readonly, MemoryRegion *mr);
typedef struct VFIOContainerCPR {

View file

@ -45,12 +45,12 @@ bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
Error **errp);
void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size, int fd,
hwaddr iova, uint64_t size, int fd,
unsigned long start, bool readonly);
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly);
uint64_t size, void *vaddr, bool readonly);
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
hwaddr iova, uint64_t size);
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
uint64_t *caps, Error **errp);