diff --git a/MAINTAINERS b/MAINTAINERS index be6efff80c..9cb181e1da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3933,8 +3933,10 @@ F: tests/functional/x86_64/test_intel_iommu.py F: tests/qtest/intel-iommu-test.c AMD-Vi Emulation -S: Orphan -F: hw/i386/amd_iommu.? +M: Alejandro Jimenez +R: Sairaj Kodilkar +S: Supported +F: hw/i386/amd_iommu* OpenSBI Firmware L: qemu-riscv@nongnu.org diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 2e50f2ddfa..93a9c8df2b 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -411,6 +411,13 @@ in the ancillary data: * ``VHOST_USER_SET_INFLIGHT_FD`` (if ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``) * ``VHOST_USER_SET_DEVICE_STATE_FD`` +When sending file descriptors in ancilliary data, *front-end* should +associate the ancilliary data with a ``sendmsg`` operation (or +equivalent) that sends bytes starting with the first byte of the +message header. *back-end* can therefore expect that file descriptors +will only be received in the first ``recvmsg`` operation for a message +header. + If *front-end* is unable to send the full message or receives a wrong reply it will close the connection. An optional reconnection mechanism can be implemented. diff --git a/hw/core/machine.c b/hw/core/machine.c index 0580550e12..06e0c9a179 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -40,6 +40,10 @@ GlobalProperty hw_compat_10_1[] = { { TYPE_ACPI_GED, "x-has-hest-addr", "false" }, + { TYPE_VIRTIO_NET, "host_tunnel", "off" }, + { TYPE_VIRTIO_NET, "host_tunnel_csum", "off" }, + { TYPE_VIRTIO_NET, "guest_tunnel", "off" }, + { TYPE_VIRTIO_NET, "guest_tunnel_csum", "off" }, }; const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 378e0cb55e..d689a06eca 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -59,7 +59,7 @@ const char *amdvi_mmio_high[] = { }; struct AMDVIAddressSpace { - uint8_t bus_num; /* bus number */ + PCIBus *bus; /* PCIBus (for bus number) */ uint8_t devfn; /* device function */ AMDVIState *iommu_state; /* AMDVI - one per machine */ MemoryRegion root; /* AMDVI Root memory map region */ @@ -101,6 +101,16 @@ typedef enum AMDVIFaultReason { AMDVI_FR_PT_ENTRY_INV, /* Failure to read PTE from guest memory */ } AMDVIFaultReason; +typedef struct AMDVIAsKey { + PCIBus *bus; + uint8_t devfn; +} AMDVIAsKey; + +typedef struct AMDVIIOTLBKey { + uint64_t gfn; + uint16_t devid; +} AMDVIIOTLBKey; + uint64_t amdvi_extended_feature_register(AMDVIState *s) { uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES; @@ -372,21 +382,68 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, PCI_STATUS_SIG_TARGET_ABORT); } -static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) +static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2) { - return *((const uint64_t *)v1) == *((const uint64_t *)v2); + const AMDVIAsKey *key1 = v1; + const AMDVIAsKey *key2 = v2; + + return key1->bus == key2->bus && key1->devfn == key2->devfn; } -static guint amdvi_uint64_hash(gconstpointer v) +static guint amdvi_as_hash(gconstpointer v) { - return (guint)*(const uint64_t *)v; + const AMDVIAsKey *key = v; + guint bus = (guint)(uintptr_t)key->bus; + + return (guint)(bus << 8 | (guint)key->devfn); } +static AMDVIAddressSpace *amdvi_as_lookup(AMDVIState *s, PCIBus *bus, + uint8_t devfn) +{ + const AMDVIAsKey key = { .bus = bus, .devfn = devfn }; + return g_hash_table_lookup(s->address_spaces, &key); +} + +static gboolean amdvi_find_as_by_devid(gpointer key, gpointer value, + gpointer user_data) +{ + const AMDVIAsKey *as = key; + const uint16_t *devidp = user_data; + + return *devidp == PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); +} + +static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t devid) +{ + return g_hash_table_find(s->address_spaces, + amdvi_find_as_by_devid, &devid); +} + +static gboolean amdvi_iotlb_equal(gconstpointer v1, gconstpointer v2) +{ + const AMDVIIOTLBKey *key1 = v1; + const AMDVIIOTLBKey *key2 = v2; + + return key1->devid == key2->devid && key1->gfn == key2->gfn; +} + +static guint amdvi_iotlb_hash(gconstpointer v) +{ + const AMDVIIOTLBKey *key = v; + /* Use GPA and DEVID to find the bucket */ + return (guint)(key->gfn << AMDVI_PAGE_SHIFT_4K | + (key->devid & ~AMDVI_PAGE_MASK_4K)); +} + + static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, uint64_t devid) { - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); + AMDVIIOTLBKey key = { + .gfn = AMDVI_GET_IOTLB_GFN(addr), + .devid = devid, + }; return g_hash_table_lookup(s->iotlb, &key); } @@ -408,8 +465,10 @@ static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, uint64_t devid) { - uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | - ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); + AMDVIIOTLBKey key = { + .gfn = AMDVI_GET_IOTLB_GFN(addr), + .devid = devid, + }; g_hash_table_remove(s->iotlb, &key); } @@ -420,8 +479,10 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, /* don't cache erroneous translations */ if (to_cache.perm != IOMMU_NONE) { AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); - uint64_t *key = g_new(uint64_t, 1); - uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; + AMDVIIOTLBKey *key = g_new(AMDVIIOTLBKey, 1); + + key->gfn = AMDVI_GET_IOTLB_GFN(gpa); + key->devid = devid; trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), gpa, to_cache.translated_addr); @@ -434,7 +495,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, entry->perms = to_cache.perm; entry->translated_addr = to_cache.translated_addr; entry->page_mask = to_cache.addr_mask; - *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); + entry->devid = devid; + g_hash_table_replace(s->iotlb, key, entry); } } @@ -551,7 +613,7 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte) { - uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); + uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); AMDVIState *s = as->iommu_state; if (!amdvi_get_dte(s, devid, dte)) { @@ -1011,25 +1073,15 @@ static void amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as) */ static void amdvi_reset_address_translation_all(AMDVIState *s) { - AMDVIAddressSpace **iommu_as; + AMDVIAddressSpace *iommu_as; + GHashTableIter as_it; - for (int bus_num = 0; bus_num < PCI_BUS_MAX; bus_num++) { + g_hash_table_iter_init(&as_it, s->address_spaces); - /* Nothing to do if there are no devices on the current bus */ - if (!s->address_spaces[bus_num]) { - continue; - } - iommu_as = s->address_spaces[bus_num]; - - for (int devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) { - - if (!iommu_as[devfn]) { - continue; - } - /* Use passthrough as default mode after reset */ - iommu_as[devfn]->addr_translation = false; - amdvi_switch_address_space(iommu_as[devfn]); - } + while (g_hash_table_iter_next(&as_it, NULL, (void **)&iommu_as)) { + /* Use passthrough as default mode after reset */ + iommu_as->addr_translation = false; + amdvi_switch_address_space(iommu_as); } } @@ -1089,27 +1141,15 @@ static void enable_nodma_mode(AMDVIAddressSpace *as) */ static void amdvi_update_addr_translation_mode(AMDVIState *s, uint16_t devid) { - uint8_t bus_num, devfn, dte_mode; + uint8_t dte_mode; AMDVIAddressSpace *as; uint64_t dte[4] = { 0 }; int ret; - /* - * Convert the devid encoded in the command to a bus and devfn in - * order to retrieve the corresponding address space. - */ - bus_num = PCI_BUS_NUM(devid); - devfn = devid & 0xff; - - /* - * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has already - * been allocated within AMDVIState, but must be careful to not access - * unallocated devfn. - */ - if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) { + as = amdvi_get_as_by_devid(s, devid); + if (!as) { return; } - as = s->address_spaces[bus_num][devfn]; ret = amdvi_as_to_dte(as, dte); @@ -1783,7 +1823,7 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, bool is_write, IOMMUTLBEntry *ret) { AMDVIState *s = as->iommu_state; - uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); + uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); uint64_t entry[4]; int dte_ret; @@ -1858,7 +1898,7 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, } amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); - trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), + trace_amdvi_translation_result(pci_bus_num(as->bus), PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, ret.translated_addr); return ret; } @@ -2222,30 +2262,28 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) { char name[128]; AMDVIState *s = opaque; - AMDVIAddressSpace **iommu_as, *amdvi_dev_as; - int bus_num = pci_bus_num(bus); + AMDVIAddressSpace *amdvi_dev_as; + AMDVIAsKey *key; - iommu_as = s->address_spaces[bus_num]; + amdvi_dev_as = amdvi_as_lookup(s, bus, devfn); /* allocate memory during the first run */ - if (!iommu_as) { - iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX); - s->address_spaces[bus_num] = iommu_as; - } - - /* set up AMD-Vi region */ - if (!iommu_as[devfn]) { + if (!amdvi_dev_as) { snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); - iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1); - iommu_as[devfn]->bus_num = (uint8_t)bus_num; - iommu_as[devfn]->devfn = (uint8_t)devfn; - iommu_as[devfn]->iommu_state = s; - iommu_as[devfn]->notifier_flags = IOMMU_NOTIFIER_NONE; - iommu_as[devfn]->iova_tree = iova_tree_new(); - iommu_as[devfn]->addr_translation = false; + amdvi_dev_as = g_new0(AMDVIAddressSpace, 1); + key = g_new0(AMDVIAsKey, 1); - amdvi_dev_as = iommu_as[devfn]; + amdvi_dev_as->bus = bus; + amdvi_dev_as->devfn = (uint8_t)devfn; + amdvi_dev_as->iommu_state = s; + amdvi_dev_as->notifier_flags = IOMMU_NOTIFIER_NONE; + amdvi_dev_as->iova_tree = iova_tree_new(); + amdvi_dev_as->addr_translation = false; + key->bus = bus; + key->devfn = devfn; + + g_hash_table_insert(s->address_spaces, key, amdvi_dev_as); /* * Memory region relationships looks like (Address range shows @@ -2288,7 +2326,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) amdvi_switch_address_space(amdvi_dev_as); } - return &iommu_as[devfn]->as; + return &amdvi_dev_as->as; } static const PCIIOMMUOps amdvi_iommu_ops = { @@ -2329,7 +2367,7 @@ static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, if (!s->dma_remap && (new & IOMMU_NOTIFIER_MAP)) { error_setg_errno(errp, ENOTSUP, "device %02x.%02x.%x requires dma-remap=1", - as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn)); + pci_bus_num(as->bus), PCI_SLOT(as->devfn), PCI_FUNC(as->devfn)); return -ENOTSUP; } @@ -2507,8 +2545,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) } } - s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, - amdvi_uint64_equal, g_free, g_free); + s->iotlb = g_hash_table_new_full(amdvi_iotlb_hash, + amdvi_iotlb_equal, g_free, g_free); + + s->address_spaces = g_hash_table_new_full(amdvi_as_hash, + amdvi_as_equal, g_free, g_free); /* set up MMIO */ memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s, diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index daf82fc85f..302ccca512 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -220,8 +220,8 @@ #define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9)) /* IOTLB */ -#define AMDVI_IOTLB_MAX_SIZE 1024 -#define AMDVI_DEVID_SHIFT 36 +#define AMDVI_IOTLB_MAX_SIZE 1024 +#define AMDVI_GET_IOTLB_GFN(addr) (addr >> AMDVI_PAGE_SHIFT_4K) /* default extended feature */ #define AMDVI_DEFAULT_EXT_FEATURES \ @@ -408,7 +408,7 @@ struct AMDVIState { bool mmio_enabled; /* for each served device */ - AMDVIAddressSpace **address_spaces[PCI_BUS_MAX]; + GHashTable *address_spaces; /* list of address spaces with registered notifiers */ QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6a168d5107..78b142ccea 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -87,6 +87,20 @@ struct vtd_iotlb_key { static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); +static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s) +{ + VTDAddressSpace *vtd_as; + GHashTableIter as_it; + + trace_vtd_pasid_cache_reset(); + + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; + pc_entry->valid = false; + } +} + static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, uint64_t wmask, uint64_t w1cmask) { @@ -381,6 +395,7 @@ static void vtd_reset_caches(IntelIOMMUState *s) vtd_iommu_lock(s); vtd_reset_iotlb_locked(s); vtd_reset_context_cache_locked(s); + vtd_pasid_cache_reset_locked(s); vtd_iommu_unlock(s); } @@ -3051,6 +3066,155 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s, return true; } +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, + VTDPASIDEntry *pe) +{ + IntelIOMMUState *s = vtd_as->iommu_state; + VTDContextEntry ce; + int ret; + + if (!s->root_scalable) { + return -VTD_FR_RTADDR_INV_TTM; + } + + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, + &ce); + if (ret) { + return ret; + } + + return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid); +} + +static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) +{ + return memcmp(p1, p2, sizeof(*p1)); +} + +/* Update or invalidate pasid cache based on the pasid entry in guest memory. */ +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value, + gpointer user_data) +{ + VTDPASIDCacheInfo *pc_info = user_data; + VTDAddressSpace *vtd_as = value; + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; + VTDPASIDEntry pe; + IOMMUNotifier *n; + uint16_t did; + + if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) { + if (!pc_entry->valid) { + return; + } + /* + * No valid pasid entry in guest memory. e.g. pasid entry was modified + * to be either all-zero or non-present. Either case means existing + * pasid cache should be invalidated. + */ + pc_entry->valid = false; + + /* + * When a pasid entry isn't valid any more, we should unmap all + * mappings in shadow pages instantly to ensure DMA security. + */ + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { + vtd_address_space_unmap(vtd_as, n); + } + vtd_switch_address_space(vtd_as); + return; + } + + /* + * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require + * DID check. If DID doesn't match the value in cache or memory, then + * it's not a pasid entry we want to invalidate. + */ + switch (pc_info->type) { + case VTD_INV_DESC_PASIDC_G_PASID_SI: + if (pc_info->pasid != vtd_as->pasid) { + return; + } + /* Fall through */ + case VTD_INV_DESC_PASIDC_G_DSI: + if (pc_entry->valid) { + did = VTD_SM_PASID_ENTRY_DID(pc_entry->pasid_entry.val[1]); + } else { + did = VTD_SM_PASID_ENTRY_DID(pe.val[1]); + } + if (pc_info->did != did) { + return; + } + } + + if (!pc_entry->valid) { + pc_entry->pasid_entry = pe; + pc_entry->valid = true; + } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) { + return; + } + + vtd_switch_address_space(vtd_as); + vtd_address_space_sync(vtd_as); +} + +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info) +{ + if (!s->root_scalable || !s->dmar_enabled) { + return; + } + + vtd_iommu_lock(s); + g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked, + pc_info); + vtd_iommu_unlock(s); +} + +static bool vtd_process_pasid_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t did; + uint32_t pasid; + VTDPASIDCacheInfo pc_info = {}; + uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "pasid cache inv")) { + return false; + } + + did = VTD_INV_DESC_PASIDC_DID(inv_desc); + pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc); + pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc); + + switch (pc_info.type) { + case VTD_INV_DESC_PASIDC_G_DSI: + trace_vtd_inv_desc_pasid_cache_dsi(did); + pc_info.did = did; + break; + + case VTD_INV_DESC_PASIDC_G_PASID_SI: + /* PASID selective implies a DID selective */ + trace_vtd_inv_desc_pasid_cache_psi(did, pasid); + pc_info.did = did; + pc_info.pasid = pasid ?: PCI_NO_PASID; + break; + + case VTD_INV_DESC_PASIDC_G_GLOBAL: + trace_vtd_inv_desc_pasid_cache_gsi(); + break; + + default: + error_report_once("invalid granularity field in PASID-cache invalidate " + "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64, + inv_desc->val[1], inv_desc->val[0]); + return false; + } + + vtd_pasid_cache_sync(s, &pc_info); + return true; +} + static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -3266,6 +3430,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_PC: + trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]); + if (!vtd_process_pasid_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_PIOTLB: trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]); if (!vtd_process_piotlb_desc(s, &inv_desc)) { @@ -3308,16 +3479,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; - /* - * TODO: the entity of below two cases will be implemented in future series. - * To make guest (which integrates scalable mode support patch set in - * iommu driver) work, just return true is enough so far. - */ - case VTD_INV_DESC_PC: - if (s->scalable_mode) { - break; - } - /* fallthrough */ default: error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64 " (unknown type)", __func__, inv_desc.hi, diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0f6a1237e4..75bafdf0cd 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -316,6 +316,8 @@ typedef enum VTDFaultReason { * request while disabled */ VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ + VTD_FR_SM_PRE_ABS = 0x47, /* SCT.8 : PRE bit in a present SM CE is 0 */ /* PASID directory entry access failure */ @@ -517,6 +519,15 @@ typedef union VTDPRDesc VTDPRDesc; #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */ +#define VTD_INV_DESC_PASIDC_G(x) extract64((x)->val[0], 4, 2) +#define VTD_INV_DESC_PASIDC_G_DSI 0 +#define VTD_INV_DESC_PASIDC_G_PASID_SI 1 +#define VTD_INV_DESC_PASIDC_G_GLOBAL 3 +#define VTD_INV_DESC_PASIDC_DID(x) extract64((x)->val[0], 16, 16) +#define VTD_INV_DESC_PASIDC_PASID(x) extract64((x)->val[0], 32, 20) +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL + /* Page Request Descriptor */ /* For the low 64-bit of 128-bit */ #define VTD_PRD_TYPE (1ULL) @@ -603,6 +614,12 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL +typedef struct VTDPASIDCacheInfo { + uint8_t type; + uint16_t did; + uint32_t pasid; +} VTDPASIDCacheInfo; + /* PASID Table Related Definitions */ #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4d6bcbb846..f8b919cb6c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -81,7 +81,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_10_1[] = {}; +GlobalProperty pc_compat_10_1[] = { + { "mch", "extended-tseg-mbytes", "16" }, +}; const size_t pc_compat_10_1_len = G_N_ELEMENTS(pc_compat_10_1); GlobalProperty pc_compat_10_0[] = { diff --git a/hw/i386/trace-events b/hw/i386/trace-events index ac9e1a10aa..b704f4f90c 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -24,6 +24,10 @@ vtd_inv_qi_head(uint16_t head) "read head %d" vtd_inv_qi_tail(uint16_t head) "write tail %d" vtd_inv_qi_fetch(void) "" vtd_context_cache_reset(void) "" +vtd_pasid_cache_reset(void) "" +vtd_inv_desc_pasid_cache_gsi(void) "" +vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16 +vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present" vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present" vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16 diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 17ed0ef919..3b85560f6f 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -4299,19 +4299,19 @@ static const Property virtio_net_properties[] = { VIRTIO_DEFINE_PROP_FEATURE("host_tunnel", VirtIONet, host_features_ex, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, - false), + true), VIRTIO_DEFINE_PROP_FEATURE("host_tunnel_csum", VirtIONet, host_features_ex, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, - false), + true), VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel", VirtIONet, host_features_ex, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, - false), + true), VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel_csum", VirtIONet, host_features_ex, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, - false), + true), }; static void virtio_net_class_init(ObjectClass *klass, const void *data) diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index 952a0ace19..4587baeb78 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -64,7 +64,7 @@ static Aml *build_pci_host_bridge_dsm_method(void) UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); ifctx = aml_if(aml_equal(aml_arg(0), UUID)); ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0))); - uint8_t byte_list[1] = {1}; + uint8_t byte_list[1] = {0}; buf = aml_buffer(1, byte_list); aml_append(ifctx1, aml_return(buf)); aml_append(ifctx, ifctx1); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 1951ae440c..a708758d36 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -663,7 +663,7 @@ static void mch_realize(PCIDevice *d, Error **errp) static const Property mch_props[] = { DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes, - 16), + 64), DEFINE_PROP_BOOL("smbase-smram", MCHPCIState, has_smram_at_smbase, true), }; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index aac98f898a..63fa9a1b4b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1327,8 +1327,11 @@ static int vhost_set_vring_file(struct vhost_dev *dev, VhostUserRequest request, struct vhost_vring_file *file) { + int ret; int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); VhostUserMsg msg = { .hdr.request = request, .hdr.flags = VHOST_USER_VERSION, @@ -1336,13 +1339,32 @@ static int vhost_set_vring_file(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.u64), }; + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + if (file->fd > 0) { fds[fd_num++] = file->fd; } else { msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; } - return vhost_user_write(dev, &msg, fds, fd_num); + ret = vhost_user_write(dev, &msg, fds, fd_num); + if (ret < 0) { + return ret; + } + + if (reply_supported) { + /* + * wait for the back-end's confirmation that the new FD is active, + * otherwise guest_notifier_mask() could check for pending interrupts + * while the back-end is still using the masked event FD, losing + * interrupts that occur before the back-end installs the FD + */ + return process_message_reply(dev, &msg); + } + + return 0; } static int vhost_user_set_vring_kick(struct vhost_dev *dev, @@ -1668,14 +1690,6 @@ static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); } -static bool -vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, - VhostUserPayload *payload, Error **errp) -{ - hdr->size = sizeof(payload->u64); - return vhost_user_send_resp(ioc, hdr, payload, errp); -} - int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, int *dmabuf_fd) { @@ -1716,19 +1730,15 @@ int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, static int vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, - QIOChannel *ioc, - VhostUserHeader *hdr, - VhostUserPayload *payload) + VhostUserShared *object) { QemuUUID uuid; CharFrontend *chr = u->user->chr; - Error *local_err = NULL; int dmabuf_fd = -1; int fd_num = 0; - memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); + memcpy(uuid.data, object->uuid, sizeof(object->uuid)); - payload->u64 = 0; switch (virtio_object_type(&uuid)) { case TYPE_DMABUF: dmabuf_fd = virtio_lookup_dmabuf(&uuid); @@ -1737,18 +1747,16 @@ vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, { struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); if (dev == NULL) { - payload->u64 = -EINVAL; - break; + return -EINVAL; } int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); if (ret < 0) { - payload->u64 = ret; + return ret; } break; } case TYPE_INVALID: - payload->u64 = -EINVAL; - break; + return -EINVAL; } if (dmabuf_fd != -1) { @@ -1757,11 +1765,6 @@ vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { error_report("Failed to set msg fds."); - payload->u64 = -EINVAL; - } - - if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { - error_report_err(local_err); return -EINVAL; } @@ -1790,6 +1793,7 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, struct iovec iov; g_autofree int *fd = NULL; size_t fdsize = 0; + bool reply_ack; int i; /* Read header */ @@ -1808,6 +1812,8 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, goto err; } + reply_ack = hdr.flags & VHOST_USER_NEED_REPLY_MASK; + /* Read payload */ if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { error_report_err(local_err); @@ -1833,8 +1839,10 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, &payload.object); break; case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: - ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, - &hdr, &payload); + /* The backend always expects a response */ + reply_ack = true; + ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, + &payload.object); break; default: error_report("Received unexpected msg type: %d.", hdr.request); @@ -1845,7 +1853,7 @@ static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, * REPLY_ACK feature handling. Other reply types has to be managed * directly in their request handlers. */ - if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + if (reply_ack) { payload.u64 = !!ret; hdr.size = sizeof(payload.u64); diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b2f1ef9595..ca7f7bb661 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -95,6 +95,11 @@ struct VTDPASIDEntry { uint64_t val[8]; }; +typedef struct VTDPASIDCacheEntry { + struct VTDPASIDEntry pasid_entry; + bool valid; +} VTDPASIDCacheEntry; + struct VTDAddressSpace { PCIBus *bus; uint8_t devfn; @@ -107,6 +112,7 @@ struct VTDAddressSpace { MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; + VTDPASIDCacheEntry pasid_cache_entry; QLIST_ENTRY(VTDAddressSpace) next; /* Superset of notifier flags that this address space has */ IOMMUNotifierFlag notifier_flags; diff --git a/tests/data/acpi/aarch64/virt/DSDT b/tests/data/acpi/aarch64/virt/DSDT index 38f01adb61..35a862e447 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT and b/tests/data/acpi/aarch64/virt/DSDT differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt index 37a9af713b..7ce35f0d86 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt and b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.acpipcihp b/tests/data/acpi/aarch64/virt/DSDT.acpipcihp index 04427e2d8e..6d1765c310 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.acpipcihp and b/tests/data/acpi/aarch64/virt/DSDT.acpipcihp differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.hpoffacpiindex b/tests/data/acpi/aarch64/virt/DSDT.hpoffacpiindex index 43ab60496e..61cce30c74 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.hpoffacpiindex and b/tests/data/acpi/aarch64/virt/DSDT.hpoffacpiindex differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.memhp b/tests/data/acpi/aarch64/virt/DSDT.memhp index 3c39167444..ffc5f1c0d1 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.memhp and b/tests/data/acpi/aarch64/virt/DSDT.memhp differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.pxb b/tests/data/acpi/aarch64/virt/DSDT.pxb index 71c632cedc..f98dcbfc6b 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.pxb and b/tests/data/acpi/aarch64/virt/DSDT.pxb differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev index e8c2b376df..6c12a7aaf8 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev and b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-dev differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy index e8c2b376df..6c12a7aaf8 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy and b/tests/data/acpi/aarch64/virt/DSDT.smmuv3-legacy differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.topology b/tests/data/acpi/aarch64/virt/DSDT.topology index 9f22cd3dc8..208a3163a6 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.topology and b/tests/data/acpi/aarch64/virt/DSDT.topology differ diff --git a/tests/data/acpi/aarch64/virt/DSDT.viot b/tests/data/acpi/aarch64/virt/DSDT.viot index dd3775a076..f81e3e6cc7 100644 Binary files a/tests/data/acpi/aarch64/virt/DSDT.viot and b/tests/data/acpi/aarch64/virt/DSDT.viot differ diff --git a/tests/data/acpi/loongarch64/virt/DSDT b/tests/data/acpi/loongarch64/virt/DSDT index 55aa34f988..09aa903c4e 100644 Binary files a/tests/data/acpi/loongarch64/virt/DSDT and b/tests/data/acpi/loongarch64/virt/DSDT differ diff --git a/tests/data/acpi/loongarch64/virt/DSDT.memhp b/tests/data/acpi/loongarch64/virt/DSDT.memhp index c0955eb604..a069d6878f 100644 Binary files a/tests/data/acpi/loongarch64/virt/DSDT.memhp and b/tests/data/acpi/loongarch64/virt/DSDT.memhp differ diff --git a/tests/data/acpi/loongarch64/virt/DSDT.numamem b/tests/data/acpi/loongarch64/virt/DSDT.numamem index 61e47e7252..78ece52f57 100644 Binary files a/tests/data/acpi/loongarch64/virt/DSDT.numamem and b/tests/data/acpi/loongarch64/virt/DSDT.numamem differ diff --git a/tests/data/acpi/loongarch64/virt/DSDT.topology b/tests/data/acpi/loongarch64/virt/DSDT.topology index b2afebc938..7ab23f47cc 100644 Binary files a/tests/data/acpi/loongarch64/virt/DSDT.topology and b/tests/data/acpi/loongarch64/virt/DSDT.topology differ diff --git a/tests/data/acpi/riscv64/virt/DSDT b/tests/data/acpi/riscv64/virt/DSDT index 527f239dab..968e1a15c8 100644 Binary files a/tests/data/acpi/riscv64/virt/DSDT and b/tests/data/acpi/riscv64/virt/DSDT differ diff --git a/tests/data/acpi/x86/microvm/DSDT.pcie b/tests/data/acpi/x86/microvm/DSDT.pcie index ba258f454d..b646a05551 100644 Binary files a/tests/data/acpi/x86/microvm/DSDT.pcie and b/tests/data/acpi/x86/microvm/DSDT.pcie differ