* i386: fix migration issues in 10.1

* target/i386/mshv: new accelerator
 * rust: use glib-sys-rs
 * rust: fixes for docker tests
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmjnaOwUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNsFQf/WXKxZLLnItHwDz3UdwjzewPWpz5N
 fpS0E4C03J8pACDgyfl7PQl47P7NlJ08Ig2Lc5l3Z9KiAKgh0orR7Cqd0BY5f9lo
 uk4FgXfXpQyApywAlctadrTfcH8sRv2tMaP6EJ9coLtJtHW9RUGFPaZeMsqrjpAl
 TpwAXPYNDDvvy1ih1LPh5DzOPDXE4pin2tDa94gJei56gY95auK4zppoNYLdB3kR
 GOyR4QK43/yhuxPHOmQCZOE3HK2XrKgMZHWIjAovjZjZFiJs49FaHBOpRfFpsUlG
 PB3UbIMtu69VY20LqbbyInPnyATRQzqIGnDGTErP6lfCGTKTy2ulQYWvHA==
 =KM5O
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* i386: fix migration issues in 10.1
* target/i386/mshv: new accelerator
* rust: use glib-sys-rs
* rust: fixes for docker tests

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmjnaOwUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroNsFQf/WXKxZLLnItHwDz3UdwjzewPWpz5N
# fpS0E4C03J8pACDgyfl7PQl47P7NlJ08Ig2Lc5l3Z9KiAKgh0orR7Cqd0BY5f9lo
# uk4FgXfXpQyApywAlctadrTfcH8sRv2tMaP6EJ9coLtJtHW9RUGFPaZeMsqrjpAl
# TpwAXPYNDDvvy1ih1LPh5DzOPDXE4pin2tDa94gJei56gY95auK4zppoNYLdB3kR
# GOyR4QK43/yhuxPHOmQCZOE3HK2XrKgMZHWIjAovjZjZFiJs49FaHBOpRfFpsUlG
# PB3UbIMtu69VY20LqbbyInPnyATRQzqIGnDGTErP6lfCGTKTy2ulQYWvHA==
# =KM5O
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 09 Oct 2025 12:49:00 AM PDT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [unknown]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (35 commits)
  rust: fix path to rust_root_crate.sh
  tests/docker: make --enable-rust overridable with EXTRA_CONFIGURE_OPTS
  MAINTAINERS: Add maintainers for mshv accelerator
  docs: Add mshv to documentation
  target/i386/mshv: Use preallocated page for hvcall
  qapi/accel: Allow to query mshv capabilities
  accel/mshv: Handle overlapping mem mappings
  target/i386/mshv: Implement mshv_vcpu_run()
  target/i386/mshv: Write MSRs to the hypervisor
  target/i386/mshv: Integrate x86 instruction decoder/emulator
  target/i386/mshv: Register MSRs with MSHV
  target/i386/mshv: Register CPUID entries with MSHV
  target/i386/mshv: Set local interrupt controller state
  target/i386/mshv: Implement mshv_arch_put_registers()
  target/i386/mshv: Implement mshv_get_special_regs()
  target/i386/mshv: Implement mshv_get_standard_regs()
  target/i386/mshv: Implement mshv_store_regs()
  target/i386/mshv: Add CPU create and remove logic
  accel/mshv: Add vCPU signal handling
  accel/mshv: Add vCPU creation and execution loop
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-10-09 07:59:00 -07:00
commit 1188b07e60
87 changed files with 6617 additions and 79 deletions

View file

@ -551,6 +551,21 @@ F: target/i386/whpx/
F: accel/stubs/whpx-stub.c F: accel/stubs/whpx-stub.c
F: include/system/whpx.h F: include/system/whpx.h
MSHV
M: Magnus Kulke <magnus.kulke@linux.microsoft.com>
R: Wei Liu <wei.liu@kernel.org>
S: Supported
F: accel/mshv/
F: include/system/mshv.h
F: include/hw/hyperv/hvgdk*.h
F: include/hw/hyperv/hvhdk*.h
X86 MSHV CPUs
M: Magnus Kulke <magnus.kulke@linux.microsoft.com>
R: Wei Liu <wei.liu@kernel.org>
S: Supported
F: target/i386/mshv/
X86 Instruction Emulator X86 Instruction Emulator
M: Cameron Esfahani <dirty@apple.com> M: Cameron Esfahani <dirty@apple.com>
M: Roman Bolshakov <rbolshakov@ddn.com> M: Roman Bolshakov <rbolshakov@ddn.com>

View file

@ -13,6 +13,9 @@ config TCG
config KVM config KVM
bool bool
config MSHV
bool
config XEN config XEN
bool bool
select FSDEV_9P if VIRTFS select FSDEV_9P if VIRTFS

106
accel/accel-irq.c Normal file
View file

@ -0,0 +1,106 @@
/*
* Accelerated irqchip abstraction
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "hw/pci/msi.h"
#include "system/kvm.h"
#include "system/mshv.h"
#include "system/accel-irq.h"
int accel_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
return mshv_irqchip_add_msi_route(vector, dev);
}
#endif
if (kvm_enabled()) {
return kvm_irqchip_add_msi_route(c, vector, dev);
}
return -ENOSYS;
}
int accel_irqchip_update_msi_route(int vector, MSIMessage msg, PCIDevice *dev)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
return mshv_irqchip_update_msi_route(vector, msg, dev);
}
#endif
if (kvm_enabled()) {
return kvm_irqchip_update_msi_route(kvm_state, vector, msg, dev);
}
return -ENOSYS;
}
void accel_irqchip_commit_route_changes(KVMRouteChange *c)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
mshv_irqchip_commit_routes();
}
#endif
if (kvm_enabled()) {
kvm_irqchip_commit_route_changes(c);
}
}
void accel_irqchip_commit_routes(void)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
mshv_irqchip_commit_routes();
}
#endif
if (kvm_enabled()) {
kvm_irqchip_commit_routes(kvm_state);
}
}
void accel_irqchip_release_virq(int virq)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
mshv_irqchip_release_virq(virq);
}
#endif
if (kvm_enabled()) {
kvm_irqchip_release_virq(kvm_state, virq);
}
}
int accel_irqchip_add_irqfd_notifier_gsi(EventNotifier *n, EventNotifier *rn,
int virq)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
return mshv_irqchip_add_irqfd_notifier_gsi(n, rn, virq);
}
#endif
if (kvm_enabled()) {
return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, rn, virq);
}
return -ENOSYS;
}
int accel_irqchip_remove_irqfd_notifier_gsi(EventNotifier *n, int virq)
{
#ifdef CONFIG_MSHV_IS_POSSIBLE
if (mshv_msi_via_irqfd_enabled()) {
return mshv_irqchip_remove_irqfd_notifier_gsi(n, virq);
}
#endif
if (kvm_enabled()) {
return kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, virq);
}
return -ENOSYS;
}

View file

@ -1,6 +1,6 @@
common_ss.add(files('accel-common.c')) common_ss.add(files('accel-common.c'))
specific_ss.add(files('accel-target.c')) specific_ss.add(files('accel-target.c'))
system_ss.add(files('accel-system.c', 'accel-blocker.c', 'accel-qmp.c')) system_ss.add(files('accel-system.c', 'accel-blocker.c', 'accel-qmp.c', 'accel-irq.c'))
user_ss.add(files('accel-user.c')) user_ss.add(files('accel-user.c'))
subdir('tcg') subdir('tcg')
@ -10,6 +10,7 @@ if have_system
subdir('kvm') subdir('kvm')
subdir('xen') subdir('xen')
subdir('stubs') subdir('stubs')
subdir('mshv')
endif endif
# qtest # qtest

399
accel/mshv/irq.c Normal file
View file

@ -0,0 +1,399 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
* Stanislav Kinsburskii <skinsburskii@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "linux/mshv.h"
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "hw/hyperv/hvhdk_mini.h"
#include "hw/hyperv/hvgdk_mini.h"
#include "hw/intc/ioapic.h"
#include "hw/pci/msi.h"
#include "system/mshv.h"
#include "system/mshv_int.h"
#include "trace.h"
#include <stdint.h>
#include <sys/ioctl.h>
#define MSHV_IRQFD_RESAMPLE_FLAG (1 << MSHV_IRQFD_BIT_RESAMPLE)
#define MSHV_IRQFD_BIT_DEASSIGN_FLAG (1 << MSHV_IRQFD_BIT_DEASSIGN)
static MshvMsiControl *msi_control;
static QemuMutex msi_control_mutex;
void mshv_init_msicontrol(void)
{
qemu_mutex_init(&msi_control_mutex);
msi_control = g_new0(MshvMsiControl, 1);
msi_control->gsi_routes = g_hash_table_new(g_direct_hash, g_direct_equal);
msi_control->updated = false;
}
static int set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data)
{
struct mshv_user_irq_entry *entry;
uint32_t high_addr = addr >> 32;
uint32_t low_addr = addr & 0xFFFFFFFF;
GHashTable *gsi_routes;
trace_mshv_set_msi_routing(gsi, addr, data);
if (gsi >= MSHV_MAX_MSI_ROUTES) {
error_report("gsi >= MSHV_MAX_MSI_ROUTES");
return -1;
}
assert(msi_control);
WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
gsi_routes = msi_control->gsi_routes;
entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
if (entry
&& entry->address_hi == high_addr
&& entry->address_lo == low_addr
&& entry->data == data)
{
/* nothing to update */
return 0;
}
/* free old entry */
g_free(entry);
/* create new entry */
entry = g_new0(struct mshv_user_irq_entry, 1);
entry->gsi = gsi;
entry->address_hi = high_addr;
entry->address_lo = low_addr;
entry->data = data;
g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), entry);
msi_control->updated = true;
}
return 0;
}
static int add_msi_routing(uint64_t addr, uint32_t data)
{
struct mshv_user_irq_entry *route_entry;
uint32_t high_addr = addr >> 32;
uint32_t low_addr = addr & 0xFFFFFFFF;
int gsi;
GHashTable *gsi_routes;
trace_mshv_add_msi_routing(addr, data);
assert(msi_control);
WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
/* find an empty slot */
gsi = 0;
gsi_routes = msi_control->gsi_routes;
while (gsi < MSHV_MAX_MSI_ROUTES) {
route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
if (!route_entry) {
break;
}
gsi++;
}
if (gsi >= MSHV_MAX_MSI_ROUTES) {
error_report("No empty gsi slot available");
return -1;
}
/* create new entry */
route_entry = g_new0(struct mshv_user_irq_entry, 1);
route_entry->gsi = gsi;
route_entry->address_hi = high_addr;
route_entry->address_lo = low_addr;
route_entry->data = data;
g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), route_entry);
msi_control->updated = true;
}
return gsi;
}
static int commit_msi_routing_table(int vm_fd)
{
guint len;
int i, ret;
size_t table_size;
struct mshv_user_irq_table *table;
GHashTableIter iter;
gpointer key, value;
assert(msi_control);
WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
if (!msi_control->updated) {
/* nothing to update */
return 0;
}
/* Calculate the size of the table */
len = g_hash_table_size(msi_control->gsi_routes);
table_size = sizeof(struct mshv_user_irq_table)
+ len * sizeof(struct mshv_user_irq_entry);
table = g_malloc0(table_size);
g_hash_table_iter_init(&iter, msi_control->gsi_routes);
i = 0;
while (g_hash_table_iter_next(&iter, &key, &value)) {
struct mshv_user_irq_entry *entry = value;
table->entries[i] = *entry;
i++;
}
table->nr = i;
trace_mshv_commit_msi_routing_table(vm_fd, len);
ret = ioctl(vm_fd, MSHV_SET_MSI_ROUTING, table);
g_free(table);
if (ret < 0) {
error_report("Failed to commit msi routing table");
return -1;
}
msi_control->updated = false;
}
return 0;
}
static int remove_msi_routing(uint32_t gsi)
{
struct mshv_user_irq_entry *route_entry;
GHashTable *gsi_routes;
trace_mshv_remove_msi_routing(gsi);
if (gsi >= MSHV_MAX_MSI_ROUTES) {
error_report("Invalid GSI: %u", gsi);
return -1;
}
assert(msi_control);
WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
gsi_routes = msi_control->gsi_routes;
route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
if (route_entry) {
g_hash_table_remove(gsi_routes, GINT_TO_POINTER(gsi));
g_free(route_entry);
msi_control->updated = true;
}
}
return 0;
}
/* Pass an eventfd which is to be used for injecting interrupts from userland */
static int irqfd(int vm_fd, int fd, int resample_fd, uint32_t gsi,
uint32_t flags)
{
int ret;
struct mshv_user_irqfd arg = {
.fd = fd,
.resamplefd = resample_fd,
.gsi = gsi,
.flags = flags,
};
ret = ioctl(vm_fd, MSHV_IRQFD, &arg);
if (ret < 0) {
error_report("Failed to set irqfd: gsi=%u, fd=%d", gsi, fd);
return -1;
}
return ret;
}
static int register_irqfd(int vm_fd, int event_fd, uint32_t gsi)
{
int ret;
trace_mshv_register_irqfd(vm_fd, event_fd, gsi);
ret = irqfd(vm_fd, event_fd, 0, gsi, 0);
if (ret < 0) {
error_report("Failed to register irqfd: gsi=%u", gsi);
return -1;
}
return 0;
}
static int register_irqfd_with_resample(int vm_fd, int event_fd,
int resample_fd, uint32_t gsi)
{
int ret;
uint32_t flags = MSHV_IRQFD_RESAMPLE_FLAG;
ret = irqfd(vm_fd, event_fd, resample_fd, gsi, flags);
if (ret < 0) {
error_report("Failed to register irqfd with resample: gsi=%u", gsi);
return -errno;
}
return 0;
}
static int unregister_irqfd(int vm_fd, int event_fd, uint32_t gsi)
{
int ret;
uint32_t flags = MSHV_IRQFD_BIT_DEASSIGN_FLAG;
ret = irqfd(vm_fd, event_fd, 0, gsi, flags);
if (ret < 0) {
error_report("Failed to unregister irqfd: gsi=%u", gsi);
return -errno;
}
return 0;
}
static int irqchip_update_irqfd_notifier_gsi(const EventNotifier *event,
const EventNotifier *resample,
int virq, bool add)
{
int fd = event_notifier_get_fd(event);
int rfd = resample ? event_notifier_get_fd(resample) : -1;
int vm_fd = mshv_state->vm;
trace_mshv_irqchip_update_irqfd_notifier_gsi(fd, rfd, virq, add);
if (!add) {
return unregister_irqfd(vm_fd, fd, virq);
}
if (rfd > 0) {
return register_irqfd_with_resample(vm_fd, fd, rfd, virq);
}
return register_irqfd(vm_fd, fd, virq);
}
int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev)
{
MSIMessage msg = { 0, 0 };
int virq = 0;
if (pci_available && dev) {
msg = pci_get_msi_message(dev, vector);
virq = add_msi_routing(msg.address, le32_to_cpu(msg.data));
}
return virq;
}
void mshv_irqchip_release_virq(int virq)
{
remove_msi_routing(virq);
}
int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev)
{
int ret;
ret = set_msi_routing(virq, msg.address, le32_to_cpu(msg.data));
if (ret < 0) {
error_report("Failed to set msi routing");
return -1;
}
return 0;
}
int mshv_request_interrupt(MshvState *mshv_state, uint32_t interrupt_type, uint32_t vector,
uint32_t vp_index, bool logical_dest_mode,
bool level_triggered)
{
int ret;
int vm_fd = mshv_state->vm;
if (vector == 0) {
warn_report("Ignoring request for interrupt vector 0");
return 0;
}
union hv_interrupt_control control = {
.interrupt_type = interrupt_type,
.level_triggered = level_triggered,
.logical_dest_mode = logical_dest_mode,
.rsvd = 0,
};
struct hv_input_assert_virtual_interrupt arg = {0};
arg.control = control;
arg.dest_addr = (uint64_t)vp_index;
arg.vector = vector;
struct mshv_root_hvcall args = {0};
args.code = HVCALL_ASSERT_VIRTUAL_INTERRUPT;
args.in_sz = sizeof(arg);
args.in_ptr = (uint64_t)&arg;
ret = mshv_hvcall(vm_fd, &args);
if (ret < 0) {
error_report("Failed to request interrupt");
return -errno;
}
return 0;
}
void mshv_irqchip_commit_routes(void)
{
int ret;
int vm_fd = mshv_state->vm;
ret = commit_msi_routing_table(vm_fd);
if (ret < 0) {
error_report("Failed to commit msi routing table");
abort();
}
}
int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *event,
const EventNotifier *resample,
int virq)
{
return irqchip_update_irqfd_notifier_gsi(event, resample, virq, true);
}
int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *event,
int virq)
{
return irqchip_update_irqfd_notifier_gsi(event, NULL, virq, false);
}
int mshv_reserve_ioapic_msi_routes(int vm_fd)
{
int ret, gsi;
/*
* Reserve GSI 0-23 for IOAPIC pins, to avoid conflicts of legacy
* peripherals with MSI-X devices
*/
for (gsi = 0; gsi < IOAPIC_NUM_PINS; gsi++) {
ret = add_msi_routing(0, 0);
if (ret < 0) {
error_report("Failed to reserve GSI %d", gsi);
return -1;
}
}
ret = commit_msi_routing_table(vm_fd);
if (ret < 0) {
error_report("Failed to commit reserved IOAPIC MSI routes");
return -1;
}
return 0;
}

563
accel/mshv/mem.c Normal file
View file

@ -0,0 +1,563 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors:
* Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
*/
#include "qemu/osdep.h"
#include "qemu/lockable.h"
#include "qemu/error-report.h"
#include "qemu/rcu.h"
#include "linux/mshv.h"
#include "system/address-spaces.h"
#include "system/mshv.h"
#include "system/mshv_int.h"
#include "exec/memattrs.h"
#include <sys/ioctl.h>
#include "trace.h"
typedef struct SlotsRCUReclaim {
struct rcu_head rcu;
GList *old_head;
MshvMemorySlot *removed_slot;
} SlotsRCUReclaim;
static void rcu_reclaim_slotlist(struct rcu_head *rcu)
{
SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu);
g_list_free(r->old_head);
g_free(r->removed_slot);
g_free(r);
}
static void publish_slots(GList *new_head, GList *old_head,
MshvMemorySlot *removed_slot)
{
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
qatomic_store_release(&manager->slots, new_head);
SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1);
r->old_head = old_head;
r->removed_slot = removed_slot;
call_rcu1(&r->rcu, rcu_reclaim_slotlist);
}
/* Needs to be called with mshv_state->msm.mutex held */
static int remove_slot(MshvMemorySlot *slot)
{
GList *old_head, *new_head;
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
old_head = qatomic_load_acquire(&manager->slots);
if (!g_list_find(old_head, slot)) {
error_report("slot requested for removal not found");
return -1;
}
new_head = g_list_copy(old_head);
new_head = g_list_remove(new_head, slot);
manager->n_slots--;
publish_slots(new_head, old_head, slot);
return 0;
}
/* Needs to be called with mshv_state->msm.mutex held */
static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr,
uint64_t size, bool readonly)
{
GList *old_head, *new_head;
MshvMemorySlot *slot;
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
old_head = qatomic_load_acquire(&manager->slots);
if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) {
error_report("no free memory slots available");
return NULL;
}
slot = g_new0(MshvMemorySlot, 1);
slot->guest_phys_addr = gpa;
slot->userspace_addr = userspace_addr;
slot->memory_size = size;
slot->readonly = readonly;
new_head = g_list_copy(old_head);
new_head = g_list_append(new_head, slot);
manager->n_slots++;
publish_slots(new_head, old_head, NULL);
return slot;
}
static int slot_overlaps(const MshvMemorySlot *slot1,
const MshvMemorySlot *slot2)
{
uint64_t start_1 = slot1->userspace_addr,
start_2 = slot2->userspace_addr;
size_t len_1 = slot1->memory_size,
len_2 = slot2->memory_size;
if (slot1 == slot2) {
return -1;
}
return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1;
}
static bool is_mapped(MshvMemorySlot *slot)
{
/* Subsequent reads of mapped field see a fully-initialized slot */
return qatomic_load_acquire(&slot->mapped);
}
/*
* Find slot that is:
* - overlapping in userspace
* - currently mapped in the guest
*
* Needs to be called with mshv_state->msm.mutex or RCU read lock held.
*/
static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot)
{
GList *found;
MshvMemorySlot *overlap_slot;
found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps);
if (!found) {
return NULL;
}
overlap_slot = found->data;
if (!overlap_slot || !is_mapped(overlap_slot)) {
return NULL;
}
return overlap_slot;
}
static int set_guest_memory(int vm_fd,
const struct mshv_user_mem_region *region)
{
int ret;
ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
if (ret < 0) {
error_report("failed to set guest memory: %s", strerror(errno));
return -1;
}
return 0;
}
static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map)
{
struct mshv_user_mem_region region = {0};
region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT;
region.size = slot->memory_size;
region.userspace_addr = slot->userspace_addr;
if (!map) {
region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr,
slot->memory_size);
return set_guest_memory(vm_fd, &region);
}
region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE);
if (!slot->readonly) {
region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE);
}
trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr,
slot->memory_size);
return set_guest_memory(vm_fd, &region);
}
static int slot_matches_region(const MshvMemorySlot *slot1,
const MshvMemorySlot *slot2)
{
return (slot1->guest_phys_addr == slot2->guest_phys_addr &&
slot1->userspace_addr == slot2->userspace_addr &&
slot1->memory_size == slot2->memory_size) ? 0 : -1;
}
/* Needs to be called with mshv_state->msm.mutex held */
static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size,
uint64_t userspace_addr)
{
MshvMemorySlot ref_slot = {
.guest_phys_addr = gpa,
.userspace_addr = userspace_addr,
.memory_size = size,
};
GList *found;
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
found = g_list_find_custom(manager->slots, &ref_slot,
(GCompareFunc) slot_matches_region);
return found ? found->data : NULL;
}
static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p)
{
uint64_t gpa_offset, gpa = *gpa_p;
gpa_offset = gpa - slot->guest_phys_addr;
return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size)
? 0 : -1;
}
/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */
static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa)
{
GList *found;
MshvMemorySlot *slot;
trace_mshv_find_slot_by_gpa(gpa);
found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa);
if (found) {
slot = found->data;
trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr,
slot->memory_size);
return slot;
}
return NULL;
}
/* Needs to be called with mshv_state->msm.mutex held */
static void set_mapped(MshvMemorySlot *slot, bool mapped)
{
/* prior writes to mapped field becomes visible before readers see slot */
qatomic_store_release(&slot->mapped, mapped);
}
MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa)
{
MshvMemorySlot *gpa_slot, *overlap_slot;
GList *head;
int ret;
MshvMemorySlotManager *manager = &mshv_state->msm;
/* fast path, called often by unmapped_gpa vm exit */
WITH_RCU_READ_LOCK_GUARD() {
assert(manager);
head = qatomic_load_acquire(&manager->slots);
/* return early if no slot is found */
gpa_slot = find_mem_slot_by_gpa(head, gpa);
if (gpa_slot == NULL) {
return MshvRemapNoMapping;
}
/* return early if no overlapping slot is found */
overlap_slot = find_overlap_mem_slot(head, gpa_slot);
if (overlap_slot == NULL) {
return MshvRemapNoOverlap;
}
}
/*
* We'll modify the mapping list, so we need to upgrade to mutex and
* recheck.
*/
assert(manager);
QEMU_LOCK_GUARD(&manager->mutex);
/* return early if no slot is found */
gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa);
if (gpa_slot == NULL) {
return MshvRemapNoMapping;
}
/* return early if no overlapping slot is found */
overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot);
if (overlap_slot == NULL) {
return MshvRemapNoOverlap;
}
/* unmap overlapping slot */
ret = map_or_unmap(vm_fd, overlap_slot, false);
if (ret < 0) {
error_report("failed to unmap overlap region");
abort();
}
set_mapped(overlap_slot, false);
warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
overlap_slot->userspace_addr,
overlap_slot->guest_phys_addr,
overlap_slot->memory_size);
/* map region for gpa */
ret = map_or_unmap(vm_fd, gpa_slot, true);
if (ret < 0) {
error_report("failed to map new region");
abort();
}
set_mapped(gpa_slot, true);
warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
gpa_slot->userspace_addr, gpa_slot->guest_phys_addr,
gpa_slot->memory_size);
return MshvRemapOk;
}
static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size,
uint8_t *data)
{
warn_report("read from unmapped mmio region gpa=0x%lx size=%lu", gpa, size);
if (size == 0 || size > 8) {
error_report("invalid size %lu for reading from unmapped mmio region",
size);
return -1;
}
memset(data, 0xFF, size);
return 0;
}
int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size,
bool is_secure_mode, bool instruction_fetch)
{
int ret;
MemTxAttrs memattr = { .secure = is_secure_mode };
if (instruction_fetch) {
trace_mshv_insn_fetch(gpa, size);
} else {
trace_mshv_mem_read(gpa, size);
}
ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
size, false);
if (ret == MEMTX_OK) {
return 0;
}
if (ret == MEMTX_DECODE_ERROR) {
return handle_unmapped_mmio_region_read(gpa, size, data);
}
error_report("failed to read guest memory at 0x%lx", gpa);
return -1;
}
int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
bool is_secure_mode)
{
int ret;
MemTxAttrs memattr = { .secure = is_secure_mode };
trace_mshv_mem_write(gpa, size);
ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
size, true);
if (ret == MEMTX_OK) {
return 0;
}
if (ret == MEMTX_DECODE_ERROR) {
warn_report("write to unmapped mmio region gpa=0x%lx size=%lu", gpa,
size);
return 0;
}
error_report("Failed to write guest memory");
return -1;
}
static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size,
uint64_t userspace_addr)
{
int ret;
MshvMemorySlot *slot;
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
QEMU_LOCK_GUARD(&manager->mutex);
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
if (!slot) {
trace_mshv_skip_unset_mem(userspace_addr, gpa, size);
/* no work to do */
return 0;
}
if (!is_mapped(slot)) {
/* remove slot, no need to unmap */
return remove_slot(slot);
}
ret = map_or_unmap(vm_fd, slot, false);
if (ret < 0) {
error_report("failed to unmap memory region");
return ret;
}
return remove_slot(slot);
}
static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly,
uint64_t userspace_addr)
{
MshvMemorySlot *slot, *overlap_slot;
int ret;
MshvMemorySlotManager *manager = &mshv_state->msm;
assert(manager);
QEMU_LOCK_GUARD(&manager->mutex);
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
if (slot) {
error_report("memory region already mapped at gpa=0x%lx, "
"userspace_addr=0x%lx, size=0x%lx",
slot->guest_phys_addr, slot->userspace_addr,
slot->memory_size);
return -1;
}
slot = append_slot(gpa, userspace_addr, size, readonly);
overlap_slot = find_overlap_mem_slot(manager->slots, slot);
if (overlap_slot) {
trace_mshv_remap_attempt(slot->userspace_addr,
slot->guest_phys_addr,
slot->memory_size);
warn_report("attempt to map region [0x%lx-0x%lx], while "
"[0x%lx-0x%lx] is already mapped in the guest",
userspace_addr, userspace_addr + size - 1,
overlap_slot->userspace_addr,
overlap_slot->userspace_addr +
overlap_slot->memory_size - 1);
/* do not register mem slot in hv, but record for later swap-in */
set_mapped(slot, false);
return 0;
}
ret = map_or_unmap(vm_fd, slot, true);
if (ret < 0) {
error_report("failed to map memory region");
return -1;
}
set_mapped(slot, true);
return 0;
}
static int set_memory(uint64_t gpa, uint64_t size, bool readonly,
uint64_t userspace_addr, bool add)
{
int vm_fd = mshv_state->vm;
if (add) {
return tracked_map(vm_fd, gpa, size, readonly, userspace_addr);
}
return tracked_unmap(vm_fd, gpa, size, userspace_addr);
}
/*
* Calculate and align the start address and the size of the section.
* Return the size. If the size is 0, the aligned section is empty.
*/
static hwaddr align_section(MemoryRegionSection *section, hwaddr *start)
{
hwaddr size = int128_get64(section->size);
hwaddr delta, aligned;
/*
* works in page size chunks, but the function may be called
* with sub-page size and unaligned start address. Pad the start
* address to next and truncate size to previous page boundary.
*/
aligned = ROUND_UP(section->offset_within_address_space,
qemu_real_host_page_size());
delta = aligned - section->offset_within_address_space;
*start = aligned;
if (delta > size) {
return 0;
}
return (size - delta) & qemu_real_host_page_mask();
}
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
bool add)
{
int ret = 0;
MemoryRegion *area = section->mr;
bool writable = !area->readonly && !area->rom_device;
hwaddr start_addr, mr_offset, size;
void *ram;
size = align_section(section, &start_addr);
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
size = align_section(section, &start_addr);
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
/*
* If the memory device is a writable non-ram area, we do not
* want to map it into the guest memory. If it is not a ROM device,
* we want to remove mshv memory mapping, so accesses will trap.
*/
if (!memory_region_is_ram(area)) {
if (writable) {
return;
} else if (!area->romd_mode) {
add = false;
}
}
if (!size) {
return;
}
mr_offset = section->offset_within_region + start_addr -
section->offset_within_address_space;
ram = memory_region_get_ram_ptr(area) + mr_offset;
ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add);
if (ret < 0) {
error_report("failed to set memory region");
abort();
}
}
void mshv_init_memory_slot_manager(MshvState *mshv_state)
{
MshvMemorySlotManager *manager;
assert(mshv_state);
manager = &mshv_state->msm;
manager->n_slots = 0;
manager->slots = NULL;
qemu_mutex_init(&manager->mutex);
}

9
accel/mshv/meson.build Normal file
View file

@ -0,0 +1,9 @@
mshv_ss = ss.source_set()
mshv_ss.add(if_true: files(
'irq.c',
'mem.c',
'msr.c',
'mshv-all.c'
))
specific_ss.add_all(when: 'CONFIG_MSHV', if_true: mshv_ss)

727
accel/mshv/mshv-all.c Normal file
View file

@ -0,0 +1,727 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors:
* Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
* Jinank Jain <jinankjain@microsoft.com>
* Wei Liu <liuwe@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
#include "qemu/module.h"
#include "qemu/main-loop.h"
#include "hw/boards.h"
#include "hw/hyperv/hvhdk.h"
#include "hw/hyperv/hvhdk_mini.h"
#include "hw/hyperv/hvgdk.h"
#include "hw/hyperv/hvgdk_mini.h"
#include "linux/mshv.h"
#include "qemu/accel.h"
#include "qemu/guest-random.h"
#include "accel/accel-ops.h"
#include "accel/accel-cpu-ops.h"
#include "system/cpus.h"
#include "system/runstate.h"
#include "system/accel-blocker.h"
#include "system/address-spaces.h"
#include "system/mshv.h"
#include "system/mshv_int.h"
#include "system/reset.h"
#include "trace.h"
#include <err.h>
#include <stdint.h>
#include <sys/ioctl.h>
#define TYPE_MSHV_ACCEL ACCEL_CLASS_NAME("mshv")
DECLARE_INSTANCE_CHECKER(MshvState, MSHV_STATE, TYPE_MSHV_ACCEL)
bool mshv_allowed;
MshvState *mshv_state;
static int init_mshv(int *mshv_fd)
{
int fd = open("/dev/mshv", O_RDWR | O_CLOEXEC);
if (fd < 0) {
error_report("Failed to open /dev/mshv: %s", strerror(errno));
return -1;
}
*mshv_fd = fd;
return 0;
}
/* freeze 1 to pause, 0 to resume */
static int set_time_freeze(int vm_fd, int freeze)
{
int ret;
struct hv_input_set_partition_property in = {0};
in.property_code = HV_PARTITION_PROPERTY_TIME_FREEZE;
in.property_value = freeze;
struct mshv_root_hvcall args = {0};
args.code = HVCALL_SET_PARTITION_PROPERTY;
args.in_sz = sizeof(in);
args.in_ptr = (uint64_t)&in;
ret = mshv_hvcall(vm_fd, &args);
if (ret < 0) {
error_report("Failed to set time freeze");
return -1;
}
return 0;
}
static int pause_vm(int vm_fd)
{
int ret;
ret = set_time_freeze(vm_fd, 1);
if (ret < 0) {
error_report("Failed to pause partition: %s", strerror(errno));
return -1;
}
return 0;
}
static int resume_vm(int vm_fd)
{
int ret;
ret = set_time_freeze(vm_fd, 0);
if (ret < 0) {
error_report("Failed to resume partition: %s", strerror(errno));
return -1;
}
return 0;
}
static int create_partition(int mshv_fd, int *vm_fd)
{
int ret;
struct mshv_create_partition args = {0};
/* Initialize pt_flags with the desired features */
uint64_t pt_flags = (1ULL << MSHV_PT_BIT_LAPIC) |
(1ULL << MSHV_PT_BIT_X2APIC) |
(1ULL << MSHV_PT_BIT_GPA_SUPER_PAGES);
/* Set default isolation type */
uint64_t pt_isolation = MSHV_PT_ISOLATION_NONE;
args.pt_flags = pt_flags;
args.pt_isolation = pt_isolation;
ret = ioctl(mshv_fd, MSHV_CREATE_PARTITION, &args);
if (ret < 0) {
error_report("Failed to create partition: %s", strerror(errno));
return -1;
}
*vm_fd = ret;
return 0;
}
static int set_synthetic_proc_features(int vm_fd)
{
int ret;
struct hv_input_set_partition_property in = {0};
union hv_partition_synthetic_processor_features features = {0};
/* Access the bitfield and set the desired features */
features.hypervisor_present = 1;
features.hv1 = 1;
features.access_partition_reference_counter = 1;
features.access_synic_regs = 1;
features.access_synthetic_timer_regs = 1;
features.access_partition_reference_tsc = 1;
features.access_frequency_regs = 1;
features.access_intr_ctrl_regs = 1;
features.access_vp_index = 1;
features.access_hypercall_regs = 1;
features.tb_flush_hypercalls = 1;
features.synthetic_cluster_ipi = 1;
features.direct_synthetic_timers = 1;
mshv_arch_amend_proc_features(&features);
in.property_code = HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES;
in.property_value = features.as_uint64[0];
struct mshv_root_hvcall args = {0};
args.code = HVCALL_SET_PARTITION_PROPERTY;
args.in_sz = sizeof(in);
args.in_ptr = (uint64_t)&in;
trace_mshv_hvcall_args("synthetic_proc_features", args.code, args.in_sz);
ret = mshv_hvcall(vm_fd, &args);
if (ret < 0) {
error_report("Failed to set synthethic proc features");
return -errno;
}
return 0;
}
static int initialize_vm(int vm_fd)
{
int ret = ioctl(vm_fd, MSHV_INITIALIZE_PARTITION);
if (ret < 0) {
error_report("Failed to initialize partition: %s", strerror(errno));
return -1;
}
return 0;
}
static int create_vm(int mshv_fd, int *vm_fd)
{
int ret = create_partition(mshv_fd, vm_fd);
if (ret < 0) {
return -1;
}
ret = set_synthetic_proc_features(*vm_fd);
if (ret < 0) {
return -1;
}
ret = initialize_vm(*vm_fd);
if (ret < 0) {
return -1;
}
ret = mshv_reserve_ioapic_msi_routes(*vm_fd);
if (ret < 0) {
return -1;
}
ret = mshv_arch_post_init_vm(*vm_fd);
if (ret < 0) {
return -1;
}
/* Always create a frozen partition */
pause_vm(*vm_fd);
return 0;
}
static void mem_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
MshvMemoryListener *mml;
mml = container_of(listener, MshvMemoryListener, listener);
memory_region_ref(section->mr);
mshv_set_phys_mem(mml, section, true);
}
static void mem_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
MshvMemoryListener *mml;
mml = container_of(listener, MshvMemoryListener, listener);
mshv_set_phys_mem(mml, section, false);
memory_region_unref(section->mr);
}
typedef enum {
DATAMATCH_NONE,
DATAMATCH_U32,
DATAMATCH_U64,
} DatamatchTag;
typedef struct {
DatamatchTag tag;
union {
uint32_t u32;
uint64_t u64;
} value;
} Datamatch;
/* flags: determine whether to de/assign */
static int ioeventfd(int vm_fd, int event_fd, uint64_t addr, Datamatch dm,
uint32_t flags)
{
struct mshv_user_ioeventfd args = {0};
args.fd = event_fd;
args.addr = addr;
args.flags = flags;
if (dm.tag == DATAMATCH_NONE) {
args.datamatch = 0;
} else {
flags |= BIT(MSHV_IOEVENTFD_BIT_DATAMATCH);
args.flags = flags;
if (dm.tag == DATAMATCH_U64) {
args.len = sizeof(uint64_t);
args.datamatch = dm.value.u64;
} else {
args.len = sizeof(uint32_t);
args.datamatch = dm.value.u32;
}
}
return ioctl(vm_fd, MSHV_IOEVENTFD, &args);
}
static int unregister_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr)
{
uint32_t flags = 0;
Datamatch dm = {0};
flags |= BIT(MSHV_IOEVENTFD_BIT_DEASSIGN);
dm.tag = DATAMATCH_NONE;
return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
}
static int register_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr,
uint64_t val, bool is_64bit, bool is_datamatch)
{
uint32_t flags = 0;
Datamatch dm = {0};
if (!is_datamatch) {
dm.tag = DATAMATCH_NONE;
} else if (is_64bit) {
dm.tag = DATAMATCH_U64;
dm.value.u64 = val;
} else {
dm.tag = DATAMATCH_U32;
dm.value.u32 = val;
}
return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
}
static void mem_ioeventfd_add(MemoryListener *listener,
MemoryRegionSection *section,
bool match_data, uint64_t data,
EventNotifier *e)
{
int fd = event_notifier_get_fd(e);
int ret;
bool is_64 = int128_get64(section->size) == 8;
uint64_t addr = section->offset_within_address_space;
trace_mshv_mem_ioeventfd_add(addr, int128_get64(section->size), data);
ret = register_ioevent(mshv_state->vm, fd, addr, data, is_64, match_data);
if (ret < 0) {
error_report("Failed to register ioeventfd: %s (%d)", strerror(-ret),
-ret);
abort();
}
}
static void mem_ioeventfd_del(MemoryListener *listener,
MemoryRegionSection *section,
bool match_data, uint64_t data,
EventNotifier *e)
{
int fd = event_notifier_get_fd(e);
int ret;
uint64_t addr = section->offset_within_address_space;
trace_mshv_mem_ioeventfd_del(section->offset_within_address_space,
int128_get64(section->size), data);
ret = unregister_ioevent(mshv_state->vm, fd, addr);
if (ret < 0) {
error_report("Failed to unregister ioeventfd: %s (%d)", strerror(-ret),
-ret);
abort();
}
}
static MemoryListener mshv_memory_listener = {
.name = "mshv",
.priority = MEMORY_LISTENER_PRIORITY_ACCEL,
.region_add = mem_region_add,
.region_del = mem_region_del,
.eventfd_add = mem_ioeventfd_add,
.eventfd_del = mem_ioeventfd_del,
};
static MemoryListener mshv_io_listener = {
.name = "mshv", .priority = MEMORY_LISTENER_PRIORITY_DEV_BACKEND,
/* MSHV does not support PIO eventfd */
};
static void register_mshv_memory_listener(MshvState *s, MshvMemoryListener *mml,
AddressSpace *as, int as_id,
const char *name)
{
int i;
mml->listener = mshv_memory_listener;
mml->listener.name = name;
memory_listener_register(&mml->listener, as);
for (i = 0; i < s->nr_as; ++i) {
if (!s->as[i].as) {
s->as[i].as = as;
s->as[i].ml = mml;
break;
}
}
}
int mshv_hvcall(int fd, const struct mshv_root_hvcall *args)
{
int ret = 0;
ret = ioctl(fd, MSHV_ROOT_HVCALL, args);
if (ret < 0) {
error_report("Failed to perform hvcall: %s", strerror(errno));
return -1;
}
return ret;
}
static int mshv_init_vcpu(CPUState *cpu)
{
int vm_fd = mshv_state->vm;
uint8_t vp_index = cpu->cpu_index;
int ret;
cpu->accel = g_new0(AccelCPUState, 1);
mshv_arch_init_vcpu(cpu);
ret = mshv_create_vcpu(vm_fd, vp_index, &cpu->accel->cpufd);
if (ret < 0) {
return -1;
}
cpu->accel->dirty = true;
return 0;
}
static int mshv_init(AccelState *as, MachineState *ms)
{
MshvState *s;
int mshv_fd, vm_fd, ret;
if (mshv_state) {
warn_report("MSHV accelerator already initialized");
return 0;
}
s = MSHV_STATE(as);
accel_blocker_init();
s->vm = 0;
ret = init_mshv(&mshv_fd);
if (ret < 0) {
return -1;
}
mshv_init_mmio_emu();
mshv_init_msicontrol();
mshv_init_memory_slot_manager(s);
ret = create_vm(mshv_fd, &vm_fd);
if (ret < 0) {
close(mshv_fd);
return -1;
}
ret = resume_vm(vm_fd);
if (ret < 0) {
close(mshv_fd);
close(vm_fd);
return -1;
}
s->vm = vm_fd;
s->fd = mshv_fd;
s->nr_as = 1;
s->as = g_new0(MshvAddressSpace, s->nr_as);
mshv_state = s;
register_mshv_memory_listener(s, &s->memory_listener, &address_space_memory,
0, "mshv-memory");
memory_listener_register(&mshv_io_listener, &address_space_io);
return 0;
}
static int mshv_destroy_vcpu(CPUState *cpu)
{
int cpu_fd = mshv_vcpufd(cpu);
int vm_fd = mshv_state->vm;
mshv_remove_vcpu(vm_fd, cpu_fd);
mshv_vcpufd(cpu) = 0;
mshv_arch_destroy_vcpu(cpu);
g_clear_pointer(&cpu->accel, g_free);
return 0;
}
static int mshv_cpu_exec(CPUState *cpu)
{
hv_message mshv_msg;
enum MshvVmExit exit_reason;
int ret = 0;
bql_unlock();
cpu_exec_start(cpu);
do {
if (cpu->accel->dirty) {
ret = mshv_arch_put_registers(cpu);
if (ret) {
error_report("Failed to put registers after init: %s",
strerror(-ret));
ret = -1;
break;
}
cpu->accel->dirty = false;
}
ret = mshv_run_vcpu(mshv_state->vm, cpu, &mshv_msg, &exit_reason);
if (ret < 0) {
error_report("Failed to run on vcpu %d", cpu->cpu_index);
abort();
}
switch (exit_reason) {
case MshvVmExitIgnore:
break;
default:
ret = EXCP_INTERRUPT;
break;
}
} while (ret == 0);
cpu_exec_end(cpu);
bql_lock();
if (ret < 0) {
cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
return ret;
}
/*
* The signal handler is triggered when QEMU's main thread receives a SIG_IPI
* (SIGUSR1). This signal causes the current CPU thread to be kicked, forcing a
* VM exit on the CPU. The VM exit generates an exit reason that breaks the loop
* (see mshv_cpu_exec). If the exit is due to a Ctrl+A+x command, the system
* will shut down. For other cases, the system will continue running.
*/
static void sa_ipi_handler(int sig)
{
/* TODO: call IOCTL to set_immediate_exit, once implemented. */
qemu_cpu_kick_self();
}
static void init_signal(CPUState *cpu)
{
/* init cpu signals */
struct sigaction sigact;
sigset_t set;
memset(&sigact, 0, sizeof(sigact));
sigact.sa_handler = sa_ipi_handler;
sigaction(SIG_IPI, &sigact, NULL);
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
pthread_sigmask(SIG_SETMASK, &set, NULL);
}
static void *mshv_vcpu_thread(void *arg)
{
CPUState *cpu = arg;
int ret;
rcu_register_thread();
bql_lock();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
current_cpu = cpu;
ret = mshv_init_vcpu(cpu);
if (ret < 0) {
error_report("Failed to init vcpu %d", cpu->cpu_index);
goto cleanup;
}
init_signal(cpu);
/* signal CPU creation */
cpu_thread_signal_created(cpu);
qemu_guest_random_seed_thread_part2(cpu->random_seed);
do {
qemu_process_cpu_events(cpu);
if (cpu_can_run(cpu)) {
mshv_cpu_exec(cpu);
}
} while (!cpu->unplug || cpu_can_run(cpu));
mshv_destroy_vcpu(cpu);
cleanup:
cpu_thread_signal_destroyed(cpu);
bql_unlock();
rcu_unregister_thread();
return NULL;
}
static void mshv_start_vcpu_thread(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
trace_mshv_start_vcpu_thread(thread_name, cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, mshv_vcpu_thread, cpu,
QEMU_THREAD_JOINABLE);
}
static void do_mshv_cpu_synchronize_post_init(CPUState *cpu,
run_on_cpu_data arg)
{
int ret = mshv_arch_put_registers(cpu);
if (ret < 0) {
error_report("Failed to put registers after init: %s", strerror(-ret));
abort();
}
cpu->accel->dirty = false;
}
static void mshv_cpu_synchronize_post_init(CPUState *cpu)
{
run_on_cpu(cpu, do_mshv_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
}
static void mshv_cpu_synchronize_post_reset(CPUState *cpu)
{
int ret = mshv_arch_put_registers(cpu);
if (ret) {
error_report("Failed to put registers after reset: %s",
strerror(-ret));
cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
cpu->accel->dirty = false;
}
static void do_mshv_cpu_synchronize_pre_loadvm(CPUState *cpu,
run_on_cpu_data arg)
{
cpu->accel->dirty = true;
}
static void mshv_cpu_synchronize_pre_loadvm(CPUState *cpu)
{
run_on_cpu(cpu, do_mshv_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
}
static void do_mshv_cpu_synchronize(CPUState *cpu, run_on_cpu_data arg)
{
if (!cpu->accel->dirty) {
int ret = mshv_load_regs(cpu);
if (ret < 0) {
error_report("Failed to load registers for vcpu %d",
cpu->cpu_index);
cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
cpu->accel->dirty = true;
}
}
static void mshv_cpu_synchronize(CPUState *cpu)
{
if (!cpu->accel->dirty) {
run_on_cpu(cpu, do_mshv_cpu_synchronize, RUN_ON_CPU_NULL);
}
}
static bool mshv_cpus_are_resettable(void)
{
return false;
}
static void mshv_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "MSHV";
ac->init_machine = mshv_init;
ac->allowed = &mshv_allowed;
}
static void mshv_accel_instance_init(Object *obj)
{
MshvState *s = MSHV_STATE(obj);
s->vm = 0;
}
static const TypeInfo mshv_accel_type = {
.name = TYPE_MSHV_ACCEL,
.parent = TYPE_ACCEL,
.instance_init = mshv_accel_instance_init,
.class_init = mshv_accel_class_init,
.instance_size = sizeof(MshvState),
};
static void mshv_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = mshv_start_vcpu_thread;
ops->synchronize_post_init = mshv_cpu_synchronize_post_init;
ops->synchronize_post_reset = mshv_cpu_synchronize_post_reset;
ops->synchronize_state = mshv_cpu_synchronize;
ops->synchronize_pre_loadvm = mshv_cpu_synchronize_pre_loadvm;
ops->cpus_are_resettable = mshv_cpus_are_resettable;
ops->handle_interrupt = generic_handle_interrupt;
}
static const TypeInfo mshv_accel_ops_type = {
.name = ACCEL_OPS_NAME("mshv"),
.parent = TYPE_ACCEL_OPS,
.class_init = mshv_accel_ops_class_init,
.abstract = true,
};
static void mshv_type_init(void)
{
type_register_static(&mshv_accel_type);
type_register_static(&mshv_accel_ops_type);
}
type_init(mshv_type_init);

375
accel/mshv/msr.c Normal file
View file

@ -0,0 +1,375 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "system/mshv.h"
#include "system/mshv_int.h"
#include "hw/hyperv/hvgdk_mini.h"
#include "linux/mshv.h"
#include "qemu/error-report.h"
static uint32_t supported_msrs[64] = {
IA32_MSR_TSC,
IA32_MSR_EFER,
IA32_MSR_KERNEL_GS_BASE,
IA32_MSR_APIC_BASE,
IA32_MSR_PAT,
IA32_MSR_SYSENTER_CS,
IA32_MSR_SYSENTER_ESP,
IA32_MSR_SYSENTER_EIP,
IA32_MSR_STAR,
IA32_MSR_LSTAR,
IA32_MSR_CSTAR,
IA32_MSR_SFMASK,
IA32_MSR_MTRR_DEF_TYPE,
IA32_MSR_MTRR_PHYSBASE0,
IA32_MSR_MTRR_PHYSMASK0,
IA32_MSR_MTRR_PHYSBASE1,
IA32_MSR_MTRR_PHYSMASK1,
IA32_MSR_MTRR_PHYSBASE2,
IA32_MSR_MTRR_PHYSMASK2,
IA32_MSR_MTRR_PHYSBASE3,
IA32_MSR_MTRR_PHYSMASK3,
IA32_MSR_MTRR_PHYSBASE4,
IA32_MSR_MTRR_PHYSMASK4,
IA32_MSR_MTRR_PHYSBASE5,
IA32_MSR_MTRR_PHYSMASK5,
IA32_MSR_MTRR_PHYSBASE6,
IA32_MSR_MTRR_PHYSMASK6,
IA32_MSR_MTRR_PHYSBASE7,
IA32_MSR_MTRR_PHYSMASK7,
IA32_MSR_MTRR_FIX64K_00000,
IA32_MSR_MTRR_FIX16K_80000,
IA32_MSR_MTRR_FIX16K_A0000,
IA32_MSR_MTRR_FIX4K_C0000,
IA32_MSR_MTRR_FIX4K_C8000,
IA32_MSR_MTRR_FIX4K_D0000,
IA32_MSR_MTRR_FIX4K_D8000,
IA32_MSR_MTRR_FIX4K_E0000,
IA32_MSR_MTRR_FIX4K_E8000,
IA32_MSR_MTRR_FIX4K_F0000,
IA32_MSR_MTRR_FIX4K_F8000,
IA32_MSR_TSC_AUX,
IA32_MSR_DEBUG_CTL,
HV_X64_MSR_GUEST_OS_ID,
HV_X64_MSR_SINT0,
HV_X64_MSR_SINT1,
HV_X64_MSR_SINT2,
HV_X64_MSR_SINT3,
HV_X64_MSR_SINT4,
HV_X64_MSR_SINT5,
HV_X64_MSR_SINT6,
HV_X64_MSR_SINT7,
HV_X64_MSR_SINT8,
HV_X64_MSR_SINT9,
HV_X64_MSR_SINT10,
HV_X64_MSR_SINT11,
HV_X64_MSR_SINT12,
HV_X64_MSR_SINT13,
HV_X64_MSR_SINT14,
HV_X64_MSR_SINT15,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_SIEFP,
HV_X64_MSR_SIMP,
HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_EOM,
};
static const size_t msr_count = ARRAY_SIZE(supported_msrs);
static int compare_msr_index(const void *a, const void *b)
{
return *(uint32_t *)a - *(uint32_t *)b;
}
__attribute__((constructor))
static void init_sorted_msr_map(void)
{
qsort(supported_msrs, msr_count, sizeof(uint32_t), compare_msr_index);
}
static int mshv_is_supported_msr(uint32_t msr)
{
return bsearch(&msr, supported_msrs, msr_count, sizeof(uint32_t),
compare_msr_index) != NULL;
}
static int mshv_msr_to_hv_reg_name(uint32_t msr, uint32_t *hv_reg)
{
switch (msr) {
case IA32_MSR_TSC:
*hv_reg = HV_X64_REGISTER_TSC;
return 0;
case IA32_MSR_EFER:
*hv_reg = HV_X64_REGISTER_EFER;
return 0;
case IA32_MSR_KERNEL_GS_BASE:
*hv_reg = HV_X64_REGISTER_KERNEL_GS_BASE;
return 0;
case IA32_MSR_APIC_BASE:
*hv_reg = HV_X64_REGISTER_APIC_BASE;
return 0;
case IA32_MSR_PAT:
*hv_reg = HV_X64_REGISTER_PAT;
return 0;
case IA32_MSR_SYSENTER_CS:
*hv_reg = HV_X64_REGISTER_SYSENTER_CS;
return 0;
case IA32_MSR_SYSENTER_ESP:
*hv_reg = HV_X64_REGISTER_SYSENTER_ESP;
return 0;
case IA32_MSR_SYSENTER_EIP:
*hv_reg = HV_X64_REGISTER_SYSENTER_EIP;
return 0;
case IA32_MSR_STAR:
*hv_reg = HV_X64_REGISTER_STAR;
return 0;
case IA32_MSR_LSTAR:
*hv_reg = HV_X64_REGISTER_LSTAR;
return 0;
case IA32_MSR_CSTAR:
*hv_reg = HV_X64_REGISTER_CSTAR;
return 0;
case IA32_MSR_SFMASK:
*hv_reg = HV_X64_REGISTER_SFMASK;
return 0;
case IA32_MSR_MTRR_CAP:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_CAP;
return 0;
case IA32_MSR_MTRR_DEF_TYPE:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_DEF_TYPE;
return 0;
case IA32_MSR_MTRR_PHYSBASE0:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0;
return 0;
case IA32_MSR_MTRR_PHYSMASK0:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0;
return 0;
case IA32_MSR_MTRR_PHYSBASE1:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1;
return 0;
case IA32_MSR_MTRR_PHYSMASK1:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1;
return 0;
case IA32_MSR_MTRR_PHYSBASE2:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2;
return 0;
case IA32_MSR_MTRR_PHYSMASK2:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2;
return 0;
case IA32_MSR_MTRR_PHYSBASE3:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3;
return 0;
case IA32_MSR_MTRR_PHYSMASK3:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3;
return 0;
case IA32_MSR_MTRR_PHYSBASE4:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4;
return 0;
case IA32_MSR_MTRR_PHYSMASK4:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4;
return 0;
case IA32_MSR_MTRR_PHYSBASE5:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5;
return 0;
case IA32_MSR_MTRR_PHYSMASK5:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5;
return 0;
case IA32_MSR_MTRR_PHYSBASE6:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6;
return 0;
case IA32_MSR_MTRR_PHYSMASK6:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6;
return 0;
case IA32_MSR_MTRR_PHYSBASE7:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7;
return 0;
case IA32_MSR_MTRR_PHYSMASK7:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7;
return 0;
case IA32_MSR_MTRR_FIX64K_00000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX64K00000;
return 0;
case IA32_MSR_MTRR_FIX16K_80000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX16K80000;
return 0;
case IA32_MSR_MTRR_FIX16K_A0000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX16KA0000;
return 0;
case IA32_MSR_MTRR_FIX4K_C0000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KC0000;
return 0;
case IA32_MSR_MTRR_FIX4K_C8000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KC8000;
return 0;
case IA32_MSR_MTRR_FIX4K_D0000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KD0000;
return 0;
case IA32_MSR_MTRR_FIX4K_D8000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KD8000;
return 0;
case IA32_MSR_MTRR_FIX4K_E0000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KE0000;
return 0;
case IA32_MSR_MTRR_FIX4K_E8000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KE8000;
return 0;
case IA32_MSR_MTRR_FIX4K_F0000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KF0000;
return 0;
case IA32_MSR_MTRR_FIX4K_F8000:
*hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KF8000;
return 0;
case IA32_MSR_TSC_AUX:
*hv_reg = HV_X64_REGISTER_TSC_AUX;
return 0;
case IA32_MSR_BNDCFGS:
*hv_reg = HV_X64_REGISTER_BNDCFGS;
return 0;
case IA32_MSR_DEBUG_CTL:
*hv_reg = HV_X64_REGISTER_DEBUG_CTL;
return 0;
case IA32_MSR_TSC_ADJUST:
*hv_reg = HV_X64_REGISTER_TSC_ADJUST;
return 0;
case IA32_MSR_SPEC_CTRL:
*hv_reg = HV_X64_REGISTER_SPEC_CTRL;
return 0;
case HV_X64_MSR_GUEST_OS_ID:
*hv_reg = HV_REGISTER_GUEST_OS_ID;
return 0;
case HV_X64_MSR_SINT0:
*hv_reg = HV_REGISTER_SINT0;
return 0;
case HV_X64_MSR_SINT1:
*hv_reg = HV_REGISTER_SINT1;
return 0;
case HV_X64_MSR_SINT2:
*hv_reg = HV_REGISTER_SINT2;
return 0;
case HV_X64_MSR_SINT3:
*hv_reg = HV_REGISTER_SINT3;
return 0;
case HV_X64_MSR_SINT4:
*hv_reg = HV_REGISTER_SINT4;
return 0;
case HV_X64_MSR_SINT5:
*hv_reg = HV_REGISTER_SINT5;
return 0;
case HV_X64_MSR_SINT6:
*hv_reg = HV_REGISTER_SINT6;
return 0;
case HV_X64_MSR_SINT7:
*hv_reg = HV_REGISTER_SINT7;
return 0;
case HV_X64_MSR_SINT8:
*hv_reg = HV_REGISTER_SINT8;
return 0;
case HV_X64_MSR_SINT9:
*hv_reg = HV_REGISTER_SINT9;
return 0;
case HV_X64_MSR_SINT10:
*hv_reg = HV_REGISTER_SINT10;
return 0;
case HV_X64_MSR_SINT11:
*hv_reg = HV_REGISTER_SINT11;
return 0;
case HV_X64_MSR_SINT12:
*hv_reg = HV_REGISTER_SINT12;
return 0;
case HV_X64_MSR_SINT13:
*hv_reg = HV_REGISTER_SINT13;
return 0;
case HV_X64_MSR_SINT14:
*hv_reg = HV_REGISTER_SINT14;
return 0;
case HV_X64_MSR_SINT15:
*hv_reg = HV_REGISTER_SINT15;
return 0;
case IA32_MSR_MISC_ENABLE:
*hv_reg = HV_X64_REGISTER_MSR_IA32_MISC_ENABLE;
return 0;
case HV_X64_MSR_SCONTROL:
*hv_reg = HV_REGISTER_SCONTROL;
return 0;
case HV_X64_MSR_SIEFP:
*hv_reg = HV_REGISTER_SIEFP;
return 0;
case HV_X64_MSR_SIMP:
*hv_reg = HV_REGISTER_SIMP;
return 0;
case HV_X64_MSR_REFERENCE_TSC:
*hv_reg = HV_REGISTER_REFERENCE_TSC;
return 0;
case HV_X64_MSR_EOM:
*hv_reg = HV_REGISTER_EOM;
return 0;
default:
error_report("failed to map MSR %u to HV register name", msr);
return -1;
}
}
static int set_msrs(const CPUState *cpu, GList *msrs)
{
size_t n_msrs;
GList *entries;
MshvMsrEntry *entry;
enum hv_register_name name;
struct hv_register_assoc *assoc;
int ret;
size_t i = 0;
n_msrs = g_list_length(msrs);
hv_register_assoc *assocs = g_new0(hv_register_assoc, n_msrs);
entries = msrs;
for (const GList *elem = entries; elem != NULL; elem = elem->next) {
entry = elem->data;
ret = mshv_msr_to_hv_reg_name(entry->index, &name);
if (ret < 0) {
g_free(assocs);
return ret;
}
assoc = &assocs[i];
assoc->name = name;
/* the union has been initialized to 0 */
assoc->value.reg64 = entry->data;
i++;
}
ret = mshv_set_generic_regs(cpu, assocs, n_msrs);
g_free(assocs);
if (ret < 0) {
error_report("failed to set msrs");
return -1;
}
return 0;
}
int mshv_configure_msr(const CPUState *cpu, const MshvMsrEntry *msrs,
size_t n_msrs)
{
GList *valid_msrs = NULL;
uint32_t msr_index;
int ret;
for (size_t i = 0; i < n_msrs; i++) {
msr_index = msrs[i].index;
/* check whether index of msrs is in SUPPORTED_MSRS */
if (mshv_is_supported_msr(msr_index)) {
valid_msrs = g_list_append(valid_msrs, (void *) &msrs[i]);
}
}
ret = set_msrs(cpu, valid_msrs);
g_list_free(valid_msrs);
return ret;
}

33
accel/mshv/trace-events Normal file
View file

@ -0,0 +1,33 @@
# Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
# Magnus Kulke <magnuskulke@microsoft.com>
#
# SPDX-License-Identifier: GPL-2.0-or-later
mshv_start_vcpu_thread(const char* thread, uint32_t cpu) "thread=%s cpu_index=%d"
mshv_set_memory(bool add, uint64_t gpa, uint64_t size, uint64_t user_addr, bool readonly, int ret) "add=%d gpa=0x%" PRIx64 " size=0x%" PRIx64 " user=0x%" PRIx64 " readonly=%d result=%d"
mshv_mem_ioeventfd_add(uint64_t addr, uint32_t size, uint32_t data) "addr=0x%" PRIx64 " size=%d data=0x%x"
mshv_mem_ioeventfd_del(uint64_t addr, uint32_t size, uint32_t data) "addr=0x%" PRIx64 " size=%d data=0x%x"
mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
mshv_handle_interrupt(uint32_t cpu, int mask) "cpu_index=%d mask=0x%x"
mshv_set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data) "gsi=%d addr=0x%" PRIx64 " data=0x%x"
mshv_remove_msi_routing(uint32_t gsi) "gsi=%d"
mshv_add_msi_routing(uint64_t addr, uint32_t data) "addr=0x%" PRIx64 " data=0x%x"
mshv_commit_msi_routing_table(int vm_fd, int len) "vm_fd=%d table_size=%d"
mshv_register_irqfd(int vm_fd, int event_fd, uint32_t gsi) "vm_fd=%d event_fd=%d gsi=%d"
mshv_irqchip_update_irqfd_notifier_gsi(int event_fd, int resample_fd, int virq, bool add) "event_fd=%d resample_fd=%d virq=%d add=%d"
mshv_insn_fetch(uint64_t addr, size_t size) "gpa=0x%" PRIx64 " size=%zu"
mshv_mem_write(uint64_t addr, size_t size) "\tgpa=0x%" PRIx64 " size=%zu"
mshv_mem_read(uint64_t addr, size_t size) "\tgpa=0x%" PRIx64 " size=%zu"
mshv_map_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
mshv_unmap_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
mshv_set_phys_mem(bool add, const char *name, uint64_t gpa) "\tadd=%d name=%s gpa=0x%010" PRIx64
mshv_handle_mmio(uint64_t gva, uint64_t gpa, uint64_t size, uint8_t access_type) "\tgva=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%" PRIx64 " access_type=%d"
mshv_found_slot(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
mshv_skip_unset_mem(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
mshv_remap_attempt(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
mshv_find_slot_by_gpa(uint64_t gpa) "\tgpa=0x%010" PRIx64

14
accel/mshv/trace.h Normal file
View file

@ -0,0 +1,14 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors:
* Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
*/
#include "trace/trace-accel_mshv.h"

View file

@ -5,5 +5,6 @@ system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c')) system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c'))
system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c')) system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c'))
system_stubs_ss.add(when: 'CONFIG_MSHV', if_false: files('mshv-stub.c'))
specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss) specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss)

44
accel/stubs/mshv-stub.c Normal file
View file

@ -0,0 +1,44 @@
/*
* QEMU MSHV stub
*
* Copyright Red Hat, Inc. 2025
*
* Author: Paolo Bonzini <pbonzini@redhat.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "hw/pci/msi.h"
#include "system/mshv.h"
bool mshv_allowed;
int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev)
{
return -ENOSYS;
}
void mshv_irqchip_release_virq(int virq)
{
}
int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev)
{
return -ENOSYS;
}
void mshv_irqchip_commit_routes(void)
{
}
int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *n,
const EventNotifier *rn, int virq)
{
return -ENOSYS;
}
int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *n, int virq)
{
return -ENOSYS;
}

13
configure vendored
View file

@ -1216,8 +1216,9 @@ fi
if test "$rust" != disabled && test -z "$rust_target_triple"; then if test "$rust" != disabled && test -z "$rust_target_triple"; then
# arch and os generally matches between meson and rust # arch and os generally matches between meson and rust
rust_arch=$host_arch rust_arch=$host_arch
# default to host vendor
rust_vendor=$(echo "$rust_host_triple" | cut -d'-' -f2)
rust_os=$host_os rust_os=$host_os
rust_machine=unknown
rust_osvariant= rust_osvariant=
# tweak rust_os if needed; also, machine and variant depend on the OS # tweak rust_os if needed; also, machine and variant depend on the OS
@ -1225,7 +1226,7 @@ if test "$rust" != disabled && test -z "$rust_target_triple"; then
case "$host_os" in case "$host_os" in
darwin) darwin)
# e.g. aarch64-apple-darwin # e.g. aarch64-apple-darwin
rust_machine=apple rust_vendor=apple
;; ;;
linux) linux)
@ -1273,13 +1274,13 @@ EOF
;; ;;
sunos) sunos)
rust_machine=pc rust_vendor=pc
rust_os=solaris rust_os=solaris
;; ;;
windows) windows)
# e.g. aarch64-pc-windows-gnullvm, x86_64-pc-windows-gnu (MSVC not supported) # e.g. aarch64-pc-windows-gnullvm, x86_64-pc-windows-gnu (MSVC not supported)
rust_machine=pc rust_vendor=pc
if test "$host_arch" = aarch64; then if test "$host_arch" = aarch64; then
rust_osvariant=gnullvm rust_osvariant=gnullvm
else else
@ -1310,7 +1311,7 @@ EOF
sparc64) sparc64)
if test "$rust_os" = solaris; then if test "$rust_os" = solaris; then
rust_arch=sparcv9 rust_arch=sparcv9
rust_machine=sun rust_vendor=sun
fi fi
;; ;;
@ -1324,7 +1325,7 @@ EOF
# e.g. aarch64-linux-android # e.g. aarch64-linux-android
rust_target_triple=$rust_arch-$rust_os-$rust_osvariant rust_target_triple=$rust_arch-$rust_os-$rust_osvariant
else else
rust_target_triple=$rust_arch-$rust_machine-$rust_os${rust_osvariant:+-$rust_osvariant} rust_target_triple=$rust_arch-$rust_vendor-$rust_os${rust_osvariant:+-$rust_osvariant}
fi fi
fi fi

View file

@ -53,7 +53,7 @@ Those hosts are officially supported, with various accelerators:
* - SPARC * - SPARC
- tcg - tcg
* - x86 * - x86
- hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen - hvf (64 bit only), mshv (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen
Other host architectures are not supported. It is possible to build QEMU system Other host architectures are not supported. It is possible to build QEMU system
emulation on an unsupported host architecture using the configure emulation on an unsupported host architecture using the configure

View file

@ -48,7 +48,7 @@ yet, so sometimes the source code is all you have.
* `accel <https://gitlab.com/qemu-project/qemu/-/tree/master/accel>`_: * `accel <https://gitlab.com/qemu-project/qemu/-/tree/master/accel>`_:
Infrastructure and architecture agnostic code related to the various Infrastructure and architecture agnostic code related to the various
`accelerators <Accelerators>` supported by QEMU `accelerators <Accelerators>` supported by QEMU
(TCG, KVM, hvf, whpx, xen, nvmm). (TCG, KVM, hvf, whpx, xen, nvmm, mshv).
Contains interfaces for operations that will be implemented per Contains interfaces for operations that will be implemented per
`target <https://gitlab.com/qemu-project/qemu/-/tree/master/target>`_. `target <https://gitlab.com/qemu-project/qemu/-/tree/master/target>`_.
* `audio <https://gitlab.com/qemu-project/qemu/-/tree/master/audio>`_: * `audio <https://gitlab.com/qemu-project/qemu/-/tree/master/audio>`_:

View file

@ -12,7 +12,7 @@ Accelerator
A specific API used to accelerate execution of guest instructions. It can be A specific API used to accelerate execution of guest instructions. It can be
hardware-based, through a virtualization API provided by the host OS (kvm, hvf, hardware-based, through a virtualization API provided by the host OS (kvm, hvf,
whpx, ...), or software-based (tcg). See this description of `supported whpx, mshv, ...), or software-based (tcg). See this description of `supported
accelerators<Accelerators>`. accelerators<Accelerators>`.
Board Board
@ -101,9 +101,8 @@ manage a virtual machine. QEMU is a virtualizer, that interacts with various
hypervisors. hypervisors.
In the context of QEMU, an hypervisor is an API, provided by the Host OS, In the context of QEMU, an hypervisor is an API, provided by the Host OS,
allowing to execute virtual machines. Linux implementation is KVM (and supports allowing to execute virtual machines. Linux provides a choice of KVM, Xen
Xen as well). For MacOS, it's HVF. Windows defines WHPX. And NetBSD provides or MSHV; MacOS provides HVF; Windows provides WHPX; NetBSD provides NVMM.
NVMM.
.. _machine: .. _machine:

View file

@ -23,6 +23,9 @@ Tiny Code Generator (TCG) capable of emulating many CPUs.
* - Xen * - Xen
- Linux (as dom0) - Linux (as dom0)
- Arm, x86 - Arm, x86
* - MSHV
- Linux (as dom0)
- x86
* - Hypervisor Framework (hvf) * - Hypervisor Framework (hvf)
- MacOS - MacOS
- x86 (64 bit only), Arm (64 bit only) - x86 (64 bit only), Arm (64 bit only)

View file

@ -307,6 +307,19 @@ SRST
Show KVM information. Show KVM information.
ERST ERST
{
.name = "mshv",
.args_type = "",
.params = "",
.help = "show MSHV information",
.cmd = hmp_info_mshv,
},
SRST
``info mshv``
Show MSHV information.
ERST
{ {
.name = "numa", .name = "numa",
.args_type = "", .args_type = "",

View file

@ -163,6 +163,21 @@ void hmp_info_kvm(Monitor *mon, const QDict *qdict)
qapi_free_KvmInfo(info); qapi_free_KvmInfo(info);
} }
void hmp_info_mshv(Monitor *mon, const QDict *qdict)
{
MshvInfo *info;
info = qmp_query_mshv(NULL);
monitor_printf(mon, "mshv support: ");
if (info->present) {
monitor_printf(mon, "%s\n", info->enabled ? "enabled" : "disabled");
} else {
monitor_printf(mon, "not compiled\n");
}
qapi_free_MshvInfo(info);
}
void hmp_info_uuid(Monitor *mon, const QDict *qdict) void hmp_info_uuid(Monitor *mon, const QDict *qdict)
{ {
UuidInfo *info; UuidInfo *info;

View file

@ -28,6 +28,20 @@
#include "system/runstate.h" #include "system/runstate.h"
#include "system/system.h" #include "system/system.h"
#include "hw/s390x/storage-keys.h" #include "hw/s390x/storage-keys.h"
#include <sys/stat.h>
/*
* QMP query for MSHV
*/
MshvInfo *qmp_query_mshv(Error **errp)
{
MshvInfo *info = g_malloc0(sizeof(*info));
info->enabled = mshv_enabled();
info->present = accel_find("mshv");
return info;
}
/* /*
* fast means: we NEVER interrupt vCPU threads to retrieve * fast means: we NEVER interrupt vCPU threads to retrieve

View file

@ -87,6 +87,8 @@ const size_t pc_compat_10_1_len = G_N_ELEMENTS(pc_compat_10_1);
GlobalProperty pc_compat_10_0[] = { GlobalProperty pc_compat_10_0[] = {
{ TYPE_X86_CPU, "x-consistent-cache", "false" }, { TYPE_X86_CPU, "x-consistent-cache", "false" },
{ TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" }, { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" },
{ TYPE_X86_CPU, "x-arch-cap-always-on", "true" },
{ TYPE_X86_CPU, "x-pdcm-on-even-without-pmu", "true" },
}; };
const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0); const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0);

View file

@ -27,6 +27,7 @@
#include "hw/pci/msi.h" #include "hw/pci/msi.h"
#include "qemu/host-utils.h" #include "qemu/host-utils.h"
#include "system/kvm.h" #include "system/kvm.h"
#include "system/mshv.h"
#include "trace.h" #include "trace.h"
#include "hw/i386/apic-msidef.h" #include "hw/i386/apic-msidef.h"
#include "qapi/error.h" #include "qapi/error.h"
@ -932,6 +933,13 @@ static void apic_send_msi(MSIMessage *msi)
uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
/* XXX: Ignore redirection hint. */ /* XXX: Ignore redirection hint. */
#ifdef CONFIG_MSHV
if (mshv_enabled()) {
mshv_request_interrupt(mshv_state, delivery, vector, dest,
dest_mode, trigger_mode);
return;
}
#endif
apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
} }

View file

@ -30,12 +30,18 @@
#include "hw/intc/ioapic_internal.h" #include "hw/intc/ioapic_internal.h"
#include "hw/pci/msi.h" #include "hw/pci/msi.h"
#include "hw/qdev-properties.h" #include "hw/qdev-properties.h"
#include "system/accel-irq.h"
#include "system/kvm.h" #include "system/kvm.h"
#include "system/system.h" #include "system/system.h"
#include "hw/i386/apic-msidef.h" #include "hw/i386/apic-msidef.h"
#include "hw/i386/x86-iommu.h" #include "hw/i386/x86-iommu.h"
#include "trace.h" #include "trace.h"
#if defined(CONFIG_KVM) || defined(CONFIG_MSHV)
#define ACCEL_GSI_IRQFD_POSSIBLE
#endif
#define APIC_DELIVERY_MODE_SHIFT 8 #define APIC_DELIVERY_MODE_SHIFT 8
#define APIC_POLARITY_SHIFT 14 #define APIC_POLARITY_SHIFT 14
#define APIC_TRIG_MODE_SHIFT 15 #define APIC_TRIG_MODE_SHIFT 15
@ -191,10 +197,10 @@ static void ioapic_set_irq(void *opaque, int vector, int level)
static void ioapic_update_kvm_routes(IOAPICCommonState *s) static void ioapic_update_kvm_routes(IOAPICCommonState *s)
{ {
#ifdef CONFIG_KVM #ifdef ACCEL_GSI_IRQFD_POSSIBLE
int i; int i;
if (kvm_irqchip_is_split()) { if (accel_irqchip_is_split()) {
for (i = 0; i < IOAPIC_NUM_PINS; i++) { for (i = 0; i < IOAPIC_NUM_PINS; i++) {
MSIMessage msg; MSIMessage msg;
struct ioapic_entry_info info; struct ioapic_entry_info info;
@ -202,15 +208,15 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
if (!info.masked) { if (!info.masked) {
msg.address = info.addr; msg.address = info.addr;
msg.data = info.data; msg.data = info.data;
kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); accel_irqchip_update_msi_route(i, msg, NULL);
} }
} }
kvm_irqchip_commit_routes(kvm_state); accel_irqchip_commit_routes();
} }
#endif #endif
} }
#ifdef CONFIG_KVM #ifdef ACCEL_KERNEL_GSI_IRQFD_POSSIBLE
static void ioapic_iec_notifier(void *private, bool global, static void ioapic_iec_notifier(void *private, bool global,
uint32_t index, uint32_t mask) uint32_t index, uint32_t mask)
{ {
@ -428,11 +434,11 @@ static const MemoryRegionOps ioapic_io_ops = {
static void ioapic_machine_done_notify(Notifier *notifier, void *data) static void ioapic_machine_done_notify(Notifier *notifier, void *data)
{ {
#ifdef CONFIG_KVM #ifdef ACCEL_KERNEL_GSI_IRQFD_POSSIBLE
IOAPICCommonState *s = container_of(notifier, IOAPICCommonState, IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
machine_done); machine_done);
if (kvm_irqchip_is_split()) { if (accel_irqchip_is_split()) {
X86IOMMUState *iommu = x86_iommu_get_default(); X86IOMMUState *iommu = x86_iommu_get_default();
if (iommu) { if (iommu) {
/* Register this IOAPIC with IOMMU IEC notifier, so that /* Register this IOAPIC with IOMMU IEC notifier, so that

View file

@ -34,6 +34,7 @@
#include "hw/pci/msi.h" #include "hw/pci/msi.h"
#include "hw/pci/msix.h" #include "hw/pci/msix.h"
#include "hw/loader.h" #include "hw/loader.h"
#include "system/accel-irq.h"
#include "system/kvm.h" #include "system/kvm.h"
#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-pci.h"
#include "qemu/range.h" #include "qemu/range.h"
@ -858,11 +859,11 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
if (irqfd->users == 0) { if (irqfd->users == 0) {
KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); ret = accel_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
kvm_irqchip_commit_route_changes(&c); accel_irqchip_commit_route_changes(&c);
irqfd->virq = ret; irqfd->virq = ret;
} }
irqfd->users++; irqfd->users++;
@ -874,7 +875,7 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
{ {
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
if (--irqfd->users == 0) { if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq); accel_irqchip_release_virq(irqfd->virq);
} }
} }
@ -883,7 +884,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
unsigned int vector) unsigned int vector)
{ {
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); return accel_irqchip_add_irqfd_notifier_gsi(n, NULL, irqfd->virq);
} }
static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
@ -893,7 +894,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret; int ret;
ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); ret = accel_irqchip_remove_irqfd_notifier_gsi(n, irqfd->virq);
assert(ret == 0); assert(ret == 0);
} }
static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
@ -1028,12 +1029,12 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
if (proxy->vector_irqfd) { if (proxy->vector_irqfd) {
irqfd = &proxy->vector_irqfd[vector]; irqfd = &proxy->vector_irqfd[vector];
if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg, ret = accel_irqchip_update_msi_route(irqfd->virq, msg,
&proxy->pci_dev); &proxy->pci_dev);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
kvm_irqchip_commit_routes(kvm_state); accel_irqchip_commit_routes();
} }
} }
@ -1262,7 +1263,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
int r, n; int r, n;
bool with_irqfd = msix_enabled(&proxy->pci_dev) && bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled(); accel_msi_via_irqfd_enabled() ;
nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX); nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
@ -1466,7 +1467,7 @@ static void virtio_pci_set_vector(VirtIODevice *vdev,
uint16_t new_vector) uint16_t new_vector)
{ {
bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); msix_enabled(&proxy->pci_dev) && accel_msi_via_irqfd_enabled();
if (new_vector == old_vector) { if (new_vector == old_vector) {
return; return;

View file

@ -169,7 +169,7 @@ struct CPUClass {
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr); vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
const char *gdb_core_xml_file; const char *gdb_core_xml_file;
const gchar * (*gdb_arch_name)(CPUState *cpu); const char * (*gdb_arch_name)(CPUState *cpu);
const char * (*gdb_get_core_xml_file)(CPUState *cpu); const char * (*gdb_get_core_xml_file)(CPUState *cpu);
void (*disas_set_info)(CPUState *cpu, disassemble_info *info); void (*disas_set_info)(CPUState *cpu, disassemble_info *info);

20
include/hw/hyperv/hvgdk.h Normal file
View file

@ -0,0 +1,20 @@
/*
* Type definitions for the mshv guest interface.
*
* Copyright Microsoft, Corp. 2025
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HW_HYPERV_HVGDK_H
#define HW_HYPERV_HVGDK_H
#define HVGDK_H_VERSION (25125)
enum hv_unimplemented_msr_action {
HV_UNIMPLEMENTED_MSR_ACTION_FAULT = 0,
HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO = 1,
HV_UNIMPLEMENTED_MSR_ACTION_COUNT = 2,
};
#endif /* HW_HYPERV_HVGDK_H */

View file

@ -0,0 +1,817 @@
/*
* Userspace interfaces for /dev/mshv* devices and derived fds
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HW_HYPERV_HVGDK_MINI_H
#define HW_HYPERV_HVGDK_MINI_H
#define MSHV_IOCTL 0xB8
typedef enum hv_register_name {
/* Pending Interruption Register */
HV_REGISTER_PENDING_INTERRUPTION = 0x00010002,
/* X64 User-Mode Registers */
HV_X64_REGISTER_RAX = 0x00020000,
HV_X64_REGISTER_RCX = 0x00020001,
HV_X64_REGISTER_RDX = 0x00020002,
HV_X64_REGISTER_RBX = 0x00020003,
HV_X64_REGISTER_RSP = 0x00020004,
HV_X64_REGISTER_RBP = 0x00020005,
HV_X64_REGISTER_RSI = 0x00020006,
HV_X64_REGISTER_RDI = 0x00020007,
HV_X64_REGISTER_R8 = 0x00020008,
HV_X64_REGISTER_R9 = 0x00020009,
HV_X64_REGISTER_R10 = 0x0002000A,
HV_X64_REGISTER_R11 = 0x0002000B,
HV_X64_REGISTER_R12 = 0x0002000C,
HV_X64_REGISTER_R13 = 0x0002000D,
HV_X64_REGISTER_R14 = 0x0002000E,
HV_X64_REGISTER_R15 = 0x0002000F,
HV_X64_REGISTER_RIP = 0x00020010,
HV_X64_REGISTER_RFLAGS = 0x00020011,
/* X64 Floating Point and Vector Registers */
HV_X64_REGISTER_XMM0 = 0x00030000,
HV_X64_REGISTER_XMM1 = 0x00030001,
HV_X64_REGISTER_XMM2 = 0x00030002,
HV_X64_REGISTER_XMM3 = 0x00030003,
HV_X64_REGISTER_XMM4 = 0x00030004,
HV_X64_REGISTER_XMM5 = 0x00030005,
HV_X64_REGISTER_XMM6 = 0x00030006,
HV_X64_REGISTER_XMM7 = 0x00030007,
HV_X64_REGISTER_XMM8 = 0x00030008,
HV_X64_REGISTER_XMM9 = 0x00030009,
HV_X64_REGISTER_XMM10 = 0x0003000A,
HV_X64_REGISTER_XMM11 = 0x0003000B,
HV_X64_REGISTER_XMM12 = 0x0003000C,
HV_X64_REGISTER_XMM13 = 0x0003000D,
HV_X64_REGISTER_XMM14 = 0x0003000E,
HV_X64_REGISTER_XMM15 = 0x0003000F,
HV_X64_REGISTER_FP_MMX0 = 0x00030010,
HV_X64_REGISTER_FP_MMX1 = 0x00030011,
HV_X64_REGISTER_FP_MMX2 = 0x00030012,
HV_X64_REGISTER_FP_MMX3 = 0x00030013,
HV_X64_REGISTER_FP_MMX4 = 0x00030014,
HV_X64_REGISTER_FP_MMX5 = 0x00030015,
HV_X64_REGISTER_FP_MMX6 = 0x00030016,
HV_X64_REGISTER_FP_MMX7 = 0x00030017,
HV_X64_REGISTER_FP_CONTROL_STATUS = 0x00030018,
HV_X64_REGISTER_XMM_CONTROL_STATUS = 0x00030019,
/* X64 Control Registers */
HV_X64_REGISTER_CR0 = 0x00040000,
HV_X64_REGISTER_CR2 = 0x00040001,
HV_X64_REGISTER_CR3 = 0x00040002,
HV_X64_REGISTER_CR4 = 0x00040003,
HV_X64_REGISTER_CR8 = 0x00040004,
HV_X64_REGISTER_XFEM = 0x00040005,
/* X64 Segment Registers */
HV_X64_REGISTER_ES = 0x00060000,
HV_X64_REGISTER_CS = 0x00060001,
HV_X64_REGISTER_SS = 0x00060002,
HV_X64_REGISTER_DS = 0x00060003,
HV_X64_REGISTER_FS = 0x00060004,
HV_X64_REGISTER_GS = 0x00060005,
HV_X64_REGISTER_LDTR = 0x00060006,
HV_X64_REGISTER_TR = 0x00060007,
/* X64 Table Registers */
HV_X64_REGISTER_IDTR = 0x00070000,
HV_X64_REGISTER_GDTR = 0x00070001,
/* X64 Virtualized MSRs */
HV_X64_REGISTER_TSC = 0x00080000,
HV_X64_REGISTER_EFER = 0x00080001,
HV_X64_REGISTER_KERNEL_GS_BASE = 0x00080002,
HV_X64_REGISTER_APIC_BASE = 0x00080003,
HV_X64_REGISTER_PAT = 0x00080004,
HV_X64_REGISTER_SYSENTER_CS = 0x00080005,
HV_X64_REGISTER_SYSENTER_EIP = 0x00080006,
HV_X64_REGISTER_SYSENTER_ESP = 0x00080007,
HV_X64_REGISTER_STAR = 0x00080008,
HV_X64_REGISTER_LSTAR = 0x00080009,
HV_X64_REGISTER_CSTAR = 0x0008000A,
HV_X64_REGISTER_SFMASK = 0x0008000B,
HV_X64_REGISTER_INITIAL_APIC_ID = 0x0008000C,
/* X64 Cache control MSRs */
HV_X64_REGISTER_MSR_MTRR_CAP = 0x0008000D,
HV_X64_REGISTER_MSR_MTRR_DEF_TYPE = 0x0008000E,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0 = 0x00080010,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1 = 0x00080011,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2 = 0x00080012,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3 = 0x00080013,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4 = 0x00080014,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5 = 0x00080015,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6 = 0x00080016,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7 = 0x00080017,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8 = 0x00080018,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9 = 0x00080019,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA = 0x0008001A,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB = 0x0008001B,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC = 0x0008001C,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASED = 0x0008001D,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE = 0x0008001E,
HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF = 0x0008001F,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0 = 0x00080040,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1 = 0x00080041,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2 = 0x00080042,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3 = 0x00080043,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4 = 0x00080044,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5 = 0x00080045,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6 = 0x00080046,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7 = 0x00080047,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8 = 0x00080048,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9 = 0x00080049,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA = 0x0008004A,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB = 0x0008004B,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC = 0x0008004C,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD = 0x0008004D,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE = 0x0008004E,
HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF = 0x0008004F,
HV_X64_REGISTER_MSR_MTRR_FIX64K00000 = 0x00080070,
HV_X64_REGISTER_MSR_MTRR_FIX16K80000 = 0x00080071,
HV_X64_REGISTER_MSR_MTRR_FIX16KA0000 = 0x00080072,
HV_X64_REGISTER_MSR_MTRR_FIX4KC0000 = 0x00080073,
HV_X64_REGISTER_MSR_MTRR_FIX4KC8000 = 0x00080074,
HV_X64_REGISTER_MSR_MTRR_FIX4KD0000 = 0x00080075,
HV_X64_REGISTER_MSR_MTRR_FIX4KD8000 = 0x00080076,
HV_X64_REGISTER_MSR_MTRR_FIX4KE0000 = 0x00080077,
HV_X64_REGISTER_MSR_MTRR_FIX4KE8000 = 0x00080078,
HV_X64_REGISTER_MSR_MTRR_FIX4KF0000 = 0x00080079,
HV_X64_REGISTER_MSR_MTRR_FIX4KF8000 = 0x0008007A,
HV_X64_REGISTER_TSC_AUX = 0x0008007B,
HV_X64_REGISTER_BNDCFGS = 0x0008007C,
HV_X64_REGISTER_DEBUG_CTL = 0x0008007D,
/* Available */
HV_X64_REGISTER_SPEC_CTRL = 0x00080084,
HV_X64_REGISTER_TSC_ADJUST = 0x00080096,
/* Other MSRs */
HV_X64_REGISTER_MSR_IA32_MISC_ENABLE = 0x000800A0,
/* Misc */
HV_REGISTER_GUEST_OS_ID = 0x00090002,
HV_REGISTER_REFERENCE_TSC = 0x00090017,
/* Hypervisor-defined Registers (Synic) */
HV_REGISTER_SINT0 = 0x000A0000,
HV_REGISTER_SINT1 = 0x000A0001,
HV_REGISTER_SINT2 = 0x000A0002,
HV_REGISTER_SINT3 = 0x000A0003,
HV_REGISTER_SINT4 = 0x000A0004,
HV_REGISTER_SINT5 = 0x000A0005,
HV_REGISTER_SINT6 = 0x000A0006,
HV_REGISTER_SINT7 = 0x000A0007,
HV_REGISTER_SINT8 = 0x000A0008,
HV_REGISTER_SINT9 = 0x000A0009,
HV_REGISTER_SINT10 = 0x000A000A,
HV_REGISTER_SINT11 = 0x000A000B,
HV_REGISTER_SINT12 = 0x000A000C,
HV_REGISTER_SINT13 = 0x000A000D,
HV_REGISTER_SINT14 = 0x000A000E,
HV_REGISTER_SINT15 = 0x000A000F,
HV_REGISTER_SCONTROL = 0x000A0010,
HV_REGISTER_SVERSION = 0x000A0011,
HV_REGISTER_SIEFP = 0x000A0012,
HV_REGISTER_SIMP = 0x000A0013,
HV_REGISTER_EOM = 0x000A0014,
HV_REGISTER_SIRBP = 0x000A0015,
} hv_register_name;
enum hv_intercept_type {
HV_INTERCEPT_TYPE_X64_IO_PORT = 0X00000000,
HV_INTERCEPT_TYPE_X64_MSR = 0X00000001,
HV_INTERCEPT_TYPE_X64_CPUID = 0X00000002,
HV_INTERCEPT_TYPE_EXCEPTION = 0X00000003,
/* Used to be HV_INTERCEPT_TYPE_REGISTER */
HV_INTERCEPT_TYPE_RESERVED0 = 0X00000004,
HV_INTERCEPT_TYPE_MMIO = 0X00000005,
HV_INTERCEPT_TYPE_X64_GLOBAL_CPUID = 0X00000006,
HV_INTERCEPT_TYPE_X64_APIC_SMI = 0X00000007,
HV_INTERCEPT_TYPE_HYPERCALL = 0X00000008,
HV_INTERCEPT_TYPE_X64_APIC_INIT_SIPI = 0X00000009,
HV_INTERCEPT_MC_UPDATE_PATCH_LEVEL_MSR_READ = 0X0000000A,
HV_INTERCEPT_TYPE_X64_APIC_WRITE = 0X0000000B,
HV_INTERCEPT_TYPE_X64_MSR_INDEX = 0X0000000C,
HV_INTERCEPT_TYPE_MAX,
HV_INTERCEPT_TYPE_INVALID = 0XFFFFFFFF,
};
struct hv_u128 {
uint64_t low_part;
uint64_t high_part;
};
union hv_x64_xmm_control_status_register {
struct hv_u128 as_uint128;
struct {
union {
/* long mode */
uint64_t last_fp_rdp;
/* 32 bit mode */
struct {
uint32_t last_fp_dp;
uint16_t last_fp_ds;
uint16_t padding;
};
};
uint32_t xmm_status_control;
uint32_t xmm_status_control_mask;
};
};
union hv_x64_fp_register {
struct hv_u128 as_uint128;
struct {
uint64_t mantissa;
uint64_t biased_exponent:15;
uint64_t sign:1;
uint64_t reserved:48;
};
};
union hv_x64_pending_exception_event {
uint64_t as_uint64[2];
struct {
uint32_t event_pending:1;
uint32_t event_type:3;
uint32_t reserved0:4;
uint32_t deliver_error_code:1;
uint32_t reserved1:7;
uint32_t vector:16;
uint32_t error_code;
uint64_t exception_parameter;
};
};
union hv_x64_pending_virtualization_fault_event {
uint64_t as_uint64[2];
struct {
uint32_t event_pending:1;
uint32_t event_type:3;
uint32_t reserved0:4;
uint32_t reserved1:8;
uint32_t parameter0:16;
uint32_t code;
uint64_t parameter1;
};
};
union hv_x64_pending_interruption_register {
uint64_t as_uint64;
struct {
uint32_t interruption_pending:1;
uint32_t interruption_type:3;
uint32_t deliver_error_code:1;
uint32_t instruction_length:4;
uint32_t nested_event:1;
uint32_t reserved:6;
uint32_t interruption_vector:16;
uint32_t error_code;
};
};
union hv_x64_register_sev_control {
uint64_t as_uint64;
struct {
uint64_t enable_encrypted_state:1;
uint64_t reserved_z:11;
uint64_t vmsa_gpa_page_number:52;
};
};
union hv_x64_msr_npiep_config_contents {
uint64_t as_uint64;
struct {
/*
* These bits enable instruction execution prevention for
* specific instructions.
*/
uint64_t prevents_gdt:1;
uint64_t prevents_idt:1;
uint64_t prevents_ldt:1;
uint64_t prevents_tr:1;
/* The reserved bits must always be 0. */
uint64_t reserved:60;
};
};
typedef struct hv_x64_segment_register {
uint64_t base;
uint32_t limit;
uint16_t selector;
union {
struct {
uint16_t segment_type:4;
uint16_t non_system_segment:1;
uint16_t descriptor_privilege_level:2;
uint16_t present:1;
uint16_t reserved:4;
uint16_t available:1;
uint16_t _long:1;
uint16_t _default:1;
uint16_t granularity:1;
};
uint16_t attributes;
};
} hv_x64_segment_register;
typedef struct hv_x64_table_register {
uint16_t pad[3];
uint16_t limit;
uint64_t base;
} hv_x64_table_register;
union hv_x64_fp_control_status_register {
struct hv_u128 as_uint128;
struct {
uint16_t fp_control;
uint16_t fp_status;
uint8_t fp_tag;
uint8_t reserved;
uint16_t last_fp_op;
union {
/* long mode */
uint64_t last_fp_rip;
/* 32 bit mode */
struct {
uint32_t last_fp_eip;
uint16_t last_fp_cs;
uint16_t padding;
};
};
};
};
/* General Hypervisor Register Content Definitions */
union hv_explicit_suspend_register {
uint64_t as_uint64;
struct {
uint64_t suspended:1;
uint64_t reserved:63;
};
};
union hv_internal_activity_register {
uint64_t as_uint64;
struct {
uint64_t startup_suspend:1;
uint64_t halt_suspend:1;
uint64_t idle_suspend:1;
uint64_t rsvd_z:61;
};
};
union hv_x64_interrupt_state_register {
uint64_t as_uint64;
struct {
uint64_t interrupt_shadow:1;
uint64_t nmi_masked:1;
uint64_t reserved:62;
};
};
union hv_intercept_suspend_register {
uint64_t as_uint64;
struct {
uint64_t suspended:1;
uint64_t reserved:63;
};
};
typedef union hv_register_value {
struct hv_u128 reg128;
uint64_t reg64;
uint32_t reg32;
uint16_t reg16;
uint8_t reg8;
union hv_x64_fp_register fp;
union hv_x64_fp_control_status_register fp_control_status;
union hv_x64_xmm_control_status_register xmm_control_status;
struct hv_x64_segment_register segment;
struct hv_x64_table_register table;
union hv_explicit_suspend_register explicit_suspend;
union hv_intercept_suspend_register intercept_suspend;
union hv_internal_activity_register internal_activity;
union hv_x64_interrupt_state_register interrupt_state;
union hv_x64_pending_interruption_register pending_interruption;
union hv_x64_msr_npiep_config_contents npiep_config;
union hv_x64_pending_exception_event pending_exception_event;
union hv_x64_pending_virtualization_fault_event
pending_virtualization_fault_event;
union hv_x64_register_sev_control sev_control;
} hv_register_value;
typedef struct hv_register_assoc {
uint32_t name; /* enum hv_register_name */
uint32_t reserved1;
uint64_t reserved2;
union hv_register_value value;
} hv_register_assoc;
union hv_input_vtl {
uint8_t as_uint8;
struct {
uint8_t target_vtl:4;
uint8_t use_target_vtl:1;
uint8_t reserved_z:3;
};
};
typedef struct hv_input_get_vp_registers {
uint64_t partition_id;
uint32_t vp_index;
union hv_input_vtl input_vtl;
uint8_t rsvd_z8;
uint16_t rsvd_z16;
uint32_t names[];
} hv_input_get_vp_registers;
typedef struct hv_input_set_vp_registers {
uint64_t partition_id;
uint32_t vp_index;
union hv_input_vtl input_vtl;
uint8_t rsvd_z8;
uint16_t rsvd_z16;
struct hv_register_assoc elements[];
} hv_input_set_vp_registers;
#define MSHV_VP_MAX_REGISTERS 128
struct mshv_vp_registers {
int count; /* at most MSHV_VP_MAX_REGISTERS */
struct hv_register_assoc *regs;
};
union hv_interrupt_control {
uint64_t as_uint64;
struct {
uint32_t interrupt_type; /* enum hv_interrupt type */
uint32_t level_triggered:1;
uint32_t logical_dest_mode:1;
uint32_t rsvd:30;
};
};
struct hv_input_assert_virtual_interrupt {
uint64_t partition_id;
union hv_interrupt_control control;
uint64_t dest_addr; /* cpu's apic id */
uint32_t vector;
uint8_t target_vtl;
uint8_t rsvd_z0;
uint16_t rsvd_z1;
};
/* /dev/mshv */
#define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
#define MSHV_CREATE_VP _IOW(MSHV_IOCTL, 0x01, struct mshv_create_vp)
/* Partition fds created with MSHV_CREATE_PARTITION */
#define MSHV_INITIALIZE_PARTITION _IO(MSHV_IOCTL, 0x00)
#define MSHV_SET_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region)
#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0x03, struct mshv_user_irqfd)
#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0x04, struct mshv_user_ioeventfd)
#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x05, struct mshv_user_irq_table)
/*
********************************
* VP APIs for child partitions *
********************************
*/
struct hv_local_interrupt_controller_state {
/* HV_X64_INTERRUPT_CONTROLLER_STATE */
uint32_t apic_id;
uint32_t apic_version;
uint32_t apic_ldr;
uint32_t apic_dfr;
uint32_t apic_spurious;
uint32_t apic_isr[8];
uint32_t apic_tmr[8];
uint32_t apic_irr[8];
uint32_t apic_esr;
uint32_t apic_icr_high;
uint32_t apic_icr_low;
uint32_t apic_lvt_timer;
uint32_t apic_lvt_thermal;
uint32_t apic_lvt_perfmon;
uint32_t apic_lvt_lint0;
uint32_t apic_lvt_lint1;
uint32_t apic_lvt_error;
uint32_t apic_lvt_cmci;
uint32_t apic_error_status;
uint32_t apic_initial_count;
uint32_t apic_counter_value;
uint32_t apic_divide_configuration;
uint32_t apic_remote_read;
};
/* Generic hypercall */
#define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
/* From hvgdk_mini.h */
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
#define HV_X64_MSR_SINT0 0x40000090
#define HV_X64_MSR_SINT1 0x40000091
#define HV_X64_MSR_SINT2 0x40000092
#define HV_X64_MSR_SINT3 0x40000093
#define HV_X64_MSR_SINT4 0x40000094
#define HV_X64_MSR_SINT5 0x40000095
#define HV_X64_MSR_SINT6 0x40000096
#define HV_X64_MSR_SINT7 0x40000097
#define HV_X64_MSR_SINT8 0x40000098
#define HV_X64_MSR_SINT9 0x40000099
#define HV_X64_MSR_SINT10 0x4000009A
#define HV_X64_MSR_SINT11 0x4000009B
#define HV_X64_MSR_SINT12 0x4000009C
#define HV_X64_MSR_SINT13 0x4000009D
#define HV_X64_MSR_SINT14 0x4000009E
#define HV_X64_MSR_SINT15 0x4000009F
#define HV_X64_MSR_SCONTROL 0x40000080
#define HV_X64_MSR_SIEFP 0x40000082
#define HV_X64_MSR_SIMP 0x40000083
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
#define HV_X64_MSR_EOM 0x40000084
/* Define port identifier type. */
union hv_port_id {
uint32_t asuint32_t;
struct {
uint32_t id:24;
uint32_t reserved:8;
};
};
#define HV_MESSAGE_SIZE (256)
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
/* Define hypervisor message types. */
enum hv_message_type {
HVMSG_NONE = 0x00000000,
/* Memory access messages. */
HVMSG_UNMAPPED_GPA = 0x80000000,
HVMSG_GPA_INTERCEPT = 0x80000001,
HVMSG_UNACCEPTED_GPA = 0x80000003,
HVMSG_GPA_ATTRIBUTE_INTERCEPT = 0x80000004,
/* Timer notification messages. */
HVMSG_TIMER_EXPIRED = 0x80000010,
/* Error messages. */
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
/*
* Opaque intercept message. The original intercept message is only
* accessible from the mapped intercept message page.
*/
HVMSG_OPAQUE_INTERCEPT = 0x8000003F,
/* Trace buffer complete messages. */
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
/* Hypercall intercept */
HVMSG_HYPERCALL_INTERCEPT = 0x80000050,
/* SynIC intercepts */
HVMSG_SYNIC_EVENT_INTERCEPT = 0x80000060,
HVMSG_SYNIC_SINT_INTERCEPT = 0x80000061,
HVMSG_SYNIC_SINT_DELIVERABLE = 0x80000062,
/* Async call completion intercept */
HVMSG_ASYNC_CALL_COMPLETION = 0x80000070,
/* Root scheduler messages */
HVMSG_SCHEDULER_VP_SIGNAL_BITSE = 0x80000100,
HVMSG_SCHEDULER_VP_SIGNAL_PAIR = 0x80000101,
/* Platform-specific processor intercept messages. */
HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000,
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
HVMSG_X64_APIC_EOI = 0x80010004,
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005,
HVMSG_X64_IOMMU_PRQ = 0x80010006,
HVMSG_X64_HALT = 0x80010007,
HVMSG_X64_INTERRUPTION_DELIVERABLE = 0x80010008,
HVMSG_X64_SIPI_INTERCEPT = 0x80010009,
HVMSG_X64_SEV_VMGEXIT_INTERCEPT = 0x80010013,
};
union hv_x64_vp_execution_state {
uint16_t as_uint16;
struct {
uint16_t cpl:2;
uint16_t cr0_pe:1;
uint16_t cr0_am:1;
uint16_t efer_lma:1;
uint16_t debug_active:1;
uint16_t interruption_pending:1;
uint16_t vtl:4;
uint16_t enclave_mode:1;
uint16_t interrupt_shadow:1;
uint16_t virtualization_fault_active:1;
uint16_t reserved:2;
};
};
/* From openvmm::hvdef */
enum hv_x64_intercept_access_type {
HV_X64_INTERCEPT_ACCESS_TYPE_READ = 0,
HV_X64_INTERCEPT_ACCESS_TYPE_WRITE = 1,
HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE = 2,
};
struct hv_x64_intercept_message_header {
uint32_t vp_index;
uint8_t instruction_length:4;
uint8_t cr8:4; /* Only set for exo partitions */
uint8_t intercept_access_type;
union hv_x64_vp_execution_state execution_state;
struct hv_x64_segment_register cs_segment;
uint64_t rip;
uint64_t rflags;
};
union hv_x64_io_port_access_info {
uint8_t as_uint8;
struct {
uint8_t access_size:3;
uint8_t string_op:1;
uint8_t rep_prefix:1;
uint8_t reserved:3;
};
};
typedef struct hv_x64_io_port_intercept_message {
struct hv_x64_intercept_message_header header;
uint16_t port_number;
union hv_x64_io_port_access_info access_info;
uint8_t instruction_byte_count;
uint32_t reserved;
uint64_t rax;
uint8_t instruction_bytes[16];
struct hv_x64_segment_register ds_segment;
struct hv_x64_segment_register es_segment;
uint64_t rcx;
uint64_t rsi;
uint64_t rdi;
} hv_x64_io_port_intercept_message;
union hv_x64_memory_access_info {
uint8_t as_uint8;
struct {
uint8_t gva_valid:1;
uint8_t gva_gpa_valid:1;
uint8_t hypercall_output_pending:1;
uint8_t tlb_locked_no_overlay:1;
uint8_t reserved:4;
};
};
struct hv_x64_memory_intercept_message {
struct hv_x64_intercept_message_header header;
uint32_t cache_type; /* enum hv_cache_type */
uint8_t instruction_byte_count;
union hv_x64_memory_access_info memory_access_info;
uint8_t tpr_priority;
uint8_t reserved1;
uint64_t guest_virtual_address;
uint64_t guest_physical_address;
uint8_t instruction_bytes[16];
};
union hv_message_flags {
uint8_t asu8;
struct {
uint8_t msg_pending:1;
uint8_t reserved:7;
};
};
struct hv_message_header {
uint32_t message_type;
uint8_t payload_size;
union hv_message_flags message_flags;
uint8_t reserved[2];
union {
uint64_t sender;
union hv_port_id port;
};
};
struct hv_message {
struct hv_message_header header;
union {
uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
} u;
};
/* From github.com/rust-vmm/mshv-bindings/src/x86_64/regs.rs */
struct hv_cpuid_entry {
uint32_t function;
uint32_t index;
uint32_t flags;
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
uint32_t padding[3];
};
struct hv_cpuid {
uint32_t nent;
uint32_t padding;
struct hv_cpuid_entry entries[0];
};
#define IA32_MSR_TSC 0x00000010
#define IA32_MSR_EFER 0xC0000080
#define IA32_MSR_KERNEL_GS_BASE 0xC0000102
#define IA32_MSR_APIC_BASE 0x0000001B
#define IA32_MSR_PAT 0x0277
#define IA32_MSR_SYSENTER_CS 0x00000174
#define IA32_MSR_SYSENTER_ESP 0x00000175
#define IA32_MSR_SYSENTER_EIP 0x00000176
#define IA32_MSR_STAR 0xC0000081
#define IA32_MSR_LSTAR 0xC0000082
#define IA32_MSR_CSTAR 0xC0000083
#define IA32_MSR_SFMASK 0xC0000084
#define IA32_MSR_MTRR_CAP 0x00FE
#define IA32_MSR_MTRR_DEF_TYPE 0x02FF
#define IA32_MSR_MTRR_PHYSBASE0 0x0200
#define IA32_MSR_MTRR_PHYSMASK0 0x0201
#define IA32_MSR_MTRR_PHYSBASE1 0x0202
#define IA32_MSR_MTRR_PHYSMASK1 0x0203
#define IA32_MSR_MTRR_PHYSBASE2 0x0204
#define IA32_MSR_MTRR_PHYSMASK2 0x0205
#define IA32_MSR_MTRR_PHYSBASE3 0x0206
#define IA32_MSR_MTRR_PHYSMASK3 0x0207
#define IA32_MSR_MTRR_PHYSBASE4 0x0208
#define IA32_MSR_MTRR_PHYSMASK4 0x0209
#define IA32_MSR_MTRR_PHYSBASE5 0x020A
#define IA32_MSR_MTRR_PHYSMASK5 0x020B
#define IA32_MSR_MTRR_PHYSBASE6 0x020C
#define IA32_MSR_MTRR_PHYSMASK6 0x020D
#define IA32_MSR_MTRR_PHYSBASE7 0x020E
#define IA32_MSR_MTRR_PHYSMASK7 0x020F
#define IA32_MSR_MTRR_FIX64K_00000 0x0250
#define IA32_MSR_MTRR_FIX16K_80000 0x0258
#define IA32_MSR_MTRR_FIX16K_A0000 0x0259
#define IA32_MSR_MTRR_FIX4K_C0000 0x0268
#define IA32_MSR_MTRR_FIX4K_C8000 0x0269
#define IA32_MSR_MTRR_FIX4K_D0000 0x026A
#define IA32_MSR_MTRR_FIX4K_D8000 0x026B
#define IA32_MSR_MTRR_FIX4K_E0000 0x026C
#define IA32_MSR_MTRR_FIX4K_E8000 0x026D
#define IA32_MSR_MTRR_FIX4K_F0000 0x026E
#define IA32_MSR_MTRR_FIX4K_F8000 0x026F
#define IA32_MSR_TSC_AUX 0xC0000103
#define IA32_MSR_BNDCFGS 0x00000d90
#define IA32_MSR_DEBUG_CTL 0x1D9
#define IA32_MSR_SPEC_CTRL 0x00000048
#define IA32_MSR_TSC_ADJUST 0x0000003b
#define IA32_MSR_MISC_ENABLE 0x000001a0
#define HV_TRANSLATE_GVA_VALIDATE_READ (0x0001)
#define HV_TRANSLATE_GVA_VALIDATE_WRITE (0x0002)
#define HV_TRANSLATE_GVA_VALIDATE_EXECUTE (0x0004)
#define HV_HYP_PAGE_SHIFT 12
#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
#define HVCALL_GET_PARTITION_PROPERTY 0x0044
#define HVCALL_SET_PARTITION_PROPERTY 0x0045
#define HVCALL_GET_VP_REGISTERS 0x0050
#define HVCALL_SET_VP_REGISTERS 0x0051
#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
#endif /* HW_HYPERV_HVGDK_MINI_H */

249
include/hw/hyperv/hvhdk.h Normal file
View file

@ -0,0 +1,249 @@
/*
* Type definitions for the mshv host.
*
* Copyright Microsoft, Corp. 2025
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HW_HYPERV_HVHDK_H
#define HW_HYPERV_HVHDK_H
#define HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS 1
struct hv_input_set_partition_property {
uint64_t partition_id;
uint32_t property_code; /* enum hv_partition_property_code */
uint32_t padding;
uint64_t property_value;
};
union hv_partition_synthetic_processor_features {
uint64_t as_uint64[HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS];
struct {
/*
* Report a hypervisor is present. CPUID leaves
* 0x40000000 and 0x40000001 are supported.
*/
uint64_t hypervisor_present:1;
/*
* Features associated with HV#1:
*/
/* Report support for Hv1 (CPUID leaves 0x40000000 - 0x40000006). */
uint64_t hv1:1;
/*
* Access to HV_X64_MSR_VP_RUNTIME.
* Corresponds to access_vp_run_time_reg privilege.
*/
uint64_t access_vp_run_time_reg:1;
/*
* Access to HV_X64_MSR_TIME_REF_COUNT.
* Corresponds to access_partition_reference_counter privilege.
*/
uint64_t access_partition_reference_counter:1;
/*
* Access to SINT-related registers (HV_X64_MSR_SCONTROL through
* HV_X64_MSR_EOM and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15).
* Corresponds to access_synic_regs privilege.
*/
uint64_t access_synic_regs:1;
/*
* Access to synthetic timers and associated MSRs
* (HV_X64_MSR_STIMER0_CONFIG through HV_X64_MSR_STIMER3_COUNT).
* Corresponds to access_synthetic_timer_regs privilege.
*/
uint64_t access_synthetic_timer_regs:1;
/*
* Access to APIC MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and
* HV_X64_MSR_TPR) as well as the VP assist page.
* Corresponds to access_intr_ctrl_regs privilege.
*/
uint64_t access_intr_ctrl_regs:1;
/*
* Access to registers associated with hypercalls
* (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL).
* Corresponds to access_hypercall_msrs privilege.
*/
uint64_t access_hypercall_regs:1;
/* VP index can be queried. corresponds to access_vp_index privilege. */
uint64_t access_vp_index:1;
/*
* Access to the reference TSC. Corresponds to
* access_partition_reference_tsc privilege.
*/
uint64_t access_partition_reference_tsc:1;
/*
* Partition has access to the guest idle reg. Corresponds to
* access_guest_idle_reg privilege.
*/
uint64_t access_guest_idle_reg:1;
/*
* Partition has access to frequency regs. corresponds to
* access_frequency_regs privilege.
*/
uint64_t access_frequency_regs:1;
uint64_t reserved_z12:1; /* Reserved for access_reenlightenment_controls */
uint64_t reserved_z13:1; /* Reserved for access_root_scheduler_reg */
uint64_t reserved_z14:1; /* Reserved for access_tsc_invariant_controls */
/*
* Extended GVA ranges for HvCallFlushVirtualAddressList hypercall.
* Corresponds to privilege.
*/
uint64_t enable_extended_gva_ranges_for_flush_virtual_address_list:1;
uint64_t reserved_z16:1; /* Reserved for access_vsm. */
uint64_t reserved_z17:1; /* Reserved for access_vp_registers. */
/* Use fast hypercall output. Corresponds to privilege. */
uint64_t fast_hypercall_output:1;
uint64_t reserved_z19:1; /* Reserved for enable_extended_hypercalls. */
/*
* HvStartVirtualProcessor can be used to start virtual processors.
* Corresponds to privilege.
*/
uint64_t start_virtual_processor:1;
uint64_t reserved_z21:1; /* Reserved for Isolation. */
/* Synthetic timers in direct mode. */
uint64_t direct_synthetic_timers:1;
uint64_t reserved_z23:1; /* Reserved for synthetic time unhalted timer */
/* Use extended processor masks. */
uint64_t extended_processor_masks:1;
/*
* HvCallFlushVirtualAddressSpace / HvCallFlushVirtualAddressList are
* supported.
*/
uint64_t tb_flush_hypercalls:1;
/* HvCallSendSyntheticClusterIpi is supported. */
uint64_t synthetic_cluster_ipi:1;
/* HvCallNotifyLongSpinWait is supported. */
uint64_t notify_long_spin_wait:1;
/* HvCallQueryNumaDistance is supported. */
uint64_t query_numa_distance:1;
/* HvCallSignalEvent is supported. Corresponds to privilege. */
uint64_t signal_events:1;
/* HvCallRetargetDeviceInterrupt is supported. */
uint64_t retarget_device_interrupt:1;
/* HvCallRestorePartitionTime is supported. */
uint64_t restore_time:1;
/* EnlightenedVmcs nested enlightenment is supported. */
uint64_t enlightened_vmcs:1;
uint64_t reserved:30;
};
};
enum hv_translate_gva_result_code {
HV_TRANSLATE_GVA_SUCCESS = 0,
/* Translation failures. */
HV_TRANSLATE_GVA_PAGE_NOT_PRESENT = 1,
HV_TRANSLATE_GVA_PRIVILEGE_VIOLATION = 2,
HV_TRANSLATE_GVA_INVALIDE_PAGE_TABLE_FLAGS = 3,
/* GPA access failures. */
HV_TRANSLATE_GVA_GPA_UNMAPPED = 4,
HV_TRANSLATE_GVA_GPA_NO_READ_ACCESS = 5,
HV_TRANSLATE_GVA_GPA_NO_WRITE_ACCESS = 6,
HV_TRANSLATE_GVA_GPA_ILLEGAL_OVERLAY_ACCESS = 7,
/*
* Intercept for memory access by either
* - a higher VTL
* - a nested hypervisor (due to a violation of the nested page table)
*/
HV_TRANSLATE_GVA_INTERCEPT = 8,
HV_TRANSLATE_GVA_GPA_UNACCEPTED = 9,
};
union hv_translate_gva_result {
uint64_t as_uint64;
struct {
uint32_t result_code; /* enum hv_translate_hva_result_code */
uint32_t cache_type:8;
uint32_t overlay_page:1;
uint32_t reserved:23;
};
};
typedef struct hv_input_translate_virtual_address {
uint64_t partition_id;
uint32_t vp_index;
uint32_t padding;
uint64_t control_flags;
uint64_t gva_page;
} hv_input_translate_virtual_address;
typedef struct hv_output_translate_virtual_address {
union hv_translate_gva_result translation_result;
uint64_t gpa_page;
} hv_output_translate_virtual_address;
typedef struct hv_register_x64_cpuid_result_parameters {
struct {
uint32_t eax;
uint32_t ecx;
uint8_t subleaf_specific;
uint8_t always_override;
uint16_t padding;
} input;
struct {
uint32_t eax;
uint32_t eax_mask;
uint32_t ebx;
uint32_t ebx_mask;
uint32_t ecx;
uint32_t ecx_mask;
uint32_t edx;
uint32_t edx_mask;
} result;
} hv_register_x64_cpuid_result_parameters;
typedef struct hv_register_x64_msr_result_parameters {
uint32_t msr_index;
uint32_t access_type;
uint32_t action; /* enum hv_unimplemented_msr_action */
} hv_register_x64_msr_result_parameters;
union hv_register_intercept_result_parameters {
struct hv_register_x64_cpuid_result_parameters cpuid;
struct hv_register_x64_msr_result_parameters msr;
};
typedef struct hv_input_register_intercept_result {
uint64_t partition_id;
uint32_t vp_index;
uint32_t intercept_type; /* enum hv_intercept_type */
union hv_register_intercept_result_parameters parameters;
} hv_input_register_intercept_result;
#endif /* HW_HYPERV_HVHDK_H */

View file

@ -0,0 +1,102 @@
/*
* Type definitions for the mshv host interface.
*
* Copyright Microsoft, Corp. 2025
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HW_HYPERV_HVHDK_MINI_H
#define HW_HYPERV_HVHDK_MINI_H
#define HVHVK_MINI_VERSION (25294)
/* Each generic set contains 64 elements */
#define HV_GENERIC_SET_SHIFT (6)
#define HV_GENERIC_SET_MASK (63)
enum hv_generic_set_format {
HV_GENERIC_SET_SPARSE_4K,
HV_GENERIC_SET_ALL,
};
enum hv_partition_property_code {
/* Privilege properties */
HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001,
/* Scheduling properties */
HV_PARTITION_PROPERTY_SUSPEND = 0x00020000,
HV_PARTITION_PROPERTY_CPU_RESERVE = 0x00020001,
HV_PARTITION_PROPERTY_CPU_CAP = 0x00020002,
HV_PARTITION_PROPERTY_CPU_WEIGHT = 0x00020003,
HV_PARTITION_PROPERTY_CPU_GROUP_ID = 0x00020004,
/* Time properties */
HV_PARTITION_PROPERTY_TIME_FREEZE = 0x00030003,
HV_PARTITION_PROPERTY_REFERENCE_TIME = 0x00030005,
/* Debugging properties */
HV_PARTITION_PROPERTY_DEBUG_CHANNEL_ID = 0x00040000,
/* Resource properties */
HV_PARTITION_PROPERTY_VIRTUAL_TLB_PAGE_COUNT = 0x00050000,
HV_PARTITION_PROPERTY_VSM_CONFIG = 0x00050001,
HV_PARTITION_PROPERTY_ZERO_MEMORY_ON_RESET = 0x00050002,
HV_PARTITION_PROPERTY_PROCESSORS_PER_SOCKET = 0x00050003,
HV_PARTITION_PROPERTY_NESTED_TLB_SIZE = 0x00050004,
HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005,
HV_PARTITION_PROPERTY_VSM_PERMISSIONS_DIRTY_SINCE_LAST_QUERY = 0x00050006,
HV_PARTITION_PROPERTY_SGX_LAUNCH_CONTROL_CONFIG = 0x00050007,
HV_PARTITION_PROPERTY_DEFAULT_SGX_LAUNCH_CONTROL0 = 0x00050008,
HV_PARTITION_PROPERTY_DEFAULT_SGX_LAUNCH_CONTROL1 = 0x00050009,
HV_PARTITION_PROPERTY_DEFAULT_SGX_LAUNCH_CONTROL2 = 0x0005000a,
HV_PARTITION_PROPERTY_DEFAULT_SGX_LAUNCH_CONTROL3 = 0x0005000b,
HV_PARTITION_PROPERTY_ISOLATION_STATE = 0x0005000c,
HV_PARTITION_PROPERTY_ISOLATION_CONTROL = 0x0005000d,
HV_PARTITION_PROPERTY_ALLOCATION_ID = 0x0005000e,
HV_PARTITION_PROPERTY_MONITORING_ID = 0x0005000f,
HV_PARTITION_PROPERTY_IMPLEMENTED_PHYSICAL_ADDRESS_BITS = 0x00050010,
HV_PARTITION_PROPERTY_NON_ARCHITECTURAL_CORE_SHARING = 0x00050011,
HV_PARTITION_PROPERTY_HYPERCALL_DOORBELL_PAGE = 0x00050012,
HV_PARTITION_PROPERTY_ISOLATION_POLICY = 0x00050014,
HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017,
HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS = 0x00050022,
/* Compatibility properties */
HV_PARTITION_PROPERTY_PROCESSOR_VENDOR = 0x00060000,
HV_PARTITION_PROPERTY_PROCESSOR_FEATURES_DEPRECATED = 0x00060001,
HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002,
HV_PARTITION_PROPERTY_PROCESSOR_CL_FLUSH_SIZE = 0x00060003,
HV_PARTITION_PROPERTY_ENLIGHTENMENT_MODIFICATIONS = 0x00060004,
HV_PARTITION_PROPERTY_COMPATIBILITY_VERSION = 0x00060005,
HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH = 0x00060006,
HV_PARTITION_PROPERTY_XSAVE_STATES = 0x00060007,
HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008,
HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009,
HV_PARTITION_PROPERTY_PROCESSOR_FEATURES0 = 0x0006000a,
HV_PARTITION_PROPERTY_PROCESSOR_FEATURES1 = 0x0006000b,
/* Guest software properties */
HV_PARTITION_PROPERTY_GUEST_OS_ID = 0x00070000,
/* Nested virtualization properties */
HV_PARTITION_PROPERTY_PROCESSOR_VIRTUALIZATION_FEATURES = 0x00080000,
};
/* HV Map GPA (Guest Physical Address) Flags */
#define HV_MAP_GPA_PERMISSIONS_NONE 0x0
#define HV_MAP_GPA_READABLE 0x1
#define HV_MAP_GPA_WRITABLE 0x2
#define HV_MAP_GPA_KERNEL_EXECUTABLE 0x4
#define HV_MAP_GPA_USER_EXECUTABLE 0x8
#define HV_MAP_GPA_EXECUTABLE 0xC
#define HV_MAP_GPA_PERMISSIONS_MASK 0xF
#define HV_MAP_GPA_ADJUSTABLE 0x8000
#define HV_MAP_GPA_NO_ACCESS 0x10000
#define HV_MAP_GPA_NOT_CACHED 0x200000
#define HV_MAP_GPA_LARGE_PAGE 0x80000000
#define HV_PFN_RNG_PAGEBITS 24 /* HV_SPA_PAGE_RANGE_ADDITIONAL_PAGES_BITS */
#endif /* HW_HYPERV_HVHDK_MINI_H */

View file

@ -24,6 +24,7 @@ strList *hmp_split_at_comma(const char *str);
void hmp_info_name(Monitor *mon, const QDict *qdict); void hmp_info_name(Monitor *mon, const QDict *qdict);
void hmp_info_version(Monitor *mon, const QDict *qdict); void hmp_info_version(Monitor *mon, const QDict *qdict);
void hmp_info_kvm(Monitor *mon, const QDict *qdict); void hmp_info_kvm(Monitor *mon, const QDict *qdict);
void hmp_info_mshv(Monitor *mon, const QDict *qdict);
void hmp_info_status(Monitor *mon, const QDict *qdict); void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict); void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict); void hmp_info_chardev(Monitor *mon, const QDict *qdict);

View file

@ -0,0 +1,37 @@
/*
* Accelerated irqchip abstraction
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef SYSTEM_ACCEL_IRQ_H
#define SYSTEM_ACCEL_IRQ_H
#include "hw/pci/msi.h"
#include "qemu/osdep.h"
#include "system/kvm.h"
#include "system/mshv.h"
static inline bool accel_msi_via_irqfd_enabled(void)
{
return mshv_msi_via_irqfd_enabled() || kvm_msi_via_irqfd_enabled();
}
static inline bool accel_irqchip_is_split(void)
{
return mshv_msi_via_irqfd_enabled() || kvm_irqchip_is_split();
}
int accel_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev);
int accel_irqchip_update_msi_route(int vector, MSIMessage msg, PCIDevice *dev);
void accel_irqchip_commit_route_changes(KVMRouteChange *c);
void accel_irqchip_commit_routes(void);
void accel_irqchip_release_virq(int virq);
int accel_irqchip_add_irqfd_notifier_gsi(EventNotifier *n, EventNotifier *rn,
int virq);
int accel_irqchip_remove_irqfd_notifier_gsi(EventNotifier *n, int virq);
#endif

View file

@ -14,6 +14,7 @@
#include "hw/core/cpu.h" #include "hw/core/cpu.h"
#include "system/kvm.h" #include "system/kvm.h"
#include "system/hvf.h" #include "system/hvf.h"
#include "system/mshv.h"
#include "system/whpx.h" #include "system/whpx.h"
#include "system/nvmm.h" #include "system/nvmm.h"

64
include/system/mshv.h Normal file
View file

@ -0,0 +1,64 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
* Jinank Jain <jinankjain@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
*/
#ifndef QEMU_MSHV_H
#define QEMU_MSHV_H
#include "qemu/osdep.h"
#include "qemu/accel.h"
#include "hw/hyperv/hyperv-proto.h"
#include "hw/hyperv/hvhdk.h"
#include "hw/hyperv/hvgdk_mini.h"
#include "qapi/qapi-types-common.h"
#include "system/memory.h"
#include "accel/accel-ops.h"
#ifdef COMPILING_PER_TARGET
#ifdef CONFIG_MSHV
#include <linux/mshv.h>
#define CONFIG_MSHV_IS_POSSIBLE
#endif
#else
#define CONFIG_MSHV_IS_POSSIBLE
#endif
#define MSHV_MAX_MSI_ROUTES 4096
#define MSHV_PAGE_SHIFT 12
#ifdef CONFIG_MSHV_IS_POSSIBLE
extern bool mshv_allowed;
#define mshv_enabled() (mshv_allowed)
#define mshv_msi_via_irqfd_enabled() mshv_enabled()
#else /* CONFIG_MSHV_IS_POSSIBLE */
#define mshv_enabled() false
#define mshv_msi_via_irqfd_enabled() mshv_enabled()
#endif
typedef struct MshvState MshvState;
extern MshvState *mshv_state;
/* interrupt */
int mshv_request_interrupt(MshvState *mshv_state, uint32_t interrupt_type, uint32_t vector,
uint32_t vp_index, bool logical_destination_mode,
bool level_triggered);
int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev);
int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev);
void mshv_irqchip_commit_routes(void);
void mshv_irqchip_release_virq(int virq);
int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *n,
const EventNotifier *rn, int virq);
int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *n, int virq);
#endif

155
include/system/mshv_int.h Normal file
View file

@ -0,0 +1,155 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
* Jinank Jain <jinankjain@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
*/
#ifndef QEMU_MSHV_INT_H
#define QEMU_MSHV_INT_H
#define MSHV_MSR_ENTRIES_COUNT 64
#define MSHV_MAX_MEM_SLOTS 32
typedef struct hyperv_message hv_message;
typedef struct MshvHvCallArgs {
void *base;
void *input_page;
void *output_page;
} MshvHvCallArgs;
struct AccelCPUState {
int cpufd;
bool dirty;
MshvHvCallArgs hvcall_args;
};
typedef struct MshvMemoryListener {
MemoryListener listener;
int as_id;
} MshvMemoryListener;
typedef struct MshvAddressSpace {
MshvMemoryListener *ml;
AddressSpace *as;
} MshvAddressSpace;
typedef struct MshvMemorySlotManager {
size_t n_slots;
GList *slots;
QemuMutex mutex;
} MshvMemorySlotManager;
struct MshvState {
AccelState parent_obj;
int vm;
MshvMemoryListener memory_listener;
/* number of listeners */
int nr_as;
MshvAddressSpace *as;
int fd;
MshvMemorySlotManager msm;
};
typedef struct MshvMsiControl {
bool updated;
GHashTable *gsi_routes;
} MshvMsiControl;
#define mshv_vcpufd(cpu) (cpu->accel->cpufd)
/* cpu */
typedef struct MshvFPU {
uint8_t fpr[8][16];
uint16_t fcw;
uint16_t fsw;
uint8_t ftwx;
uint8_t pad1;
uint16_t last_opcode;
uint64_t last_ip;
uint64_t last_dp;
uint8_t xmm[16][16];
uint32_t mxcsr;
uint32_t pad2;
} MshvFPU;
typedef enum MshvVmExit {
MshvVmExitIgnore = 0,
MshvVmExitShutdown = 1,
MshvVmExitSpecial = 2,
} MshvVmExit;
typedef enum MshvRemapResult {
MshvRemapOk = 0,
MshvRemapNoMapping = 1,
MshvRemapNoOverlap = 2,
} MshvRemapResult;
void mshv_init_mmio_emu(void);
int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd);
void mshv_remove_vcpu(int vm_fd, int cpu_fd);
int mshv_configure_vcpu(const CPUState *cpu, const MshvFPU *fpu, uint64_t xcr0);
int mshv_get_standard_regs(CPUState *cpu);
int mshv_get_special_regs(CPUState *cpu);
int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit);
int mshv_load_regs(CPUState *cpu);
int mshv_store_regs(CPUState *cpu);
int mshv_set_generic_regs(const CPUState *cpu, const hv_register_assoc *assocs,
size_t n_regs);
int mshv_arch_put_registers(const CPUState *cpu);
void mshv_arch_init_vcpu(CPUState *cpu);
void mshv_arch_destroy_vcpu(CPUState *cpu);
void mshv_arch_amend_proc_features(
union hv_partition_synthetic_processor_features *features);
int mshv_arch_post_init_vm(int vm_fd);
#if defined COMPILING_PER_TARGET && defined CONFIG_MSHV_IS_POSSIBLE
int mshv_hvcall(int fd, const struct mshv_root_hvcall *args);
#endif
/* memory */
typedef struct MshvMemorySlot {
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
bool readonly;
bool mapped;
} MshvMemorySlot;
MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa);
int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size,
bool is_secure_mode, bool instruction_fetch);
int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
bool is_secure_mode);
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
bool add);
void mshv_init_memory_slot_manager(MshvState *mshv_state);
/* msr */
typedef struct MshvMsrEntry {
uint32_t index;
uint32_t reserved;
uint64_t data;
} MshvMsrEntry;
typedef struct MshvMsrEntries {
MshvMsrEntry entries[MSHV_MSR_ENTRIES_COUNT];
uint32_t nmsrs;
} MshvMsrEntries;
int mshv_configure_msr(const CPUState *cpu, const MshvMsrEntry *msrs,
size_t n_msrs);
/* interrupt */
void mshv_init_msicontrol(void);
int mshv_reserve_ioapic_msi_routes(int vm_fd);
#endif

291
linux-headers/linux/mshv.h Normal file
View file

@ -0,0 +1,291 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Userspace interfaces for /dev/mshv* devices and derived fds
*
* This file is divided into sections containing data structures and IOCTLs for
* a particular set of related devices or derived file descriptors.
*
* The IOCTL definitions are at the end of each section. They are grouped by
* device/fd, so that new IOCTLs can easily be added with a monotonically
* increasing number.
*/
#ifndef _LINUX_MSHV_H
#define _LINUX_MSHV_H
#include <linux/types.h>
#define MSHV_IOCTL 0xB8
/*
*******************************************
* Entry point to main VMM APIs: /dev/mshv *
*******************************************
*/
enum {
MSHV_PT_BIT_LAPIC,
MSHV_PT_BIT_X2APIC,
MSHV_PT_BIT_GPA_SUPER_PAGES,
MSHV_PT_BIT_COUNT,
};
#define MSHV_PT_FLAGS_MASK ((1 << MSHV_PT_BIT_COUNT) - 1)
enum {
MSHV_PT_ISOLATION_NONE,
MSHV_PT_ISOLATION_COUNT,
};
/**
* struct mshv_create_partition - arguments for MSHV_CREATE_PARTITION
* @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
* @pt_isolation: MSHV_PT_ISOLATION_*
*
* Returns a file descriptor to act as a handle to a guest partition.
* At this point the partition is not yet initialized in the hypervisor.
* Some operations must be done with the partition in this state, e.g. setting
* so-called "early" partition properties. The partition can then be
* initialized with MSHV_INITIALIZE_PARTITION.
*/
struct mshv_create_partition {
__u64 pt_flags;
__u64 pt_isolation;
};
/* /dev/mshv */
#define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
/*
************************
* Child partition APIs *
************************
*/
struct mshv_create_vp {
__u32 vp_index;
};
enum {
MSHV_SET_MEM_BIT_WRITABLE,
MSHV_SET_MEM_BIT_EXECUTABLE,
MSHV_SET_MEM_BIT_UNMAP,
MSHV_SET_MEM_BIT_COUNT
};
#define MSHV_SET_MEM_FLAGS_MASK ((1 << MSHV_SET_MEM_BIT_COUNT) - 1)
/* The hypervisor's "native" page size */
#define MSHV_HV_PAGE_SIZE 0x1000
/**
* struct mshv_user_mem_region - arguments for MSHV_SET_GUEST_MEMORY
* @size: Size of the memory region (bytes). Must be aligned to
* MSHV_HV_PAGE_SIZE
* @guest_pfn: Base guest page number to map
* @userspace_addr: Base address of userspace memory. Must be aligned to
* MSHV_HV_PAGE_SIZE
* @flags: Bitmask of 1 << MSHV_SET_MEM_BIT_*. If (1 << MSHV_SET_MEM_BIT_UNMAP)
* is set, ignore other bits.
* @rsvd: MBZ
*
* Map or unmap a region of userspace memory to Guest Physical Addresses (GPA).
* Mappings can't overlap in GPA space or userspace.
* To unmap, these fields must match an existing mapping.
*/
struct mshv_user_mem_region {
__u64 size;
__u64 guest_pfn;
__u64 userspace_addr;
__u8 flags;
__u8 rsvd[7];
};
enum {
MSHV_IRQFD_BIT_DEASSIGN,
MSHV_IRQFD_BIT_RESAMPLE,
MSHV_IRQFD_BIT_COUNT,
};
#define MSHV_IRQFD_FLAGS_MASK ((1 << MSHV_IRQFD_BIT_COUNT) - 1)
struct mshv_user_irqfd {
__s32 fd;
__s32 resamplefd;
__u32 gsi;
__u32 flags;
};
enum {
MSHV_IOEVENTFD_BIT_DATAMATCH,
MSHV_IOEVENTFD_BIT_PIO,
MSHV_IOEVENTFD_BIT_DEASSIGN,
MSHV_IOEVENTFD_BIT_COUNT,
};
#define MSHV_IOEVENTFD_FLAGS_MASK ((1 << MSHV_IOEVENTFD_BIT_COUNT) - 1)
struct mshv_user_ioeventfd {
__u64 datamatch;
__u64 addr; /* legal pio/mmio address */
__u32 len; /* 1, 2, 4, or 8 bytes */
__s32 fd;
__u32 flags;
__u8 rsvd[4];
};
struct mshv_user_irq_entry {
__u32 gsi;
__u32 address_lo;
__u32 address_hi;
__u32 data;
};
struct mshv_user_irq_table {
__u32 nr;
__u32 rsvd; /* MBZ */
struct mshv_user_irq_entry entries[];
};
enum {
MSHV_GPAP_ACCESS_TYPE_ACCESSED,
MSHV_GPAP_ACCESS_TYPE_DIRTY,
MSHV_GPAP_ACCESS_TYPE_COUNT /* Count of enum members */
};
enum {
MSHV_GPAP_ACCESS_OP_NOOP,
MSHV_GPAP_ACCESS_OP_CLEAR,
MSHV_GPAP_ACCESS_OP_SET,
MSHV_GPAP_ACCESS_OP_COUNT /* Count of enum members */
};
/**
* struct mshv_gpap_access_bitmap - arguments for MSHV_GET_GPAP_ACCESS_BITMAP
* @access_type: MSHV_GPAP_ACCESS_TYPE_* - The type of access to record in the
* bitmap
* @access_op: MSHV_GPAP_ACCESS_OP_* - Allows an optional clear or set of all
* the access states in the range, after retrieving the current
* states.
* @rsvd: MBZ
* @page_count: Number of pages
* @gpap_base: Base gpa page number
* @bitmap_ptr: Output buffer for bitmap, at least (page_count + 7) / 8 bytes
*
* Retrieve a bitmap of either ACCESSED or DIRTY bits for a given range of guest
* memory, and optionally clear or set the bits.
*/
struct mshv_gpap_access_bitmap {
__u8 access_type;
__u8 access_op;
__u8 rsvd[6];
__u64 page_count;
__u64 gpap_base;
__u64 bitmap_ptr;
};
/**
* struct mshv_root_hvcall - arguments for MSHV_ROOT_HVCALL
* @code: Hypercall code (HVCALL_*)
* @reps: in: Rep count ('repcount')
* out: Reps completed ('repcomp'). MBZ unless rep hvcall
* @in_sz: Size of input incl rep data. <= MSHV_HV_PAGE_SIZE
* @out_sz: Size of output buffer. <= MSHV_HV_PAGE_SIZE. MBZ if out_ptr is 0
* @status: in: MBZ
* out: HV_STATUS_* from hypercall
* @rsvd: MBZ
* @in_ptr: Input data buffer (struct hv_input_*). If used with partition or
* vp fd, partition id field is populated by kernel.
* @out_ptr: Output data buffer (optional)
*/
struct mshv_root_hvcall {
__u16 code;
__u16 reps;
__u16 in_sz;
__u16 out_sz;
__u16 status;
__u8 rsvd[6];
__u64 in_ptr;
__u64 out_ptr;
};
/* Partition fds created with MSHV_CREATE_PARTITION */
#define MSHV_INITIALIZE_PARTITION _IO(MSHV_IOCTL, 0x00)
#define MSHV_CREATE_VP _IOW(MSHV_IOCTL, 0x01, struct mshv_create_vp)
#define MSHV_SET_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region)
#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0x03, struct mshv_user_irqfd)
#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0x04, struct mshv_user_ioeventfd)
#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x05, struct mshv_user_irq_table)
#define MSHV_GET_GPAP_ACCESS_BITMAP _IOWR(MSHV_IOCTL, 0x06, struct mshv_gpap_access_bitmap)
/* Generic hypercall */
#define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
/*
********************************
* VP APIs for child partitions *
********************************
*/
#define MSHV_RUN_VP_BUF_SZ 256
/*
* VP state pages may be mapped to userspace via mmap().
* To specify which state page, use MSHV_VP_MMAP_OFFSET_ values multiplied by
* the system page size.
* e.g.
* long page_size = sysconf(_SC_PAGE_SIZE);
* void *reg_page = mmap(NULL, MSHV_HV_PAGE_SIZE, PROT_READ|PROT_WRITE,
* MAP_SHARED, vp_fd,
* MSHV_VP_MMAP_OFFSET_REGISTERS * page_size);
*/
enum {
MSHV_VP_MMAP_OFFSET_REGISTERS,
MSHV_VP_MMAP_OFFSET_INTERCEPT_MESSAGE,
MSHV_VP_MMAP_OFFSET_GHCB,
MSHV_VP_MMAP_OFFSET_COUNT
};
/**
* struct mshv_run_vp - argument for MSHV_RUN_VP
* @msg_buf: On success, the intercept message is copied here. It can be
* interpreted using the relevant hypervisor definitions.
*/
struct mshv_run_vp {
__u8 msg_buf[MSHV_RUN_VP_BUF_SZ];
};
enum {
MSHV_VP_STATE_LAPIC, /* Local interrupt controller state (either arch) */
MSHV_VP_STATE_XSAVE, /* XSAVE data in compacted form (x86_64) */
MSHV_VP_STATE_SIMP,
MSHV_VP_STATE_SIEFP,
MSHV_VP_STATE_SYNTHETIC_TIMERS,
MSHV_VP_STATE_COUNT,
};
/**
* struct mshv_get_set_vp_state - arguments for MSHV_[GET,SET]_VP_STATE
* @type: MSHV_VP_STATE_*
* @rsvd: MBZ
* @buf_sz: in: 4k page-aligned size of buffer
* out: Actual size of data (on EINVAL, check this to see if buffer
* was too small)
* @buf_ptr: 4k page-aligned data buffer
*/
struct mshv_get_set_vp_state {
__u8 type;
__u8 rsvd[3];
__u32 buf_sz;
__u64 buf_ptr;
};
/* VP fds created with MSHV_CREATE_VP */
#define MSHV_RUN_VP _IOR(MSHV_IOCTL, 0x00, struct mshv_run_vp)
#define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x01, struct mshv_get_set_vp_state)
#define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x02, struct mshv_get_set_vp_state)
/*
* Generic hypercall
* Defined above in partition IOCTLs, avoid redefining it here
* #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
*/
#endif

View file

@ -334,6 +334,7 @@ elif cpu == 'x86_64'
'CONFIG_HVF': ['x86_64-softmmu'], 'CONFIG_HVF': ['x86_64-softmmu'],
'CONFIG_NVMM': ['i386-softmmu', 'x86_64-softmmu'], 'CONFIG_NVMM': ['i386-softmmu', 'x86_64-softmmu'],
'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'], 'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
'CONFIG_MSHV': ['x86_64-softmmu'],
} }
endif endif
@ -883,6 +884,14 @@ accelerators = []
if get_option('kvm').allowed() and host_os == 'linux' if get_option('kvm').allowed() and host_os == 'linux'
accelerators += 'CONFIG_KVM' accelerators += 'CONFIG_KVM'
endif endif
if get_option('mshv').allowed() and host_os == 'linux'
if get_option('mshv').enabled() and host_machine.cpu() != 'x86_64'
error('mshv accelerator requires x64_64 host')
endif
accelerators += 'CONFIG_MSHV'
endif
if get_option('whpx').allowed() and host_os == 'windows' if get_option('whpx').allowed() and host_os == 'windows'
if get_option('whpx').enabled() and host_machine.cpu() != 'x86_64' if get_option('whpx').enabled() and host_machine.cpu() != 'x86_64'
error('WHPX requires 64-bit host') error('WHPX requires 64-bit host')
@ -952,6 +961,9 @@ endif
if 'CONFIG_WHPX' not in accelerators and get_option('whpx').enabled() if 'CONFIG_WHPX' not in accelerators and get_option('whpx').enabled()
error('WHPX not available on this platform') error('WHPX not available on this platform')
endif endif
if 'CONFIG_MSHV' not in accelerators and get_option('mshv').enabled()
error('mshv not available on this platform')
endif
xen = not_found xen = not_found
if get_option('xen').enabled() or (get_option('xen').auto() and have_system) if get_option('xen').enabled() or (get_option('xen').auto() and have_system)
@ -3656,6 +3668,7 @@ if have_system
trace_events_subdirs += [ trace_events_subdirs += [
'accel/hvf', 'accel/hvf',
'accel/kvm', 'accel/kvm',
'accel/mshv',
'audio', 'audio',
'backends', 'backends',
'backends/tpm', 'backends/tpm',
@ -4239,6 +4252,7 @@ if have_rust
'--no-prepend-enum-name', '--no-prepend-enum-name',
'--allowlist-file', meson.project_source_root() + '/include/.*', '--allowlist-file', meson.project_source_root() + '/include/.*',
'--allowlist-file', meson.project_build_root() + '/.*', '--allowlist-file', meson.project_build_root() + '/.*',
'--blocklist-file', glib_pc.get_variable('includedir') + '/glib-2.0/.*',
] ]
if not rustfmt.found() if not rustfmt.found()
if bindgen.version().version_compare('<0.65.0') if bindgen.version().version_compare('<0.65.0')
@ -4826,6 +4840,7 @@ if have_system
summary_info += {'HVF support': config_all_accel.has_key('CONFIG_HVF')} summary_info += {'HVF support': config_all_accel.has_key('CONFIG_HVF')}
summary_info += {'WHPX support': config_all_accel.has_key('CONFIG_WHPX')} summary_info += {'WHPX support': config_all_accel.has_key('CONFIG_WHPX')}
summary_info += {'NVMM support': config_all_accel.has_key('CONFIG_NVMM')} summary_info += {'NVMM support': config_all_accel.has_key('CONFIG_NVMM')}
summary_info += {'MSHV support': config_all_accel.has_key('CONFIG_MSHV')}
summary_info += {'Xen support': xen.found()} summary_info += {'Xen support': xen.found()}
if xen.found() if xen.found()
summary_info += {'xen ctrl version': xen.version()} summary_info += {'xen ctrl version': xen.version()}

View file

@ -73,6 +73,8 @@ option('malloc', type : 'combo', choices : ['system', 'tcmalloc', 'jemalloc'],
option('kvm', type: 'feature', value: 'auto', option('kvm', type: 'feature', value: 'auto',
description: 'KVM acceleration support') description: 'KVM acceleration support')
option('mshv', type: 'feature', value: 'auto',
description: 'MSHV acceleration support')
option('whpx', type: 'feature', value: 'auto', option('whpx', type: 'feature', value: 'auto',
description: 'WHPX acceleration support') description: 'WHPX acceleration support')
option('hvf', type: 'feature', value: 'auto', option('hvf', type: 'feature', value: 'auto',

View file

@ -54,3 +54,32 @@
{ 'command': 'x-accel-stats', { 'command': 'x-accel-stats',
'returns': 'HumanReadableText', 'returns': 'HumanReadableText',
'features': [ 'unstable' ] } 'features': [ 'unstable' ] }
##
# @MshvInfo:
#
# Information about support for MSHV acceleration
#
# @enabled: true if MSHV acceleration is active
#
# @present: true if MSHV acceleration is built into this executable
#
# Since: 10.2.0
##
{ 'struct': 'MshvInfo', 'data': {'enabled': 'bool', 'present': 'bool'} }
##
# @query-mshv:
#
# Return information about MSHV acceleration
#
# Returns: @MshvInfo
#
# Since: 10.0.92
#
# .. qmp-example::
#
# -> { "execute": "query-mshv" }
# <- { "return": { "enabled": true, "present": true } }
##
{ 'command': 'query-mshv', 'returns': 'MshvInfo' }

View file

@ -28,7 +28,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
"-machine [type=]name[,prop[=value][,...]]\n" "-machine [type=]name[,prop[=value][,...]]\n"
" selects emulated machine ('-machine help' for list)\n" " selects emulated machine ('-machine help' for list)\n"
" property accel=accel1[:accel2[:...]] selects accelerator\n" " property accel=accel1[:accel2[:...]] selects accelerator\n"
" supported accelerators are kvm, xen, hvf, nvmm, whpx or tcg (default: tcg)\n" " supported accelerators are kvm, xen, hvf, nvmm, whpx, mshv or tcg (default: tcg)\n"
" vmport=on|off|auto controls emulation of vmport (default: auto)\n" " vmport=on|off|auto controls emulation of vmport (default: auto)\n"
" dump-guest-core=on|off include guest memory in a core dump (default=on)\n" " dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
" mem-merge=on|off controls memory merge support (default: on)\n" " mem-merge=on|off controls memory merge support (default: on)\n"
@ -66,10 +66,10 @@ SRST
``accel=accels1[:accels2[:...]]`` ``accel=accels1[:accels2[:...]]``
This is used to enable an accelerator. Depending on the target This is used to enable an accelerator. Depending on the target
architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available. architecture, kvm, xen, hvf, nvmm, whpx, mshv or tcg can be
By default, tcg is used. If there is more than one accelerator available. By default, tcg is used. If there is more than one
specified, the next one is used if the previous one fails to accelerator specified, the next one is used if the previous one
initialize. fails to initialize.
``vmport=on|off|auto`` ``vmport=on|off|auto``
Enables emulation of VMWare IO port, for vmmouse etc. auto says Enables emulation of VMWare IO port, for vmmouse etc. auto says
@ -226,7 +226,7 @@ ERST
DEF("accel", HAS_ARG, QEMU_OPTION_accel, DEF("accel", HAS_ARG, QEMU_OPTION_accel,
"-accel [accel=]accelerator[,prop[=value][,...]]\n" "-accel [accel=]accelerator[,prop[=value][,...]]\n"
" select accelerator (kvm, xen, hvf, nvmm, whpx or tcg; use 'help' for a list)\n" " select accelerator (kvm, xen, hvf, nvmm, whpx, mshv or tcg; use 'help' for a list)\n"
" igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n" " igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
" kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n" " kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
" kvm-shadow-mem=size of KVM shadow MMU in bytes\n" " kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
@ -241,8 +241,8 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
SRST SRST
``-accel name[,prop=value[,...]]`` ``-accel name[,prop=value[,...]]``
This is used to enable an accelerator. Depending on the target This is used to enable an accelerator. Depending on the target
architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available. By architecture, kvm, xen, hvf, nvmm, whpx, mshv or tcg can be available.
default, tcg is used. If there is more than one accelerator By default, tcg is used. If there is more than one accelerator
specified, the next one is used if the previous one fails to specified, the next one is used if the previous one fails to
initialize. initialize.

180
rust/Cargo.lock generated
View file

@ -58,15 +58,27 @@ dependencies = [
name = "bql" name = "bql"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"glib-sys",
"migration", "migration",
] ]
[[package]]
name = "cfg-expr"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a2c5f3bf25ec225351aa1c8e230d04d880d3bd89dea133537dafad4ae291e5c"
dependencies = [
"smallvec",
"target-lexicon",
]
[[package]] [[package]]
name = "chardev" name = "chardev"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bql", "bql",
"common", "common",
"glib-sys",
"migration", "migration",
"qom", "qom",
"util", "util",
@ -86,6 +98,12 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]] [[package]]
name = "foreign" name = "foreign"
version = "0.3.1" version = "0.3.1"
@ -95,6 +113,28 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "glib-sys"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d09d3d0fddf7239521674e57b0465dfbd844632fec54f059f7f56112e3f927e1"
dependencies = [
"libc",
"system-deps",
]
[[package]]
name = "hashbrown"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "hpet" name = "hpet"
version = "0.1.0" version = "0.1.0"
@ -115,6 +155,7 @@ dependencies = [
"bql", "bql",
"chardev", "chardev",
"common", "common",
"glib-sys",
"migration", "migration",
"qemu_macros", "qemu_macros",
"qom", "qom",
@ -122,6 +163,16 @@ dependencies = [
"util", "util",
] ]
[[package]]
name = "indexmap"
version = "2.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]] [[package]]
name = "itertools" name = "itertools"
version = "0.11.0" version = "0.11.0"
@ -137,14 +188,27 @@ version = "0.2.162"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]] [[package]]
name = "migration" name = "migration"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"common", "common",
"glib-sys",
"util", "util",
] ]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]] [[package]]
name = "pl011" name = "pl011"
version = "0.1.0" version = "0.1.0"
@ -155,6 +219,7 @@ dependencies = [
"bql", "bql",
"chardev", "chardev",
"common", "common",
"glib-sys",
"hwcore", "hwcore",
"migration", "migration",
"qom", "qom",
@ -211,6 +276,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"bql", "bql",
"common", "common",
"glib-sys",
"migration", "migration",
"qemu_macros", "qemu_macros",
"util", "util",
@ -225,6 +291,50 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "serde"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.226"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.104" version = "2.0.104"
@ -241,10 +351,30 @@ name = "system"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"common", "common",
"glib-sys",
"qom", "qom",
"util", "util",
] ]
[[package]]
name = "system-deps"
version = "7.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4be53aa0cba896d2dc615bd42bbc130acdcffa239e0a2d965ea5b3b2a86ffdb"
dependencies = [
"cfg-expr",
"heck",
"pkg-config",
"toml",
"version-compare",
]
[[package]]
name = "target-lexicon"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
[[package]] [[package]]
name = "tests" name = "tests"
version = "0.1.0" version = "0.1.0"
@ -259,6 +389,40 @@ dependencies = [
"util", "util",
] ]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]] [[package]]
name = "trace" name = "trace"
version = "0.1.0" version = "0.1.0"
@ -279,11 +443,27 @@ dependencies = [
"anyhow", "anyhow",
"common", "common",
"foreign", "foreign",
"glib-sys",
"libc", "libc",
] ]
[[package]]
name = "version-compare"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winnow"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
dependencies = [
"memchr",
]

View file

@ -29,6 +29,7 @@ authors = ["The QEMU Project Developers <qemu-devel@nongnu.org>"]
anyhow = "~1.0" anyhow = "~1.0"
foreign = "~0.3.1" foreign = "~0.3.1"
libc = "0.2.162" libc = "0.2.162"
glib-sys = { version = "0.21.2", features = ["v2_66"] }
[workspace.lints.rust] [workspace.lints.rust]
unexpected_cfgs = { level = "deny", check-cfg = ['cfg(MESON)'] } unexpected_cfgs = { level = "deny", check-cfg = ['cfg(MESON)'] }

View file

@ -14,6 +14,7 @@ rust-version.workspace = true
[dependencies] [dependencies]
migration = { path = "../migration" } migration = { path = "../migration" }
glib-sys.workspace = true
[features] [features]
default = ["debug_cell"] default = ["debug_cell"]

View file

@ -38,6 +38,7 @@ _bql_rs = static_library(
rust_abi: 'rust', rust_abi: 'rust',
rust_args: _bql_cfg, rust_args: _bql_cfg,
link_with: [_migration_rs], link_with: [_migration_rs],
dependencies: [glib_sys_rs],
) )
bql_rs = declare_dependency(link_with: [_bql_rs], bql_rs = declare_dependency(link_with: [_bql_rs],

View file

@ -18,6 +18,10 @@
clippy::too_many_arguments clippy::too_many_arguments
)] )]
use glib_sys::{
guint, GArray, GHashTable, GHashTableIter, GList, GPollFD, GPtrArray, GQueue, GSList, GSource,
};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -13,6 +13,7 @@ repository.workspace = true
rust-version.workspace = true rust-version.workspace = true
[dependencies] [dependencies]
glib-sys = { workspace = true }
common = { path = "../common" } common = { path = "../common" }
bql = { path = "../bql" } bql = { path = "../bql" }
migration = { path = "../migration" } migration = { path = "../migration" }

View file

@ -36,7 +36,7 @@ _chardev_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
link_with: [_bql_rs, _migration_rs, _qom_rs, _util_rs], link_with: [_bql_rs, _migration_rs, _qom_rs, _util_rs],
dependencies: [common_rs, qemu_macros], dependencies: [glib_sys_rs, common_rs, qemu_macros],
) )
chardev_rs = declare_dependency(link_with: [_chardev_rs], dependencies: [chardev, qemuutil]) chardev_rs = declare_dependency(link_with: [_chardev_rs], dependencies: [chardev, qemuutil])

View file

@ -19,6 +19,10 @@
)] )]
use common::Zeroable; use common::Zeroable;
use glib_sys::{
gboolean, guint, GArray, GHashTable, GHashTableIter, GIOCondition, GList, GMainContext,
GPollFD, GPtrArray, GQueue, GSList, GSource, GSourceFunc,
};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -13,6 +13,7 @@ repository.workspace = true
rust-version.workspace = true rust-version.workspace = true
[dependencies] [dependencies]
glib-sys.workspace = true
bilge = { version = "0.2.0" } bilge = { version = "0.2.0" }
bilge-impl = { version = "0.2.0" } bilge-impl = { version = "0.2.0" }
bits = { path = "../../../bits" } bits = { path = "../../../bits" }

View file

@ -33,6 +33,7 @@ _libpl011_rs = static_library(
bilge_impl_rs, bilge_impl_rs,
bits_rs, bits_rs,
common_rs, common_rs,
glib_sys_rs,
util_rs, util_rs,
migration_rs, migration_rs,
bql_rs, bql_rs,

View file

@ -20,6 +20,11 @@
//! `bindgen`-generated declarations. //! `bindgen`-generated declarations.
use glib_sys::{
gboolean, guint, GArray, GByteArray, GHashTable, GHashTableIter, GIOCondition, GList,
GMainContext, GPollFD, GPtrArray, GQueue, GSList, GSource, GSourceFunc, GString,
};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -13,6 +13,7 @@ repository.workspace = true
rust-version.workspace = true rust-version.workspace = true
[dependencies] [dependencies]
glib-sys.workspace = true
qemu_macros = { path = "../../qemu-macros" } qemu_macros = { path = "../../qemu-macros" }
common = { path = "../../common" } common = { path = "../../common" }
bql = { path = "../../bql" } bql = { path = "../../bql" }

View file

@ -59,7 +59,7 @@ _hwcore_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
link_with: [_bql_rs, _chardev_rs, _migration_rs, _qom_rs, _system_rs, _util_rs], link_with: [_bql_rs, _chardev_rs, _migration_rs, _qom_rs, _system_rs, _util_rs],
dependencies: [qemu_macros, common_rs], dependencies: [glib_sys_rs, qemu_macros, common_rs],
) )
hwcore_rs = declare_dependency(link_with: [_hwcore_rs], hwcore_rs = declare_dependency(link_with: [_hwcore_rs],

View file

@ -20,6 +20,9 @@
use chardev::bindings::Chardev; use chardev::bindings::Chardev;
use common::Zeroable; use common::Zeroable;
use glib_sys::{
GArray, GByteArray, GHashTable, GHashTableIter, GList, GPtrArray, GQueue, GSList, GString,
};
use migration::bindings::VMStateDescription; use migration::bindings::VMStateDescription;
use qom::bindings::ObjectClass; use qom::bindings::ObjectClass;
use system::bindings::MemoryRegion; use system::bindings::MemoryRegion;

View file

@ -2,12 +2,14 @@ subproject('anyhow-1-rs', required: true)
subproject('bilge-0.2-rs', required: true) subproject('bilge-0.2-rs', required: true)
subproject('bilge-impl-0.2-rs', required: true) subproject('bilge-impl-0.2-rs', required: true)
subproject('foreign-0.3-rs', required: true) subproject('foreign-0.3-rs', required: true)
subproject('glib-sys-0.21-rs', required: true)
subproject('libc-0.2-rs', required: true) subproject('libc-0.2-rs', required: true)
anyhow_rs = dependency('anyhow-1-rs') anyhow_rs = dependency('anyhow-1-rs')
bilge_rs = dependency('bilge-0.2-rs') bilge_rs = dependency('bilge-0.2-rs')
bilge_impl_rs = dependency('bilge-impl-0.2-rs') bilge_impl_rs = dependency('bilge-impl-0.2-rs')
foreign_rs = dependency('foreign-0.3-rs') foreign_rs = dependency('foreign-0.3-rs')
glib_sys_rs = dependency('glib-sys-0.21-rs')
libc_rs = dependency('libc-0.2-rs') libc_rs = dependency('libc-0.2-rs')
subproject('proc-macro2-1-rs', required: true) subproject('proc-macro2-1-rs', required: true)

View file

@ -15,6 +15,7 @@ rust-version.workspace = true
[dependencies] [dependencies]
common = { path = "../common" } common = { path = "../common" }
util = { path = "../util" } util = { path = "../util" }
glib-sys.workspace = true
[lints] [lints]
workspace = true workspace = true

View file

@ -38,7 +38,7 @@ _migration_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
link_with: [_util_rs], link_with: [_util_rs],
dependencies: [common_rs], dependencies: [common_rs, glib_sys_rs],
) )
migration_rs = declare_dependency(link_with: [_migration_rs], migration_rs = declare_dependency(link_with: [_migration_rs],

View file

@ -19,6 +19,7 @@
)] )]
use common::Zeroable; use common::Zeroable;
use glib_sys::{GHashTable, GHashTableIter, GList, GPtrArray, GQueue, GSList};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -18,6 +18,7 @@ bql = { path = "../bql" }
migration = { path = "../migration" } migration = { path = "../migration" }
qemu_macros = { path = "../qemu-macros" } qemu_macros = { path = "../qemu-macros" }
util = { path = "../util" } util = { path = "../util" }
glib-sys.workspace = true
[lints] [lints]
workspace = true workspace = true

View file

@ -29,7 +29,7 @@ _qom_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
link_with: [_bql_rs, _migration_rs], link_with: [_bql_rs, _migration_rs],
dependencies: [common_rs, qemu_macros], dependencies: [common_rs, glib_sys_rs, qemu_macros],
) )
qom_rs = declare_dependency(link_with: [_qom_rs], dependencies: [qemu_macros, qom]) qom_rs = declare_dependency(link_with: [_qom_rs], dependencies: [qemu_macros, qom])

View file

@ -18,6 +18,8 @@
clippy::too_many_arguments clippy::too_many_arguments
)] )]
use glib_sys::{GHashTable, GHashTableIter, GList, GPtrArray, GQueue, GSList};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -16,6 +16,7 @@ rust-version.workspace = true
common = { path = "../common" } common = { path = "../common" }
qom = { path = "../qom" } qom = { path = "../qom" }
util = { path = "../util" } util = { path = "../util" }
glib-sys.workspace = true
[lints] [lints]
workspace = true workspace = true

View file

@ -36,7 +36,7 @@ _system_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
link_with: [_bql_rs, _migration_rs, _qom_rs, _util_rs], link_with: [_bql_rs, _migration_rs, _qom_rs, _util_rs],
dependencies: [common_rs, qemu_macros], dependencies: [glib_sys_rs, common_rs, qemu_macros],
) )
system_rs = declare_dependency(link_with: [_system_rs], system_rs = declare_dependency(link_with: [_system_rs],

View file

@ -19,6 +19,10 @@
)] )]
use common::Zeroable; use common::Zeroable;
use glib_sys::{
guint, GArray, GByteArray, GHashTable, GHashTableIter, GList, GPollFD, GPtrArray, GQueue,
GSList, GString,
};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -15,6 +15,7 @@ rust-version.workspace = true
[dependencies] [dependencies]
anyhow = { workspace = true } anyhow = { workspace = true }
foreign = { workspace = true } foreign = { workspace = true }
glib-sys = { workspace = true }
libc = { workspace = true } libc = { workspace = true }
common = { path = "../common" } common = { path = "../common" }

View file

@ -40,7 +40,7 @@ _util_rs = static_library(
), ),
override_options: ['rust_std=2021', 'build.rust_std=2021'], override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust', rust_abi: 'rust',
dependencies: [anyhow_rs, libc_rs, foreign_rs, common_rs, qom, qemuutil], dependencies: [anyhow_rs, libc_rs, foreign_rs, glib_sys_rs, common_rs, qom, qemuutil],
) )
util_rs = declare_dependency(link_with: [_util_rs], dependencies: [qemuutil, qom]) util_rs = declare_dependency(link_with: [_util_rs], dependencies: [qemuutil, qom])

View file

@ -18,6 +18,8 @@
clippy::too_many_arguments clippy::too_many_arguments
)] )]
use glib_sys::{guint, GList, GPollFD, GQueue, GSList, GString};
#[cfg(MESON)] #[cfg(MESON)]
include!("bindings.inc.rs"); include!("bindings.inc.rs");

View file

@ -36,6 +36,7 @@ subprojects=(
bilge-impl-0.2-rs bilge-impl-0.2-rs
either-1-rs either-1-rs
foreign-0.3-rs foreign-0.3-rs
glib-sys-0.21-rs
itertools-0.11-rs itertools-0.11-rs
keycodemapdb keycodemapdb
libc-0.2-rs libc-0.2-rs

View file

@ -155,6 +155,7 @@ meson_options_help() {
printf "%s\n" ' membarrier membarrier system call (for Linux 4.14+ or Windows' printf "%s\n" ' membarrier membarrier system call (for Linux 4.14+ or Windows'
printf "%s\n" ' modules modules support (non Windows)' printf "%s\n" ' modules modules support (non Windows)'
printf "%s\n" ' mpath Multipath persistent reservation passthrough' printf "%s\n" ' mpath Multipath persistent reservation passthrough'
printf "%s\n" ' mshv MSHV acceleration support'
printf "%s\n" ' multiprocess Out of process device emulation support' printf "%s\n" ' multiprocess Out of process device emulation support'
printf "%s\n" ' netmap netmap network backend support' printf "%s\n" ' netmap netmap network backend support'
printf "%s\n" ' nettle nettle cryptography support' printf "%s\n" ' nettle nettle cryptography support'
@ -410,6 +411,8 @@ _meson_option_parse() {
--disable-modules) printf "%s" -Dmodules=disabled ;; --disable-modules) printf "%s" -Dmodules=disabled ;;
--enable-mpath) printf "%s" -Dmpath=enabled ;; --enable-mpath) printf "%s" -Dmpath=enabled ;;
--disable-mpath) printf "%s" -Dmpath=disabled ;; --disable-mpath) printf "%s" -Dmpath=disabled ;;
--enable-mshv) printf "%s" -Dmshv=enabled ;;
--disable-mshv) printf "%s" -Dmshv=disabled ;;
--enable-multiprocess) printf "%s" -Dmultiprocess=enabled ;; --enable-multiprocess) printf "%s" -Dmultiprocess=enabled ;;
--disable-multiprocess) printf "%s" -Dmultiprocess=disabled ;; --disable-multiprocess) printf "%s" -Dmultiprocess=disabled ;;
--enable-netmap) printf "%s" -Dnetmap=enabled ;; --enable-netmap) printf "%s" -Dnetmap=enabled ;;

View file

@ -4,7 +4,7 @@ set -eu
cat <<EOF cat <<EOF
// @generated // @generated
// This file is autogenerated by scripts/rust_root_crate.sh // This file is autogenerated by scripts/rust/rust_root_crate.sh
EOF EOF

View file

@ -196,7 +196,7 @@ rm -rf "$output/linux-headers/linux"
mkdir -p "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux"
for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \
vduse.h iommufd.h bits.h; do vduse.h iommufd.h bits.h mshv.h; do
cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux"
done done

View file

@ -6,21 +6,22 @@
/keycodemapdb /keycodemapdb
/libvfio-user /libvfio-user
/slirp /slirp
/anyhow-1.0.98 /anyhow-*
/arbitrary-int-1.2.7 /arbitrary-int-*
/attrs-0.2.9 /attrs-*
/bilge-0.2.0 /bilge-*
/bilge-impl-0.2.0 /bilge-impl-*
/either-1.12.0 /either-*
/foreign-0.3.1 /foreign-*
/itertools-0.11.0 /glib-sys-*
/libc-0.2.162 /itertools-*
/proc-macro-error-1.0.4 /libc-*
/proc-macro-error-attr-1.0.4 /proc-macro-error-*
/proc-macro2-1.0.95 /proc-macro-error-attr-*
/quote-1.0.36 /proc-macro*
/syn-2.0.66 /quote-*
/unicode-ident-1.0.12 /syn-*
/unicode-ident-*
# Workaround for Meson v1.9.0 https://github.com/mesonbuild/meson/issues/14948 # Workaround for Meson v1.9.0 https://github.com/mesonbuild/meson/issues/14948
/.wraplock /.wraplock

View file

@ -0,0 +1,7 @@
[wrap-file]
directory = glib-sys-0.21.2
source_url = https://crates.io/api/v1/crates/glib-sys/0.21.2/download
source_filename = glib-sys-0.21.2.tar.gz
source_hash = d09d3d0fddf7239521674e57b0465dfbd844632fec54f059f7f56112e3f927e1
#method = cargo
patch_directory = glib-sys-0.21-rs

View file

@ -0,0 +1,33 @@
project('glib-sys-0.21-rs', 'rust',
meson_version: '>=1.5.0',
version: '0.21.2',
license: 'MIT',
default_options: [])
subproject('libc-0.2-rs', required: true)
libc_rs = dependency('libc-0.2-rs')
_glib_sys_rs = static_library(
'glib_sys',
files('src/lib.rs'),
gnu_symbol_visibility: 'hidden',
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust',
rust_args: [
'--cap-lints', 'allow',
'--cfg', 'feature="v2_66"',
'--cfg', 'feature="v2_64"',
'--cfg', 'feature="v2_62"',
'--cfg', 'feature="v2_60"',
'--cfg', 'feature="v2_58"',
],
# should also link with glib; don't bother doing it here since all
# QEMU targets have it
dependencies: [libc_rs],
)
glib_sys_dep = declare_dependency(
link_with: _glib_sys_rs,
)
meson.override_dependency('glib-sys-0.21-rs', glib_sys_dep)

View file

@ -7539,6 +7539,20 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w)
#endif #endif
break; break;
case FEAT_7_0_EDX:
/*
* Windows does not like ARCH_CAPABILITIES on AMD machines at all.
* Do not show the fake ARCH_CAPABILITIES MSR that KVM sets up,
* except if needed for migration.
*
* When arch_cap_always_on is removed, this tweak can move to
* kvm_arch_get_supported_cpuid.
*/
if (cpu && IS_AMD_CPU(&cpu->env) && !cpu->arch_cap_always_on) {
unavail = CPUID_7_0_EDX_ARCH_CAPABILITIES;
}
break;
default: default:
break; break;
} }
@ -7894,6 +7908,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
/* Fixup overflow: max value for bits 23-16 is 255. */ /* Fixup overflow: max value for bits 23-16 is 255. */
*ebx |= MIN(num, 255) << 16; *ebx |= MIN(num, 255) << 16;
} }
if (cpu->pdcm_on_even_without_pmu) {
if (!cpu->enable_pmu) {
*ecx &= ~CPUID_EXT_PDCM;
}
}
break; break;
case 2: { /* cache info: needed for Pentium Pro compatibility */ case 2: { /* cache info: needed for Pentium Pro compatibility */
const CPUCaches *caches; const CPUCaches *caches;
@ -8944,9 +8963,11 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
} }
} }
/* PDCM is fixed1 bit for TDX */ if (!cpu->pdcm_on_even_without_pmu) {
if (!cpu->enable_pmu && !is_tdx_vm()) { /* PDCM is fixed1 bit for TDX */
env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; if (!cpu->enable_pmu && !is_tdx_vm()) {
env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM;
}
} }
for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) {
@ -10004,6 +10025,11 @@ static const Property x86_cpu_properties[] = {
true), true),
DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true),
DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false), DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false),
DEFINE_PROP_BOOL("x-arch-cap-always-on", X86CPU,
arch_cap_always_on, false),
DEFINE_PROP_BOOL("x-pdcm-on-even-without-pmu", X86CPU,
pdcm_on_even_without_pmu, false),
}; };
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY

View file

@ -435,9 +435,11 @@ typedef enum X86Seg {
#define MSR_SMI_COUNT 0x34 #define MSR_SMI_COUNT 0x34
#define MSR_CORE_THREAD_COUNT 0x35 #define MSR_CORE_THREAD_COUNT 0x35
#define MSR_MTRRcap 0xfe #define MSR_MTRRcap 0xfe
#define MSR_MTRR_MEM_TYPE_WB 0x06
#define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_VCNT 8
#define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8)
#define MSR_MTRRcap_WC_SUPPORTED (1 << 10) #define MSR_MTRRcap_WC_SUPPORTED (1 << 10)
#define MSR_MTRR_ENABLE (1 << 11)
#define MSR_IA32_SYSENTER_CS 0x174 #define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_SYSENTER_ESP 0x175 #define MSR_IA32_SYSENTER_ESP 0x175
@ -2126,7 +2128,7 @@ typedef struct CPUArchState {
QEMUTimer *xen_periodic_timer; QEMUTimer *xen_periodic_timer;
QemuMutex xen_timers_lock; QemuMutex xen_timers_lock;
#endif #endif
#if defined(CONFIG_HVF) #if defined(CONFIG_HVF) || defined(CONFIG_MSHV)
void *emu_mmio_buf; void *emu_mmio_buf;
#endif #endif
@ -2314,6 +2316,18 @@ struct ArchCPU {
/* Forcefully disable KVM PV features not exposed in guest CPUIDs */ /* Forcefully disable KVM PV features not exposed in guest CPUIDs */
bool kvm_pv_enforce_cpuid; bool kvm_pv_enforce_cpuid;
/*
* Expose arch-capabilities unconditionally even on AMD models, for backwards
* compatibility with QEMU <10.1.
*/
bool arch_cap_always_on;
/*
* Backwards compatibility with QEMU <10.1. The PDCM feature is now disabled when
* PMU is not available, but prior to 10.1 it was enabled even if PMU is off.
*/
bool pdcm_on_even_without_pmu;
/* Number of physical address bits supported */ /* Number of physical address bits supported */
uint32_t phys_bits; uint32_t phys_bits;

View file

@ -1,5 +1,8 @@
i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( emulator_files = files(
'x86_decode.c', 'x86_decode.c',
'x86_emu.c', 'x86_emu.c',
'x86_flags.c', 'x86_flags.c',
)) )
i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: emulator_files)
i386_system_ss.add(when: 'CONFIG_MSHV', if_true: emulator_files)

View file

@ -71,10 +71,16 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
VM_PANIC_EX("%s invalid size %d\n", __func__, size); VM_PANIC_EX("%s invalid size %d\n", __func__, size);
break; break;
} }
target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len;
emul_ops->read_mem(env_cpu(env), &val, va, size); /* copy the bytes from the instruction stream, if available */
if (decode->stream && decode->len + size <= decode->stream->len) {
memcpy(&val, decode->stream->bytes + decode->len, size);
} else {
target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len;
emul_ops->fetch_instruction(env_cpu(env), &val, va, size);
}
decode->len += size; decode->len += size;
return val; return val;
} }
@ -2076,9 +2082,10 @@ static void decode_opcodes(CPUX86State *env, struct x86_decode *decode)
} }
} }
uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) static uint32_t decode_opcode(CPUX86State *env, struct x86_decode *decode)
{ {
memset(decode, 0, sizeof(*decode)); memset(decode, 0, sizeof(*decode));
decode_prefix(env, decode); decode_prefix(env, decode);
set_addressing_size(env, decode); set_addressing_size(env, decode);
set_operand_size(env, decode); set_operand_size(env, decode);
@ -2088,6 +2095,18 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
return decode->len; return decode->len;
} }
uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
{
return decode_opcode(env, decode);
}
uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode,
struct x86_insn_stream *stream)
{
decode->stream = stream;
return decode_opcode(env, decode);
}
void init_decoder(void) void init_decoder(void)
{ {
int i; int i;

View file

@ -272,6 +272,11 @@ typedef struct x86_decode_op {
}; };
} x86_decode_op; } x86_decode_op;
typedef struct x86_insn_stream {
const uint8_t *bytes;
size_t len;
} x86_insn_stream;
typedef struct x86_decode { typedef struct x86_decode {
int len; int len;
uint8_t opcode[4]; uint8_t opcode[4];
@ -298,11 +303,15 @@ typedef struct x86_decode {
struct x86_modrm modrm; struct x86_modrm modrm;
struct x86_decode_op op[4]; struct x86_decode_op op[4];
bool is_fpu; bool is_fpu;
x86_insn_stream *stream;
} x86_decode; } x86_decode;
uint64_t sign(uint64_t val, int size); uint64_t sign(uint64_t val, int size);
uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode,
struct x86_insn_stream *stream);
void *get_reg_ref(CPUX86State *env, int reg, int rex_present, void *get_reg_ref(CPUX86State *env, int reg, int rex_present,
int is_extended, int size); int is_extended, int size);

View file

@ -1246,7 +1246,8 @@ static void init_cmd_handler(void)
bool exec_instruction(CPUX86State *env, struct x86_decode *ins) bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
{ {
if (!_cmd_handler[ins->cmd].handler) { if (!_cmd_handler[ins->cmd].handler) {
printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n", env->eip, printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x)\n",
env->eip,
ins->cmd, ins->opcode[0], ins->cmd, ins->opcode[0],
ins->opcode_len > 1 ? ins->opcode[1] : 0); ins->opcode_len > 1 ? ins->opcode[1] : 0);
env->eip += ins->len; env->eip += ins->len;

View file

@ -24,6 +24,8 @@
#include "cpu.h" #include "cpu.h"
struct x86_emul_ops { struct x86_emul_ops {
void (*fetch_instruction)(CPUState *cpu, void *data, target_ulong addr,
int bytes);
void (*read_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); void (*read_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes);
void (*write_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); void (*write_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes);
void (*read_segment_descriptor)(CPUState *cpu, struct x86_segment_descriptor *desc, void (*read_segment_descriptor)(CPUState *cpu, struct x86_segment_descriptor *desc,

View file

@ -503,12 +503,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
* Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts.
* We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is
* returned by KVM_GET_MSR_INDEX_LIST. * returned by KVM_GET_MSR_INDEX_LIST.
*
* But also, because Windows does not like ARCH_CAPABILITIES on AMD
* mcahines at all, do not show the fake ARCH_CAPABILITIES MSR that
* KVM sets up.
*/ */
if (!has_msr_arch_capabs || !(edx & CPUID_7_0_EDX_ARCH_CAPABILITIES)) { if (!has_msr_arch_capabs) {
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
} }
} else if (function == 7 && index == 1 && reg == R_EAX) { } else if (function == 7 && index == 1 && reg == R_EAX) {

View file

@ -13,6 +13,7 @@ i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_MSHV', if_true: files('host-cpu.c'))
i386_system_ss = ss.source_set() i386_system_ss = ss.source_set()
i386_system_ss.add(files( i386_system_ss.add(files(
@ -34,6 +35,7 @@ subdir('nvmm')
subdir('hvf') subdir('hvf')
subdir('tcg') subdir('tcg')
subdir('emulate') subdir('emulate')
subdir('mshv')
target_arch += {'i386': i386_ss} target_arch += {'i386': i386_ss}
target_system_arch += {'i386': i386_system_ss} target_system_arch += {'i386': i386_system_ss}

View file

@ -0,0 +1,8 @@
i386_mshv_ss = ss.source_set()
i386_mshv_ss.add(files(
'mshv-cpu.c',
'x86.c',
))
i386_system_ss.add_all(when: 'CONFIG_MSHV', if_true: i386_mshv_ss)

1763
target/i386/mshv/mshv-cpu.c Normal file

File diff suppressed because it is too large Load diff

297
target/i386/mshv/x86.c Normal file
View file

@ -0,0 +1,297 @@
/*
* QEMU MSHV support
*
* Copyright Microsoft, Corp. 2025
*
* Authors: Magnus Kulke <magnuskulke@microsoft.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "emulate/x86_decode.h"
#include "emulate/x86_emu.h"
#include "qemu/typedefs.h"
#include "qemu/error-report.h"
#include "system/mshv.h"
/* RW or Exec segment */
static const uint8_t RWRX_SEGMENT_TYPE = 0x2;
static const uint8_t CODE_SEGMENT_TYPE = 0x8;
static const uint8_t EXPAND_DOWN_SEGMENT_TYPE = 0x4;
typedef enum CpuMode {
REAL_MODE,
PROTECTED_MODE,
LONG_MODE,
} CpuMode;
static CpuMode cpu_mode(CPUState *cpu)
{
enum CpuMode m = REAL_MODE;
if (x86_is_protected(cpu)) {
m = PROTECTED_MODE;
if (x86_is_long_mode(cpu)) {
m = LONG_MODE;
}
}
return m;
}
static bool segment_type_ro(const SegmentCache *seg)
{
uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
return (type_ & (~RWRX_SEGMENT_TYPE)) == 0;
}
static bool segment_type_code(const SegmentCache *seg)
{
uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
return (type_ & CODE_SEGMENT_TYPE) != 0;
}
static bool segment_expands_down(const SegmentCache *seg)
{
uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
if (segment_type_code(seg)) {
return false;
}
return (type_ & EXPAND_DOWN_SEGMENT_TYPE) != 0;
}
static uint32_t segment_limit(const SegmentCache *seg)
{
uint32_t limit = seg->limit;
uint32_t granularity = (seg->flags & DESC_G_MASK) != 0;
if (granularity != 0) {
limit = (limit << 12) | 0xFFF;
}
return limit;
}
static uint8_t segment_db(const SegmentCache *seg)
{
return (seg->flags >> DESC_B_SHIFT) & 1;
}
static uint32_t segment_max_limit(const SegmentCache *seg)
{
if (segment_db(seg) != 0) {
return 0xFFFFFFFF;
}
return 0xFFFF;
}
static int linearize(CPUState *cpu,
target_ulong logical_addr, target_ulong *linear_addr,
X86Seg seg_idx)
{
enum CpuMode mode;
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
SegmentCache *seg = &env->segs[seg_idx];
target_ulong base = seg->base;
target_ulong logical_addr_32b;
uint32_t limit;
/* TODO: the emulator will not pass us "write" indicator yet */
bool write = false;
mode = cpu_mode(cpu);
switch (mode) {
case LONG_MODE:
if (__builtin_add_overflow(logical_addr, base, linear_addr)) {
error_report("Address overflow");
return -1;
}
break;
case PROTECTED_MODE:
case REAL_MODE:
if (segment_type_ro(seg) && write) {
error_report("Cannot write to read-only segment");
return -1;
}
logical_addr_32b = logical_addr & 0xFFFFFFFF;
limit = segment_limit(seg);
if (segment_expands_down(seg)) {
if (logical_addr_32b >= limit) {
error_report("Address exceeds limit (expands down)");
return -1;
}
limit = segment_max_limit(seg);
}
if (logical_addr_32b > limit) {
error_report("Address exceeds limit %u", limit);
return -1;
}
*linear_addr = logical_addr_32b + base;
break;
default:
error_report("Unknown cpu mode: %d", mode);
return -1;
}
return 0;
}
bool x86_read_segment_descriptor(CPUState *cpu,
struct x86_segment_descriptor *desc,
x86_segment_selector sel)
{
target_ulong base;
uint32_t limit;
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
target_ulong gva;
memset(desc, 0, sizeof(*desc));
/* valid gdt descriptors start from index 1 */
if (!sel.index && GDT_SEL == sel.ti) {
return false;
}
if (GDT_SEL == sel.ti) {
base = env->gdt.base;
limit = env->gdt.limit;
} else {
base = env->ldt.base;
limit = env->ldt.limit;
}
if (sel.index * 8 >= limit) {
return false;
}
gva = base + sel.index * 8;
emul_ops->read_mem(cpu, desc, gva, sizeof(*desc));
return true;
}
bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc,
int gate)
{
target_ulong base;
uint32_t limit;
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
target_ulong gva;
base = env->idt.base;
limit = env->idt.limit;
memset(idt_desc, 0, sizeof(*idt_desc));
if (gate * 8 >= limit) {
perror("call gate exceeds idt limit");
return false;
}
gva = base + gate * 8;
emul_ops->read_mem(cpu, idt_desc, gva, sizeof(*idt_desc));
return true;
}
bool x86_is_protected(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
uint64_t cr0 = env->cr[0];
return cr0 & CR0_PE_MASK;
}
bool x86_is_real(CPUState *cpu)
{
return !x86_is_protected(cpu);
}
bool x86_is_v8086(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
return x86_is_protected(cpu) && (env->eflags & VM_MASK);
}
bool x86_is_long_mode(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
uint64_t efer = env->efer;
uint64_t lme_lma = (MSR_EFER_LME | MSR_EFER_LMA);
return ((efer & lme_lma) == lme_lma);
}
bool x86_is_long64_mode(CPUState *cpu)
{
error_report("unimplemented: is_long64_mode()");
abort();
}
bool x86_is_paging_mode(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
uint64_t cr0 = env->cr[0];
return cr0 & CR0_PG_MASK;
}
bool x86_is_pae_enabled(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
uint64_t cr4 = env->cr[4];
return cr4 & CR4_PAE_MASK;
}
target_ulong linear_addr(CPUState *cpu, target_ulong addr, X86Seg seg)
{
int ret;
target_ulong linear_addr;
ret = linearize(cpu, addr, &linear_addr, seg);
if (ret < 0) {
error_report("failed to linearize address");
abort();
}
return linear_addr;
}
target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size,
X86Seg seg)
{
switch (size) {
case 2:
addr = (uint16_t)addr;
break;
case 4:
addr = (uint32_t)addr;
break;
default:
break;
}
return linear_addr(cpu, addr, seg);
}
target_ulong linear_rip(CPUState *cpu, target_ulong rip)
{
return linear_addr(cpu, rip, R_CS);
}

View file

@ -53,8 +53,8 @@ configure_qemu()
config_opts="--enable-werror \ config_opts="--enable-werror \
${TARGET_LIST:+--target-list=${TARGET_LIST}} \ ${TARGET_LIST:+--target-list=${TARGET_LIST}} \
--prefix=$INSTALL_DIR \ --prefix=$INSTALL_DIR \
$QEMU_CONFIGURE_OPTS $EXTRA_CONFIGURE_OPTS \
$enable_rust \ $enable_rust \
$QEMU_CONFIGURE_OPTS $EXTRA_CONFIGURE_OPTS \
$@" $@"
echo "Configure options:" echo "Configure options:"
echo $config_opts echo $config_opts