target/i386/mshv: Implement mshv_vcpu_run()
Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl. The execution loop handles guest entry and VM exits. There are handlers for memory r/w, PIO and MMIO to which the exit events are dispatched. In case of MMIO the i386 instruction decoder/emulator is invoked to perform the operation in user space. Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com> Link: https://lore.kernel.org/r/20250916164847.77883-23-magnuskulke@linux.microsoft.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
64118f452c
commit
6dec60528c
1 changed files with 442 additions and 2 deletions
|
|
@ -1082,10 +1082,450 @@ void mshv_arch_amend_proc_features(
|
|||
features->access_guest_idle_reg = 1;
|
||||
}
|
||||
|
||||
static int set_memory_info(const struct hyperv_message *msg,
|
||||
struct hv_x64_memory_intercept_message *info)
|
||||
{
|
||||
if (msg->header.message_type != HVMSG_GPA_INTERCEPT
|
||||
&& msg->header.message_type != HVMSG_UNMAPPED_GPA
|
||||
&& msg->header.message_type != HVMSG_UNACCEPTED_GPA) {
|
||||
error_report("invalid message type");
|
||||
return -1;
|
||||
}
|
||||
memcpy(info, msg->payload, sizeof(*info));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emulate_instruction(CPUState *cpu,
|
||||
const uint8_t *insn_bytes, size_t insn_len,
|
||||
uint64_t gva, uint64_t gpa)
|
||||
{
|
||||
X86CPU *x86_cpu = X86_CPU(cpu);
|
||||
CPUX86State *env = &x86_cpu->env;
|
||||
struct x86_decode decode = { 0 };
|
||||
int ret;
|
||||
x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len };
|
||||
|
||||
ret = mshv_load_regs(cpu);
|
||||
if (ret < 0) {
|
||||
error_report("failed to load registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
decode_instruction_stream(env, &decode, &stream);
|
||||
exec_instruction(env, &decode);
|
||||
|
||||
ret = mshv_store_regs(cpu);
|
||||
if (ret < 0) {
|
||||
error_report("failed to store registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg,
|
||||
MshvVmExit *exit_reason)
|
||||
{
|
||||
struct hv_x64_memory_intercept_message info = { 0 };
|
||||
size_t insn_len;
|
||||
uint8_t access_type;
|
||||
uint8_t *instruction_bytes;
|
||||
int ret;
|
||||
|
||||
ret = set_memory_info(msg, &info);
|
||||
if (ret < 0) {
|
||||
error_report("failed to convert message to memory info");
|
||||
return -1;
|
||||
}
|
||||
insn_len = info.instruction_byte_count;
|
||||
access_type = info.header.intercept_access_type;
|
||||
|
||||
if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) {
|
||||
error_report("invalid intercept access type: execute");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (insn_len > 16) {
|
||||
error_report("invalid mmio instruction length: %zu", insn_len);
|
||||
return -1;
|
||||
}
|
||||
|
||||
trace_mshv_handle_mmio(info.guest_virtual_address,
|
||||
info.guest_physical_address,
|
||||
info.instruction_byte_count, access_type);
|
||||
|
||||
instruction_bytes = info.instruction_bytes;
|
||||
|
||||
ret = emulate_instruction(cpu, instruction_bytes, insn_len,
|
||||
info.guest_virtual_address,
|
||||
info.guest_physical_address);
|
||||
if (ret < 0) {
|
||||
error_report("failed to emulate mmio");
|
||||
return -1;
|
||||
}
|
||||
|
||||
*exit_reason = MshvVmExitIgnore;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_ioport_info(const struct hyperv_message *msg,
|
||||
hv_x64_io_port_intercept_message *info)
|
||||
{
|
||||
if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) {
|
||||
error_report("Invalid message type");
|
||||
return -1;
|
||||
}
|
||||
memcpy(info, msg->payload, sizeof(*info));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_x64_registers(const CPUState *cpu, const uint32_t *names,
|
||||
const uint64_t *values)
|
||||
{
|
||||
|
||||
hv_register_assoc assocs[2];
|
||||
int ret;
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(assocs); i++) {
|
||||
assocs[i].name = names[i];
|
||||
assocs[i].value.reg64 = values[i];
|
||||
}
|
||||
|
||||
ret = mshv_set_generic_regs(cpu, assocs, ARRAY_SIZE(assocs));
|
||||
if (ret < 0) {
|
||||
error_report("failed to set x64 registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
|
||||
{
|
||||
MemTxAttrs memattr = {0};
|
||||
memattr.secure = is_secure_mode;
|
||||
return memattr;
|
||||
}
|
||||
|
||||
static void pio_read(uint64_t port, uint8_t *data, uintptr_t size,
|
||||
bool is_secure_mode)
|
||||
{
|
||||
int ret = 0;
|
||||
MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
|
||||
ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
|
||||
false);
|
||||
if (ret != MEMTX_OK) {
|
||||
error_report("Failed to read from port %lx: %d", port, ret);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size,
|
||||
bool is_secure_mode)
|
||||
{
|
||||
int ret = 0;
|
||||
MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
|
||||
ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
|
||||
true);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int handle_pio_non_str(const CPUState *cpu,
|
||||
hv_x64_io_port_intercept_message *info)
|
||||
{
|
||||
size_t len = info->access_info.access_size;
|
||||
uint8_t access_type = info->header.intercept_access_type;
|
||||
int ret;
|
||||
uint32_t val, eax;
|
||||
const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8);
|
||||
size_t insn_len;
|
||||
uint64_t rip, rax;
|
||||
uint32_t reg_names[2];
|
||||
uint64_t reg_values[2];
|
||||
uint16_t port = info->port_number;
|
||||
|
||||
if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
|
||||
union {
|
||||
uint32_t u32;
|
||||
uint8_t bytes[4];
|
||||
} conv;
|
||||
|
||||
/* convert the first 4 bytes of rax to bytes */
|
||||
conv.u32 = (uint32_t)info->rax;
|
||||
/* secure mode is set to false */
|
||||
ret = pio_write(port, conv.bytes, len, false);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to write to io port");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
uint8_t data[4] = { 0 };
|
||||
/* secure mode is set to false */
|
||||
pio_read(info->port_number, data, len, false);
|
||||
|
||||
/* Preserve high bits in EAX, but clear out high bits in RAX */
|
||||
val = *(uint32_t *)data;
|
||||
eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask);
|
||||
info->rax = (uint64_t)eax;
|
||||
}
|
||||
|
||||
insn_len = info->header.instruction_length;
|
||||
|
||||
/* Advance RIP and update RAX */
|
||||
rip = info->header.rip + insn_len;
|
||||
rax = info->rax;
|
||||
|
||||
reg_names[0] = HV_X64_REGISTER_RIP;
|
||||
reg_values[0] = rip;
|
||||
reg_names[1] = HV_X64_REGISTER_RAX;
|
||||
reg_values[1] = rax;
|
||||
|
||||
ret = set_x64_registers(cpu, reg_names, reg_values);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to set x64 registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cpu->accel->dirty = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fetch_guest_state(CPUState *cpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = mshv_get_standard_regs(cpu);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to get standard registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = mshv_get_special_regs(cpu);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to get special registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_memory(const CPUState *cpu, uint64_t initial_gva,
|
||||
uint64_t initial_gpa, uint64_t gva, uint8_t *data,
|
||||
size_t len)
|
||||
{
|
||||
int ret;
|
||||
uint64_t gpa, flags;
|
||||
|
||||
if (gva == initial_gva) {
|
||||
gpa = initial_gpa;
|
||||
} else {
|
||||
flags = HV_TRANSLATE_GVA_VALIDATE_READ;
|
||||
ret = translate_gva(cpu, gva, &gpa, flags);
|
||||
if (ret < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = mshv_guest_mem_read(gpa, data, len, false, false);
|
||||
if (ret < 0) {
|
||||
error_report("failed to read guest mem");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_memory(const CPUState *cpu, uint64_t initial_gva,
|
||||
uint64_t initial_gpa, uint64_t gva, const uint8_t *data,
|
||||
size_t len)
|
||||
{
|
||||
int ret;
|
||||
uint64_t gpa, flags;
|
||||
|
||||
if (gva == initial_gva) {
|
||||
gpa = initial_gpa;
|
||||
} else {
|
||||
flags = HV_TRANSLATE_GVA_VALIDATE_WRITE;
|
||||
ret = translate_gva(cpu, gva, &gpa, flags);
|
||||
if (ret < 0) {
|
||||
error_report("failed to translate gva to gpa");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ret = mshv_guest_mem_write(gpa, data, len, false);
|
||||
if (ret != MEMTX_OK) {
|
||||
error_report("failed to write to mmio");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_pio_str_write(CPUState *cpu,
|
||||
hv_x64_io_port_intercept_message *info,
|
||||
size_t repeat, uint16_t port,
|
||||
bool direction_flag)
|
||||
{
|
||||
int ret;
|
||||
uint64_t src;
|
||||
uint8_t data[4] = { 0 };
|
||||
size_t len = info->access_info.access_size;
|
||||
|
||||
src = linear_addr(cpu, info->rsi, R_DS);
|
||||
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
ret = read_memory(cpu, 0, 0, src, data, len);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to read memory");
|
||||
return -1;
|
||||
}
|
||||
ret = pio_write(port, data, len, false);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to write to io port");
|
||||
return -1;
|
||||
}
|
||||
src += direction_flag ? -len : len;
|
||||
info->rsi += direction_flag ? -len : len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_pio_str_read(CPUState *cpu,
|
||||
hv_x64_io_port_intercept_message *info,
|
||||
size_t repeat, uint16_t port,
|
||||
bool direction_flag)
|
||||
{
|
||||
int ret;
|
||||
uint64_t dst;
|
||||
size_t len = info->access_info.access_size;
|
||||
uint8_t data[4] = { 0 };
|
||||
|
||||
dst = linear_addr(cpu, info->rdi, R_ES);
|
||||
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
pio_read(port, data, len, false);
|
||||
|
||||
ret = write_memory(cpu, 0, 0, dst, data, len);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to write memory");
|
||||
return -1;
|
||||
}
|
||||
dst += direction_flag ? -len : len;
|
||||
info->rdi += direction_flag ? -len : len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_pio_str(CPUState *cpu, hv_x64_io_port_intercept_message *info)
|
||||
{
|
||||
uint8_t access_type = info->header.intercept_access_type;
|
||||
uint16_t port = info->port_number;
|
||||
bool repop = info->access_info.rep_prefix == 1;
|
||||
size_t repeat = repop ? info->rcx : 1;
|
||||
size_t insn_len = info->header.instruction_length;
|
||||
bool direction_flag;
|
||||
uint32_t reg_names[3];
|
||||
uint64_t reg_values[3];
|
||||
int ret;
|
||||
X86CPU *x86_cpu = X86_CPU(cpu);
|
||||
CPUX86State *env = &x86_cpu->env;
|
||||
|
||||
ret = fetch_guest_state(cpu);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to fetch guest state");
|
||||
return -1;
|
||||
}
|
||||
|
||||
direction_flag = (env->eflags & DESC_E_MASK) != 0;
|
||||
|
||||
if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
|
||||
ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to handle pio str write");
|
||||
return -1;
|
||||
}
|
||||
reg_names[0] = HV_X64_REGISTER_RSI;
|
||||
reg_values[0] = info->rsi;
|
||||
} else {
|
||||
ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag);
|
||||
reg_names[0] = HV_X64_REGISTER_RDI;
|
||||
reg_values[0] = info->rdi;
|
||||
}
|
||||
|
||||
reg_names[1] = HV_X64_REGISTER_RIP;
|
||||
reg_values[1] = info->header.rip + insn_len;
|
||||
reg_names[2] = HV_X64_REGISTER_RAX;
|
||||
reg_values[2] = info->rax;
|
||||
|
||||
ret = set_x64_registers(cpu, reg_names, reg_values);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to set x64 registers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cpu->accel->dirty = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_pio(CPUState *cpu, const struct hyperv_message *msg)
|
||||
{
|
||||
struct hv_x64_io_port_intercept_message info = { 0 };
|
||||
int ret;
|
||||
|
||||
ret = set_ioport_info(msg, &info);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to convert message to ioport info");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (info.access_info.string_op) {
|
||||
return handle_pio_str(cpu, &info);
|
||||
}
|
||||
|
||||
return handle_pio_non_str(cpu, &info);
|
||||
}
|
||||
|
||||
int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit)
|
||||
{
|
||||
error_report("unimplemented");
|
||||
abort();
|
||||
int ret;
|
||||
enum MshvVmExit exit_reason;
|
||||
int cpu_fd = mshv_vcpufd(cpu);
|
||||
|
||||
ret = ioctl(cpu_fd, MSHV_RUN_VP, msg);
|
||||
if (ret < 0) {
|
||||
return MshvVmExitShutdown;
|
||||
}
|
||||
|
||||
switch (msg->header.message_type) {
|
||||
case HVMSG_UNRECOVERABLE_EXCEPTION:
|
||||
return MshvVmExitShutdown;
|
||||
case HVMSG_UNMAPPED_GPA:
|
||||
case HVMSG_GPA_INTERCEPT:
|
||||
ret = handle_mmio(cpu, msg, &exit_reason);
|
||||
if (ret < 0) {
|
||||
error_report("failed to handle mmio");
|
||||
return -1;
|
||||
}
|
||||
return exit_reason;
|
||||
case HVMSG_X64_IO_PORT_INTERCEPT:
|
||||
ret = handle_pio(cpu, msg);
|
||||
if (ret < 0) {
|
||||
return MshvVmExitSpecial;
|
||||
}
|
||||
return MshvVmExitIgnore;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
*exit = MshvVmExitIgnore;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mshv_remove_vcpu(int vm_fd, int cpu_fd)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue