From 4fb54de823e9d5b88b7f708516a2a9bf997d15c2 Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:08 -0700 Subject: [PATCH 01/77] meson: build target libraries with common dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As mentioned in [1], dependencies were missing when compiling per target libraries, thus breaking compilation on certain host systems. We now explicitly add common dependencies to those libraries, so it solves the problem. [1] https://lore.kernel.org/qemu-devel/20250513115637.184940-1-thuth@redhat.com/ Tested-by: Thomas Huth Fixes: 6f4e8a92bbd ("hw/arm: make most of the compilation units common") Signed-off-by: Pierrick Bouvier Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-2-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- meson.build | 75 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/meson.build b/meson.build index ad2053f968..cbb22f60d1 100644 --- a/meson.build +++ b/meson.build @@ -3242,6 +3242,7 @@ config_devices_mak_list = [] config_devices_h = {} config_target_h = {} config_target_mak = {} +config_base_arch_mak = {} disassemblers = { 'alpha' : ['CONFIG_ALPHA_DIS'], @@ -3433,6 +3434,11 @@ foreach target : target_dirs config_all_devices += config_devices endif config_target_mak += {target: config_target} + + # build a merged config for all targets with the same TARGET_BASE_ARCH + target_base_arch = config_target['TARGET_BASE_ARCH'] + config_base_arch = config_base_arch_mak.get(target_base_arch, {}) + config_target + config_base_arch_mak += {target_base_arch: config_base_arch} endforeach target_dirs = actual_target_dirs @@ -4111,57 +4117,56 @@ common_all = static_library('common', hw_common_arch_libs = {} target_common_arch_libs = {} target_common_system_arch_libs = {} -foreach target : target_dirs +foreach target_base_arch, config_base_arch : config_base_arch_mak config_target = config_target_mak[target] - target_base_arch = config_target['TARGET_BASE_ARCH'] target_inc = [include_directories('target' / target_base_arch)] inc = [common_user_inc + target_inc] + target_common = common_ss.apply(config_target, strict: false) + common_deps = [] + foreach dep: target_common.dependencies() + common_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + # prevent common code to access cpu compile time definition, # but still allow access to cpu.h target_c_args = ['-DCPU_DEFS_H'] target_system_c_args = target_c_args + ['-DCOMPILING_SYSTEM_VS_USER', '-DCONFIG_SOFTMMU'] if target_base_arch in hw_common_arch - if target_base_arch not in hw_common_arch_libs - src = hw_common_arch[target_base_arch] - lib = static_library( - 'hw_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - hw_common_arch_libs += {target_base_arch: lib} - endif + src = hw_common_arch[target_base_arch] + lib = static_library( + 'hw_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_system_c_args, + dependencies: src.all_dependencies() + common_deps) + hw_common_arch_libs += {target_base_arch: lib} endif if target_base_arch in target_common_arch - if target_base_arch not in target_common_arch_libs - src = target_common_arch[target_base_arch] - lib = static_library( - 'target_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_c_args, - dependencies: src.all_dependencies()) - target_common_arch_libs += {target_base_arch: lib} - endif + src = target_common_arch[target_base_arch] + lib = static_library( + 'target_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_c_args, + dependencies: src.all_dependencies() + common_deps) + target_common_arch_libs += {target_base_arch: lib} endif if target_base_arch in target_common_system_arch - if target_base_arch not in target_common_system_arch_libs - src = target_common_system_arch[target_base_arch] - lib = static_library( - 'target_system_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - target_common_system_arch_libs += {target_base_arch: lib} - endif + src = target_common_system_arch[target_base_arch] + lib = static_library( + 'target_system_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_system_c_args, + dependencies: src.all_dependencies() + common_deps) + target_common_system_arch_libs += {target_base_arch: lib} endif endforeach From 0ca26a51791f2601238129c6c2724cc4c604392c Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:09 -0700 Subject: [PATCH 02/77] hw/arm: remove explicit dependencies listed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierrick Bouvier Reviewed-by: Thomas Huth Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-3-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- hw/arm/meson.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 5098795f61..d90be8f4c9 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c')) arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c')) arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c')) arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c')) -arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')]) +arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')]) arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c')) arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c')) arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c')) @@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) -arm_common_ss.add(fdt, files('boot.c')) +arm_common_ss.add(files('boot.c')) hw_arch += {'arm': arm_ss} hw_common_arch += {'arm': arm_common_ss} From 598a0ba8e6df8c113c77e69ee18c3872fda7b6e9 Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:10 -0700 Subject: [PATCH 03/77] target/arm: remove explicit dependencies listed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierrick Bouvier Reviewed-by: Thomas Huth Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-4-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- target/arm/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/meson.build b/target/arm/meson.build index b404fa5486..2ff7ed6e98 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -28,7 +28,7 @@ arm_user_ss.add(files( 'vfp_fpscr.c', )) -arm_common_system_ss.add(files('cpu.c'), capstone) +arm_common_system_ss.add(files('cpu.c')) arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( 'cpu32-stubs.c')) arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) From b17b51d325130bc9d0f1189461a9a681fbd554e5 Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:11 -0700 Subject: [PATCH 04/77] meson: apply target config for picking files from lib{system, user} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit semihosting code needs to be included only if CONFIG_SEMIHOSTING is set. However, this is a target configuration, so we need to apply it to the lib{system, user}_ss. As well, this prepares merging lib{system, user}_ss with {system, user}_ss. Acked-by: Richard Henderson Signed-off-by: Pierrick Bouvier Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-5-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- meson.build | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/meson.build b/meson.build index cbb22f60d1..79d123c50e 100644 --- a/meson.build +++ b/meson.build @@ -4081,27 +4081,19 @@ common_ss.add(qom, qemuutil) common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss]) common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss) -libuser_ss = libuser_ss.apply({}) libuser = static_library('user', - libuser_ss.sources() + genh, + libuser_ss.all_sources() + genh, c_args: ['-DCONFIG_USER_ONLY', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libuser_ss.dependencies(), + dependencies: libuser_ss.all_dependencies(), build_by_default: false) -libuser = declare_dependency(objects: libuser.extract_all_objects(recursive: false), - dependencies: libuser_ss.dependencies()) -common_ss.add(when: 'CONFIG_USER_ONLY', if_true: libuser) -libsystem_ss = libsystem_ss.apply({}) libsystem = static_library('system', - libsystem_ss.sources() + genh, + libsystem_ss.all_sources() + genh, c_args: ['-DCONFIG_SOFTMMU', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libsystem_ss.dependencies(), + dependencies: libsystem_ss.all_dependencies(), build_by_default: false) -libsystem = declare_dependency(objects: libsystem.extract_all_objects(recursive: false), - dependencies: libsystem_ss.dependencies()) -common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem) # Note that this library is never used directly (only through extract_objects) # and is not built by default; therefore, source files not used by the build @@ -4343,6 +4335,16 @@ foreach target : target_dirs objects += lib.extract_objects(src.sources()) arch_deps += src.dependencies() endif + if target_type == 'system' + src = libsystem_ss.apply(config_target, strict: false) + objects += libsystem.extract_objects(src.sources()) + arch_deps += src.dependencies() + endif + if target_type == 'user' + src = libuser_ss.apply(config_target, strict: false) + objects += libuser.extract_objects(src.sources()) + arch_deps += src.dependencies() + endif if target_type == 'system' and target_base_arch in hw_common_arch_libs src = hw_common_arch[target_base_arch].apply(config_target, strict: false) lib = hw_common_arch_libs[target_base_arch] From 7ca433244c6dde497bb36184f70b442b399fda3e Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:12 -0700 Subject: [PATCH 05/77] meson: merge lib{system, user}_ss with {system, user}_ss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that target configuration can be applied to lib{system, user}_ss, there is no reason to keep that separate from the existing {system, user}_ss. The only difference is that we'll now compile those files with -DCOMPILING_SYSTEM_VS_USER, which removes poison for CONFIG_USER_ONLY and CONFIG_SOFTMMU, without any other side effect. We extract existing system/user code common common libraries to lib{system, user}. To not break existing meson files, we alias libsystem_ss to system_ss and libuser_ss to user_ss, so we can do the cleanup in next commit. Signed-off-by: Pierrick Bouvier Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-6-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- meson.build | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/meson.build b/meson.build index 79d123c50e..ea54d15f4a 100644 --- a/meson.build +++ b/meson.build @@ -3694,14 +3694,14 @@ io_ss = ss.source_set() qmp_ss = ss.source_set() qom_ss = ss.source_set() system_ss = ss.source_set() -libsystem_ss = ss.source_set() +libsystem_ss = system_ss specific_fuzz_ss = ss.source_set() specific_ss = ss.source_set() rust_devices_ss = ss.source_set() stub_ss = ss.source_set() trace_ss = ss.source_set() user_ss = ss.source_set() -libuser_ss = ss.source_set() +libuser_ss = user_ss util_ss = ss.source_set() # accel modules @@ -4078,21 +4078,19 @@ common_ss.add(hwcore) system_ss.add(authz, blockdev, chardev, crypto, io, qmp) common_ss.add(qom, qemuutil) -common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss]) -common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss) - libuser = static_library('user', - libuser_ss.all_sources() + genh, + user_ss.all_sources() + genh, c_args: ['-DCONFIG_USER_ONLY', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libuser_ss.all_dependencies(), + include_directories: common_user_inc, + dependencies: user_ss.all_dependencies(), build_by_default: false) libsystem = static_library('system', - libsystem_ss.all_sources() + genh, + system_ss.all_sources() + genh, c_args: ['-DCONFIG_SOFTMMU', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libsystem_ss.all_dependencies(), + dependencies: system_ss.all_dependencies(), build_by_default: false) # Note that this library is never used directly (only through extract_objects) @@ -4101,7 +4099,6 @@ libsystem = static_library('system', common_all = static_library('common', build_by_default: false, sources: common_ss.all_sources() + genh, - include_directories: common_user_inc, implicit_include_directories: false, dependencies: common_ss.all_dependencies()) @@ -4115,10 +4112,20 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak inc = [common_user_inc + target_inc] target_common = common_ss.apply(config_target, strict: false) + target_system = system_ss.apply(config_target, strict: false) + target_user = user_ss.apply(config_target, strict: false) common_deps = [] + system_deps = [] + user_deps = [] foreach dep: target_common.dependencies() common_deps += dep.partial_dependency(compile_args: true, includes: true) endforeach + foreach dep: target_system.dependencies() + system_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + foreach dep: target_user.dependencies() + user_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach # prevent common code to access cpu compile time definition, # but still allow access to cpu.h @@ -4133,7 +4140,7 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak sources: src.all_sources() + genh, include_directories: inc, c_args: target_system_c_args, - dependencies: src.all_dependencies() + common_deps) + dependencies: src.all_dependencies() + common_deps + system_deps) hw_common_arch_libs += {target_base_arch: lib} endif @@ -4145,7 +4152,8 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak sources: src.all_sources() + genh, include_directories: inc, c_args: target_c_args, - dependencies: src.all_dependencies() + common_deps) + dependencies: src.all_dependencies() + common_deps + + system_deps + user_deps) target_common_arch_libs += {target_base_arch: lib} endif @@ -4157,7 +4165,7 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak sources: src.all_sources() + genh, include_directories: inc, c_args: target_system_c_args, - dependencies: src.all_dependencies() + common_deps) + dependencies: src.all_dependencies() + common_deps + system_deps) target_common_system_arch_libs += {target_base_arch: lib} endif endforeach @@ -4336,12 +4344,12 @@ foreach target : target_dirs arch_deps += src.dependencies() endif if target_type == 'system' - src = libsystem_ss.apply(config_target, strict: false) + src = system_ss.apply(config_target, strict: false) objects += libsystem.extract_objects(src.sources()) arch_deps += src.dependencies() endif if target_type == 'user' - src = libuser_ss.apply(config_target, strict: false) + src = user_ss.apply(config_target, strict: false) objects += libuser.extract_objects(src.sources()) arch_deps += src.dependencies() endif From d33717d7fca6a599aa991f771e3ea34b15978cee Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:13 -0700 Subject: [PATCH 06/77] meson: remove lib{system, user}_ss aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierrick Bouvier Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-7-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- accel/tcg/meson.build | 8 ++++---- gdbstub/meson.build | 4 ++-- hw/core/meson.build | 4 ++-- meson.build | 2 -- plugins/meson.build | 4 ++-- system/meson.build | 2 +- tcg/meson.build | 4 ++-- 7 files changed, 13 insertions(+), 15 deletions(-) diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 97d5e5a711..575e92bb9e 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -18,15 +18,15 @@ if get_option('plugins') tcg_ss.add(files('plugin-gen.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) -libuser_ss.add(files( +user_ss.add(files( 'user-exec.c', 'user-exec-stub.c', )) -libsystem_ss.add(files( +system_ss.add(files( 'cputlb.c', 'icount-common.c', 'monitor.c', diff --git a/gdbstub/meson.build b/gdbstub/meson.build index b25db86767..15c666f575 100644 --- a/gdbstub/meson.build +++ b/gdbstub/meson.build @@ -5,13 +5,13 @@ # # We build two versions of gdbstub, one for each mode -libuser_ss.add(files( +user_ss.add(files( 'gdbstub.c', 'syscalls.c', 'user.c' )) -libsystem_ss.add(files( +system_ss.add(files( 'gdbstub.c', 'syscalls.c', 'system.c' diff --git a/hw/core/meson.build b/hw/core/meson.build index 547de6527c..b5a545a0ed 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c')) system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls]) -libsystem_ss.add(files( +system_ss.add(files( 'cpu-system.c', 'fw-path-provider.c', 'gpio.c', @@ -46,7 +46,7 @@ libsystem_ss.add(files( 'vm-change-state-handler.c', 'clock-vmstate.c', )) -libuser_ss.add(files( +user_ss.add(files( 'cpu-user.c', 'qdev-user.c', )) diff --git a/meson.build b/meson.build index ea54d15f4a..1c9f1aa91e 100644 --- a/meson.build +++ b/meson.build @@ -3694,14 +3694,12 @@ io_ss = ss.source_set() qmp_ss = ss.source_set() qom_ss = ss.source_set() system_ss = ss.source_set() -libsystem_ss = system_ss specific_fuzz_ss = ss.source_set() specific_ss = ss.source_set() rust_devices_ss = ss.source_set() stub_ss = ss.source_set() trace_ss = ss.source_set() user_ss = ss.source_set() -libuser_ss = user_ss util_ss = ss.source_set() # accel modules diff --git a/plugins/meson.build b/plugins/meson.build index 5383c7b88b..b20edfbabc 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -61,8 +61,8 @@ endif user_ss.add(files('user.c', 'api-user.c')) system_ss.add(files('system.c', 'api-system.c')) -libuser_ss.add(files('api.c', 'core.c')) -libsystem_ss.add(files('api.c', 'core.c')) +user_ss.add(files('api.c', 'core.c')) +system_ss.add(files('api.c', 'core.c')) common_ss.add(files('loader.c')) diff --git a/system/meson.build b/system/meson.build index c2f0082766..7514bf3455 100644 --- a/system/meson.build +++ b/system/meson.build @@ -7,7 +7,7 @@ system_ss.add(files( 'vl.c', ), sdl, libpmem, libdaxctl) -libsystem_ss.add(files( +system_ss.add(files( 'balloon.c', 'bootdevice.c', 'cpus.c', diff --git a/tcg/meson.build b/tcg/meson.build index bd2821e4b5..706a6eb260 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -27,5 +27,5 @@ if host_os == 'linux' tcg_ss.add(files('perf.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) From ddc25eb40444f4bbcfea276d22beee6494c3f18e Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 21 May 2025 15:34:14 -0700 Subject: [PATCH 07/77] meson: merge hw_common_arch in target_common_system_arch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to keep two different libraries, as both are compiled with exact same flags. As well, rename target common libraries to common_{arch} and system_{arch}, to follow what exists for common and system libraries. Signed-off-by: Pierrick Bouvier Tested-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250521223414.248276-8-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- meson.build | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/meson.build b/meson.build index 1c9f1aa91e..f614d11219 100644 --- a/meson.build +++ b/meson.build @@ -4101,7 +4101,6 @@ common_all = static_library('common', dependencies: common_ss.all_dependencies()) # construct common libraries per base architecture -hw_common_arch_libs = {} target_common_arch_libs = {} target_common_system_arch_libs = {} foreach target_base_arch, config_base_arch : config_base_arch_mak @@ -4130,22 +4129,10 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak target_c_args = ['-DCPU_DEFS_H'] target_system_c_args = target_c_args + ['-DCOMPILING_SYSTEM_VS_USER', '-DCONFIG_SOFTMMU'] - if target_base_arch in hw_common_arch - src = hw_common_arch[target_base_arch] - lib = static_library( - 'hw_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies() + common_deps + system_deps) - hw_common_arch_libs += {target_base_arch: lib} - endif - if target_base_arch in target_common_arch src = target_common_arch[target_base_arch] lib = static_library( - 'target_' + target_base_arch, + 'common_' + target_base_arch, build_by_default: false, sources: src.all_sources() + genh, include_directories: inc, @@ -4155,10 +4142,20 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak target_common_arch_libs += {target_base_arch: lib} endif + # merge hw_common_arch in target_common_system_arch + if target_base_arch in hw_common_arch + hw_src = hw_common_arch[target_base_arch] + if target_base_arch in target_common_system_arch + target_common_system_arch[target_base_arch].add_all(hw_src) + else + target_common_system_arch += {target_base_arch: hw_src} + endif + endif + if target_base_arch in target_common_system_arch src = target_common_system_arch[target_base_arch] lib = static_library( - 'target_system_' + target_base_arch, + 'system_' + target_base_arch, build_by_default: false, sources: src.all_sources() + genh, include_directories: inc, @@ -4351,12 +4348,6 @@ foreach target : target_dirs objects += libuser.extract_objects(src.sources()) arch_deps += src.dependencies() endif - if target_type == 'system' and target_base_arch in hw_common_arch_libs - src = hw_common_arch[target_base_arch].apply(config_target, strict: false) - lib = hw_common_arch_libs[target_base_arch] - objects += lib.extract_objects(src.sources()) - arch_deps += src.dependencies() - endif if target_type == 'system' and target_base_arch in target_common_system_arch_libs src = target_common_system_arch[target_base_arch].apply(config_target, strict: false) lib = target_common_system_arch_libs[target_base_arch] From d74169e09e1d424aaca138966f460520a0d4dd0d Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 20 May 2025 23:27:46 +0800 Subject: [PATCH 08/77] hw/timer/hpet: Reorganize register decoding For Rust HPET, since the commit 519088b7cf6d ("rust: hpet: decode HPET registers into enums"), it decodes register address by checking if the register belongs to global register space. And for C HPET, it checks timer register space first. While both approaches are fine, it's best to be as consistent as possible. Synchronize changes from the rust side to C side. Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250520152750.2542612-2-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini --- hw/timer/hpet.c | 166 ++++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 82 deletions(-) diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index d1b7bc52b7..0fd1337a15 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, uint64_t cur_tick; trace_hpet_ram_read(addr); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { - uint8_t timer_id = (addr - 0x100) / 0x20; - HPETTimer *timer = &s->timer[timer_id]; - - if (timer_id > s->num_timers) { - trace_hpet_timer_id_out_of_range(timer_id); - return 0; - } - - switch (addr & 0x18) { - case HPET_TN_CFG: // including interrupt capabilities - return timer->config >> shift; - case HPET_TN_CMP: // comparator register - return timer->cmp >> shift; - case HPET_TN_ROUTE: - return timer->fsb >> shift; - default: - trace_hpet_ram_read_invalid(); - break; - } - } else { - switch (addr & ~4) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { case HPET_ID: // including HPET_PERIOD return s->capability >> shift; case HPET_CFG: @@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, trace_hpet_ram_read_invalid(); break; } + } else { + uint8_t timer_id = (addr - 0x100) / 0x20; + HPETTimer *timer = &s->timer[timer_id]; + + if (timer_id > s->num_timers) { + trace_hpet_timer_id_out_of_range(timer_id); + return 0; + } + + switch (addr & 0x1f) { + case HPET_TN_CFG: // including interrupt capabilities + return timer->config >> shift; + case HPET_TN_CMP: // comparator register + return timer->cmp >> shift; + case HPET_TN_ROUTE: + return timer->fsb >> shift; + default: + trace_hpet_ram_read_invalid(); + break; + } } return 0; } @@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr, uint64_t old_val, new_val, cleared; trace_hpet_ram_write(addr, value); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { + case HPET_ID: + return; + case HPET_CFG: + old_val = s->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); + s->config = new_val; + if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Enable main counter and interrupt generation. */ + s->hpet_offset = + ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + for (i = 0; i < s->num_timers; i++) { + if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { + update_irq(&s->timer[i], 1); + } + hpet_set_timer(&s->timer[i]); + } + } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Halt main counter and disable interrupt generation. */ + s->hpet_counter = hpet_get_ticks(s); + for (i = 0; i < s->num_timers; i++) { + hpet_del_timer(&s->timer[i]); + } + } + /* i8254 and RTC output pins are disabled + * when HPET is in legacy mode */ + if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_set_irq(s->pit_enabled, 0); + qemu_irq_lower(s->irqs[0]); + qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); + } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_irq_lower(s->irqs[0]); + qemu_set_irq(s->pit_enabled, 1); + qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); + } + break; + case HPET_STATUS: + new_val = value << shift; + cleared = new_val & s->isr; + for (i = 0; i < s->num_timers; i++) { + if (cleared & (1 << i)) { + update_irq(&s->timer[i], 0); + } + } + break; + case HPET_COUNTER: + if (hpet_enabled(s)) { + trace_hpet_ram_write_counter_write_while_enabled(); + } + s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); + break; + default: + trace_hpet_ram_write_invalid(); + break; + } + } else { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr, break; } return; - } else { - switch (addr & ~4) { - case HPET_ID: - return; - case HPET_CFG: - old_val = s->config; - new_val = deposit64(old_val, shift, len, value); - new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); - s->config = new_val; - if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Enable main counter and interrupt generation. */ - s->hpet_offset = - ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - for (i = 0; i < s->num_timers; i++) { - if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { - update_irq(&s->timer[i], 1); - } - hpet_set_timer(&s->timer[i]); - } - } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Halt main counter and disable interrupt generation. */ - s->hpet_counter = hpet_get_ticks(s); - for (i = 0; i < s->num_timers; i++) { - hpet_del_timer(&s->timer[i]); - } - } - /* i8254 and RTC output pins are disabled - * when HPET is in legacy mode */ - if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_set_irq(s->pit_enabled, 0); - qemu_irq_lower(s->irqs[0]); - qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); - } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_irq_lower(s->irqs[0]); - qemu_set_irq(s->pit_enabled, 1); - qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); - } - break; - case HPET_STATUS: - new_val = value << shift; - cleared = new_val & s->isr; - for (i = 0; i < s->num_timers; i++) { - if (cleared & (1 << i)) { - update_irq(&s->timer[i], 0); - } - } - break; - case HPET_COUNTER: - if (hpet_enabled(s)) { - trace_hpet_ram_write_counter_write_while_enabled(); - } - s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); - break; - default: - trace_hpet_ram_write_invalid(); - break; - } } } From 86c54a3a418e462e67444ac4db25b2757fd62079 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 20 May 2025 23:27:49 +0800 Subject: [PATCH 09/77] rust: Fix Zhao's email address No one could find Zhao Liu via zhai1.liu@intel.com. Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250520152750.2542612-5-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini --- rust/hw/timer/hpet/src/fw_cfg.rs | 2 +- rust/hw/timer/hpet/src/hpet.rs | 2 +- rust/hw/timer/hpet/src/lib.rs | 2 +- rust/qemu-api/src/bitops.rs | 2 +- rust/qemu-api/src/timer.rs | 2 +- rust/qemu-api/tests/vmstate_tests.rs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs index aa08d28351..6c10316104 100644 --- a/rust/hw/timer/hpet/src/fw_cfg.rs +++ b/rust/hw/timer/hpet/src/fw_cfg.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later use std::ptr::addr_of_mut; diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/hpet.rs index 779681d650..e3ba62b287 100644 --- a/rust/hw/timer/hpet/src/hpet.rs +++ b/rust/hw/timer/hpet/src/hpet.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs index 1954584a87..141aae229d 100644 --- a/rust/hw/timer/hpet/src/lib.rs +++ b/rust/hw/timer/hpet/src/lib.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later //! # HPET QEMU Device Model diff --git a/rust/qemu-api/src/bitops.rs b/rust/qemu-api/src/bitops.rs index 023ec1a998..b1e3a530ab 100644 --- a/rust/qemu-api/src/bitops.rs +++ b/rust/qemu-api/src/bitops.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later //! This module provides bit operation extensions to integer types. diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs index 868bd88575..0a2d111d49 100644 --- a/rust/qemu-api/src/timer.rs +++ b/rust/qemu-api/src/timer.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/qemu-api/tests/vmstate_tests.rs b/rust/qemu-api/tests/vmstate_tests.rs index ad0fc5cd5d..bded836eb6 100644 --- a/rust/qemu-api/tests/vmstate_tests.rs +++ b/rust/qemu-api/tests/vmstate_tests.rs @@ -1,5 +1,5 @@ // Copyright (C) 2025 Intel Corporation. -// Author(s): Zhao Liu +// Author(s): Zhao Liu // SPDX-License-Identifier: GPL-2.0-or-later use std::{ From aef5ac8624c7b826ae2adde48bc6997286ee1303 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 20 May 2025 23:27:50 +0800 Subject: [PATCH 10/77] rust: Fix the typos in doc These typos are found by "cargo spellcheck". Though it outputs a lot of noise and false positives, there still are some real typos. Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250520152750.2542612-6-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini --- rust/hw/char/pl011/src/device.rs | 4 ++-- rust/qemu-api/src/qom.rs | 4 ++-- rust/qemu-api/src/vmstate.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index bde3be65c5..bd5cee0464 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -480,13 +480,13 @@ impl PL011Registers { } impl PL011State { - /// Initializes a pre-allocated, unitialized instance of `PL011State`. + /// Initializes a pre-allocated, uninitialized instance of `PL011State`. /// /// # Safety /// /// `self` must point to a correctly sized and aligned location for the /// `PL011State` type. It must not be called more than once on the same - /// location/instance. All its fields are expected to hold unitialized + /// location/instance. All its fields are expected to hold uninitialized /// values with the sole exception of `parent_obj`. unsafe fn init(&mut self) { static PL011_OPS: MemoryRegionOps = MemoryRegionOpsBuilder::::new() diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index 41e5a5e29a..14f98fee60 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -291,7 +291,7 @@ pub unsafe trait ObjectType: Sized { } /// Return the receiver as a const raw pointer to Object. - /// This is preferrable to `as_object_mut_ptr()` if a C + /// This is preferable to `as_object_mut_ptr()` if a C /// function only needs a `const Object *`. fn as_object_ptr(&self) -> *const bindings::Object { self.as_object().as_ptr() @@ -485,7 +485,7 @@ pub trait ObjectImpl: ObjectType + IsA { /// `INSTANCE_INIT` functions have been called. const INSTANCE_POST_INIT: Option = None; - /// Called on descendent classes after all parent class initialization + /// Called on descendant classes after all parent class initialization /// has occurred, but before the class itself is initialized. This /// is only useful if a class is not a leaf, and can be used to undo /// the effects of copying the contents of the parent's class struct diff --git a/rust/qemu-api/src/vmstate.rs b/rust/qemu-api/src/vmstate.rs index 9c8b2398e9..812f390d78 100644 --- a/rust/qemu-api/src/vmstate.rs +++ b/rust/qemu-api/src/vmstate.rs @@ -9,7 +9,7 @@ //! * [`vmstate_unused!`](crate::vmstate_unused) and //! [`vmstate_of!`](crate::vmstate_of), which are used to express the //! migration format for a struct. This is based on the [`VMState`] trait, -//! which is defined by all migrateable types. +//! which is defined by all migratable types. //! //! * [`impl_vmstate_forward`](crate::impl_vmstate_forward) and //! [`impl_vmstate_bitsized`](crate::impl_vmstate_bitsized), which help with From 734a1e9eeed2c791c8906d0ee08ad5c9b1f41fa0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 May 2025 12:18:12 +0200 Subject: [PATCH 11/77] rust: hpet: rename hpet module to "device" Follow a similar convention as pl011. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/hw/timer/hpet/src/{hpet.rs => device.rs} | 0 rust/hw/timer/hpet/src/lib.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename rust/hw/timer/hpet/src/{hpet.rs => device.rs} (100%) diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/device.rs similarity index 100% rename from rust/hw/timer/hpet/src/hpet.rs rename to rust/hw/timer/hpet/src/device.rs diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs index 141aae229d..a95cf14ac9 100644 --- a/rust/hw/timer/hpet/src/lib.rs +++ b/rust/hw/timer/hpet/src/lib.rs @@ -7,7 +7,7 @@ //! This library implements a device model for the IA-PC HPET (High //! Precision Event Timers) device in QEMU. +pub mod device; pub mod fw_cfg; -pub mod hpet; pub const TYPE_HPET: &::std::ffi::CStr = c"hpet"; From 341ed3eae4179273788d2f74579862a10e18cf81 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 May 2025 14:53:29 +0200 Subject: [PATCH 12/77] target/i386/emulate: more lflags cleanups Signed-off-by: Paolo Bonzini --- target/i386/emulate/x86_flags.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c index 47bc19778c..cc138c7749 100644 --- a/target/i386/emulate/x86_flags.c +++ b/target/i386/emulate/x86_flags.c @@ -255,19 +255,19 @@ void lflags_to_rflags(CPUX86State *env) void rflags_to_lflags(CPUX86State *env) { - target_ulong cf_xor_of; + target_ulong cf_af, cf_xor_of; + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; + + /* ... and therefore cc_src always uses opposite polarity. */ env->cc_src = CC_P; env->cc_src ^= env->eflags & (CC_S | CC_P); /* rotate right by one to move CF and AF into the carry-out positions */ - env->cc_src |= ( - (env->eflags >> 1) | - (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); - cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C); + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; env->cc_src |= -cf_xor_of & LF_MASK_PO; - - /* Leave the low byte zero so that parity is not affected. */ - env->cc_dst = !(env->eflags & CC_Z) << 8; } From 5150004ccf5fe72c35b3263fbed6f4d06ed3cc6a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 May 2025 11:20:13 +0200 Subject: [PATCH 13/77] rocker: do not pollute the namespace Do not leave the __le* macros defined, in fact do not use them at all. Fixes a build failure on Alpine with the TDX patches: In file included from ../hw/net/rocker/rocker_of_dpa.c:25: ../hw/net/rocker/rocker_hw.h:14:16: error: conflicting types for 'uint64_t'; have '__u64' {aka 'long long unsigned int'} 14 | #define __le64 uint64_t | ^~~~~~~~ In file included from /usr/include/stdint.h:20, from ../include/qemu/osdep.h:111, from ../hw/net/rocker/rocker_of_dpa.c:17: /usr/include/bits/alltypes.h:136:25: note: previous declaration of 'uint64_t' with type 'uint64_t' {aka 'long unsigned int'} 136 | typedef unsigned _Int64 uint64_t; | ^~~~~~~~ because the Linux headers include a typedef of __leNN. Signed-off-by: Paolo Bonzini --- hw/net/rocker/rocker.h | 14 +++--------- hw/net/rocker/rocker_hw.h | 20 +++++++----------- hw/net/rocker/rocker_of_dpa.c | 40 +++++++++++++++++------------------ 3 files changed, 31 insertions(+), 43 deletions(-) diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index 6e0962f47a..ae06c1c72a 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...) } #endif -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - -#define __be16 uint16_t -#define __be32 uint32_t -#define __be64 uint64_t - -static inline bool ipv4_addr_is_multicast(__be32 addr) +static inline bool ipv4_addr_is_multicast(uint32_t addr) { return (addr & htonl(0xf0000000)) == htonl(0xe0000000); } @@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr) typedef struct ipv6_addr { union { uint8_t addr8[16]; - __be16 addr16[8]; - __be32 addr32[4]; + uint16_t addr16[8]; + uint32_t addr32[4]; }; } Ipv6Addr; diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index 1786323fa4..7ec6bfbcb9 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -9,10 +9,6 @@ #ifndef ROCKER_HW_H #define ROCKER_HW_H -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - /* * Return codes */ @@ -124,12 +120,12 @@ enum { */ typedef struct rocker_desc { - __le64 buf_addr; + uint64_t buf_addr; uint64_t cookie; - __le16 buf_size; - __le16 tlv_size; - __le16 rsvd[5]; /* pad to 32 bytes */ - __le16 comp_err; + uint16_t buf_size; + uint16_t tlv_size; + uint16_t rsvd[5]; /* pad to 32 bytes */ + uint16_t comp_err; } __attribute__((packed, aligned(8))) RockerDesc; /* @@ -137,9 +133,9 @@ typedef struct rocker_desc { */ typedef struct rocker_tlv { - __le32 type; - __le16 len; - __le16 rsvd; + uint32_t type; + uint16_t len; + uint16_t rsvd; } __attribute__((packed, aligned(8))) RockerTlv; /* cmd msg */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 3378f63110..4aed178756 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key { uint32_t tunnel_id; /* overlay tunnel id */ uint32_t tbl_id; /* table id */ struct { - __be16 vlan_id; /* 0 if no VLAN */ + uint16_t vlan_id; /* 0 if no VLAN */ MACAddr src; /* ethernet source address */ MACAddr dst; /* ethernet destination address */ - __be16 type; /* ethernet frame type */ + uint16_t type; /* ethernet frame type */ } eth; struct { uint8_t proto; /* IP protocol or ARP opcode */ @@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key { union { struct { struct { - __be32 src; /* IP source address */ - __be32 dst; /* IP destination address */ + uint32_t src; /* IP source address */ + uint32_t dst; /* IP destination address */ } addr; union { struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { MACAddr sha; /* ARP source hardware address */ @@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key { Ipv6Addr src; /* IPv6 source address */ Ipv6Addr dst; /* IPv6 destination address */ } addr; - __be32 label; /* IPv6 flow label */ + uint32_t label; /* IPv6 flow label */ struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { Ipv6Addr target; /* ND target address */ @@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action { struct { uint32_t group_id; uint32_t tun_log_lport; - __be16 vlan_id; + uint16_t vlan_id; } write; struct { - __be16 new_vlan_id; + uint16_t new_vlan_id; uint32_t out_pport; uint8_t copy_to_cpu; - __be16 vlan_id; + uint16_t vlan_id; } apply; } OfDpaFlowAction; @@ -143,7 +143,7 @@ typedef struct of_dpa_flow { typedef struct of_dpa_flow_pkt_fields { uint32_t tunnel_id; struct eth_header *ethhdr; - __be16 *h_proto; + uint16_t *h_proto; struct vlan_header *vlanhdr; struct ip_header *ipv4hdr; struct ip6_header *ipv6hdr; @@ -180,7 +180,7 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; } l2_rewrite; struct { uint16_t group_count; @@ -190,13 +190,13 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; uint8_t ttl_check; } l3_unicast; }; } OfDpaGroup; -static int of_dpa_mask2prefix(__be32 mask) +static int of_dpa_mask2prefix(uint32_t mask) { int i; int count = 32; @@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, fc->iovcnt = iovcnt + 2; } -static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; uint16_t h_proto = fields->ethhdr->h_proto; @@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, uint8_t *src_mac, uint8_t *dst_mac, - __be16 vlan_id) + uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; From 756e12e791771034ac105a5d2c9887bbbb6b7c73 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:08 -0400 Subject: [PATCH 14/77] i386: Introduce tdx-guest object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce tdx-guest object which inherits X86_CONFIDENTIAL_GUEST, and will be used to create TDX VMs (TDs) by qemu -machine ...,confidential-guest-support=tdx0 \ -object tdx-guest,id=tdx0 It has one QAPI member 'attributes' defined, which allows user to set TD's attributes directly. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Acked-by: Markus Armbruster Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-3-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- configs/devices/i386-softmmu/default.mak | 1 + hw/i386/Kconfig | 5 +++ qapi/qom.json | 15 +++++++++ target/i386/kvm/meson.build | 2 ++ target/i386/kvm/tdx.c | 43 ++++++++++++++++++++++++ target/i386/kvm/tdx.h | 21 ++++++++++++ 6 files changed, 87 insertions(+) create mode 100644 target/i386/kvm/tdx.c create mode 100644 target/i386/kvm/tdx.h diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak index 4faf2f0315..bc0479a7e0 100644 --- a/configs/devices/i386-softmmu/default.mak +++ b/configs/devices/i386-softmmu/default.mak @@ -18,6 +18,7 @@ #CONFIG_QXL=n #CONFIG_SEV=n #CONFIG_SGA=n +#CONFIG_TDX=n #CONFIG_TEST_DEVICES=n #CONFIG_TPM_CRB=n #CONFIG_TPM_TIS_ISA=n diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index d34ce07b21..cce9521ba9 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -10,6 +10,10 @@ config SGX bool depends on KVM +config TDX + bool + depends on KVM + config PC bool imply APPLESMC @@ -26,6 +30,7 @@ config PC imply QXL imply SEV imply SGX + imply TDX imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA diff --git a/qapi/qom.json b/qapi/qom.json index 04c118e4d6..3d7e11efc3 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1047,6 +1047,19 @@ '*host-data': 'str', '*vcek-disabled': 'bool' } } +## +# @TdxGuestProperties: +# +# Properties for tdx-guest objects. +# +# @attributes: The 'attributes' of a TD guest that is passed to +# KVM_TDX_INIT_VM +# +# Since: 10.1 +## +{ 'struct': 'TdxGuestProperties', + 'data': { '*attributes': 'uint64' } } + ## # @ThreadContextProperties: # @@ -1132,6 +1145,7 @@ 'sev-snp-guest', 'thread-context', 's390-pv-guest', + 'tdx-guest', 'throttle-group', 'tls-creds-anon', 'tls-creds-psk', @@ -1204,6 +1218,7 @@ 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', 'sev-snp-guest': 'SevSnpGuestProperties', + 'tdx-guest': 'TdxGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996cafaf2..466bccb9cb 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000000..ab70566c7d --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,43 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qom/object_interfaces.h" + +#include "tdx.h" + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + cgs->require_guest_memfd = true; + tdx->attributes = 0; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000000..f3b7253361 --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#include "confidential-guest.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + uint64_t attributes; /* TD attributes */ +} TdxGuest; + +#endif /* QEMU_I386_TDX_H */ From b455880e5515a9fc2b923bfc6c60bb54519b51d3 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:09 -0400 Subject: [PATCH 15/77] i386/tdx: Implement tdx_kvm_type() for TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TDX VM requires VM type to be KVM_X86_TDX_VM. Implement tdx_kvm_type() as X86ConfidentialGuestClass->kvm_type. Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-4-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 1 + target/i386/kvm/tdx.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c9a3c02e3e..653a8f46c2 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -192,6 +192,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index ab70566c7d..21a4f87756 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -12,8 +12,17 @@ #include "qemu/osdep.h" #include "qom/object_interfaces.h" +#include "kvm_i386.h" #include "tdx.h" +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + /* tdx guest */ OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, tdx_guest, @@ -40,4 +49,7 @@ static void tdx_guest_finalize(Object *obj) static void tdx_guest_class_init(ObjectClass *oc, const void *data) { + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + x86_klass->kvm_type = tdx_kvm_type; } From 631a2ac5a4beab740b342367550562cd659b4c4a Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:10 -0400 Subject: [PATCH 16/77] i386/tdx: Implement tdx_kvm_init() to initialize TDX VM context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement TDX specific ConfidentialGuestSupportClass::kvm_init() callback, tdx_kvm_init(). Mark guest state is proctected for TDX VM. More TDX specific initialization will be added later. Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-5-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 11 +---------- target/i386/kvm/tdx.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 653a8f46c2..d29376c599 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3207,16 +3207,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 21a4f87756..2b060fb52b 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -12,9 +12,17 @@ #include "qemu/osdep.h" #include "qom/object_interfaces.h" +#include "hw/i386/x86.h" #include "kvm_i386.h" #include "tdx.h" +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + kvm_mark_guest_state_protected(); + + return 0; +} + static int tdx_kvm_type(X86ConfidentialGuest *cg) { /* Do the object check */ @@ -49,7 +57,9 @@ static void tdx_guest_finalize(Object *obj) static void tdx_guest_class_init(ObjectClass *oc, const void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->kvm_init = tdx_kvm_init; x86_klass->kvm_type = tdx_kvm_type; } From 8eddedc3701d2190db976a05155a8263c8ec175b Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:11 -0400 Subject: [PATCH 17/77] i386/tdx: Get tdx_capabilities via KVM_TDX_CAPABILITIES KVM provides TDX capabilities via sub command KVM_TDX_CAPABILITIES of IOCTL(KVM_MEMORY_ENCRYPT_OP). Get the capabilities when initializing TDX context. It will be used to validate user's setting later. Since there is no interface reporting how many cpuid configs contains in KVM_TDX_CAPABILITIES, QEMU chooses to try starting with a known number and abort when it exceeds KVM_MAX_CPUID_ENTRIES. Besides, introduce the interfaces to invoke TDX "ioctls" at VCPU scope in preparation. Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250508150002.689633-6-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 2 - target/i386/kvm/kvm_i386.h | 2 + target/i386/kvm/tdx.c | 109 ++++++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 4 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index d29376c599..6d88495d47 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1780,8 +1780,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8dba..ed1e61fb8b 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 2b060fb52b..f8ec4fa217 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -10,19 +10,124 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" #include "qom/object_interfaces.h" #include "hw/i386/x86.h" #include "kvm_i386.h" #include "tdx.h" -static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static struct kvm_tdx_capabilities *tdx_caps; + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) { - kvm_mark_guest_state_protected(); + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; return 0; } +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + int r = 0; + + kvm_mark_guest_state_protected(); + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + } + + return r; +} + static int tdx_kvm_type(X86ConfidentialGuest *cg) { /* Do the object check */ From 1619d0e45be0d1e48a46d80963b4e77dc1b000a2 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:12 -0400 Subject: [PATCH 18/77] i386/tdx: Introduce is_tdx_vm() helper and cache tdx_guest object It will need special handling for TDX VMs all around the QEMU. Introduce is_tdx_vm() helper to query if it's a TDX VM. Cache tdx_guest object thus no need to cast from ms->cgs every time. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Isaku Yamahata Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-7-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 15 ++++++++++++++- target/i386/kvm/tdx.h | 10 ++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index f8ec4fa217..3750889453 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -18,8 +18,16 @@ #include "kvm_i386.h" #include "tdx.h" +static TdxGuest *tdx_guest; + static struct kvm_tdx_capabilities *tdx_caps; +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + enum tdx_ioctl_level { TDX_VM_IOCTL, TDX_VCPU_IOCTL, @@ -117,15 +125,20 @@ static int get_tdx_capabilities(Error **errp) static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { + TdxGuest *tdx = TDX_GUEST(cgs); int r = 0; kvm_mark_guest_state_protected(); if (!tdx_caps) { r = get_tdx_capabilities(errp); + if (r) { + return r; + } } - return r; + tdx_guest = tdx; + return 0; } static int tdx_kvm_type(X86ConfidentialGuest *cg) diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index f3b7253361..de8ae91961 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -3,6 +3,10 @@ #ifndef QEMU_I386_TDX_H #define QEMU_I386_TDX_H +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + #include "confidential-guest.h" #define TYPE_TDX_GUEST "tdx-guest" @@ -18,4 +22,10 @@ typedef struct TdxGuest { uint64_t attributes; /* TD attributes */ } TdxGuest; +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + #endif /* QEMU_I386_TDX_H */ From a668268dc08f7f4d30cecd513054bb38ce48c0d6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:13 -0400 Subject: [PATCH 19/77] kvm: Introduce kvm_arch_pre_create_vcpu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce kvm_arch_pre_create_vcpu(), to perform arch-dependent work prior to create any vcpu. This is for i386 TDX because it needs call TDX_INIT_VM before creating any vcpu. The specific implementation for i386 will be added in the future patch. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-8-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 5 +++++ include/system/kvm.h | 1 + target/arm/kvm.c | 5 +++++ target/i386/kvm/kvm.c | 5 +++++ target/loongarch/kvm/kvm.c | 4 ++++ target/mips/kvm.c | 5 +++++ target/ppc/kvm.c | 5 +++++ target/riscv/kvm/kvm-cpu.c | 5 +++++ target/s390x/kvm/kvm.c | 5 +++++ 9 files changed, 40 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 278a50690c..42d239cf8f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -545,6 +545,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + ret = kvm_create_vcpu(cpu); if (ret < 0) { error_setg_errno(errp, -ret, diff --git a/include/system/kvm.h b/include/system/kvm.h index b690dda137..62ec131d4d 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -376,6 +376,7 @@ int kvm_arch_get_default_type(MachineState *ms); int kvm_arch_init(MachineState *ms, KVMState *s); +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp); int kvm_arch_init_vcpu(CPUState *cpu); int kvm_arch_destroy_vcpu(CPUState *cpu); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a2791aa866..74fda8b809 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1846,6 +1846,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6d88495d47..446f0600d4 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2051,6 +2051,11 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 1bda570482..c66bdd5302 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp) env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI); } } + return 0; +} +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ return 0; } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index d67b7c1a8e..ec53acb51a 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { CPUMIPSState *env = cpu_env(cs); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8a957c3c7d..015658049e 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) } } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index efb41fac53..e1a04be20f 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1472,6 +1472,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6cd2ebc5f1..67d9a1977c 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -398,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return cpu->cpu_index; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus; From f15898b0f50609d66465326221aa54b6699da674 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:14 -0400 Subject: [PATCH 20/77] i386/tdx: Initialize TDX before creating TD vcpus Invoke KVM_TDX_INIT_VM in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT_VM configures global TD configurations, e.g. the canonical CPUID config, and must be executed prior to creating vCPUs. Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM. Note, this doesn't address the fact that QEMU may change the CPUID configuration when creating vCPUs, i.e. punts on refactoring QEMU to provide a stable CPUID config prior to kvm_arch_init(). Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Acked-by: Markus Armbruster Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-9-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 16 +++--- target/i386/kvm/kvm_i386.h | 5 ++ target/i386/kvm/meson.build | 2 +- target/i386/kvm/tdx-stub.c | 10 ++++ target/i386/kvm/tdx.c | 105 ++++++++++++++++++++++++++++++++++++ target/i386/kvm/tdx.h | 6 +++ 6 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 target/i386/kvm/tdx-stub.c diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 446f0600d4..e98f1ee26a 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -415,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -1822,9 +1823,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -2053,6 +2053,10 @@ full: int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) { + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + return 0; } diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index ed1e61fb8b..dc696cb723 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -59,6 +59,11 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 466bccb9cb..3f44cdedb7 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,7 +8,7 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c')) i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000000..2344433594 --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 3750889453..2d2d48c083 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -149,6 +149,109 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_TDX_VM; } +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", + xfam, tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + /* tdx guest */ OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, tdx_guest, @@ -162,6 +265,8 @@ static void tdx_guest_init(Object *obj) ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); TdxGuest *tdx = TDX_GUEST(obj); + qemu_mutex_init(&tdx->lock); + cgs->require_guest_memfd = true; tdx->attributes = 0; diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index de8ae91961..4e2b5c61ff 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -19,7 +19,11 @@ typedef struct TdxGuestClass { typedef struct TdxGuest { X86ConfidentialGuest parent_obj; + QemuMutex lock; + + bool initialized; uint64_t attributes; /* TD attributes */ + uint64_t xfam; } TdxGuest; #ifdef CONFIG_TDX @@ -28,4 +32,6 @@ bool is_tdx_vm(void); #define is_tdx_vm() 0 #endif /* CONFIG_TDX */ +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); + #endif /* QEMU_I386_TDX_H */ From 6016e2972d94c90307b6caf55a8e3aee5424c09b Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:15 -0400 Subject: [PATCH 21/77] i386/tdx: Add property sept-ve-disable for tdx-guest object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bit 28 of TD attribute, named SEPT_VE_DISABLE. When set to 1, it disables EPT violation conversion to #VE on guest TD access of PENDING pages. Some guest OS (e.g., Linux TD guest) may require this bit as 1. Otherwise refuse to boot. Add sept-ve-disable property for tdx-guest object, for user to configure this bit. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Acked-by: Markus Armbruster Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-10-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- qapi/qom.json | 8 +++++++- target/i386/kvm/tdx.c | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/qapi/qom.json b/qapi/qom.json index 3d7e11efc3..5a88d36423 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1055,10 +1055,16 @@ # @attributes: The 'attributes' of a TD guest that is passed to # KVM_TDX_INIT_VM # +# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling +# of EPT violation conversion to #VE on guest TD access of PENDING +# pages. Some guest OS (e.g., Linux TD guest) may require this to +# be set, otherwise they refuse to boot. +# # Since: 10.1 ## { 'struct': 'TdxGuestProperties', - 'data': { '*attributes': 'uint64' } } + 'data': { '*attributes': 'uint64', + '*sept-ve-disable': 'bool' } } ## # @ThreadContextProperties: diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 2d2d48c083..32ba3982ff 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -18,6 +18,8 @@ #include "kvm_i386.h" #include "tdx.h" +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) + static TdxGuest *tdx_guest; static struct kvm_tdx_capabilities *tdx_caps; @@ -252,6 +254,24 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return 0; } +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + /* tdx guest */ OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, tdx_guest, @@ -272,6 +292,9 @@ static void tdx_guest_init(Object *obj) object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); } static void tdx_guest_finalize(Object *obj) From 714af52276e74a1829674d180ef26ecb6261834c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 8 May 2025 10:59:16 -0400 Subject: [PATCH 22/77] i386/tdx: Make sept_ve_disable set by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For TDX KVM use case, Linux guest is the most major one. It requires sept_ve_disable set. Make it default for the main use case. For other use case, it can be enabled/disabled via qemu command line. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-11-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 32ba3982ff..a30731b1a3 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -288,7 +288,7 @@ static void tdx_guest_init(Object *obj) qemu_mutex_init(&tdx->lock); cgs->require_guest_memfd = true; - tdx->attributes = 0; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, OBJ_PROP_FLAG_READWRITE); From bb3be394cf80d68251e5b89e823dddc679b6e644 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:17 -0400 Subject: [PATCH 23/77] i386/tdx: Wire CPU features up with attributes of TD guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For QEMU VMs, - PKS is configured via CPUID_7_0_ECX_PKS, e.g., -cpu xxx,+pks and - PMU is configured by x86cpu->enable_pmu, e.g., -cpu xxx,pmu=on While the bit 30 (PKS) and bit 63 (PERFMON) of TD's attributes are also used to configure the PKS and PERFMON/PMU of TD, reuse the existing configuration interfaces of 'cpu' for TD's attributes. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-12-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index a30731b1a3..22d66bdb14 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -19,6 +19,8 @@ #include "tdx.h" #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) static TdxGuest *tdx_guest; @@ -151,6 +153,15 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_TDX_VM; } +static void setup_td_guest_attributes(X86CPU *x86cpu) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; +} + static int setup_td_xfam(X86CPU *x86cpu, Error **errp) { CPUX86State *env = &x86cpu->env; @@ -214,6 +225,8 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + setup_td_guest_attributes(x86cpu); + r = setup_td_xfam(x86cpu, errp); if (r) { return r; From 53b6f406b4f1a215fb3ec60e56ddba2e019a45ef Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:18 -0400 Subject: [PATCH 24/77] i386/tdx: Validate TD attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validate TD attributes with tdx_caps that only supported bits are allowed by KVM. Besides, sanity check the attribute bits that have not been supported by QEMU yet. e.g., debug bit, it will be allowed in the future when debug TD support lands in QEMU. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Reviewed-by: Daniel P. Berrangé Link: https://lore.kernel.org/r/20250508150002.689633-13-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 22d66bdb14..c78a0e8b5e 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -18,10 +18,15 @@ #include "kvm_i386.h" #include "tdx.h" +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + static TdxGuest *tdx_guest; static struct kvm_tdx_capabilities *tdx_caps; @@ -153,13 +158,34 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_TDX_VM; } -static void setup_td_guest_attributes(X86CPU *x86cpu) +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%lx for TDX VM " + "(KVM supported: 0x%llx)", tdx->attributes, + tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%lx (QEMU supported: 0x%llx)", + tdx->attributes, TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) { CPUX86State *env = &x86cpu->env; tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? TDX_TD_ATTRIBUTES_PKS : 0; tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); } static int setup_td_xfam(X86CPU *x86cpu, Error **errp) @@ -225,7 +251,10 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); - setup_td_guest_attributes(x86cpu); + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } r = setup_td_xfam(x86cpu, errp); if (r) { From d05a0858cf876f79b57a622716fbad07f5b2ea08 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 8 May 2025 10:59:19 -0400 Subject: [PATCH 25/77] i386/tdx: Support user configurable mrconfigid/mrowner/mrownerconfig Three sha384 hash values, mrconfigid, mrowner and mrownerconfig, of a TD can be provided for TDX attestation. Detailed meaning of them can be found: https://lore.kernel.org/qemu-devel/31d6dbc1-f453-4cef-ab08-4813f4e0ff92@intel.com/ Allow user to specify those values via property mrconfigid, mrowner and mrownerconfig. They are all in base64 format. example -object tdx-guest, \ mrconfigid=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v,\ mrowner=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v,\ mrownerconfig=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v Signed-off-by: Isaku Yamahata Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Acked-by: Markus Armbruster Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-14-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- qapi/qom.json | 16 +++++++- target/i386/kvm/tdx.c | 95 +++++++++++++++++++++++++++++++++++++++++++ target/i386/kvm/tdx.h | 3 ++ 3 files changed, 113 insertions(+), 1 deletion(-) diff --git a/qapi/qom.json b/qapi/qom.json index 5a88d36423..45cd47508b 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1060,11 +1060,25 @@ # pages. Some guest OS (e.g., Linux TD guest) may require this to # be set, otherwise they refuse to boot. # +# @mrconfigid: ID for non-owner-defined configuration of the guest TD, +# e.g., run-time or OS configuration (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrownerconfig: ID for owner-defined configuration of the guest TD, +# e.g., specific to the workload rather than the run-time or OS +# (base64 encoded SHA384 digest). Defaults to all zeros. +# # Since: 10.1 ## { 'struct': 'TdxGuestProperties', 'data': { '*attributes': 'uint64', - '*sept-ve-disable': 'bool' } } + '*sept-ve-disable': 'bool', + '*mrconfigid': 'str', + '*mrowner': 'str', + '*mrownerconfig': 'str' } } ## # @ThreadContextProperties: diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index c78a0e8b5e..671f23d910 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -11,8 +11,10 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "qemu/base64.h" #include "qapi/error.h" #include "qom/object_interfaces.h" +#include "crypto/hash.h" #include "hw/i386/x86.h" #include "kvm_i386.h" @@ -240,6 +242,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) CPUX86State *env = &x86cpu->env; g_autofree struct kvm_tdx_init_vm *init_vm = NULL; Error *local_err = NULL; + size_t data_len; int retry = 10000; int r = 0; @@ -251,6 +254,45 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrconfigid"); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrowner"); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrownerconfig"); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + r = setup_td_guest_attributes(x86cpu, errp); if (r) { return r; @@ -314,6 +356,51 @@ static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) } } +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + /* tdx guest */ OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, tdx_guest, @@ -337,6 +424,14 @@ static void tdx_guest_init(Object *obj) object_property_add_bool(obj, "sept-ve-disable", tdx_guest_get_sept_ve_disable, tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); } static void tdx_guest_finalize(Object *obj) diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 4e2b5c61ff..e472b11fb0 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -24,6 +24,9 @@ typedef struct TdxGuest { bool initialized; uint64_t attributes; /* TD attributes */ uint64_t xfam; + char *mrconfigid; /* base64 encoded sha348 digest */ + char *mrowner; /* base64 encoded sha348 digest */ + char *mrownerconfig; /* base64 encoded sha348 digest */ } TdxGuest; #ifdef CONFIG_TDX From d529a2ac5ef4620173439942f78ec668f9165fc1 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:20 -0400 Subject: [PATCH 26/77] i386/tdx: Set APIC bus rate to match with what TDX module enforces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TDX advertises core crystal clock with cpuid[0x15] as 25MHz for TD guests and it's unchangeable from VMM. As a result, TDX guest reads the APIC timer at the same frequency, 25MHz. While KVM's default emulated frequency for APIC bus is 1GHz, set the APIC bus rate to match with TDX explicitly to ensure KVM provide correct emulated APIC timer for TD guest. Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-15-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 13 +++++++++++++ target/i386/kvm/tdx.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 671f23d910..58983edd80 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -254,6 +254,19 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + if (tdx_guest->mrconfigid) { g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, strlen(tdx_guest->mrconfigid), &data_len, errp); diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index e472b11fb0..d39e733d9f 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -16,6 +16,9 @@ typedef struct TdxGuestClass { X86ConfidentialGuestClass parent_class; } TdxGuestClass; +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + typedef struct TdxGuest { X86ConfidentialGuest parent_obj; From 0e73b843616e52882940ab89e1b0e86e22be2162 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:21 -0400 Subject: [PATCH 27/77] i386/tdx: Implement user specified tsc frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reuse "-cpu,tsc-frequency=" to get user wanted tsc frequency and call VM scope VM_SET_TSC_KHZ to set the tsc frequency of TD before KVM_TDX_INIT_VM. Besides, sanity check the tsc frequency to be in the legal range and legal granularity (required by TDX module). Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-16-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 9 +++++++++ target/i386/kvm/tdx.c | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index e98f1ee26a..fd1817fc5e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -870,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 58983edd80..93a16a1aaa 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -20,6 +20,9 @@ #include "kvm_i386.h" #include "tdx.h" +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) @@ -267,6 +270,28 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return r; } + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz", + env->tsc_khz); + return r; + } + if (tdx_guest->mrconfigid) { g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, strlen(tdx_guest->mrconfigid), &data_len, errp); From 0dd5fe5ebeabefc7b3d7f043991b1edfe6b8eda9 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Thu, 8 May 2025 10:59:22 -0400 Subject: [PATCH 28/77] i386/tdx: load TDVF for TD guest TDVF(OVMF) needs to run at private memory for TD guest. TDX cannot support pflash device since it doesn't support read-only private memory. Thus load TDVF(OVMF) with -bios option for TDs. Use memory_region_init_ram_guest_memfd() to allocate the MemoryRegion for TDVF because it needs to be located at private memory. Also store the MemoryRegion pointer of TDVF since the shared ramblock of it can be discared after it gets copied to private ramblock. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-17-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/x86-common.c | 6 +++++- target/i386/kvm/tdx.c | 6 ++++++ target/i386/kvm/tdx.h | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 1b0671c523..b1b5f11e73 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -44,6 +44,7 @@ #include "standard-headers/asm-x86/bootparam.h" #include CONFIG_DEVICES #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #ifdef CONFIG_XEN_EMU #include "hw/xen/xen.h" @@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, if (machine_require_guest_memfd(MACHINE(x86ms))) { memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); + if (is_tdx_vm()) { + tdx_set_tdvf_region(&x86ms->bios); + } } else { memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); } - if (sev_enabled()) { + if (sev_enabled() || is_tdx_vm()) { /* * The concept of a "reset" simply doesn't exist for * confidential computing guests, we have to destroy and diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 93a16a1aaa..0f5acbf980 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -137,6 +137,12 @@ static int get_tdx_capabilities(Error **errp) return 0; } +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { TdxGuest *tdx = TDX_GUEST(cgs); diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index d39e733d9f..b73461b8d8 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -30,6 +30,8 @@ typedef struct TdxGuest { char *mrconfigid; /* base64 encoded sha348 digest */ char *mrowner; /* base64 encoded sha348 digest */ char *mrownerconfig; /* base64 encoded sha348 digest */ + + MemoryRegion *tdvf_mr; } TdxGuest; #ifdef CONFIG_TDX @@ -39,5 +41,6 @@ bool is_tdx_vm(void); #endif /* CONFIG_TDX */ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); #endif /* QEMU_I386_TDX_H */ From b65a6011d16c4f7cb2eb227ab1bc735850475288 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 8 May 2025 10:59:23 -0400 Subject: [PATCH 29/77] i386/tdvf: Introduce function to parse TDVF metadata TDX VM needs to boot with its specialized firmware, Trusted Domain Virtual Firmware (TDVF). QEMU needs to parse TDVF and map it in TD guest memory prior to running the TDX VM. A TDVF Metadata in TDVF image describes the structure of firmware. QEMU refers to it to setup memory for TDVF. Introduce function tdvf_parse_metadata() to parse the metadata from TDVF image and store the info of each TDVF section. TDX metadata is located by a TDX metadata offset block, which is a GUID-ed structure. The data portion of the GUID structure contains only an 4-byte field that is the offset of TDX metadata to the end of firmware file. Select X86_FW_OVMF when TDX is enable to leverage existing functions to parse and search OVMF's GUID-ed structures. Signed-off-by: Isaku Yamahata Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-18-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/Kconfig | 1 + hw/i386/meson.build | 1 + hw/i386/tdvf.c | 188 +++++++++++++++++++++++++++++++++++++++++ include/hw/i386/tdvf.h | 38 +++++++++ 4 files changed, 228 insertions(+) create mode 100644 hw/i386/tdvf.c create mode 100644 include/hw/i386/tdvf.h diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index cce9521ba9..eb65bda6e0 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -12,6 +12,7 @@ config SGX config TDX bool + select X86_FW_OVMF depends on KVM config PC diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 10bdfde27c..3bc1da2b6e 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'port92.c')) i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), if_false: files('pc_sysfw_ovmf-stubs.c')) +i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c new file mode 100644 index 0000000000..e2d486946a --- /dev/null +++ b/hw/i386/tdvf.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata + * + * Xiaoyao Li + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" + +#include "hw/i386/pc.h" +#include "hw/i386/tdvf.h" +#include "system/kvm.h" + +#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2" +#define TDX_METADATA_VERSION 1 +#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */ +#define TDVF_ALIGNMENT 4096 + +/* + * the raw structs read from TDVF keeps the name convention in + * TDVF Design Guide spec. + */ +typedef struct { + uint32_t DataOffset; + uint32_t RawDataSize; + uint64_t MemoryAddress; + uint64_t MemoryDataSize; + uint32_t Type; + uint32_t Attributes; +} TdvfSectionEntry; + +typedef struct { + uint32_t Signature; + uint32_t Length; + uint32_t Version; + uint32_t NumberOfSectionEntries; + TdvfSectionEntry SectionEntries[]; +} TdvfMetadata; + +struct tdx_metadata_offset { + uint32_t offset; +}; + +static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size) +{ + TdvfMetadata *metadata; + uint32_t offset = 0; + uint8_t *data; + + if ((uint32_t) size != size) { + return NULL; + } + + if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) { + offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset); + + if (offset + sizeof(*metadata) > size) { + return NULL; + } + } else { + error_report("Cannot find TDX_METADATA_OFFSET_GUID"); + return NULL; + } + + metadata = flash_ptr + offset; + + /* Finally, verify the signature to determine if this is a TDVF image. */ + metadata->Signature = le32_to_cpu(metadata->Signature); + if (metadata->Signature != TDVF_SIGNATURE) { + error_report("Invalid TDVF signature in metadata!"); + return NULL; + } + + /* Sanity check that the TDVF doesn't overlap its own metadata. */ + metadata->Length = le32_to_cpu(metadata->Length); + if (offset + metadata->Length > size) { + return NULL; + } + + /* Only version 1 is supported/defined. */ + metadata->Version = le32_to_cpu(metadata->Version); + if (metadata->Version != TDX_METADATA_VERSION) { + return NULL; + } + + return metadata; +} + +static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src, + TdxFirmwareEntry *entry) +{ + entry->data_offset = le32_to_cpu(src->DataOffset); + entry->data_len = le32_to_cpu(src->RawDataSize); + entry->address = le64_to_cpu(src->MemoryAddress); + entry->size = le64_to_cpu(src->MemoryDataSize); + entry->type = le32_to_cpu(src->Type); + entry->attributes = le32_to_cpu(src->Attributes); + + /* sanity check */ + if (entry->size < entry->data_len) { + error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx", + entry->data_len, entry->size); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) { + error_report("MemoryAddress 0x%lx not page aligned", entry->address); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) { + error_report("MemoryDataSize 0x%lx not page aligned", entry->size); + return -1; + } + + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + /* The sections that must be copied from firmware image to TD memory */ + if (entry->data_len == 0) { + error_report("%d section with RawDataSize == 0", entry->type); + return -1; + } + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + /* The sections that no need to be copied from firmware image */ + if (entry->data_len != 0) { + error_report("%d section with RawDataSize 0x%x != 0", + entry->type, entry->data_len); + return -1; + } + break; + default: + error_report("TDVF contains unsupported section type %d", entry->type); + return -1; + } + + return 0; +} + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size) +{ + g_autofree TdvfSectionEntry *sections = NULL; + TdvfMetadata *metadata; + ssize_t entries_size; + int i; + + metadata = tdvf_get_metadata(flash_ptr, size); + if (!metadata) { + return -EINVAL; + } + + /* load and parse metadata entries */ + fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries); + if (fw->nr_entries < 2) { + error_report("Invalid number of fw entries (%u) in TDVF Metadata", + fw->nr_entries); + return -EINVAL; + } + + entries_size = fw->nr_entries * sizeof(TdvfSectionEntry); + if (metadata->Length != sizeof(*metadata) + entries_size) { + error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)", + metadata->Length, + (uint32_t)(sizeof(*metadata) + entries_size)); + return -EINVAL; + } + + fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries); + sections = g_new(TdvfSectionEntry, fw->nr_entries); + + memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size); + + for (i = 0; i < fw->nr_entries; i++) { + if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) { + goto err; + } + } + + return 0; + +err: + fw->entries = 0; + g_free(fw->entries); + return -EINVAL; +} diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h new file mode 100644 index 0000000000..7ebcac42a3 --- /dev/null +++ b/include/hw/i386/tdvf.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata + * + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_TDVF_H +#define HW_I386_TDVF_H + +#include "qemu/osdep.h" + +#define TDVF_SECTION_TYPE_BFV 0 +#define TDVF_SECTION_TYPE_CFV 1 +#define TDVF_SECTION_TYPE_TD_HOB 2 +#define TDVF_SECTION_TYPE_TEMP_MEM 3 + +#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0) +#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1) + +typedef struct TdxFirmwareEntry { + uint32_t data_offset; + uint32_t data_len; + uint64_t address; + uint64_t size; + uint32_t type; + uint32_t attributes; +} TdxFirmwareEntry; + +typedef struct TdxFirmware { + uint32_t nr_entries; + TdxFirmwareEntry *entries; +} TdxFirmware; + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size); + +#endif /* HW_I386_TDVF_H */ From cb5d65a854e58abeb705a2ce14cc3eb28973c606 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:24 -0400 Subject: [PATCH 30/77] i386/tdx: Parse TDVF metadata for TDX VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After TDVF is loaded to bios MemoryRegion, it needs parse TDVF metadata. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-19-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/pc_sysfw.c | 7 +++++++ target/i386/kvm/tdx-stub.c | 5 +++++ target/i386/kvm/tdx.c | 5 +++++ target/i386/kvm/tdx.h | 3 +++ 4 files changed, 20 insertions(+) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 1eeb58ab37..821396c16e 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -37,6 +37,7 @@ #include "hw/block/flash.h" #include "system/kvm.h" #include "target/i386/sev.h" +#include "kvm/tdx.h" #define FLASH_SECTOR_SIZE 4096 @@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size) } sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } else if (is_tdx_vm()) { + ret = tdx_parse_tdvf(ptr, size); + if (ret) { + error_report("failed to parse TDVF for TDX VM"); + exit(1); + } } } diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c index 2344433594..7748b6d0a4 100644 --- a/target/i386/kvm/tdx-stub.c +++ b/target/i386/kvm/tdx-stub.c @@ -8,3 +8,8 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) { return -EINVAL; } + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 0f5acbf980..18beba2f5c 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -382,6 +382,11 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return 0; } +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) { TdxGuest *tdx = TDX_GUEST(obj); diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index b73461b8d8..28a03c2a7b 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -8,6 +8,7 @@ #endif #include "confidential-guest.h" +#include "hw/i386/tdvf.h" #define TYPE_TDX_GUEST "tdx-guest" #define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) @@ -32,6 +33,7 @@ typedef struct TdxGuest { char *mrownerconfig; /* base64 encoded sha348 digest */ MemoryRegion *tdvf_mr; + TdxFirmware tdvf; } TdxGuest; #ifdef CONFIG_TDX @@ -42,5 +44,6 @@ bool is_tdx_vm(void); int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); #endif /* QEMU_I386_TDX_H */ From 49b1f0f812372129736c1df0421c8f67d86d362b Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:25 -0400 Subject: [PATCH 31/77] i386/tdx: Don't initialize pc.rom for TDX VMs For TDX, the address below 1MB are entirely general RAM. No need to initialize pc.rom memory region for TDs. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-20-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 70656157ca..a403987a64 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -44,6 +44,7 @@ #include "system/xen.h" #include "system/reset.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "hw/xen/xen.h" #include "qobject/qlist.h" #include "qemu/error-report.h" @@ -976,21 +977,23 @@ void pc_memory_init(PCMachineState *pcms, /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - if (machine_require_guest_memfd(machine)) { - memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", - PC_ROM_SIZE, &error_fatal); - } else { - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (!is_tdx_vm()) { + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, + option_rom_mr, + 1); } - memory_region_add_subregion_overlap(rom_memory, - PC_ROM_MIN_VGA, - option_rom_mr, - 1); fw_cfg = fw_cfg_arch_create(machine, x86ms->boot_cpus, x86ms->apic_id_limit); From 4420ba0ebbf014acc68f78669e0767e288313ed6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:26 -0400 Subject: [PATCH 32/77] i386/tdx: Track mem_ptr for each firmware entry of TDVF For each TDVF sections, QEMU needs to copy the content to guest private memory via KVM API (KVM_TDX_INIT_MEM_REGION). Introduce a field @mem_ptr for TdxFirmwareEntry to track the memory pointer of each TDVF sections. So that QEMU can add/copy them to guest private memory later. TDVF sections can be classified into two groups: - Firmware itself, e.g., TDVF BFV and CFV, that located separately from guest RAM. Its memory pointer is the bios pointer. - Sections located at guest RAM, e.g., TEMP_MEM and TD_HOB. mmap a new memory range for them. Register a machine_init_done callback to do the stuff. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-21-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/tdvf.c | 1 + include/hw/i386/tdvf.h | 7 +++++++ target/i386/kvm/tdx.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c index e2d486946a..bd993ea2f0 100644 --- a/hw/i386/tdvf.c +++ b/hw/i386/tdvf.c @@ -179,6 +179,7 @@ int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size) } } + fw->mem_ptr = flash_ptr; return 0; err: diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h index 7ebcac42a3..e75c8d1acc 100644 --- a/include/hw/i386/tdvf.h +++ b/include/hw/i386/tdvf.h @@ -26,13 +26,20 @@ typedef struct TdxFirmwareEntry { uint64_t size; uint32_t type; uint32_t attributes; + + void *mem_ptr; } TdxFirmwareEntry; typedef struct TdxFirmware { + void *mem_ptr; + uint32_t nr_entries; TdxFirmwareEntry *entries; } TdxFirmware; +#define for_each_tdx_fw_entry(fw, e) \ + for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++) + int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size); #endif /* HW_I386_TDVF_H */ diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 18beba2f5c..bfdae4f1c0 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -12,10 +12,13 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/base64.h" +#include "qemu/mmap-alloc.h" #include "qapi/error.h" #include "qom/object_interfaces.h" #include "crypto/hash.h" +#include "system/system.h" +#include "hw/i386/tdvf.h" #include "hw/i386/x86.h" #include "kvm_i386.h" #include "tdx.h" @@ -143,6 +146,38 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) tdx_guest->tdvf_mr = tdvf_mr; } +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { TdxGuest *tdx = TDX_GUEST(cgs); @@ -157,6 +192,8 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + tdx_guest = tdx; return 0; } From f18672e4cf91feed4b91ef85a264a500935a2865 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:27 -0400 Subject: [PATCH 33/77] i386/tdx: Track RAM entries for TDX VM The RAM of TDX VM can be classified into two types: - TDX_RAM_UNACCEPTED: default type of TDX memory, which needs to be accepted by TDX guest before it can be used and will be all-zeros after being accepted. - TDX_RAM_ADDED: the RAM that is ADD'ed to TD guest before running, and can be used directly. E.g., TD HOB and TEMP MEM that needed by TDVF. Maintain TdxRamEntries[] which grabs the initial RAM info from e820 table and mark each RAM range as default type TDX_RAM_UNACCEPTED. Then turn the range of TD HOB and TEMP MEM to TDX_RAM_ADDED since these ranges will be ADD'ed before TD runs and no need to be accepted runtime. The TdxRamEntries[] are later used to setup the memory TD resource HOB that passes memory info from QEMU to TDVF. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-22-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 109 ++++++++++++++++++++++++++++++++++++++++++ target/i386/kvm/tdx.h | 14 ++++++ 2 files changed, 123 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index bfdae4f1c0..e06f5d0bd4 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -18,6 +18,7 @@ #include "crypto/hash.h" #include "system/system.h" +#include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" #include "kvm_i386.h" @@ -146,11 +147,110 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) tdx_guest->tdvf_mr = tdvf_mr; } +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + static void tdx_finalize_vm(Notifier *notifier, void *unused) { TdxFirmware *tdvf = &tdx_guest->tdvf; TdxFirmwareEntry *entry; + tdx_init_ram_entries(); + for_each_tdx_fw_entry(tdvf, entry) { switch (entry->type) { case TDVF_SECTION_TYPE_BFV: @@ -166,12 +266,21 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) entry->type); exit(1); } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } break; default: error_report("Unsupported TDVF section %d", entry->type); exit(1); } } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); } static Notifier tdx_machine_done_notify = { diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 28a03c2a7b..36a7400e74 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -20,6 +20,17 @@ typedef struct TdxGuestClass { /* TDX requires bus frequency 25MHz */ #define TDX_APIC_BUS_CYCLES_NS 40 +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + typedef struct TdxGuest { X86ConfidentialGuest parent_obj; @@ -34,6 +45,9 @@ typedef struct TdxGuest { MemoryRegion *tdvf_mr; TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; } TdxGuest; #ifdef CONFIG_TDX From 88aa6576e4ab40b538f543852128cb17fce37f87 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:28 -0400 Subject: [PATCH 34/77] headers: Add definitions from UEFI spec for volumes, resources, etc... Add UEFI definitions for literals, enums, structs, GUIDs, etc... that will be used by TDX to build the UEFI Hand-Off Block (HOB) that is passed to the Trusted Domain Virtual Firmware (TDVF). All values come from the UEFI specification [1], PI spec [2] and TDVF design guide[3]. [1] UEFI Specification v2.1.0 https://uefi.org/sites/default/files/resources/UEFI_Spec_2_10_Aug29.pdf [2] UEFI PI spec v1.8 https://uefi.org/sites/default/files/resources/UEFI_PI_Spec_1_8_March3.pdf [3] https://software.intel.com/content/dam/develop/external/us/en/documents/tdx-virtual-firmware-design-guide-rev-1.pdf Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-23-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- include/standard-headers/uefi/uefi.h | 187 +++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 include/standard-headers/uefi/uefi.h diff --git a/include/standard-headers/uefi/uefi.h b/include/standard-headers/uefi/uefi.h new file mode 100644 index 0000000000..5256349ec0 --- /dev/null +++ b/include/standard-headers/uefi/uefi.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Author: Isaku Yamahata + * + * Xiaoyao Li + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_UEFI_H +#define HW_I386_UEFI_H + +/***************************************************************************/ +/* + * basic EFI definitions + * supplemented with UEFI Specification Version 2.8 (Errata A) + * released February 2020 + */ +/* UEFI integer is little endian */ + +typedef struct { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} EFI_GUID; + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiPersistentMemory, + EfiUnacceptedMemoryType, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009 + +#define EFI_HOB_TYPE_HANDOFF 0x0001 +#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002 +#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003 +#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004 +#define EFI_HOB_TYPE_FV 0x0005 +#define EFI_HOB_TYPE_CPU 0x0006 +#define EFI_HOB_TYPE_MEMORY_POOL 0x0007 +#define EFI_HOB_TYPE_FV2 0x0009 +#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A +#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B +#define EFI_HOB_TYPE_FV3 0x000C +#define EFI_HOB_TYPE_UNUSED 0xFFFE +#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF + +typedef struct { + uint16_t HobType; + uint16_t HobLength; + uint32_t Reserved; +} EFI_HOB_GENERIC_HEADER; + +typedef uint64_t EFI_PHYSICAL_ADDRESS; +typedef uint32_t EFI_BOOT_MODE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint32_t Version; + EFI_BOOT_MODE BootMode; + EFI_PHYSICAL_ADDRESS EfiMemoryTop; + EFI_PHYSICAL_ADDRESS EfiMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiEndOfHobList; +} EFI_HOB_HANDOFF_INFO_TABLE; + +#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000 +#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001 +#define EFI_RESOURCE_IO 0x00000002 +#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003 +#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004 +#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005 +#define EFI_RESOURCE_IO_RESERVED 0x00000006 +#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007 +#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008 + +#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001 +#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002 +#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004 +#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008 +#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000 +#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000 +#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000 +#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000 +#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000 + +typedef uint32_t EFI_RESOURCE_TYPE; +typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Owner; + EFI_RESOURCE_TYPE ResourceType; + EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute; + EFI_PHYSICAL_ADDRESS PhysicalStart; + uint64_t ResourceLength; +} EFI_HOB_RESOURCE_DESCRIPTOR; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Name; + + /* guid specific data follows */ +} EFI_HOB_GUID_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_FIRMWARE_VOLUME; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME2; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + uint32_t AuthenticationStatus; + bool ExtractedFv; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME3; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint8_t SizeOfMemorySpace; + uint8_t SizeOfIoSpace; + uint8_t Reserved[6]; +} EFI_HOB_CPU; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; +} EFI_HOB_MEMORY_POOL; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_UEFI_CAPSULE; + +#define EFI_HOB_OWNER_ZERO \ + ((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }) + +#endif From a731425980a4d3f8bb96fc41893b6437672875ee Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:29 -0400 Subject: [PATCH 35/77] i386/tdx: Setup the TD HOB list The TD HOB list is used to pass the information from VMM to TDVF. The TD HOB must include PHIT HOB and Resource Descriptor HOB. More details can be found in TDVF specification and PI specification. Build the TD HOB in TDX's machine_init_done callback. Co-developed-by: Isaku Yamahata Signed-off-by: Isaku Yamahata Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-24-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/meson.build | 2 +- hw/i386/tdvf-hob.c | 130 ++++++++++++++++++++++++++++++++++++++++++ hw/i386/tdvf-hob.h | 26 +++++++++ target/i386/kvm/tdx.c | 16 ++++++ 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 hw/i386/tdvf-hob.c create mode 100644 hw/i386/tdvf-hob.h diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 3bc1da2b6e..7896f348cf 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,7 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'port92.c')) i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), if_false: files('pc_sysfw_ovmf-stubs.c')) -i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c')) +i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c new file mode 100644 index 0000000000..782b3d1578 --- /dev/null +++ b/hw/i386/tdvf-hob.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata + * + * Xiaoyao Li + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "standard-headers/uefi/uefi.h" +#include "hw/pci/pcie_host.h" +#include "tdvf-hob.h" + +typedef struct TdvfHob { + hwaddr hob_addr; + void *ptr; + int size; + + /* working area */ + void *current; + void *end; +} TdvfHob; + +static uint64_t tdvf_current_guest_addr(const TdvfHob *hob) +{ + return hob->hob_addr + (hob->current - hob->ptr); +} + +static void tdvf_align(TdvfHob *hob, size_t align) +{ + hob->current = QEMU_ALIGN_PTR_UP(hob->current, align); +} + +static void *tdvf_get_area(TdvfHob *hob, uint64_t size) +{ + void *ret; + + if (hob->current + size > hob->end) { + error_report("TD_HOB overrun, size = 0x%" PRIx64, size); + exit(1); + } + + ret = hob->current; + hob->current += size; + tdvf_align(hob, 8); + return ret; +} + +static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob) +{ + EFI_HOB_RESOURCE_DESCRIPTOR *region; + EFI_RESOURCE_ATTRIBUTE_TYPE attr; + EFI_RESOURCE_TYPE resource_type; + + TdxRamEntry *e; + int i; + + for (i = 0; i < tdx->nr_ram_entries; i++) { + e = &tdx->ram_entries[i]; + + if (e->type == TDX_RAM_UNACCEPTED) { + resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED; + } else if (e->type == TDX_RAM_ADDED) { + resource_type = EFI_RESOURCE_SYSTEM_MEMORY; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE; + } else { + error_report("unknown TDX_RAM_ENTRY type %d", e->type); + exit(1); + } + + region = tdvf_get_area(hob, sizeof(*region)); + *region = (EFI_HOB_RESOURCE_DESCRIPTOR) { + .Header = { + .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR, + .HobLength = cpu_to_le16(sizeof(*region)), + .Reserved = cpu_to_le32(0), + }, + .Owner = EFI_HOB_OWNER_ZERO, + .ResourceType = cpu_to_le32(resource_type), + .ResourceAttribute = cpu_to_le32(attr), + .PhysicalStart = cpu_to_le64(e->address), + .ResourceLength = cpu_to_le64(e->length), + }; + } +} + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob) +{ + TdvfHob hob = { + .hob_addr = td_hob->address, + .size = td_hob->size, + .ptr = td_hob->mem_ptr, + + .current = td_hob->mem_ptr, + .end = td_hob->mem_ptr + td_hob->size, + }; + + EFI_HOB_GENERIC_HEADER *last_hob; + EFI_HOB_HANDOFF_INFO_TABLE *hit; + + /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */ + hit = tdvf_get_area(&hob, sizeof(*hit)); + *hit = (EFI_HOB_HANDOFF_INFO_TABLE) { + .Header = { + .HobType = EFI_HOB_TYPE_HANDOFF, + .HobLength = cpu_to_le16(sizeof(*hit)), + .Reserved = cpu_to_le32(0), + }, + .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION), + .BootMode = cpu_to_le32(0), + .EfiMemoryTop = cpu_to_le64(0), + .EfiMemoryBottom = cpu_to_le64(0), + .EfiFreeMemoryTop = cpu_to_le64(0), + .EfiFreeMemoryBottom = cpu_to_le64(0), + .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */ + }; + + tdvf_hob_add_memory_resources(tdx, &hob); + + last_hob = tdvf_get_area(&hob, sizeof(*last_hob)); + *last_hob = (EFI_HOB_GENERIC_HEADER) { + .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST, + .HobLength = cpu_to_le16(sizeof(*last_hob)), + .Reserved = cpu_to_le32(0), + }; + hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob); +} diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h new file mode 100644 index 0000000000..4fc6a3740a --- /dev/null +++ b/hw/i386/tdvf-hob.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef HW_I386_TD_HOB_H +#define HW_I386_TD_HOB_H + +#include "hw/i386/tdvf.h" +#include "target/i386/kvm/tdx.h" + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob); + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE) + +#endif diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e06f5d0bd4..e20ffee955 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -21,6 +21,7 @@ #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" #include "kvm_i386.h" #include "tdx.h" @@ -147,6 +148,19 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) tdx_guest->tdvf_mr = tdvf_mr; } +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + static void tdx_add_ram_entry(uint64_t address, uint64_t length, enum TdxRamType type) { @@ -281,6 +295,8 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); } static Notifier tdx_machine_done_notify = { From ebc2d2b497c59414ac3c91de32bc546d27940e74 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 8 May 2025 10:59:30 -0400 Subject: [PATCH 36/77] i386/tdx: Add TDVF memory via KVM_TDX_INIT_MEM_REGION TDVF firmware (CODE and VARS) needs to be copied to TD's private memory via KVM_TDX_INIT_MEM_REGION, as well as TD HOB and TEMP memory. If the TDVF section has TDVF_SECTION_ATTRIBUTES_MR_EXTEND set in the flag, calling KVM_TDX_EXTEND_MEMORY to extend the measurement. After populating the TDVF memory, the original image located in shared ramblock can be discarded. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-25-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e20ffee955..43529a9e0e 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -17,6 +17,7 @@ #include "qom/object_interfaces.h" #include "crypto/hash.h" #include "system/system.h" +#include "system/ramblock.h" #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" @@ -262,6 +263,9 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) { TdxFirmware *tdvf = &tdx_guest->tdvf; TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; tdx_init_ram_entries(); @@ -297,6 +301,44 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) sizeof(TdxRamEntry), &tdx_ram_entry_compare); tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uint64_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); } static Notifier tdx_machine_done_notify = { From 41f7fd22073561a23229c0479d9d708dee9d3a1e Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:31 -0400 Subject: [PATCH 37/77] i386/tdx: Call KVM_TDX_INIT_VCPU to initialize TDX vcpu TDX vcpu needs to be initialized by SEAMCALL(TDH.VP.INIT) and KVM provides vcpu level IOCTL KVM_TDX_INIT_VCPU for it. KVM_TDX_INIT_VCPU needs the address of the HOB as input. Invoke it for each vcpu after HOB list is created. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-26-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 43529a9e0e..99d13bd844 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -259,6 +259,18 @@ static void tdx_init_ram_entries(void) tdx_guest->nr_ram_entries = j; } +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address, + &error_fatal); + } +} + static void tdx_finalize_vm(Notifier *notifier, void *unused) { TdxFirmware *tdvf = &tdx_guest->tdvf; @@ -302,6 +314,8 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + tdx_post_init_vcpus(); + for_each_tdx_fw_entry(tdvf, entry) { struct kvm_tdx_init_mem_region region; uint32_t flags; From ae60ff4e9f9e5790f79abf866ec67270c28ca477 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:32 -0400 Subject: [PATCH 38/77] i386/tdx: Finalize TDX VM Invoke KVM_TDX_FINALIZE_VM to finalize the TD's measurement and make the TD vCPUs runnable once machine initialization is complete. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-27-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 99d13bd844..287bf7147e 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -353,6 +353,9 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) */ ram_block = tdx_guest->tdvf_mr->ram_block; ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; } static Notifier tdx_machine_done_notify = { From 1ff5048d74e661943260c33e864c4118acb37ab4 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:33 -0400 Subject: [PATCH 39/77] i386/tdx: Enable user exit on KVM_HC_MAP_GPA_RANGE KVM translates TDG.VP.VMCALL to KVM_HC_MAP_GPA_RANGE, and QEMU needs to enable user exit on KVM_HC_MAP_GPA_RANGE in order to handle the memory conversion requested by TD guest. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-28-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 287bf7147e..971f4becfa 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -19,6 +19,8 @@ #include "system/system.h" #include "system/ramblock.h" +#include + #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" @@ -376,6 +378,11 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); tdx_guest = tdx; From 98dbfd6849f117de02ac6f513f2a1f95563e60ae Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:34 -0400 Subject: [PATCH 40/77] i386/tdx: Handle KVM_SYSTEM_EVENT_TDX_FATAL TD guest can use TDG.VP.VMCALL to request termination. KVM translates such request into KVM_EXIT_SYSTEM_EVENT with type of KVM_SYSTEM_EVENT_TDX_FATAL. Add hanlder for such exit. Parse and print the error message, and terminate the TD guest in the handler. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-29-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 10 +++++++++ target/i386/kvm/tdx-stub.c | 5 +++++ target/i386/kvm/tdx.c | 46 ++++++++++++++++++++++++++++++++++++++ target/i386/kvm/tdx.h | 2 ++ 4 files changed, 63 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index fd1817fc5e..c5c692a034 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -6129,6 +6129,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_HYPERCALL: ret = kvm_handle_hypercall(run); break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c index 7748b6d0a4..720a4ff046 100644 --- a/target/i386/kvm/tdx-stub.c +++ b/target/i386/kvm/tdx-stub.c @@ -13,3 +13,8 @@ int tdx_parse_tdvf(void *flash_ptr, int size) { return -EINVAL; } + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 971f4becfa..16ad1af3c5 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -615,6 +615,52 @@ int tdx_parse_tdvf(void *flash_ptr, int size) return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); } +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx", + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + error_report("TD guest reports fatal error. %s", message ? : ""); + return -1; +} + static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) { TdxGuest *tdx = TDX_GUEST(obj); diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 36a7400e74..04b5afe199 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -8,6 +8,7 @@ #endif #include "confidential-guest.h" +#include "cpu.h" #include "hw/i386/tdvf.h" #define TYPE_TDX_GUEST "tdx-guest" @@ -59,5 +60,6 @@ bool is_tdx_vm(void); int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); #endif /* QEMU_I386_TDX_H */ From 6e250463b08b4028123f201343ee72099ef81e68 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:35 -0400 Subject: [PATCH 41/77] i386/tdx: Wire TDX_REPORT_FATAL_ERROR with GuestPanic facility Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility Originated-from: Isaku Yamahata Signed-off-by: Xiaoyao Li Acked-by: Markus Armbruster Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-30-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- qapi/run-state.json | 31 +++++++++++++++++++-- system/runstate.c | 65 +++++++++++++++++++++++++++++++++++++++++++ target/i386/kvm/tdx.c | 25 ++++++++++++++++- 3 files changed, 118 insertions(+), 3 deletions(-) diff --git a/qapi/run-state.json b/qapi/run-state.json index ce95cfa46b..ee11adc508 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -501,10 +501,12 @@ # # @s390: s390 guest panic information type (Since: 2.12) # +# @tdx: tdx guest panic information type (Since: 10.1) +# # Since: 2.9 ## { 'enum': 'GuestPanicInformationType', - 'data': [ 'hyper-v', 's390' ] } + 'data': [ 'hyper-v', 's390', 'tdx' ] } ## # @GuestPanicInformation: @@ -519,7 +521,8 @@ 'base': {'type': 'GuestPanicInformationType'}, 'discriminator': 'type', 'data': {'hyper-v': 'GuestPanicInformationHyperV', - 's390': 'GuestPanicInformationS390'}} + 's390': 'GuestPanicInformationS390', + 'tdx' : 'GuestPanicInformationTdx'}} ## # @GuestPanicInformationHyperV: @@ -598,6 +601,30 @@ 'psw-addr': 'uint64', 'reason': 'S390CrashReason'}} +## +# @GuestPanicInformationTdx: +# +# TDX Guest panic information specific to TDX, as specified in the +# "Guest-Hypervisor Communication Interface (GHCI) Specification", +# section TDG.VP.VMCALL. +# +# @error-code: TD-specific error code +# +# @message: Human-readable error message provided by the guest. Not +# to be trusted. +# +# @gpa: guest-physical address of a page that contains more verbose +# error information, as zero-terminated string. Present when the +# "GPA valid" bit (bit 63) is set in @error-code. +# +# +# Since: 10.1 +## +{'struct': 'GuestPanicInformationTdx', + 'data': {'error-code': 'uint32', + 'message': 'str', + '*gpa': 'uint64'}} + ## # @MEMORY_FAILURE: # diff --git a/system/runstate.c b/system/runstate.c index de74d962bc..38900c935a 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -590,6 +590,58 @@ static void qemu_system_wakeup(void) } } +static char *tdx_parse_panic_message(char *message) +{ + bool printable = false; + char *buf = NULL; + int len = 0, i; + + /* + * Although message is defined as a json string, we shouldn't + * unconditionally treat it as is because the guest generated it and + * it's not necessarily trustable. + */ + if (message) { + /* The caller guarantees the NULL-terminated string. */ + len = strlen(message); + + printable = len > 0; + for (i = 0; i < len; i++) { + if (!(0x20 <= message[i] && message[i] <= 0x7e)) { + printable = false; + break; + } + } + } + + if (len == 0) { + buf = g_malloc(1); + buf[0] = '\0'; + } else { + if (!printable) { + /* 3 = length of "%02x " */ + buf = g_malloc(len * 3); + for (i = 0; i < len; i++) { + if (message[i] == '\0') { + break; + } else { + sprintf(buf + 3 * i, "%02x ", message[i]); + } + } + if (i > 0) { + /* replace the last ' '(space) to NULL */ + buf[i * 3 - 1] = '\0'; + } else { + buf[0] = '\0'; + } + } else { + buf = g_strdup(message); + } + } + + return buf; +} + void qemu_system_guest_panicked(GuestPanicInformation *info) { qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); @@ -631,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) S390CrashReason_str(info->u.s390.reason), info->u.s390.psw_mask, info->u.s390.psw_addr); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) { + char *message = tdx_parse_panic_message(info->u.tdx.message); + qemu_log_mask(LOG_GUEST_ERROR, + "\nTDX guest reports fatal error." + " error code: 0x%" PRIx32 " error message:\"%s\"\n", + info->u.tdx.error_code, message); + g_free(message); + if (info->u.tdx.gpa != -1ull) { + qemu_log_mask(LOG_GUEST_ERROR, "Additional error information " + "can be found at gpa page: 0x%" PRIx64 "\n", + info->u.tdx.gpa); + } } + qapi_free_GuestPanicInformation(info); } } diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 16ad1af3c5..ae3740a230 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -16,6 +16,7 @@ #include "qapi/error.h" #include "qom/object_interfaces.h" #include "crypto/hash.h" +#include "system/runstate.h" #include "system/system.h" #include "system/ramblock.h" @@ -615,18 +616,35 @@ int tdx_parse_tdvf(void *flash_ptr, int size) return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); } +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + + qemu_system_guest_panicked(panic_info); +} + /* * Only 8 registers can contain valid ASCII byte stream to form the fatal * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX */ #define TDX_FATAL_MESSAGE_MAX 64 +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) { uint64_t error_code = run->system_event.data[R_R12]; uint64_t reg_mask = run->system_event.data[R_ECX]; char *message = NULL; uint64_t *tmp; + uint64_t gpa = -1ull; if (error_code & 0xffff) { error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx", @@ -657,7 +675,12 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) } #undef COPY_REG - error_report("TD guest reports fatal error. %s", message ? : ""); + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); + return -1; } From 77b5403a0298a5460554f768a2098fd21588e555 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:36 -0400 Subject: [PATCH 42/77] kvm: Check KVM_CAP_MAX_VCPUS at vm level KVM with TDX support starts to report different KVM_CAP_MAX_VCPUS per different VM types. So switch to check the KVM_CAP_MAX_VCPUS at vm level. KVM still returns the global KVM_CAP_MAX_VCPUS when the KVM is old that doesn't report different value at vm level. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-31-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 42d239cf8f..71e6060458 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2431,7 +2431,7 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } From 8583c53e2b619b1b9569d3f2d3f3cb2904a573ad Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:37 -0400 Subject: [PATCH 43/77] i386/cpu: introduce x86_confidential_guest_cpu_instance_init() To allow execute confidential guest specific cpu init operations. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-32-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/confidential-guest.h | 11 +++++++++++ target/i386/cpu.c | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be7633a..a86c42a475 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,6 +39,7 @@ struct X86ConfidentialGuestClass { /* */ int (*kvm_type)(X86ConfidentialGuest *cg); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value); }; @@ -59,6 +60,16 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** * x86_confidential_guest_mask_cpuid_features: * diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9689f6374e..4a7c319bb9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -37,6 +37,7 @@ #include "hw/i386/topology.h" #include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" #include "qapi/qapi-commands-machine-target.h" #include "system/address-spaces.h" @@ -8543,6 +8544,13 @@ static void x86_cpu_post_initfn(Object *obj) } accel_cpu_instance_init(CPU(obj)); + +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif } static void x86_cpu_init_default_topo(X86CPU *cpu) From 7c615242671dbe65e198c20889dcaa9b4b9a1624 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:38 -0400 Subject: [PATCH 44/77] i386/tdx: implement tdx_cpu_instance_init() Currently, pmu is not supported for TDX by KVM. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-33-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index ae3740a230..7c5e59c559 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -398,6 +398,11 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_TDX_VM; } +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); +} + static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) { if ((tdx->attributes & ~tdx_caps->supported_attrs)) { @@ -791,4 +796,5 @@ static void tdx_guest_class_init(ObjectClass *oc, const void *data) klass->kvm_init = tdx_kvm_init; x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; } From ab8bd85adf75900edc2764d0ebe8b53867cc54aa Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:39 -0400 Subject: [PATCH 45/77] i386/cpu: Introduce enable_cpuid_0x1f to force exposing CPUID 0x1f Currently, QEMU exposes CPUID 0x1f to guest only when necessary, i.e., when topology level that cannot be enumerated by leaf 0xB, e.g., die or module level, are configured for the guest, e.g., -smp xx,dies=2. However, TDX architecture forces to require CPUID 0x1f to configure CPU topology. Introduce a bool flag, enable_cpuid_0x1f, in CPU for the case that requires CPUID leaf 0x1f to be exposed to guest. Introduce a new function x86_has_cpuid_0x1f(), which is the wrapper of cpu->enable_cpuid_0x1f and x86_has_extended_topo() to check if it needs to enable cpuid leaf 0x1f for the guest. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-34-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- target/i386/cpu.h | 9 +++++++++ target/i386/kvm/kvm.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4a7c319bb9..6a97d7549e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7045,7 +7045,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(cpu)) { *eax = *ebx = *ecx = *edx = 0; break; } @@ -7909,7 +7909,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c51e0a43d0..ad0e3d8cdd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2239,6 +2239,9 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool enable_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; @@ -2500,6 +2503,12 @@ void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->enable_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} + /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); void cpu_sync_avx_hflag(CPUX86State *env); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c5c692a034..ee33797fdd 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1872,7 +1872,7 @@ uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } From 9002494f80b751a7655045c5f46bf90bc1d3bbd0 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:40 -0400 Subject: [PATCH 46/77] i386/tdx: Force exposing CPUID 0x1f TDX uses CPUID 0x1f to configure TD guest's CPU topology. So set enable_cpuid_0x1f for TDs. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-35-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 7c5e59c559..accaefb401 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -400,7 +400,11 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) { + X86CPU *x86cpu = X86_CPU(cpu); + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + x86cpu->enable_cpuid_0x1f = true; } static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) From da6728658bf63d6a3989f1587a33566b3e54bed8 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:41 -0400 Subject: [PATCH 47/77] i386/tdx: Set kvm_readonly_mem_enabled to false for TDX VM TDX only supports readonly for shared memory but not for private memory. In the view of QEMU, it has no idea whether a memslot is used as shared memory of private. Thus just mark kvm_readonly_mem_enabled to false to TDX VM for simplicity. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-36-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index accaefb401..344e560b4b 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -384,6 +384,15 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -EOPNOTSUPP; } + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); tdx_guest = tdx; From 810d4e83d07ca0d072205453a42c324a51d5a5fa Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:42 -0400 Subject: [PATCH 48/77] i386/tdx: Disable SMM for TDX VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TDX doesn't support SMM and VMM cannot emulate SMM for TDX VMs because VMM cannot manipulate TDX VM's memory. Disable SMM for TDX VMs and error out if user requests to enable SMM. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-37-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 344e560b4b..87c5bf0496 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -367,11 +367,20 @@ static Notifier tdx_machine_done_notify = { static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); TdxGuest *tdx = TDX_GUEST(cgs); int r = 0; kvm_mark_guest_state_protected(); + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + if (!tdx_caps) { r = get_tdx_capabilities(errp); if (r) { From e7ef60892c80a9ce5b8504ceb13a81f4e0d4b3f7 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:43 -0400 Subject: [PATCH 49/77] i386/tdx: Disable PIC for TDX VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Legacy PIC (8259) cannot be supported for TDX VMs since TDX module doesn't allow directly interrupt injection. Using posted interrupts for the PIC is not a viable option as the guest BIOS/kernel will not do EOI for PIC IRQs, i.e. will leave the vIRR bit set. Hence disable PIC for TDX VMs and error out if user wants PIC. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-38-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 87c5bf0496..32c3f3795d 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -381,6 +381,13 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -EINVAL; } + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + if (!tdx_caps) { r = get_tdx_capabilities(errp); if (r) { From bb45580d842530d78b58179eaf80b6331b15324e Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:44 -0400 Subject: [PATCH 50/77] i386/tdx: Set and check kernel_irqchip mode for TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM mandates kernel_irqchip to be split mode. Set it to split mode automatically when users don't provide an explicit value, otherwise check it to be the split mode. Suggested-by: Daniel P. Berrangé Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-39-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 32c3f3795d..68ed3b9f98 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -16,6 +16,7 @@ #include "qapi/error.h" #include "qom/object_interfaces.h" #include "crypto/hash.h" +#include "system/kvm_int.h" #include "system/runstate.h" #include "system/system.h" #include "system/ramblock.h" @@ -388,6 +389,13 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -EINVAL; } + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + if (!tdx_caps) { r = get_tdx_capabilities(errp); if (r) { From 0ed55865b49b703af93e160d48935812a7114e07 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 8 May 2025 10:59:45 -0400 Subject: [PATCH 51/77] i386/tdx: Don't synchronize guest tsc for TDs TSC of TDs is not accessible and KVM doesn't allow access of MSR_IA32_TSC for TDs. To avoid the assert() in kvm_get_tsc, make kvm_synchronize_all_tsc() noop for TDs, Signed-off-by: Isaku Yamahata Reviewed-by: Connor Kuehl Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-40-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index ee33797fdd..4fc37cc370 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -328,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } From f9aaad3362a5886d78e7d4d50d563ac16c6acdde Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:46 -0400 Subject: [PATCH 52/77] i386/tdx: Only configure MSR_IA32_UCODE_REV in kvm_init_msrs() for TDs For TDs, only MSR_IA32_UCODE_REV in kvm_init_msrs() can be configured by VMM, while the features enumerated/controlled by other MSRs except MSR_IA32_UCODE_REV in kvm_init_msrs() are not under control of VMM. Only configure MSR_IA32_UCODE_REV for TDs. Signed-off-by: Xiaoyao Li Acked-by: Gerd Hoffmann Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-41-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 4fc37cc370..90a0dac4a1 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3864,32 +3864,34 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } + + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } From 62a1a8b89d90cd3fbee0e6d38e6a4c0d833e978a Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:47 -0400 Subject: [PATCH 53/77] i386/apic: Skip kvm_apic_put() for TDX KVM neithers allow writing to MSR_IA32_APICBASE for TDs, nor allow for KVM_SET_LAPIC[*]. Note, KVM_GET_LAPIC is also disallowed for TDX. It is called in the path do_kvm_cpu_synchronize_state() -> kvm_arch_get_registers() -> kvm_get_apic() and it's already disllowed for confidential guest through guest_state_protected. [*] https://lore.kernel.org/all/Z3w4Ku4Jq0CrtXne@google.com/ Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-42-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- hw/i386/kvm/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 39035db042..1be9bfe36e 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -17,6 +17,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, int reg_id, uint32_t val) @@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) struct kvm_lapic_state kapic; int ret; + if (is_tdx_vm()) { + return; + } + kvm_put_apicbase(s->cpu, s->apicbase); kvm_put_apic_state(s, &kapic); From b4b7fb5a773e1d2215c2aaa99789eca51914b78f Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:48 -0400 Subject: [PATCH 54/77] cpu: Don't set vcpu_dirty when guest_state_protected QEMU calls kvm_arch_put_registers() when vcpu_dirty is true in kvm_vcpu_exec(). However, for confidential guest, like TDX, putting registers is disallowed due to guest state is protected. Only set vcpu_dirty to true with guest state is not protected when creating the vcpu. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-43-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 71e6060458..51526d301b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -471,7 +471,9 @@ int kvm_create_vcpu(CPUState *cpu) cpu->kvm_fd = kvm_fd; cpu->kvm_state = s; - cpu->vcpu_dirty = true; + if (!s->guest_state_protected) { + cpu->vcpu_dirty = true; + } cpu->dirty_pages = 0; cpu->throttle_us_per_full = 0; From 695bfaee7153153708228946aa26c6d879599c04 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:49 -0400 Subject: [PATCH 55/77] i386/cgs: Rename *mask_cpuid_features() to *adjust_cpuid_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because for TDX case, there are also fixed-1 bits that enforced by TDX module. Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-44-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/confidential-guest.h | 20 ++++++++++---------- target/i386/kvm/kvm.c | 2 +- target/i386/sev.c | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index a86c42a475..777d43cc96 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -40,8 +40,8 @@ struct X86ConfidentialGuestClass { /* */ int (*kvm_type)(X86ConfidentialGuest *cg); void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); }; /** @@ -71,21 +71,21 @@ static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest } /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 90a0dac4a1..0d47463431 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -574,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } diff --git a/target/i386/sev.c b/target/i386/sev.c index 7ee700d6a3..8b87b7cdec 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -947,7 +947,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -2405,7 +2405,7 @@ sev_snp_guest_class_init(ObjectClass *oc, const void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", From 75ec6189f5c65cab210dd9f16cf4eef368038d45 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:50 -0400 Subject: [PATCH 56/77] i386/tdx: Implement adjust_cpuid_features() for TDX Maintain a TDX specific supported CPUID set, and use it to mask the common supported CPUID value of KVM. It can avoid newly added supported features (reported via KVM_GET_SUPPORTED_CPUID) for common VMs being falsely reported as supported for TDX. As the first step, initialize the TDX supported CPUID set with all the configurable CPUID bits. It's not complete because there are other CPUID bits are supported for TDX but not reported as directly configurable. E.g. the XFAM related bits, attribute related bits and fixed-1 bits. They will be handled in the future. Also, what matters are the CPUID bits related to QEMU's feature word. Only mask the CPUID leafs which are feature word leaf. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-45-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 16 ++++++++++++++++ target/i386/cpu.h | 1 + target/i386/kvm/kvm.c | 2 +- target/i386/kvm/kvm_i386.h | 1 + target/i386/kvm/tdx.c | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6a97d7549e..5aacb62f08 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1678,6 +1678,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} + typedef struct FeatureMask { FeatureWord index; uint64_t mask; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ad0e3d8cdd..7ffcf91b01 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2502,6 +2502,7 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) { diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 0d47463431..cd87f5502a 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -394,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index dc696cb723..484a1de84d 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -62,6 +62,7 @@ void kvm_update_msi_routes_all(void *private, bool global, struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, uint32_t cpuid_i); #endif /* CONFIG_KVM */ diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 68ed3b9f98..e3b7ad6d14 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -45,6 +45,7 @@ static TdxGuest *tdx_guest; static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ bool is_tdx_vm(void) @@ -366,6 +367,20 @@ static Notifier tdx_machine_done_notify = { .notify = tdx_finalize_vm, }; +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; +} + static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -403,6 +418,8 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + tdx_setup_supported_cpuid(); + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL */ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { return -EOPNOTSUPP; @@ -440,6 +457,22 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) x86cpu->enable_cpuid_0x1f = true; } +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) { if ((tdx->attributes & ~tdx_caps->supported_attrs)) { @@ -834,4 +867,5 @@ static void tdx_guest_class_init(ObjectClass *oc, const void *data) klass->kvm_init = tdx_kvm_init; x86_klass->kvm_type = tdx_kvm_type; x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; } From 0ba06e46d09b84a2cb97a268da5576aaca3a24ca Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:51 -0400 Subject: [PATCH 57/77] i386/tdx: Add TDX fixed1 bits to supported CPUIDs TDX architecture forcibly sets some CPUID bits for TD guest that VMM cannot disable it. They are fixed1 bits. Fixed1 bits are not covered by tdx_caps.cpuid (which only contains the directly configurable bits), while fixed1 bits are supported for TD guest obviously. Add fixed1 bits to tdx_supported_cpuid. Besides, set all the fixed1 bits to the initial set of KVM's support since KVM might not report them as supported. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-46-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 2 + target/i386/kvm/kvm_i386.h | 7 ++ target/i386/kvm/tdx.c | 134 +++++++++++++++++++++++++++++++++++++ target/i386/sev.c | 8 --- 4 files changed, 143 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 7ffcf91b01..342e4f2a57 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -920,6 +920,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 484a1de84d..5f83e8850a 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -44,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e3b7ad6d14..9d92ff1484 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -367,6 +367,133 @@ static Notifier tdx_machine_done_notify = { .notify = tdx_finalize_vm, }; +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + static void tdx_setup_supported_cpuid(void) { if (tdx_supported_cpuid) { @@ -379,6 +506,8 @@ static void tdx_setup_supported_cpuid(void) memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); } static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) @@ -463,6 +592,11 @@ static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, { struct kvm_cpuid_entry2 *e; + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + if (is_feature_word_cpuid(feature, index, reg)) { e = cpuid_find_entry(tdx_supported_cpuid, feature, index); if (e) { diff --git a/target/i386/sev.c b/target/i386/sev.c index 8b87b7cdec..adf787797e 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -212,14 +212,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF From 31df29c532a9ef473c6efd497950a620099bf1da Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:52 -0400 Subject: [PATCH 58/77] i386/tdx: Add supported CPUID bits related to TD Attributes For TDX, some CPUID feature bit is configured via TD attributes. They are not covered by tdx_caps.cpuid (which only contians the directly configurable CPUID bits), but they are actually supported when the related attributre bit is supported. Note, LASS and KeyLocker are not supported by KVM for TDX, nor does QEMU support it (see TDX_SUPPORTED_TD_ATTRS). They are defined in tdx_attrs_maps[] for the completeness of the existing TD Attribute bits that are related with CPUID features. Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250508150002.689633-47-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 4 +++ target/i386/kvm/tdx.c | 60 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 342e4f2a57..e50c57264d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -899,6 +899,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -959,6 +961,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 9d92ff1484..fa161661fa 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -458,6 +458,34 @@ KvmCpuidInfo tdx_fixed1_bits = { }, }; +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, uint32_t index) { @@ -494,6 +522,37 @@ static void tdx_add_supported_cpuid_by_fixed1_bits(void) } } +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + static void tdx_setup_supported_cpuid(void) { if (tdx_supported_cpuid) { @@ -508,6 +567,7 @@ static void tdx_setup_supported_cpuid(void) tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); } static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) From 8c94c84cb9e0140b48acc9c9d404525ca7ef7457 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:53 -0400 Subject: [PATCH 59/77] i386/tdx: Add supported CPUID bits relates to XFAM Some CPUID bits are controlled by XFAM. They are not covered by tdx_caps.cpuid (which only contians the directly configurable bits), but they are actually supported when the related XFAM bit is supported. Add these XFAM controlled bits to TDX supported CPUID bits based on the supported_xfam. Besides, incorporate the supported_xfam into the supported CPUID leaf of 0xD. Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250508150002.689633-48-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 12 ------- target/i386/cpu.h | 16 ++++++++++ target/i386/kvm/tdx.c | 73 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5aacb62f08..383c0b35d4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1694,15 +1694,6 @@ bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) return false; } -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; - -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; - static FeatureDep feature_dependencies[] = { { .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, @@ -1871,9 +1862,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e50c57264d..b38e691f1a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -584,6 +584,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -597,6 +598,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -619,6 +621,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -667,6 +674,15 @@ typedef enum FeatureWord { FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index fa161661fa..188c2242d5 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -23,6 +23,8 @@ #include +#include "cpu.h" +#include "cpu-internal.h" #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" @@ -486,6 +488,32 @@ static TdxAttrsMap tdx_attrs_maps[] = { .feat_mask = CPUID_7_0_ECX_KeyLocker,}, }; +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, uint32_t index) { @@ -553,6 +581,50 @@ static void tdx_add_supported_cpuid_by_attrs(void) } } +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + static void tdx_setup_supported_cpuid(void) { if (tdx_supported_cpuid) { @@ -568,6 +640,7 @@ static void tdx_setup_supported_cpuid(void) tdx_add_supported_cpuid_by_fixed1_bits(); tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); } static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) From 9f5771c57dbe92d46361afd992a5851c846d0322 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:54 -0400 Subject: [PATCH 60/77] i386/tdx: Add XFD to supported bit of TDX Just mark XFD as always supported for TDX. This simple solution relies on the fact KVM will report XFD as 0 when it's not supported by the hardware. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-49-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 1 + target/i386/kvm/tdx.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b38e691f1a..8a4b4217d0 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1122,6 +1122,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 188c2242d5..0f7f47c6da 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -621,6 +621,12 @@ static void tdx_add_supported_cpuid_by_xfam(void) e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; } From 4d6e288a350a977b0fb0613db952087928ccd93e Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:55 -0400 Subject: [PATCH 61/77] i386/tdx: Define supported KVM features for TDX For TDX, only limited KVM PV features are supported. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-50-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 0f7f47c6da..e35983ad9b 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -32,6 +32,8 @@ #include "kvm_i386.h" #include "tdx.h" +#include "standard-headers/asm-x86/kvm_para.h" + #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) @@ -44,6 +46,14 @@ TDX_TD_ATTRIBUTES_PKS | \ TDX_TD_ATTRIBUTES_PERFMON) +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + static TdxGuest *tdx_guest; static struct kvm_tdx_capabilities *tdx_caps; @@ -631,6 +641,14 @@ static void tdx_add_supported_cpuid_by_xfam(void) e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; } +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + static void tdx_setup_supported_cpuid(void) { if (tdx_supported_cpuid) { @@ -647,6 +665,8 @@ static void tdx_setup_supported_cpuid(void) tdx_add_supported_cpuid_by_fixed1_bits(); tdx_add_supported_cpuid_by_attrs(); tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); } static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) From dc0b08b303ad34983b43936a4c978672e0f9a9d8 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:56 -0400 Subject: [PATCH 62/77] i386/cgs: Introduce x86_confidential_guest_check_features() To do cgs specific feature checking. Note the feature checking in x86_cpu_filter_features() is valid for non-cgs VMs. For cgs VMs like TDX, what features can be supported has more restrictions. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-51-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/confidential-guest.h | 13 +++++++++++++ target/i386/kvm/kvm.c | 8 ++++++++ 2 files changed, 21 insertions(+) diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 777d43cc96..48b88dbd31 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -42,6 +42,7 @@ struct X86ConfidentialGuestClass { void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -91,4 +92,16 @@ static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGu } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index cd87f5502a..a6bc089d02 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2093,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; From 4a2fb19669bb41eee5b2fb8e5b5ba30e1daaeaf5 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Tue, 17 Dec 2024 07:39:31 -0500 Subject: [PATCH 63/77] i386: Remove unused parameter "uint32_t bit" in feature_word_description() Parameter "uint32_t bit" is not used in function feature_word_description(), so remove it. Signed-off-by: Lei Wang Reviewed-by: Igor Mammedov Reviewed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20241217123932.948789-2-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 383c0b35d4..6258027ab1 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5771,7 +5771,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5810,6 +5810,7 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5822,7 +5823,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, From adf25ad70f2f989e63c2cd3e9de4e38152d05e84 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 17 Dec 2024 07:39:32 -0500 Subject: [PATCH 64/77] target/i386: Print CPUID subleaf info for unsupported feature Some CPUID leaves have meaningful subleaf index. Print the subleaf info in feature_word_description for CPUID features. Signed-off-by: Xiaoyao Li Reviewed-by: Eduardo Habkost Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20241217123932.948789-3-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6258027ab1..be3812973f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5780,11 +5780,15 @@ static char *feature_word_description(FeatureWordInfo *f) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } From e3d1a4a6d1d61cf5fbd0e4b389cfb3976093739f Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:57 -0400 Subject: [PATCH 65/77] i386/tdx: Fetch and validate CPUID of TD guest Use KVM_TDX_GET_CPUID to get the CPUIDs that are managed and enfored by TDX module for TD guest. Check QEMU's configuration against the fetched data. Print wanring message when 1. a feature is not supported but requested by QEMU or 2. QEMU doesn't want to expose a feature while it is enforced enabled. - If cpu->enforced_cpuid is not set, prints the warning message of both 1) and 2) and tweak QEMU's configuration. - If cpu->enforced_cpuid is set, quit if any case of 1) or 2). Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250508150002.689633-52-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 33 +++++++++++++- target/i386/cpu.h | 7 +++ target/i386/kvm/tdx.c | 101 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index be3812973f..34364cf96a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5808,8 +5808,8 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; @@ -5836,6 +5836,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, } } +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + static void x86_cpuid_version_get_family(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8a4b4217d0..22e82444ae 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2215,6 +2215,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2526,6 +2529,10 @@ void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) { diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e35983ad9b..e474abf3a6 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -766,6 +766,106 @@ static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, return value; } +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + return 0; +} + static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) { if ((tdx->attributes & ~tdx_caps->supported_attrs)) { @@ -1161,4 +1261,5 @@ static void tdx_guest_class_init(ObjectClass *oc, const void *data) x86_klass->kvm_type = tdx_kvm_type; x86_klass->cpu_instance_init = tdx_cpu_instance_init; x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; } From deb9db6fb789cfe80527b75983e86137589227a4 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:58 -0400 Subject: [PATCH 66/77] i386/tdx: Don't treat SYSCALL as unavailable On Intel CPU, the value of CPUID_EXT2_SYSCALL depends on the mode of the vcpu. It's 0 outside 64-bit mode and 1 in 64-bit mode. The initial state of TDX vcpu is 32-bit protected mode. At the time of calling KVM_TDX_GET_CPUID, vcpu hasn't started running so the value read is 0. In reality, 64-bit mode should always be supported. So mark CPUID_EXT2_SYSCALL always supported to avoid false warning. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-53-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e474abf3a6..7629302991 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -845,6 +845,19 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) continue; } + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + requested = env->features[w]; unavailable = requested & ~actual; mark_unavailable_features(cpu, w, unavailable, unav_prefix); From ea4867b911fc2f6d4c8bd50ec62f0dc0fa190fab Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 10:59:59 -0400 Subject: [PATCH 67/77] i386/tdx: Make invtsc default on Because it's fixed1 bit that enforced by TDX module. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-54-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 7629302991..a55ab436ad 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -742,6 +742,9 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + x86cpu->enable_cpuid_0x1f = true; } From 907ee7b67e50a7eea2768c66e3ad67c9aa4ffd3c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 11:00:00 -0400 Subject: [PATCH 68/77] i386/tdx: Validate phys_bits against host value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For TDX guest, the phys_bits is not configurable and can only be host/native value. Validate phys_bits inside tdx_check_features(). Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250508150002.689633-55-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/host-cpu.c | 2 +- target/i386/host-cpu.h | 1 + target/i386/kvm/tdx.c | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index a2d3830f5b..7512567298 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc918ba..b97ec01c9b 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,6 +10,7 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index a55ab436ad..0a21ae555c 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -25,6 +25,7 @@ #include "cpu.h" #include "cpu-internal.h" +#include "host-cpu.h" #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" @@ -879,6 +880,13 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) return -EINVAL; } + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + return 0; } From dc1424319311f86449c6825ceec2364ee645a363 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 8 May 2025 11:00:01 -0400 Subject: [PATCH 69/77] docs: Add TDX documentation Add docs/system/i386/tdx.rst for TDX support, and add tdx in confidential-guest-support.rst Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250508150002.689633-56-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- docs/system/confidential-guest-support.rst | 1 + docs/system/i386/tdx.rst | 161 +++++++++++++++++++++ docs/system/target-i386.rst | 1 + 3 files changed, 163 insertions(+) create mode 100644 docs/system/i386/tdx.rst diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst index 0c490dbda2..66129fbab6 100644 --- a/docs/system/confidential-guest-support.rst +++ b/docs/system/confidential-guest-support.rst @@ -38,6 +38,7 @@ Supported mechanisms Currently supported confidential guest mechanisms are: * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`) +* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`) * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) diff --git a/docs/system/i386/tdx.rst b/docs/system/i386/tdx.rst new file mode 100644 index 0000000000..8131750b64 --- /dev/null +++ b/docs/system/i386/tdx.rst @@ -0,0 +1,161 @@ +Intel Trusted Domain eXtension (TDX) +==================================== + +Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends +Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME) +with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs +in a CPU mode that is designed to protect the confidentiality of its memory +contents and its CPU state from any other software, including the hosting +Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself. + +Prerequisites +------------- + +To run TD, the physical machine needs to have TDX module loaded and initialized +while KVM hypervisor has TDX support and has TDX enabled. If those requirements +are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``. + +Trust Domain Virtual Firmware (TDVF) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot +TD Guest OS. TDVF needs to be copied to guest private memory and measured before +the TD boots. + +KVM vcpu ioctl ``KVM_TDX_INIT_MEM_REGION`` can be used to populate the TDVF +content into its private memory. + +Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash +device and it actually works as RAM. "-bios" option is chosen to load TDVF. + +OVMF is the opensource firmware that implements the TDVF support. Thus the +command line to specify and load TDVF is ``-bios OVMF.fd`` + +Feature Configuration +--------------------- + +Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD are not +under full control of VMM. VMM can only configure part of features of a TD on +``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl. + +The configurable features have three types: + +- Attributes: + - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD, + which determines related CPUID bit and CR4 bit; + - PERFMON (bit 63) controls whether PMU is exposed to TD. + +- XSAVE related features (XFAM): + XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It + determines the set of extended features available for use by the guest TD. + +- CPUID features: + Only some bits of some CPUID leaves are directly configurable by VMM. + +What features can be configured is reported via TDX capabilities. + +TDX capabilities +~~~~~~~~~~~~~~~~ + +The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES`` +to get the TDX capabilities from KVM. It returns a data structure of +``struct kvm_tdx_capabilities``, which tells the supported configuration of +attributes, XFAM and CPUIDs. + +TD attributes +~~~~~~~~~~~~~ + +QEMU supports configuring raw 64-bit TD attributes directly via "attributes" +property of "tdx-guest" object. Note, it's users' responsibility to provide a +valid value because some bits may not supported by current QEMU or KVM yet. + +QEMU also supports the configuration of individual attribute bits that are +supported by it, via properties of "tdx-guest" object. +E.g., "sept-ve-disable" (bit 28). + +MSR based features +~~~~~~~~~~~~~~~~~~ + +Current KVM doesn't support MSR based feature (e.g., MSR_IA32_ARCH_CAPABILITIES) +configuration for TDX, and it's a future work to enable it in QEMU when KVM adds +support of it. + +Feature check +~~~~~~~~~~~~~ + +QEMU checks if the final (CPU) features, determined by given cpu model and +explicit feature adjustment of "+featureA/-featureB", can be supported or not. +It can produce feature not supported warning like + + "warning: host doesn't support requested feature: CPUID.07H:EBX.intel-pt [bit 25]" + +It can also produce warning like + + "warning: TDX forcibly sets the feature: CPUID.80000007H:EDX.invtsc [bit 8]" + +if the fixed-1 feature is requested to be disabled explicitly. This is newly +added to QEMU for TDX because TDX has fixed-1 features that are forcibly enabled +by TDX module and VMM cannot disable them. + +Launching a TD (TDX VM) +----------------------- + +To launch a TD, the necessary command line options are tdx-guest object and +split kernel-irqchip, as below: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -accel kvm \\ + -cpu host \\ + -object tdx-guest,id=tdx0 \\ + -machine ...,confidential-guest-support=tdx0 \\ + -bios OVMF.fd \\ + +Restrictions +------------ + + - kernel-irqchip must be split; + + This is set by default for TDX guest if kernel-irqchip is left on its default + 'auto' setting. + + - No readonly support for private memory; + + - No SMM support: SMM support requires manipulating the guest register states + which is not allowed; + +Debugging +--------- + +Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD +debug mode. When in off-TD debug mode, TD's VCPU state and private memory are +accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those +SEAMCALLs and corresonponding QEMU change. + +It's targeted as future work. + +TD attestation +-------------- + +In TD guest, the attestation process is used to verify the TDX guest +trustworthiness to other entities before provisioning secrets to the guest. + +TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the +REPORT. Then the REPORT data needs to be converted into a remotely verifiable +Quote by SGX Quoting Enclave (QE). + +It's a future work in QEMU to add support of TD attestation since it lacks +support in current KVM. + +Live Migration +-------------- + +Future work. + +References +---------- + +- `TDX Homepage `__ + +- `SGX QE `__ diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index ab7af1a75d..43b09c79d6 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -31,6 +31,7 @@ Architectural features i386/kvm-pv i386/sgx i386/amd-memory-encryption + i386/tdx OS requirements ~~~~~~~~~~~~~~~ From 1297b285cc3ffbd06dc3208fbecdb2d582c535dc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 26 May 2025 09:22:20 +0200 Subject: [PATCH 70/77] rust: make declaration of dependent crates more consistent Crates like "bilge" and "libc" can be shared by more than one directory, so declare them directly in rust/meson.build. While at it, make their variable names end with "_rs" and always add a subproject() statement (as that pinpoints the error better if the subproject is missing and cannot be downloaded). Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/hw/char/pl011/meson.build | 12 +++--------- rust/meson.build | 16 ++++++++++++++++ rust/qemu-api-macros/meson.build | 14 +++----------- rust/qemu-api/meson.build | 4 +--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/rust/hw/char/pl011/meson.build b/rust/hw/char/pl011/meson.build index 547cca5a96..494b6c123c 100644 --- a/rust/hw/char/pl011/meson.build +++ b/rust/hw/char/pl011/meson.build @@ -1,17 +1,11 @@ -subproject('bilge-0.2-rs', required: true) -subproject('bilge-impl-0.2-rs', required: true) - -bilge_dep = dependency('bilge-0.2-rs') -bilge_impl_dep = dependency('bilge-impl-0.2-rs') - _libpl011_rs = static_library( 'pl011', files('src/lib.rs'), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', dependencies: [ - bilge_dep, - bilge_impl_dep, + bilge_rs, + bilge_impl_rs, qemu_api, qemu_api_macros, ], @@ -21,6 +15,6 @@ rust_devices_ss.add(when: 'CONFIG_X_PL011_RUST', if_true: [declare_dependency( link_whole: [_libpl011_rs], # Putting proc macro crates in `dependencies` is necessary for Meson to find # them when compiling the root per-target static rust lib. - dependencies: [bilge_impl_dep, qemu_api_macros], + dependencies: [bilge_impl_rs, qemu_api_macros], variables: {'crate': 'pl011'}, )]) diff --git a/rust/meson.build b/rust/meson.build index 91e52b8fb8..1f0dcce7d0 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -1,3 +1,19 @@ +subproject('bilge-0.2-rs', required: true) +subproject('bilge-impl-0.2-rs', required: true) +subproject('libc-0.2-rs', required: true) + +bilge_rs = dependency('bilge-0.2-rs') +bilge_impl_rs = dependency('bilge-impl-0.2-rs') +libc_rs = dependency('libc-0.2-rs') + +subproject('proc-macro2-1-rs', required: true) +subproject('quote-1-rs', required: true) +subproject('syn-2-rs', required: true) + +quote_rs_native = dependency('quote-1-rs', native: true) +syn_rs_native = dependency('syn-2-rs', native: true) +proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true) + subdir('qemu-api-macros') subdir('qemu-api') diff --git a/rust/qemu-api-macros/meson.build b/rust/qemu-api-macros/meson.build index 6f94a4bb3c..8610ce1c84 100644 --- a/rust/qemu-api-macros/meson.build +++ b/rust/qemu-api-macros/meson.build @@ -1,11 +1,3 @@ -subproject('proc-macro2-1-rs', required: true) -subproject('quote-1-rs', required: true) -subproject('syn-2-rs', required: true) - -quote_dep = dependency('quote-1-rs', native: true) -syn_dep = dependency('syn-2-rs', native: true) -proc_macro2_dep = dependency('proc-macro2-1-rs', native: true) - _qemu_api_macros_rs = rust.proc_macro( 'qemu_api_macros', files('src/lib.rs'), @@ -16,9 +8,9 @@ _qemu_api_macros_rs = rust.proc_macro( '--cfg', 'feature="proc-macro"', ], dependencies: [ - proc_macro2_dep, - quote_dep, - syn_dep, + proc_macro2_rs_native, + quote_rs_native, + syn_rs_native, ], ) diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 1696df705b..1ea86b8bbf 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -2,8 +2,6 @@ _qemu_api_cfg = run_command(rustc_args, '--config-headers', config_host_h, '--features', files('Cargo.toml'), capture: true, check: true).stdout().strip().splitlines() -libc_dep = dependency('libc-0.2-rs') - # _qemu_api_cfg += ['--cfg', 'feature="allocator"'] if get_option('debug_mutex') _qemu_api_cfg += ['--cfg', 'feature="debug_cell"'] @@ -37,7 +35,7 @@ _qemu_api_rs = static_library( override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', rust_args: _qemu_api_cfg, - dependencies: [libc_dep, qemu_api_macros], + dependencies: [libc_rs, qemu_api_macros], ) rust.test('rust-qemu-api-tests', _qemu_api_rs, From 397db937e85d7b9f5a6f0b30764786cef09d1ff3 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:57:59 -0500 Subject: [PATCH 71/77] target/i386: Update EPYC CPU model for Cache property, RAS, SVM feature bits Found that some of the cache properties are not set correctly for EPYC models. l1d_cache.no_invd_sharing should not be true. l1i_cache.no_invd_sharing should not be true. L2.self_init should be true. L2.inclusive should be true. L3.inclusive should not be true. L3.no_invd_sharing should be true. Fix the cache properties. Also add the missing RAS and SVM features bits on AMD EPYC CPU models. The SVM feature bits are used in nested guests. succor : Software uncorrectable error containment and recovery capability. overflow-recov : MCA overflow recovery support. lbrv : LBR virtualization tsc-scale : MSR based TSC rate control vmcb-clean : VMCB clean bits flushbyasid : Flush by ASID pause-filter : Pause intercept filter pfthreshold : PAUSE filter threshold v-vmsave-vmload : Virtualized VMLOAD and VMSAVE vgif : Virtualized GIF Signed-off-by: Babu Moger Reviewed-by: Maksim Davydov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/515941861700d7066186c9600bc5d96a1741ef0c.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 34364cf96a..b6c63b892e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2211,6 +2211,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -5238,6 +5292,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, From 83d940e9700527ff080416ce2fa52ee1f4771d72 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:58:00 -0500 Subject: [PATCH 72/77] target/i386: Update EPYC-Rome CPU model for Cache property, RAS, SVM feature bits Found that some of the cache properties are not set correctly for EPYC models. l1d_cache.no_invd_sharing should not be true. l1i_cache.no_invd_sharing should not be true. L2.self_init should be true. L2.inclusive should be true. L3.inclusive should not be true. L3.no_invd_sharing should be true. Fix these cache properties. Also add the missing RAS and SVM features bits on AMD EPYC-Rome. The SVM feature bits are used in nested guests. succor : Software uncorrectable error containment and recovery capability. overflow-recov : MCA overflow recovery support. lbrv : LBR virtualization tsc-scale : MSR based TSC rate control vmcb-clean : VMCB clean bits flushbyasid : Flush by ASID pause-filter : Pause intercept filter pfthreshold : PAUSE filter threshold v-vmsave-vmload : Virtualized VMLOAD and VMSAVE vgif : Virtualized GIF Signed-off-by: Babu Moger Reviewed-by: Maksim Davydov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/8265af72057b84c99ac3a02a5487e32759cc69b1.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b6c63b892e..5b5324a35c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2373,6 +2373,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -5449,6 +5503,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, From fc014d9ba5b26b27401e0e88a4e1ef827c68fe64 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:58:01 -0500 Subject: [PATCH 73/77] target/i386: Update EPYC-Milan CPU model for Cache property, RAS, SVM feature bits Found that some of the cache properties are not set correctly for EPYC models. l1d_cache.no_invd_sharing should not be true. l1i_cache.no_invd_sharing should not be true. L2.self_init should be true. L2.inclusive should be true. L3.inclusive should not be true. L3.no_invd_sharing should be true. Fix these cache properties. Also add the missing RAS and SVM features bits on AMD EPYC-Milan model. The SVM feature bits are used in nested guests. succor : Software uncorrectable error containment and recovery capability. overflow-recov : MCA overflow recovery support. lbrv : LBR virtualization tsc-scale : MSR based TSC rate control vmcb-clean : VMCB clean bits flushbyasid : Flush by ASID pause-filter : Pause intercept filter pfthreshold : PAUSE filter threshold v-vmsave-vmload : Virtualized VMLOAD and VMSAVE vgif : Virtualized GIF Signed-off-by: Babu Moger Reviewed-by: Maksim Davydov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/c619c0e09a9d5d496819ed48d69181d65f416891.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5b5324a35c..d01a808e3a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2535,6 +2535,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -5597,6 +5651,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, From dfd5b456108a75588ab094358ba5754787146d3d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:58:02 -0500 Subject: [PATCH 74/77] target/i386: Add couple of feature bits in CPUID_Fn80000021_EAX Add CPUID bit indicates that a WRMSR to MSR_FS_BASE, MSR_GS_BASE, or MSR_KERNEL_GS_BASE is non-serializing amd PREFETCHI that the indicates support for IC prefetch. CPUID_Fn80000021_EAX Bit Feature description 20 Indicates support for IC prefetch. 1 FsGsKernelGsBaseNonSerializing. WRMSR to FS_BASE, GS_BASE and KernelGSbase are non-serializing. Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip Signed-off-by: Babu Moger Reviewed-by: Maksim Davydov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/a5f6283a59579b09ac345b3f21ecb3b3b2d92451.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- target/i386/cpu.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index d01a808e3a..0d1b907778 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1253,12 +1253,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, NULL, NULL, "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 22e82444ae..1146465c8c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1092,12 +1092,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ From abc92cc8488b5dbcc403b5be24d8092180605101 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:58:03 -0500 Subject: [PATCH 75/77] target/i386: Update EPYC-Genoa for Cache property, perfmon-v2, RAS and SVM feature bits Found that some of the cache properties are not set correctly for EPYC models. l1d_cache.no_invd_sharing should not be true. l1i_cache.no_invd_sharing should not be true. L2.self_init should be true. L2.inclusive should be true. L3.inclusive should not be true. L3.no_invd_sharing should be true. Fix these cache properties. Also add the missing RAS and SVM features bits on AMD EPYC-Genoa model. The SVM feature bits are used in nested guests. perfmon-v2 : Allow guests to make use of the PerfMonV2 features. succor : Software uncorrectable error containment and recovery capability. overflow-recov : MCA overflow recovery support. lbrv : LBR virtualization tsc-scale : MSR based TSC rate control vmcb-clean : VMCB clean bits flushbyasid : Flush by ASID pause-filter : Pause intercept filter pfthreshold : PAUSE filter threshold v-vmsave-vmload: Virtualized VMLOAD and VMSAVE vgif : Virtualized GIF fs-gs-base-ns : WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing The feature details are available in APM listed below [1]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Babu Moger Reviewed-by: Maksim Davydov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/afe3f05d4116124fd5795f28fc23d7b396140313.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0d1b907778..a656b3c664 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2643,6 +2643,59 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -5744,6 +5797,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", From 3771a4daa273ba17cb27309984413790d1df5651 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 8 May 2025 14:58:04 -0500 Subject: [PATCH 76/77] target/i386: Add support for EPYC-Turin model Add the support for AMD EPYC zen 5 processors (EPYC-Turin). Add the following new feature bits on top of the feature bits from the previous generation EPYC models. movdiri : Move Doubleword as Direct Store Instruction movdir64b : Move 64 Bytes as Direct Store Instruction avx512-vp2intersect : AVX512 Vector Pair Intersection to a Pair of Mask Register avx-vnni : AVX VNNI Instruction prefetchi : Indicates support for IC prefetch sbpb : Selective Branch Predictor Barrier ibpb-brtype : IBPB includes branch type prediction flushing srso-user-kernel-no : Not vulnerable to SRSO at the user-kernel boundary Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip Link: https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf Signed-off-by: Babu Moger Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/b4fa7708a0e1453d2e9b8ec3dc881feb92eeca0b.1746734284.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a656b3c664..15439fdfa8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2696,6 +2696,61 @@ static const CPUCaches epyc_genoa_v2_cache_info = { .share_level = CPU_TOPOLOGY_LEVEL_DIE, }, }; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -5959,6 +6014,89 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, + }, }; /* From 9bd24d8d2756a0771b6677b02c7f9b603ef6afe9 Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Mon, 26 May 2025 13:44:47 +0200 Subject: [PATCH 77/77] target/i386/tcg/helper-tcg: fix file references in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 32cad1ffb8 ("include: Rename sysemu/ -> system/") renamed target/i386/tcg/sysemu => target/i386/tcg/system. Signed-off-by: Fiona Ebner Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20250526114447.1243840-1-f.ebner@proxmox.com Signed-off-by: Paolo Bonzini --- target/i386/tcg/helper-tcg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 6b3f19855f..be011b06b7 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -97,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -115,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /*