From 9a02932b059613f7c4bea273677811561a9237ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Jun 2025 12:58:54 +0200 Subject: [PATCH 01/24] meson: cleanup win32 library detection As pointed out by Akihiko Odaki, all Win32 libraries in MinGW have lowercase names. This means that on (case-insensitive) Windows you can use the mixed-case names suggested by Microsoft or all-lowercase names, while on Linux you need to make them lowercase. QEMU was already using lowercase names, so there is no need to test the mixed-case name version of libSynchronization. Remove the unnecessary test and while at it make all the tests use "required: true". Signed-off-by: Paolo Bonzini --- meson.build | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/meson.build b/meson.build index 34729c2a3d..ed60be2a2d 100644 --- a/meson.build +++ b/meson.build @@ -843,15 +843,12 @@ host_dsosuf = '.so' if host_os == 'windows' midl = find_program('midl', required: false) widl = find_program('widl', required: false) - pathcch = cc.find_library('pathcch') - synchronization = cc.find_library('Synchronization', required: false) - if not synchronization.found() - # The library name is lowercase on mingw - synchronization = cc.find_library('synchronization', required: true) - endif - socket = cc.find_library('ws2_32') - winmm = cc.find_library('winmm') + # MinGW uses lowercase for library names + pathcch = cc.find_library('pathcch', required: true) + synchronization = cc.find_library('synchronization', required: true) + socket = cc.find_library('ws2_32', required: true) + winmm = cc.find_library('winmm', required: true) win = import('windows') version_res = win.compile_resources('version.rc', From 0f1d6606c28d0ae81a1b311972c5c54e5e867bf0 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Wed, 11 Jun 2025 14:03:15 +0100 Subject: [PATCH 02/24] target/i386: fix TB exit logic in gen_movl_seg() when writing to SS Before commit e54ef98c8a ("target/i386: do not trigger IRQ shadow for LSS"), any write to SS in gen_movl_seg() would cause a TB exit. The changes introduced by this commit were intended to restrict the DISAS_EOB_INHIBIT_IRQ exit to the case where inhibit_irq is true, but missed that a DISAS_EOB_NEXT exit can still be required when writing to SS and inhibit_irq is false. Comparing the PE(s) && !VM86(s) section with the logic in x86_update_hflags(), we can see that the DISAS_EOB_NEXT exit is still required for the !CODE32 case when writing to SS in gen_movl_seg() because any change to the SS flags can affect hflags. Similarly we can see that the existing CODE32 case is still correct since a change to any of DS, ES and SS can affect hflags. Finally for the gen_op_movl_seg_real() case an explicit TB exit is not needed because the segment register selector does not affect hflags. Update the logic in gen_movl_seg() so that a write to SS with inhibit_irq set to false where PE(s) && !VM86(s) will generate a DISAS_EOB_NEXT exit along with the inline comment. This has the effect of allowing Win98SE to boot in QEMU once again. Signed-off-by: Mark Cave-Ayland Fixes: e54ef98c8a ("target/i386: do not trigger IRQ shadow for LSS") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2987 Link: https://lore.kernel.org/r/20250611130315.383151-1-mark.cave-ayland@ilande.co.uk Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0fcddc2ec0..0cb87d0201 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2033,8 +2033,11 @@ static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit tcg_gen_trunc_tl_i32(sel, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); - /* For move to DS/ES/SS, the addseg or ss32 flags may change. */ - if (CODE32(s) && seg_reg < R_FS) { + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { From 5d353cce65142440251c014331c2f25f3af5e1b2 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 10 Jun 2025 22:41:27 +0200 Subject: [PATCH 03/24] hw: Fix type constant for DTB files Commit fcb1ad456c58 ("system/datadir: Add new type constant for DTB files") introduced a new type constant for DTB files and converted the boards with bundled device trees to use it. Convert the other boards for consistency. Fixes: fcb1ad456c58 ("system/datadir: Add new type constant for DTB files") Signed-off-by: Bernhard Beschow Link: https://lore.kernel.org/r/20250610204131.2862-2-shentey@gmail.com Signed-off-by: Paolo Bonzini --- hw/arm/boot.c | 2 +- hw/ppc/e500.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 79afb51b8a..64040504a1 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -527,7 +527,7 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, if (binfo->dtb_filename) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename); if (!filename) { fprintf(stderr, "Couldn't open dtb file %s\n", binfo->dtb_filename); goto fail; diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 6899802bed..723c97fad2 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -408,7 +408,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, if (dtb_file) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, dtb_file); if (!filename) { goto out; } From 6c2888dd0f9b7129943d53cacd6a7b7143a65cfb Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 10 Jun 2025 22:41:28 +0200 Subject: [PATCH 04/24] pc-bios/dtb/meson: Prefer target name to be outfile, not infile Makes this custom_target() usage consistent with other ones in QEMU. Fixes: 6e0dc9d2a88a ("meson: compile bundled device trees") Signed-off-by: Bernhard Beschow Link: https://lore.kernel.org/r/20250610204131.2862-3-shentey@gmail.com Signed-off-by: Paolo Bonzini --- pc-bios/dtb/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pc-bios/dtb/meson.build b/pc-bios/dtb/meson.build index 7a71835bca..993032949f 100644 --- a/pc-bios/dtb/meson.build +++ b/pc-bios/dtb/meson.build @@ -9,7 +9,7 @@ dtc = find_program('dtc', required: false) if dtc.found() foreach out : dtbs f = fs.replace_suffix(out, '.dts') - custom_target(f, + custom_target(out, build_by_default: have_system, input: files(f), output: out, From abf18324240a3c8f3feafbe5a96d4b83cd044615 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Feb 2025 09:41:42 +0100 Subject: [PATCH 05/24] rust: qemu_api: introduce MaybeUninit field projection Add a macro that makes it possible to convert a MaybeUninit<> into another MaybeUninit<> for a single field within it. Furthermore, it is possible to use the resulting MaybeUninitField<> in APIs that take the parent object, such as memory_region_init_io(). This allows removing some of the undefined behavior from instance_init() functions, though this may not be the definitive implementation. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/qemu-api/meson.build | 1 + rust/qemu-api/src/lib.rs | 1 + rust/qemu-api/src/uninit.rs | 85 +++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 rust/qemu-api/src/uninit.rs diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index cac8595a14..33653b4a28 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -28,6 +28,7 @@ _qemu_api_rs = static_library( 'src/qom.rs', 'src/sysbus.rs', 'src/timer.rs', + 'src/uninit.rs', 'src/vmstate.rs', 'src/zeroable.rs', ], diff --git a/rust/qemu-api/src/lib.rs b/rust/qemu-api/src/lib.rs index 93902fc94b..c78198f0f4 100644 --- a/rust/qemu-api/src/lib.rs +++ b/rust/qemu-api/src/lib.rs @@ -27,6 +27,7 @@ pub mod qdev; pub mod qom; pub mod sysbus; pub mod timer; +pub mod uninit; pub mod vmstate; pub mod zeroable; diff --git a/rust/qemu-api/src/uninit.rs b/rust/qemu-api/src/uninit.rs new file mode 100644 index 0000000000..04123b4ae9 --- /dev/null +++ b/rust/qemu-api/src/uninit.rs @@ -0,0 +1,85 @@ +//! Access fields of a [`MaybeUninit`] + +use std::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, +}; + +pub struct MaybeUninitField<'a, T, U> { + parent: &'a mut MaybeUninit, + child: *mut U, +} + +impl<'a, T, U> MaybeUninitField<'a, T, U> { + #[doc(hidden)] + pub fn new(parent: &'a mut MaybeUninit, child: *mut U) -> Self { + MaybeUninitField { parent, child } + } + + /// Return a constant pointer to the containing object of the field. + /// + /// Because the `MaybeUninitField` remembers the containing object, + /// it is possible to use it in foreign APIs that initialize the + /// child. + pub fn parent(f: &Self) -> *const T { + f.parent.as_ptr() + } + + /// Return a mutable pointer to the containing object. + /// + /// Because the `MaybeUninitField` remembers the containing object, + /// it is possible to use it in foreign APIs that initialize the + /// child. + pub fn parent_mut(f: &mut Self) -> *mut T { + f.parent.as_mut_ptr() + } +} + +impl<'a, T, U> Deref for MaybeUninitField<'a, T, U> { + type Target = MaybeUninit; + + fn deref(&self) -> &MaybeUninit { + // SAFETY: self.child was obtained by dereferencing a valid mutable + // reference; the content of the memory may be invalid or uninitialized + // but MaybeUninit<_> makes no assumption on it + unsafe { &*(self.child.cast()) } + } +} + +impl<'a, T, U> DerefMut for MaybeUninitField<'a, T, U> { + fn deref_mut(&mut self) -> &mut MaybeUninit { + // SAFETY: self.child was obtained by dereferencing a valid mutable + // reference; the content of the memory may be invalid or uninitialized + // but MaybeUninit<_> makes no assumption on it + unsafe { &mut *(self.child.cast()) } + } +} + +/// ``` +/// #[derive(Debug)] +/// struct S { +/// x: u32, +/// y: u32, +/// } +/// +/// # use std::mem::MaybeUninit; +/// # use qemu_api::{assert_match, uninit_field_mut}; +/// +/// let mut s: MaybeUninit = MaybeUninit::zeroed(); +/// uninit_field_mut!(s, x).write(5); +/// let s = unsafe { s.assume_init() }; +/// assert_match!(s, S { x: 5, y: 0 }); +/// ``` +#[macro_export] +macro_rules! uninit_field_mut { + ($container:expr, $($field:tt)+) => {{ + let container__: &mut ::std::mem::MaybeUninit<_> = &mut $container; + let container_ptr__ = container__.as_mut_ptr(); + + // SAFETY: the container is not used directly, only through a MaybeUninit<>, + // so the safety is delegated to the caller and to final invocation of + // assume_init() + let target__ = unsafe { std::ptr::addr_of_mut!((*container_ptr__).$($field)+) }; + $crate::uninit::MaybeUninitField::new(container__, target__) + }}; +} From eb64a0c6ae492f8ef37f31c9d4994bfc69f02e3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 15 Apr 2025 13:13:19 +0200 Subject: [PATCH 06/24] rust: hpet: fully initialize object during instance_init The array of BqlRefCell is not initialized yet at the end of instance_init. In particular, the "state" field is NonNull and therefore it is invalid to have it as zero bytes. Note that MaybeUninit is necessary because assigning to self.timers[index] would trigger Drop of the old value. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/hw/timer/hpet/src/device.rs | 42 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/rust/hw/timer/hpet/src/device.rs b/rust/hw/timer/hpet/src/device.rs index 735b2fbef7..340ca1d355 100644 --- a/rust/hw/timer/hpet/src/device.rs +++ b/rust/hw/timer/hpet/src/device.rs @@ -4,6 +4,7 @@ use std::{ ffi::{c_int, c_void, CStr}, + mem::MaybeUninit, pin::Pin, ptr::{addr_of_mut, null_mut, NonNull}, slice::from_ref, @@ -25,6 +26,7 @@ use qemu_api::{ qom_isa, sysbus::{SysBusDevice, SysBusDeviceImpl}, timer::{Timer, CLOCK_VIRTUAL, NANOSECONDS_PER_SECOND}, + uninit_field_mut, vmstate::VMStateDescription, vmstate_fields, vmstate_of, vmstate_struct, vmstate_subsections, vmstate_validate, zeroable::Zeroable, @@ -212,13 +214,13 @@ pub struct HPETTimer { } impl HPETTimer { - fn init(&mut self, index: u8, state: &HPETState) { - *self = HPETTimer { + fn new(index: u8, state: *const HPETState) -> HPETTimer { + HPETTimer { index, // SAFETY: the HPETTimer will only be used after the timer // is initialized below. qemu_timer: unsafe { Timer::new() }, - state: NonNull::new((state as *const HPETState).cast_mut()).unwrap(), + state: NonNull::new(state.cast_mut()).unwrap(), config: 0, cmp: 0, fsb: 0, @@ -226,19 +228,15 @@ impl HPETTimer { period: 0, wrap_flag: 0, last: 0, - }; + } + } + fn init_timer_with_cell(cell: &BqlRefCell) { + let mut timer = cell.borrow_mut(); // SAFETY: HPETTimer is only used as part of HPETState, which is // always pinned. - let qemu_timer = unsafe { Pin::new_unchecked(&mut self.qemu_timer) }; - qemu_timer.init_full( - None, - CLOCK_VIRTUAL, - Timer::NS, - 0, - timer_handler, - &state.timers[self.index as usize], - ) + let qemu_timer = unsafe { Pin::new_unchecked(&mut timer.qemu_timer) }; + qemu_timer.init_full(None, CLOCK_VIRTUAL, Timer::NS, 0, timer_handler, cell); } fn get_state(&self) -> &HPETState { @@ -607,9 +605,18 @@ impl HPETState { } } - fn init_timer(&self) { - for (index, timer) in self.timers.iter().enumerate() { - timer.borrow_mut().init(index.try_into().unwrap(), self); + fn init_timers(this: &mut MaybeUninit) { + let state = this.as_ptr(); + for index in 0..HPET_MAX_TIMERS { + let mut timer = uninit_field_mut!(*this, timers[index]); + + // Initialize in two steps, to avoid calling Timer::init_full on a + // temporary that can be moved. + let timer = timer.write(BqlRefCell::new(HPETTimer::new( + index.try_into().unwrap(), + state, + ))); + HPETTimer::init_timer_with_cell(timer); } } @@ -710,6 +717,8 @@ impl HPETState { "hpet", HPET_REG_SPACE_LEN, ); + + Self::init_timers(unsafe { &mut *((self as *mut Self).cast::>()) }); } fn post_init(&self) { @@ -731,7 +740,6 @@ impl HPETState { self.hpet_id.set(HPETFwConfig::assign_hpet_id()?); - self.init_timer(); // 64-bit General Capabilities and ID Register; LegacyReplacementRoute. self.capability.set( HPET_CAP_REV_ID_VALUE << HPET_CAP_REV_ID_SHIFT | From a44122258328a33aaa776c21277e6d4f98ab3d1f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Feb 2025 09:40:30 +0100 Subject: [PATCH 07/24] rust: qom: introduce ParentInit This is a smart pointer for MaybeUninit; it can be upcasted to the already-initialized parent classes, or dereferenced to a MaybeUninit for the class that is being initialized. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/qemu-api/src/qom.rs | 96 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index 14f98fee60..ef966e570c 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -95,7 +95,7 @@ use std::{ ffi::{c_void, CStr}, fmt, - mem::ManuallyDrop, + mem::{ManuallyDrop, MaybeUninit}, ops::{Deref, DerefMut}, ptr::NonNull, }; @@ -206,6 +206,100 @@ impl fmt::Display for ParentField { } } +/// This struct knows that the superclasses of the object have already been +/// initialized. +pub struct ParentInit<'a, T>(&'a mut MaybeUninit); + +impl<'a, T> ParentInit<'a, T> { + #[inline] + pub fn with(obj: &'a mut MaybeUninit, f: impl FnOnce(ParentInit<'a, T>)) { + let parent_init = ParentInit(obj); + f(parent_init) + } +} + +impl ParentInit<'_, T> { + /// Return the receiver as a mutable raw pointer to Object. + /// + /// # Safety + /// + /// Fields beyond `Object` could be uninitialized and it's your + /// responsibility to avoid that they're used when the pointer is + /// dereferenced, either directly or through a cast. + pub fn as_object_mut_ptr(&self) -> *mut bindings::Object { + self.as_object_ptr().cast_mut() + } + + /// Return the receiver as a mutable raw pointer to Object. + /// + /// # Safety + /// + /// Fields beyond `Object` could be uninitialized and it's your + /// responsibility to avoid that they're used when the pointer is + /// dereferenced, either directly or through a cast. + pub fn as_object_ptr(&self) -> *const bindings::Object { + self.0.as_ptr().cast() + } +} + +impl<'a, T: ObjectImpl> ParentInit<'a, T> { + /// Convert from a derived type to one of its parent types, which + /// have already been initialized. + /// + /// # Safety + /// + /// Structurally this is always a safe operation; the [`IsA`] trait + /// provides static verification trait that `Self` dereferences to `U` or + /// a child of `U`, and only parent types of `T` are allowed. + /// + /// However, while the fields of the resulting reference are initialized, + /// calls might use uninitialized fields of the subclass. It is your + /// responsibility to avoid this. + pub unsafe fn upcast(&self) -> &'a U + where + T::ParentType: IsA, + { + // SAFETY: soundness is declared via IsA, which is an unsafe trait; + // the parent has been initialized before `instance_init `is called + unsafe { &*(self.0.as_ptr().cast::()) } + } + + /// Convert from a derived type to one of its parent types, which + /// have already been initialized. + /// + /// # Safety + /// + /// Structurally this is always a safe operation; the [`IsA`] trait + /// provides static verification trait that `Self` dereferences to `U` or + /// a child of `U`, and only parent types of `T` are allowed. + /// + /// However, while the fields of the resulting reference are initialized, + /// calls might use uninitialized fields of the subclass. It is your + /// responsibility to avoid this. + pub unsafe fn upcast_mut(&mut self) -> &'a mut U + where + T::ParentType: IsA, + { + // SAFETY: soundness is declared via IsA, which is an unsafe trait; + // the parent has been initialized before `instance_init `is called + unsafe { &mut *(self.0.as_mut_ptr().cast::()) } + } +} + +impl Deref for ParentInit<'_, T> { + type Target = MaybeUninit; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl DerefMut for ParentInit<'_, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0 + } +} + unsafe extern "C" fn rust_instance_init(obj: *mut bindings::Object) { let mut state = NonNull::new(obj).unwrap().cast::(); // SAFETY: obj is an instance of T, since rust_instance_init From 8d394f6cf0b50a82758b651e81a18dac13e70e7d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Feb 2025 10:20:48 +0100 Subject: [PATCH 08/24] rust: qom: make ParentInit lifetime-invariant This is the trick that allows the parent-field initializer to be used only for the object that it's meant to be initialized. This way, the owner of a MemoryRegion must be the object that embeds it. More information is in the comments; it's best explained with a simplified example. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/qemu-api/src/qom.rs | 89 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index ef966e570c..04d102591d 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -95,6 +95,7 @@ use std::{ ffi::{c_void, CStr}, fmt, + marker::PhantomData, mem::{ManuallyDrop, MaybeUninit}, ops::{Deref, DerefMut}, ptr::NonNull, @@ -208,12 +209,92 @@ impl fmt::Display for ParentField { /// This struct knows that the superclasses of the object have already been /// initialized. -pub struct ParentInit<'a, T>(&'a mut MaybeUninit); +/// +/// The declaration of `ParentInit` is.. *"a kind of magic"*. It uses a +/// technique that is found in several crates, the main ones probably being +/// `ghost-cell` (in fact it was introduced by the [`GhostCell` paper](https://plv.mpi-sws.org/rustbelt/ghostcell/)) +/// and `generativity`. +/// +/// The `PhantomData` makes the `ParentInit` type *invariant* with respect to +/// the lifetime argument `'init`. This, together with the `for<'...>` in +/// `[ParentInit::with]`, block any attempt of the compiler to be creative when +/// operating on types of type `ParentInit` and to extend their lifetimes. In +/// particular, it ensures that the `ParentInit` cannot be made to outlive the +/// `rust_instance_init()` function that creates it, and therefore that the +/// `&'init T` reference is valid. +/// +/// This implementation of the same concept, without the QOM baggage, can help +/// understanding the effect: +/// +/// ``` +/// use std::marker::PhantomData; +/// +/// #[derive(PartialEq, Eq)] +/// pub struct Jail<'closure, T: Copy>(&'closure T, PhantomData &'closure ()>); +/// +/// impl<'closure, T: Copy> Jail<'closure, T> { +/// fn get(&self) -> T { +/// *self.0 +/// } +/// +/// #[inline] +/// fn with(v: T, f: impl for<'id> FnOnce(Jail<'id, T>) -> U) -> U { +/// let parent_init = Jail(&v, PhantomData); +/// f(parent_init) +/// } +/// } +/// ``` +/// +/// It's impossible to escape the `Jail`; `token1` cannot be moved out of the +/// closure: +/// +/// ```ignore +/// let x = 42; +/// let escape = Jail::with(&x, |token1| { +/// println!("{}", token1.get()); +/// // fails to compile... +/// token1 +/// }); +/// // ... so you cannot do this: +/// println!("{}", escape.get()); +/// ``` +/// +/// Likewise, in the QOM case the `ParentInit` cannot be moved out of +/// `instance_init()`. Without this trick it would be possible to stash a +/// `ParentInit` and use it later to access uninitialized memory. +/// +/// Here is another example, showing how separately-created "identities" stay +/// isolated: +/// +/// ```ignore +/// impl<'closure, T: Copy> Clone for Jail<'closure, T> { +/// fn clone(&self) -> Jail<'closure, T> { +/// Jail(self.0, PhantomData) +/// } +/// } +/// +/// fn main() { +/// Jail::with(42, |token1| { +/// // this works and returns true: the clone has the same "identity" +/// println!("{}", token1 == token1.clone()); +/// Jail::with(42, |token2| { +/// // here the outer token remains accessible... +/// println!("{}", token1.get()); +/// // ... but the two are separate: this fails to compile: +/// println!("{}", token1 == token2); +/// }); +/// }); +/// } +/// ``` +pub struct ParentInit<'init, T>( + &'init mut MaybeUninit, + PhantomData &'init ()>, +); -impl<'a, T> ParentInit<'a, T> { +impl<'init, T> ParentInit<'init, T> { #[inline] - pub fn with(obj: &'a mut MaybeUninit, f: impl FnOnce(ParentInit<'a, T>)) { - let parent_init = ParentInit(obj); + pub fn with(obj: &'init mut MaybeUninit, f: impl for<'id> FnOnce(ParentInit<'id, T>)) { + let parent_init = ParentInit(obj, PhantomData); f(parent_init) } } From 345bef46a1b6765185bfe1450cc147f5feb5d0e7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Mar 2025 20:48:05 +0100 Subject: [PATCH 09/24] rust: qom: change instance_init to take a ParentInit<> This removes undefined behavior associated to writing to uninitialized fields, and makes it possible to remove "unsafe" from the instance_init implementation. However, the init function itself is still unsafe, because it must promise (as a sort as MaybeUninit::assume_init) that all fields have been initialized. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- rust/hw/char/pl011/src/device.rs | 34 ++++++++++------------ rust/hw/timer/hpet/src/device.rs | 16 ++++------- rust/qemu-api/src/memory.rs | 12 ++++---- rust/qemu-api/src/qdev.rs | 49 +++++++++++++++++++------------- rust/qemu-api/src/qom.rs | 9 ++++-- 5 files changed, 63 insertions(+), 57 deletions(-) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index be8387f6f2..2d416cd9a3 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -2,7 +2,7 @@ // Author(s): Manos Pitsidianakis // SPDX-License-Identifier: GPL-2.0-or-later -use std::{ffi::CStr, mem::size_of, ptr::addr_of_mut}; +use std::{ffi::CStr, mem::size_of}; use qemu_api::{ chardev::{CharBackend, Chardev, Event}, @@ -11,9 +11,10 @@ use qemu_api::{ memory::{hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder}, prelude::*, qdev::{Clock, ClockEvent, DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl}, - qom::{ObjectImpl, Owned, ParentField}, + qom::{ObjectImpl, Owned, ParentField, ParentInit}, static_assert, sysbus::{SysBusDevice, SysBusDeviceImpl}, + uninit_field_mut, vmstate::VMStateDescription, }; @@ -163,7 +164,7 @@ impl PL011Impl for PL011State { impl ObjectImpl for PL011State { type ParentType = SysBusDevice; - const INSTANCE_INIT: Option = Some(Self::init); + const INSTANCE_INIT: Option)> = Some(Self::init); const INSTANCE_POST_INIT: Option = Some(Self::post_init); const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::; } @@ -488,7 +489,7 @@ impl PL011State { /// `PL011State` type. It must not be called more than once on the same /// location/instance. All its fields are expected to hold uninitialized /// values with the sole exception of `parent_obj`. - unsafe fn init(&mut self) { + unsafe fn init(mut this: ParentInit) { static PL011_OPS: MemoryRegionOps = MemoryRegionOpsBuilder::::new() .read(&PL011State::read) .write(&PL011State::write) @@ -496,28 +497,23 @@ impl PL011State { .impl_sizes(4, 4) .build(); - // SAFETY: - // - // self and self.iomem are guaranteed to be valid at this point since callers - // must make sure the `self` reference is valid. + // SAFETY: this and this.iomem are guaranteed to be valid at this point MemoryRegion::init_io( - unsafe { &mut *addr_of_mut!(self.iomem) }, - addr_of_mut!(*self), + &mut uninit_field_mut!(*this, iomem), &PL011_OPS, "pl011", 0x1000, ); - self.regs = Default::default(); + uninit_field_mut!(*this, regs).write(Default::default()); - // SAFETY: - // - // self.clock is not initialized at this point; but since `Owned<_>` is - // not Drop, we can overwrite the undefined value without side effects; - // it's not sound but, because for all PL011State instances are created - // by QOM code which calls this function to initialize the fields, at - // leastno code is able to access an invalid self.clock value. - self.clock = self.init_clock_in("clk", &Self::clock_update, ClockEvent::ClockUpdate); + let clock = DeviceState::init_clock_in( + &mut this, + "clk", + &Self::clock_update, + ClockEvent::ClockUpdate, + ); + uninit_field_mut!(*this, clock).write(clock); } const fn clock_update(&self, _event: ClockEvent) { diff --git a/rust/hw/timer/hpet/src/device.rs b/rust/hw/timer/hpet/src/device.rs index 340ca1d355..a281927781 100644 --- a/rust/hw/timer/hpet/src/device.rs +++ b/rust/hw/timer/hpet/src/device.rs @@ -21,8 +21,8 @@ use qemu_api::{ hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder, MEMTXATTRS_UNSPECIFIED, }, prelude::*, - qdev::{DeviceImpl, DeviceMethods, DeviceState, Property, ResetType, ResettablePhasesImpl}, - qom::{ObjectImpl, ObjectType, ParentField}, + qdev::{DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl}, + qom::{ObjectImpl, ObjectType, ParentField, ParentInit}, qom_isa, sysbus::{SysBusDevice, SysBusDeviceImpl}, timer::{Timer, CLOCK_VIRTUAL, NANOSECONDS_PER_SECOND}, @@ -697,7 +697,7 @@ impl HPETState { .set(self.counter.get().deposit(shift, len, val)); } - unsafe fn init(&mut self) { + unsafe fn init(mut this: ParentInit) { static HPET_RAM_OPS: MemoryRegionOps = MemoryRegionOpsBuilder::::new() .read(&HPETState::read) @@ -707,18 +707,14 @@ impl HPETState { .impl_sizes(4, 8) .build(); - // SAFETY: - // self and self.iomem are guaranteed to be valid at this point since callers - // must make sure the `self` reference is valid. MemoryRegion::init_io( - unsafe { &mut *addr_of_mut!(self.iomem) }, - addr_of_mut!(*self), + &mut uninit_field_mut!(*this, iomem), &HPET_RAM_OPS, "hpet", HPET_REG_SPACE_LEN, ); - Self::init_timers(unsafe { &mut *((self as *mut Self).cast::>()) }); + Self::init_timers(&mut this); } fn post_init(&self) { @@ -900,7 +896,7 @@ unsafe impl ObjectType for HPETState { impl ObjectImpl for HPETState { type ParentType = SysBusDevice; - const INSTANCE_INIT: Option = Some(Self::init); + const INSTANCE_INIT: Option)> = Some(Self::init); const INSTANCE_POST_INIT: Option = Some(Self::post_init); const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::; } diff --git a/rust/qemu-api/src/memory.rs b/rust/qemu-api/src/memory.rs index 9ef2694bd6..e40fad6cf1 100644 --- a/rust/qemu-api/src/memory.rs +++ b/rust/qemu-api/src/memory.rs @@ -16,6 +16,7 @@ use crate::{ callbacks::FnCall, cell::Opaque, prelude::*, + uninit::MaybeUninitField, zeroable::Zeroable, }; @@ -147,7 +148,7 @@ impl MemoryRegion { #[inline(always)] unsafe fn do_init_io( slot: *mut bindings::MemoryRegion, - owner: *mut Object, + owner: *mut bindings::Object, ops: &'static bindings::MemoryRegionOps, name: &'static str, size: u64, @@ -156,7 +157,7 @@ impl MemoryRegion { let cstr = CString::new(name).unwrap(); memory_region_init_io( slot, - owner.cast::(), + owner, ops, owner.cast::(), cstr.as_ptr(), @@ -166,16 +167,15 @@ impl MemoryRegion { } pub fn init_io>( - &mut self, - owner: *mut T, + this: &mut MaybeUninitField<'_, T, Self>, ops: &'static MemoryRegionOps, name: &'static str, size: u64, ) { unsafe { Self::do_init_io( - self.0.as_mut_ptr(), - owner.cast::(), + this.as_mut_ptr().cast(), + MaybeUninitField::parent_mut(this).cast(), &ops.0, name, size, diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs index 0610959f46..36f02fb57d 100644 --- a/rust/qemu-api/src/qdev.rs +++ b/rust/qemu-api/src/qdev.rs @@ -19,7 +19,7 @@ use crate::{ error::{Error, Result}, irq::InterruptSource, prelude::*, - qom::{ObjectClass, ObjectImpl, Owned}, + qom::{ObjectClass, ObjectImpl, Owned, ParentInit}, vmstate::VMStateDescription, }; @@ -247,15 +247,9 @@ unsafe impl ObjectType for DeviceState { } qom_isa!(DeviceState: Object); -/// Trait for methods exposed by the [`DeviceState`] class. The methods can be -/// called on all objects that have the trait `IsA`. -/// -/// The trait should only be used through the blanket implementation, -/// which guarantees safety via `IsA`. -pub trait DeviceMethods: ObjectDeref -where - Self::Target: IsA, -{ +/// Initialization methods take a [`ParentInit`] and can be called as +/// associated functions. +impl DeviceState { /// Add an input clock named `name`. Invoke the callback with /// `self` as the first parameter for the events that are requested. /// @@ -266,12 +260,15 @@ where /// which Rust code has a reference to a child object) it would be /// possible for this function to return a `&Clock` too. #[inline] - fn init_clock_in FnCall<(&'a Self::Target, ClockEvent)>>( - &self, + pub fn init_clock_in FnCall<(&'a T, ClockEvent)>>( + this: &mut ParentInit, name: &str, _cb: &F, events: ClockEvent, - ) -> Owned { + ) -> Owned + where + T::ParentType: IsA, + { fn do_init_clock_in( dev: &DeviceState, name: &str, @@ -287,10 +284,10 @@ where unsafe { let cstr = CString::new(name).unwrap(); let clk = bindings::qdev_init_clock_in( - dev.as_mut_ptr(), + dev.0.as_mut_ptr(), cstr.as_ptr(), cb, - dev.as_void_ptr(), + dev.0.as_void_ptr(), events.0, ); @@ -307,12 +304,12 @@ where // SAFETY: the opaque is "this", which is indeed a pointer to T F::call((unsafe { &*(opaque.cast::()) }, event)) } - Some(rust_clock_cb::) + Some(rust_clock_cb::) } else { None }; - do_init_clock_in(self.upcast(), name, cb, events) + do_init_clock_in(unsafe { this.upcast_mut() }, name, cb, events) } /// Add an output clock named `name`. @@ -324,16 +321,30 @@ where /// which Rust code has a reference to a child object) it would be /// possible for this function to return a `&Clock` too. #[inline] - fn init_clock_out(&self, name: &str) -> Owned { + pub fn init_clock_out(this: &mut ParentInit, name: &str) -> Owned + where + T::ParentType: IsA, + { unsafe { let cstr = CString::new(name).unwrap(); - let clk = bindings::qdev_init_clock_out(self.upcast().as_mut_ptr(), cstr.as_ptr()); + let dev: &mut DeviceState = this.upcast_mut(); + let clk = bindings::qdev_init_clock_out(dev.0.as_mut_ptr(), cstr.as_ptr()); let clk: &Clock = Clock::from_raw(clk); Owned::from(clk) } } +} +/// Trait for methods exposed by the [`DeviceState`] class. The methods can be +/// called on all objects that have the trait `IsA`. +/// +/// The trait should only be used through the blanket implementation, +/// which guarantees safety via `IsA`. +pub trait DeviceMethods: ObjectDeref +where + Self::Target: IsA, +{ fn prop_set_chr(&self, propname: &str, chr: &Owned) { assert!(bql_locked()); let c_propname = CString::new(propname).unwrap(); diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index 04d102591d..e20ee014cb 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -382,12 +382,15 @@ impl DerefMut for ParentInit<'_, T> { } unsafe extern "C" fn rust_instance_init(obj: *mut bindings::Object) { - let mut state = NonNull::new(obj).unwrap().cast::(); + let mut state = NonNull::new(obj).unwrap().cast::>(); + // SAFETY: obj is an instance of T, since rust_instance_init // is called from QOM core as the instance_init function // for class T unsafe { - T::INSTANCE_INIT.unwrap()(state.as_mut()); + ParentInit::with(state.as_mut(), |parent_init| { + T::INSTANCE_INIT.unwrap()(parent_init); + }); } } @@ -654,7 +657,7 @@ pub trait ObjectImpl: ObjectType + IsA { /// /// FIXME: The argument is not really a valid reference. `&mut /// MaybeUninit` would be a better description. - const INSTANCE_INIT: Option = None; + const INSTANCE_INIT: Option)> = None; /// Function that is called to finish initialization of an object, once /// `INSTANCE_INIT` functions have been called. From 1548c5cdf010f6c89d577402c56eca7169936f48 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Jun 2025 14:51:54 +0200 Subject: [PATCH 10/24] rust: prepare variable definitions for multiple bindgen invocations When splitting the QEMU Rust bindings into multiple crates, the bindgen-generated structs also have to be split so that it's possible to add "impl" blocks (e.g. for Sync/Send or Default, or even for utility methods in cases such as VMStateFlags). Tweak various variable definitions in meson.build, to avoid naming conflicts once there will be multiple bindgen invocations. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- meson.build | 21 ++++++++++++--------- rust/meson.build | 2 +- rust/qemu-api/meson.build | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/meson.build b/meson.build index ed60be2a2d..19ffa9cb34 100644 --- a/meson.build +++ b/meson.build @@ -4200,10 +4200,11 @@ foreach target_base_arch, config_base_arch : config_base_arch_mak endforeach if have_rust + bindings_incdir = include_directories('.', 'include') # We would like to use --generate-cstr, but it is only available # starting with bindgen 0.66.0. The oldest supported versions # is 0.60.x (Debian 12 has 0.60.1) which introduces --allowlist-file. - bindgen_args = [ + bindgen_args_common = [ '--disable-header-comment', '--raw-line', '// @generated', '--ctypes-prefix', 'std::os::raw', @@ -4219,20 +4220,22 @@ if have_rust ] if not rustfmt.found() if bindgen.version().version_compare('<0.65.0') - bindgen_args += ['--no-rustfmt-bindings'] + bindgen_args_common += ['--no-rustfmt-bindings'] else - bindgen_args += ['--formatter', 'none'] + bindgen_args_common += ['--formatter', 'none'] endif endif if bindgen.version().version_compare('>=0.66.0') - bindgen_args += ['--rust-target', '1.59'] + bindgen_args_common += ['--rust-target', '1.59'] endif if bindgen.version().version_compare('<0.61.0') # default in 0.61+ - bindgen_args += ['--size_t-is-usize'] + bindgen_args_common += ['--size_t-is-usize'] else - bindgen_args += ['--merge-extern-blocks'] + bindgen_args_common += ['--merge-extern-blocks'] endif + + bindgen_args = [] c_enums = [ 'DeviceCategory', 'GpioPolarity', @@ -4264,13 +4267,13 @@ if have_rust # this case you must pass the path to `clang` and `libclang` to your build # command invocation using the environment variables CLANG_PATH and # LIBCLANG_PATH - bindings_rs = rust.bindgen( + _qemu_api_bindings_inc_rs = rust.bindgen( input: 'rust/wrapper.h', dependencies: common_ss.all_dependencies(), output: 'bindings.inc.rs', - include_directories: include_directories('.', 'include'), + include_directories: bindings_incdir, bindgen_version: ['>=0.60.0'], - args: bindgen_args, + args: bindgen_args_common + bindgen_args, ) subdir('rust') endif diff --git a/rust/meson.build b/rust/meson.build index 99ae7956cd..e9f0879e29 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -33,5 +33,5 @@ if cargo.found() command: [config_host['MESON'], 'devenv', '--workdir', '@CURRENT_SOURCE_DIR@', cargo, 'fmt'], - depends: bindings_rs) + depends: _qemu_api_bindings_inc_rs) endif diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 33653b4a28..64c04dfd74 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -32,7 +32,7 @@ _qemu_api_rs = static_library( 'src/vmstate.rs', 'src/zeroable.rs', ], - {'.' : bindings_rs}, + {'.' : _qemu_api_bindings_inc_rs}, ), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', From 1ae4ca0463d737ead1162c35b267ef5882d42883 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Jun 2025 14:49:27 +0200 Subject: [PATCH 11/24] rust: move rust.bindgen to qemu-api crate Once qemu-api is split in multiple crates, each of them will have its own invocation of bindgen. There cannot be only one, because there are occasional "impl" blocks for the bindgen-generated structs (e.g. VMStateFlags or QOM classes) that have to reside in the same crate as the bindgen-generated code. For now, prepare for this new organization by invoking bindgen within the qemu-api crate's build definitions; it's also a much better place to list enums that need specific treatment from bindgen. Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- meson.build | 41 ----------------------------------- rust/meson.build | 4 +++- rust/qemu-api/build.rs | 2 +- rust/qemu-api/meson.build | 41 +++++++++++++++++++++++++++++++++++ rust/{ => qemu-api}/wrapper.h | 0 5 files changed, 45 insertions(+), 43 deletions(-) rename rust/{ => qemu-api}/wrapper.h (100%) diff --git a/meson.build b/meson.build index 19ffa9cb34..4676908dbb 100644 --- a/meson.build +++ b/meson.build @@ -4234,47 +4234,6 @@ if have_rust else bindgen_args_common += ['--merge-extern-blocks'] endif - - bindgen_args = [] - c_enums = [ - 'DeviceCategory', - 'GpioPolarity', - 'MachineInitPhase', - 'MemoryDeviceInfoKind', - 'MigrationPolicy', - 'MigrationPriority', - 'QEMUChrEvent', - 'QEMUClockType', - 'ResetType', - 'device_endian', - 'module_init_type', - ] - foreach enum : c_enums - bindgen_args += ['--rustified-enum', enum] - endforeach - c_bitfields = [ - 'ClockEvent', - 'VMStateFlags', - ] - foreach enum : c_bitfields - bindgen_args += ['--bitfield-enum', enum] - endforeach - - # TODO: Remove this comment when the clang/libclang mismatch issue is solved. - # - # Rust bindings generation with `bindgen` might fail in some cases where the - # detected `libclang` does not match the expected `clang` version/target. In - # this case you must pass the path to `clang` and `libclang` to your build - # command invocation using the environment variables CLANG_PATH and - # LIBCLANG_PATH - _qemu_api_bindings_inc_rs = rust.bindgen( - input: 'rust/wrapper.h', - dependencies: common_ss.all_dependencies(), - output: 'bindings.inc.rs', - include_directories: bindings_incdir, - bindgen_version: ['>=0.60.0'], - args: bindgen_args_common + bindgen_args, - ) subdir('rust') endif diff --git a/rust/meson.build b/rust/meson.build index e9f0879e29..331f11b7e7 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -20,6 +20,8 @@ proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true) qemuutil_rs = qemuutil.partial_dependency(link_args: true, links: true) +genrs = [] + subdir('qemu-api-macros') subdir('bits') subdir('qemu-api') @@ -33,5 +35,5 @@ if cargo.found() command: [config_host['MESON'], 'devenv', '--workdir', '@CURRENT_SOURCE_DIR@', cargo, 'fmt'], - depends: _qemu_api_bindings_inc_rs) + depends: genrs) endif diff --git a/rust/qemu-api/build.rs b/rust/qemu-api/build.rs index 1e720641d2..7849486c1b 100644 --- a/rust/qemu-api/build.rs +++ b/rust/qemu-api/build.rs @@ -14,7 +14,7 @@ fn main() -> Result<()> { let path = env::var("MESON_BUILD_ROOT") .unwrap_or_else(|_| format!("{}/src", env!("CARGO_MANIFEST_DIR"))); - let file = format!("{path}/bindings.inc.rs"); + let file = format!("{path}/rust/qemu-api/bindings.inc.rs"); let file = Path::new(&file); if !Path::new(&file).exists() { panic!(concat!( diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 64c04dfd74..5b8c7e5e8d 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -7,6 +7,47 @@ if get_option('debug_mutex') _qemu_api_cfg += ['--cfg', 'feature="debug_cell"'] endif +c_enums = [ + 'DeviceCategory', + 'GpioPolarity', + 'MachineInitPhase', + 'MemoryDeviceInfoKind', + 'MigrationPolicy', + 'MigrationPriority', + 'QEMUChrEvent', + 'QEMUClockType', + 'ResetType', + 'device_endian', + 'module_init_type', +] +_qemu_api_bindgen_args = [] +foreach enum : c_enums + _qemu_api_bindgen_args += ['--rustified-enum', enum] +endforeach +c_bitfields = [ + 'ClockEvent', + 'VMStateFlags', +] +foreach enum : c_bitfields + _qemu_api_bindgen_args += ['--bitfield-enum', enum] +endforeach + +# TODO: Remove this comment when the clang/libclang mismatch issue is solved. +# +# Rust bindings generation with `bindgen` might fail in some cases where the +# detected `libclang` does not match the expected `clang` version/target. In +# this case you must pass the path to `clang` and `libclang` to your build +# command invocation using the environment variables CLANG_PATH and +# LIBCLANG_PATH +_qemu_api_bindings_inc_rs = rust.bindgen( + input: 'wrapper.h', + dependencies: common_ss.all_dependencies(), + output: 'bindings.inc.rs', + include_directories: bindings_incdir, + bindgen_version: ['>=0.60.0'], + args: bindgen_args_common + _qemu_api_bindgen_args, + ) + _qemu_api_rs = static_library( 'qemu_api', structured_sources( diff --git a/rust/wrapper.h b/rust/qemu-api/wrapper.h similarity index 100% rename from rust/wrapper.h rename to rust/qemu-api/wrapper.h From ab81002252af101068fca94bc82b31ba59ff154b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 15 Jun 2025 13:20:34 +0200 Subject: [PATCH 12/24] rust/qemu-api: Add initial logging support based on C API A log_mask_ln!() macro is provided which expects similar arguments as the C version. However, the formatting works as one would expect from Rust. To maximize code reuse the macro is just a thin wrapper around qemu_log(). Also, just the bare minimum of logging masks is provided which should suffice for the current use case of Rust in QEMU. Signed-off-by: Bernhard Beschow Link: https://lore.kernel.org/r/20250615112037.11992-2-shentey@gmail.com Signed-off-by: Paolo Bonzini --- docs/devel/rust.rst | 1 + rust/qemu-api/meson.build | 1 + rust/qemu-api/src/lib.rs | 1 + rust/qemu-api/src/log.rs | 73 ++++++++++++++++++++++++++++++++++++ rust/qemu-api/src/prelude.rs | 2 + rust/qemu-api/wrapper.h | 2 + 6 files changed, 80 insertions(+) create mode 100644 rust/qemu-api/src/log.rs diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst index 47e9677fcb..dc8c44109e 100644 --- a/docs/devel/rust.rst +++ b/docs/devel/rust.rst @@ -162,6 +162,7 @@ module status ``errno`` complete ``error`` stable ``irq`` complete +``log`` proof of concept ``memory`` stable ``module`` complete ``qdev`` stable diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 5b8c7e5e8d..a090297c45 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -62,6 +62,7 @@ _qemu_api_rs = static_library( 'src/errno.rs', 'src/error.rs', 'src/irq.rs', + 'src/log.rs', 'src/memory.rs', 'src/module.rs', 'src/prelude.rs', diff --git a/rust/qemu-api/src/lib.rs b/rust/qemu-api/src/lib.rs index c78198f0f4..86dcd8ef17 100644 --- a/rust/qemu-api/src/lib.rs +++ b/rust/qemu-api/src/lib.rs @@ -21,6 +21,7 @@ pub mod chardev; pub mod errno; pub mod error; pub mod irq; +pub mod log; pub mod memory; pub mod module; pub mod qdev; diff --git a/rust/qemu-api/src/log.rs b/rust/qemu-api/src/log.rs new file mode 100644 index 0000000000..d6c3d6c1b6 --- /dev/null +++ b/rust/qemu-api/src/log.rs @@ -0,0 +1,73 @@ +// Copyright 2025 Bernhard Beschow +// SPDX-License-Identifier: GPL-2.0-or-later + +//! Bindings for QEMU's logging infrastructure + +#[repr(u32)] +/// Represents specific error categories within QEMU's logging system. +/// +/// The `Log` enum provides a Rust abstraction for logging errors, corresponding +/// to a subset of the error categories defined in the C implementation. +pub enum Log { + /// Log invalid access caused by the guest. + /// Corresponds to `LOG_GUEST_ERROR` in the C implementation. + GuestError = crate::bindings::LOG_GUEST_ERROR, + + /// Log guest access of unimplemented functionality. + /// Corresponds to `LOG_UNIMP` in the C implementation. + Unimp = crate::bindings::LOG_UNIMP, +} + +/// A macro to log messages conditionally based on a provided mask. +/// +/// The `log_mask_ln` macro checks whether the given mask matches the current +/// log level and, if so, formats and logs the message. It is the Rust +/// counterpart of the `qemu_log_mask()` macro in the C implementation. +/// +/// # Parameters +/// +/// - `$mask`: A log level mask. This should be a variant of the `Log` enum. +/// - `$fmt`: A format string following the syntax and rules of the `format!` +/// macro. It specifies the structure of the log message. +/// - `$args`: Optional arguments to be interpolated into the format string. +/// +/// # Example +/// +/// ``` +/// use qemu_api::{log::Log, log_mask_ln}; +/// +/// let error_address = 0xbad; +/// log_mask_ln!(Log::GuestError, "Address 0x{error_address:x} out of range"); +/// ``` +/// +/// It is also possible to use printf-style formatting, as well as having a +/// trailing `,`: +/// +/// ``` +/// use qemu_api::{log::Log, log_mask_ln}; +/// +/// let error_address = 0xbad; +/// log_mask_ln!( +/// Log::GuestError, +/// "Address 0x{:x} out of range", +/// error_address, +/// ); +/// ``` +#[macro_export] +macro_rules! log_mask_ln { + ($mask:expr, $fmt:tt $($args:tt)*) => {{ + // Type assertion to enforce type `Log` for $mask + let _: Log = $mask; + + if unsafe { + (::qemu_api::bindings::qemu_loglevel & ($mask as std::os::raw::c_int)) != 0 + } { + let formatted_string = format!("{}\n", format_args!($fmt $($args)*)); + let c_string = std::ffi::CString::new(formatted_string).unwrap(); + + unsafe { + ::qemu_api::bindings::qemu_log(c_string.as_ptr()); + } + } + }}; +} diff --git a/rust/qemu-api/src/prelude.rs b/rust/qemu-api/src/prelude.rs index 43bfcd5fca..8f9e23ee2c 100644 --- a/rust/qemu-api/src/prelude.rs +++ b/rust/qemu-api/src/prelude.rs @@ -11,6 +11,8 @@ pub use crate::cell::BqlRefCell; pub use crate::errno; +pub use crate::log_mask_ln; + pub use crate::qdev::DeviceMethods; pub use crate::qom::InterfaceType; diff --git a/rust/qemu-api/wrapper.h b/rust/qemu-api/wrapper.h index 6060d3ba1a..15a1b19847 100644 --- a/rust/qemu-api/wrapper.h +++ b/rust/qemu-api/wrapper.h @@ -48,6 +48,8 @@ typedef enum memory_order { #endif /* __CLANG_STDATOMIC_H */ #include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/log-for-trace.h" #include "qemu/module.h" #include "qemu-io.h" #include "system/system.h" From 1563f287dc9c4bc6a50d380095e966ac039ac24a Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 15 Jun 2025 13:20:35 +0200 Subject: [PATCH 13/24] rust: pl011: Implement logging Now that there is logging support in Rust for QEMU, use it in the pl011 device. Signed-off-by: Bernhard Beschow Link: https://lore.kernel.org/r/20250615112037.11992-3-shentey@gmail.com Signed-off-by: Paolo Bonzini --- rust/hw/char/pl011/src/device.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index 2d416cd9a3..92dc295540 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -8,6 +8,8 @@ use qemu_api::{ chardev::{CharBackend, Chardev, Event}, impl_vmstate_forward, irq::{IRQState, InterruptSource}, + log::Log, + log_mask_ln, memory::{hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder}, prelude::*, qdev::{Clock, ClockEvent, DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl}, @@ -276,8 +278,7 @@ impl PL011Registers { DMACR => { self.dmacr = value; if value & 3 > 0 { - // qemu_log_mask(LOG_UNIMP, "pl011: DMA not implemented\n"); - eprintln!("pl011: DMA not implemented"); + log_mask_ln!(Log::Unimp, "pl011: DMA not implemented"); } } } @@ -534,7 +535,7 @@ impl PL011State { u64::from(device_id[(offset - 0xfe0) >> 2]) } Err(_) => { - // qemu_log_mask(LOG_GUEST_ERROR, "pl011_read: Bad offset 0x%x\n", (int)offset); + log_mask_ln!(Log::GuestError, "PL011State::read: Bad offset {offset}"); 0 } Ok(field) => { @@ -566,7 +567,10 @@ impl PL011State { .borrow_mut() .write(field, value as u32, &self.char_backend); } else { - eprintln!("write bad offset {offset} value {value}"); + log_mask_ln!( + Log::GuestError, + "PL011State::write: Bad offset {offset} value {value}" + ); } if update_irq { self.update(); From b783601d1a3b3fd468035baf0ce2ead166e0abdc Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 15 Jun 2025 13:20:36 +0200 Subject: [PATCH 14/24] rust: pl011: Add missing logging to match C version Co-developed-by: Paolo Bonzini Signed-off-by: Bernhard Beschow Link: https://lore.kernel.org/r/20250615112037.11992-4-shentey@gmail.com Signed-off-by: Paolo Bonzini --- rust/hw/char/pl011/src/device.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index 92dc295540..5b53f2649f 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -305,6 +305,12 @@ impl PL011Registers { } fn write_data_register(&mut self, value: u32) -> bool { + if !self.control.enable_uart() { + log_mask_ln!(Log::GuestError, "PL011 data written to disabled UART"); + } + if !self.control.enable_transmit() { + log_mask_ln!(Log::GuestError, "PL011 data written to disabled TX UART"); + } // interrupts always checked let _ = self.loopback_tx(value.into()); self.int_level |= Interrupt::TX; From 6b3fad084fc4e13901e252fe6c2a2c46ecea999b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 16 Jun 2025 18:56:49 +0200 Subject: [PATCH 15/24] rust: hpet: fix new warning Nightly rustc complains that HPETAddrDecode has a lifetime but it is not clearly noted that it comes from &self. Apply the compiler's suggestion to shut it up. Reviewed-by: Zhao Liu Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- rust/hw/timer/hpet/src/device.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/hw/timer/hpet/src/device.rs b/rust/hw/timer/hpet/src/device.rs index a281927781..acf7251029 100644 --- a/rust/hw/timer/hpet/src/device.rs +++ b/rust/hw/timer/hpet/src/device.rs @@ -771,7 +771,7 @@ impl HPETState { self.rtc_irq_level.set(0); } - fn decode(&self, mut addr: hwaddr, size: u32) -> HPETAddrDecode { + fn decode(&self, mut addr: hwaddr, size: u32) -> HPETAddrDecode<'_> { let shift = ((addr & 4) * 8) as u32; let len = std::cmp::min(size * 8, 64 - shift); From e68ec2980901c8e7f948f3305770962806c53f0b Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 4 Mar 2025 00:24:49 -0500 Subject: [PATCH 16/24] i386/cpu: Move adjustment of CPUID_EXT_PDCM before feature_dependencies[] check There is one entry relates to CPUID_EXT_PDCM in feature_dependencies[]. So it needs to get correct value of CPUID_EXT_PDCM before using feature_dependencies[] to apply dependencies. Besides, it also ensures CPUID_EXT_PDCM value is tracked in env->features[FEAT_1_ECX]. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250304052450.465445-2-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 40aefb38f6..29bce67c3a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7349,9 +7349,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (threads_per_pkg > 1) { *ebx |= threads_per_pkg << 16; } - if (!cpu->enable_pmu) { - *ecx &= ~CPUID_EXT_PDCM; - } break; case 2: /* cache info: needed for Pentium Pro compatibility */ @@ -8341,6 +8338,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } + if (!cpu->enable_pmu) { + env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; + } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { FeatureDep *d = &feature_dependencies[i]; if (!(env->features[d->from.index] & d->from.mask)) { From 00268e00027459abede448662f8794d78eb4b0a4 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 4 Mar 2025 00:24:50 -0500 Subject: [PATCH 17/24] i386/cpu: Warn about why CPUID_EXT_PDCM is not available When user requests PDCM explicitly via "+pdcm" without PMU enabled, emit a warning to inform the user. Signed-off-by: Xiaoyao Li Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20250304052450.465445-3-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 29bce67c3a..0d35e95430 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -8339,6 +8339,9 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } if (!cpu->enable_pmu) { + mark_unavailable_features(cpu, FEAT_1_ECX, + env->user_features[FEAT_1_ECX] & CPUID_EXT_PDCM, + "This feature is not available due to PMU being disabled"); env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; } From 750560f8a832361cf5cc4cd7bc4f56e1e76206f6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 12 Jun 2025 09:38:01 -0400 Subject: [PATCH 18/24] i386/tdx: Error and exit when named cpu model is requested Currently, it gets below error when requesting any named cpu model with "-cpu" to boot a TDX VM: qemu-system-x86_64: KVM_TDX_INIT_VM failed: Invalid argument It misleads people to think it's the bug of KVM or QEMU. It is just that current QEMU doesn't support named cpu model for TDX. To support named cpu models for TDX guest, there are opens to be finalized and needs a mount of additional work. For now, explicitly check the case when named cpu model is requested. Error report a hint and exit. Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250612133801.2238342-1-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 820ca3614e..2b52de9d71 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -739,8 +739,14 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg) static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) { + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); X86CPU *x86cpu = X86_CPU(cpu); + if (xcc->model) { + error_report("Named cpu model is not supported for TDX yet!"); + exit(1); + } + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); /* invtsc is fixed1 for TD guest */ From 90d2bbd1f6edfa22a056070ee62ded55099cd56d Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 3 Jun 2025 01:03:03 -0400 Subject: [PATCH 19/24] i386/cpu: Rename enable_cpuid_0x1f to force_cpuid_0x1f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name of "enable_cpuid_0x1f" isn't right to its behavior because the leaf 0x1f can be enabled even when "enable_cpuid_0x1f" is false. Rename it to "force_cpuid_0x1f" to better reflect its behavior. Suggested-by: Igor Mammedov Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Igor Mammedov Link: https://lore.kernel.org/r/20250603050305.1704586-2-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 4 ++-- target/i386/kvm/tdx.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5910dcf74d..51e10139df 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2269,7 +2269,7 @@ struct ArchCPU { bool enable_cpuid_0xb; /* Force to enable cpuid 0x1f */ - bool enable_cpuid_0x1f; + bool force_cpuid_0x1f; /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; @@ -2539,7 +2539,7 @@ void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) { - return cpu->enable_cpuid_0x1f || + return cpu->force_cpuid_0x1f || x86_has_extended_topo(cpu->env.avail_cpu_topo); } diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 2b52de9d71..acbe749754 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -752,7 +752,7 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) /* invtsc is fixed1 for TD guest */ object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); - x86cpu->enable_cpuid_0x1f = true; + x86cpu->force_cpuid_0x1f = true; } static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, From a38da9f4876bb17d7ed9c6e24964b12b61877d38 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 3 Jun 2025 01:03:04 -0400 Subject: [PATCH 20/24] i386/tdx: Fix the typo of the comment of struct TdxGuest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change sha348 to sha384. Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Igor Mammedov Link: https://lore.kernel.org/r/20250603050305.1704586-3-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 04b5afe199..8dd66e9014 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -40,9 +40,9 @@ typedef struct TdxGuest { bool initialized; uint64_t attributes; /* TD attributes */ uint64_t xfam; - char *mrconfigid; /* base64 encoded sha348 digest */ - char *mrowner; /* base64 encoded sha348 digest */ - char *mrownerconfig; /* base64 encoded sha348 digest */ + char *mrconfigid; /* base64 encoded sha384 digest */ + char *mrowner; /* base64 encoded sha384 digest */ + char *mrownerconfig; /* base64 encoded sha384 digest */ MemoryRegion *tdvf_mr; TdxFirmware tdvf; From 41cd354d350d3c64915be9c5decbf20abd84e486 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 3 Jun 2025 01:03:05 -0400 Subject: [PATCH 21/24] i386/tdx: Clarify the error message of mrconfigid/mrowner/mrownerconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message is misleading - we successfully decoded the data, the decoded data was simply with the wrong length. Change the error message to show it is an length check failure with both the received and expected values. Suggested-by: Daniel P. Berrangé Signed-off-by: Xiaoyao Li Reviewed-by: Daniel P. Berrangé Reviewed-by: Igor Mammedov Link: https://lore.kernel.org/r/20250603050305.1704586-4-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/tdx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index acbe749754..2284167141 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -1032,7 +1032,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return -1; } if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { - error_setg(errp, "TDX: failed to decode mrconfigid"); + error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); return -1; } memcpy(init_vm->mrconfigid, data, data_len); @@ -1045,7 +1047,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return -1; } if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { - error_setg(errp, "TDX: failed to decode mrowner"); + error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); return -1; } memcpy(init_vm->mrowner, data, data_len); @@ -1058,7 +1062,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) return -1; } if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { - error_setg(errp, "TDX: failed to decode mrownerconfig"); + error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); return -1; } memcpy(init_vm->mrownerconfig, data, data_len); From 688b0756ad2fdbe8effdb66f724a1129f62be7a2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 19 Jun 2025 20:09:19 +0200 Subject: [PATCH 22/24] update Linux headers to v6.16-rc3 Signed-off-by: Paolo Bonzini --- include/standard-headers/asm-x86/setup_data.h | 13 +- include/standard-headers/drm/drm_fourcc.h | 45 +++++++ include/standard-headers/linux/ethtool.h | 124 +++++++++--------- include/standard-headers/linux/fuse.h | 6 +- .../linux/input-event-codes.h | 3 +- include/standard-headers/linux/pci_regs.h | 12 +- include/standard-headers/linux/virtio_gpu.h | 3 +- include/standard-headers/linux/virtio_pci.h | 1 + linux-headers/asm-arm64/kvm.h | 9 +- linux-headers/asm-x86/kvm.h | 1 + linux-headers/linux/bits.h | 4 +- linux-headers/linux/kvm.h | 25 ++++ linux-headers/linux/vhost.h | 4 +- 13 files changed, 177 insertions(+), 73 deletions(-) diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h index a483d72f42..2e446c1d85 100644 --- a/include/standard-headers/asm-x86/setup_data.h +++ b/include/standard-headers/asm-x86/setup_data.h @@ -13,7 +13,8 @@ #define SETUP_CC_BLOB 7 #define SETUP_IMA 8 #define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED +#define SETUP_KEXEC_KHO 10 +#define SETUP_ENUM_MAX SETUP_KEXEC_KHO #define SETUP_INDIRECT (1<<31) #define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) @@ -78,6 +79,16 @@ struct ima_setup_data { uint64_t size; } QEMU_PACKED; +/* + * Locations of kexec handover metadata + */ +struct kho_data { + uint64_t fdt_addr; + uint64_t fdt_size; + uint64_t scratch_addr; + uint64_t scratch_size; +} QEMU_PACKED; + #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index a8b759dcbc..c8309d378b 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -421,6 +421,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a #define DRM_FORMAT_MOD_VENDOR_MTK 0x0b +#define DRM_FORMAT_MOD_VENDOR_APPLE 0x0c /* add more to the end as needed */ @@ -1493,6 +1494,50 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) /* alias for the most common tiling format */ #define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) +/* + * Apple GPU-tiled layouts. + * + * Apple GPUs support nonlinear tilings with optional lossless compression. + * + * GPU-tiled images are divided into 16KiB tiles: + * + * Bytes per pixel Tile size + * --------------- --------- + * 1 128x128 + * 2 128x64 + * 4 64x64 + * 8 64x32 + * 16 32x32 + * + * Tiles are raster-order. Pixels within a tile are interleaved (Morton order). + * + * Compressed images pad the body to 128-bytes and are immediately followed by a + * metadata section. The metadata section rounds the image dimensions to + * powers-of-two and contains 8 bytes for each 16x16 compression subtile. + * Subtiles are interleaved (Morton order). + * + * All images are 128-byte aligned. + * + * These layouts fundamentally do not have meaningful strides. No matter how we + * specify strides for these layouts, userspace unaware of Apple image layouts + * will be unable to use correctly the specified stride for any purpose. + * Userspace aware of the image layouts do not use strides. The most "correct" + * convention would be setting the image stride to 0. Unfortunately, some + * software assumes the stride is at least (width * bytes per pixel). We + * therefore require that stride equals (width * bytes per pixel). Since the + * stride is arbitrary here, we pick the simplest convention. + * + * Although containing two sections, compressed image layouts are treated in + * software as a single plane. This is modelled after AFBC, a similar + * scheme. Attempting to separate the sections to be "explicit" in DRM would + * only generate more confusion, as software does not treat the image this way. + * + * For detailed information on the hardware image layouts, see + * https://docs.mesa3d.org/drivers/asahi.html#image-layouts + */ +#define DRM_FORMAT_MOD_APPLE_GPU_TILED fourcc_mod_code(APPLE, 1) +#define DRM_FORMAT_MOD_APPLE_GPU_TILED_COMPRESSED fourcc_mod_code(APPLE, 2) + /* * AMD modifiers * diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 5d1ad5fdea..cef0d207a6 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2295,71 +2295,75 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_XFRM_SYM_OR_XOR (1 << 1) #define RXH_XFRM_NO_CHANGE 0xff -/* L2-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ -#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ -#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ -#define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ -#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ -#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ -#define AH_ESP_V6_FLOW 0x08 /* hash only */ -#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ -#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ -#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ -#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ -#define IP_USER_FLOW IPV4_USER_FLOW -#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ -#define IPV4_FLOW 0x10 /* hash only */ -#define IPV6_FLOW 0x11 /* hash only */ -#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ +enum { + /* L2-L4 network traffic flow types */ + TCP_V4_FLOW = 0x01, /* hash or spec (tcp_ip4_spec) */ + UDP_V4_FLOW = 0x02, /* hash or spec (udp_ip4_spec) */ + SCTP_V4_FLOW = 0x03, /* hash or spec (sctp_ip4_spec) */ + AH_ESP_V4_FLOW = 0x04, /* hash only */ + TCP_V6_FLOW = 0x05, /* hash or spec (tcp_ip6_spec; nfc only) */ + UDP_V6_FLOW = 0x06, /* hash or spec (udp_ip6_spec; nfc only) */ + SCTP_V6_FLOW = 0x07, /* hash or spec (sctp_ip6_spec; nfc only) */ + AH_ESP_V6_FLOW = 0x08, /* hash only */ + AH_V4_FLOW = 0x09, /* hash or spec (ah_ip4_spec) */ + ESP_V4_FLOW = 0x0a, /* hash or spec (esp_ip4_spec) */ + AH_V6_FLOW = 0x0b, /* hash or spec (ah_ip6_spec; nfc only) */ + ESP_V6_FLOW = 0x0c, /* hash or spec (esp_ip6_spec; nfc only) */ + IPV4_USER_FLOW = 0x0d, /* spec only (usr_ip4_spec) */ + IP_USER_FLOW = IPV4_USER_FLOW, + IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ + IPV4_FLOW = 0x10, /* hash only */ + IPV6_FLOW = 0x11, /* hash only */ + ETHER_FLOW = 0x12, /* spec only (ether_spec) */ -/* Used for GTP-U IPv4 and IPv6. - * The format of GTP packets only includes - * elements such as TEID and GTP version. - * It is primarily intended for data communication of the UE. - */ -#define GTPU_V4_FLOW 0x13 /* hash only */ -#define GTPU_V6_FLOW 0x14 /* hash only */ + /* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ + GTPU_V4_FLOW = 0x13, /* hash only */ + GTPU_V6_FLOW = 0x14, /* hash only */ -/* Use for GTP-C IPv4 and v6. - * The format of these GTP packets does not include TEID. - * Primarily expected to be used for communication - * to create sessions for UE data communication, - * commonly referred to as CSR (Create Session Request). - */ -#define GTPC_V4_FLOW 0x15 /* hash only */ -#define GTPC_V6_FLOW 0x16 /* hash only */ + /* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ + GTPC_V4_FLOW = 0x15, /* hash only */ + GTPC_V6_FLOW = 0x16, /* hash only */ -/* Use for GTP-C IPv4 and v6. - * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. - * After session creation, it becomes this packet. - * This is mainly used for requests to realize UE handover. - */ -#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ -#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + /* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ + GTPC_TEID_V4_FLOW = 0x17, /* hash only */ + GTPC_TEID_V6_FLOW = 0x18, /* hash only */ -/* Use for GTP-U and extended headers for the PSC (PDU Session Container). - * The format of these GTP packets includes TEID and QFI. - * In 5G communication using UPF (User Plane Function), - * data communication with this extended header is performed. - */ -#define GTPU_EH_V4_FLOW 0x19 /* hash only */ -#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + /* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ + GTPU_EH_V4_FLOW = 0x19, /* hash only */ + GTPU_EH_V6_FLOW = 0x1a, /* hash only */ -/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. - * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by - * UL/DL included in the PSC. - * There are differences in the data included based on Downlink/Uplink, - * and can be used to distinguish packets. - * The functions described so far are useful when you want to - * handle communication from the mobile network in UPF, PGW, etc. - */ -#define GTPU_UL_V4_FLOW 0x1b /* hash only */ -#define GTPU_UL_V6_FLOW 0x1c /* hash only */ -#define GTPU_DL_V4_FLOW 0x1d /* hash only */ -#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ + GTPU_UL_V4_FLOW = 0x1b, /* hash only */ + GTPU_UL_V6_FLOW = 0x1c, /* hash only */ + GTPU_DL_V4_FLOW = 0x1d, /* hash only */ + GTPU_DL_V6_FLOW = 0x1e, /* hash only */ + + __FLOW_TYPE_COUNT, +}; /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index a2b5815d89..d8b2fd67e1 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -232,6 +232,9 @@ * * 7.43 * - add FUSE_REQUEST_TIMEOUT + * + * 7.44 + * - add FUSE_NOTIFY_INC_EPOCH */ #ifndef _LINUX_FUSE_H @@ -263,7 +266,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 43 +#define FUSE_KERNEL_MINOR_VERSION 44 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -667,6 +670,7 @@ enum fuse_notify_code { FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_INC_EPOCH = 8, FUSE_NOTIFY_CODE_MAX, }; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 09ba0ad878..a82ff795e0 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -925,7 +925,8 @@ #define SW_MUTE_DEVICE 0x0e /* set = device disabled */ #define SW_PEN_INSERTED 0x0f /* set = pen inserted */ #define SW_MACHINE_COVER 0x10 /* set = cover closed */ -#define SW_MAX_ 0x10 +#define SW_USB_INSERT 0x11 /* set = USB audio device connected */ +#define SW_MAX_ 0x11 #define SW_CNT (SW_MAX_+1) /* diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index ba326710f9..a3a3e942de 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -750,7 +750,8 @@ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE +#define PCI_EXT_CAP_ID_PL_64GT 0x31 /* Physical Layer 64.0 GT/s */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_64GT #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -1144,12 +1145,21 @@ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ +/* Secondary PCIe Capability 8.0 GT/s */ +#define PCI_SECPCI_LE_CTRL 0x0c /* Lane Equalization Control Register */ + /* Physical Layer 16.0 GT/s */ #define PCI_PL_16GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ #define PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK 0x0000000F #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 +/* Physical Layer 32.0 GT/s */ +#define PCI_PL_32GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + +/* Physical Layer 64.0 GT/s */ +#define PCI_PL_64GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + /* Native PCIe Enclosure Management */ #define PCI_NPEM_CAP 0x04 /* NPEM capability register */ #define PCI_NPEM_CAP_CAPABLE 0x00000001 /* NPEM Capable */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 6459fdb9fb..00cd3f04af 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,8 +309,9 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 -/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_GFXSTREAM_VULKAN 3 #define VIRTIO_GPU_CAPSET_VENUS 4 +#define VIRTIO_GPU_CAPSET_CROSS_DOMAIN 5 #define VIRTIO_GPU_CAPSET_DRM 6 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index 91fec6f502..09e964e6ee 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 4e6aff08df..f4d9baafa1 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -419,10 +419,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 7fb57ccb2a..cd275ae76d 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -843,6 +843,7 @@ struct kvm_sev_snp_launch_start { }; /* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_PAGE_TYPE_INVALID 0x0 #define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 #define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 #define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h index 58596d18f4..9243f38975 100644 --- a/linux-headers/linux/bits.h +++ b/linux-headers/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _LINUX_BITS_H #define _LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 99cc82a275..0690743944 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -439,6 +440,27 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -923,6 +945,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index b95dd84eef..d4b3e2ae13 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ From 427b8cf47a6959cd8b0db12bcf66e9009afa2c07 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Wed, 30 Apr 2025 08:53:14 +0800 Subject: [PATCH 23/24] i386/tdx: handle TDG.VP.VMCALL Signed-off-by: Binbin Wu Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 12 ++++++++++++ target/i386/kvm/tdx-stub.c | 4 ++++ target/i386/kvm/tdx.c | 12 ++++++++++++ target/i386/kvm/tdx.h | 9 +++++++++ 4 files changed, 37 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 56a6b9b638..8ef29fc1fb 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -6170,6 +6170,18 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; } break; + case KVM_EXIT_TDX: + /* + * run->tdx is already set up for the case where userspace + * does not handle the TDVMCALL. + */ + switch (run->tdx.nr) { + case TDVMCALL_GET_TD_VM_CALL_INFO: + tdx_handle_get_tdvmcall_info(cpu, run); + break; + } + ret = 0; + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c index 720a4ff046..62a12a0677 100644 --- a/target/i386/kvm/tdx-stub.c +++ b/target/i386/kvm/tdx-stub.c @@ -18,3 +18,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) { return -EINVAL; } + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 2284167141..ef10a19347 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -1120,6 +1120,18 @@ int tdx_parse_tdvf(void *flash_ptr, int size) return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); } +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ + if (run->tdx.get_tdvmcall_info.leaf != 1) { + return; + } + + run->tdx.get_tdvmcall_info.r11 = 0; + run->tdx.get_tdvmcall_info.r12 = 0; + run->tdx.get_tdvmcall_info.r13 = 0; + run->tdx.get_tdvmcall_info.r14 = 0; +} + static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, char *message, uint64_t gpa) { diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 8dd66e9014..0dd41d5811 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -21,6 +21,14 @@ typedef struct TdxGuestClass { /* TDX requires bus frequency 25MHz */ #define TDX_APIC_BUS_CYCLES_NS 40 +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 + +#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL +#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL +#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL +#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL +#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL + enum TdxRamType { TDX_RAM_UNACCEPTED, TDX_RAM_ADDED, @@ -61,5 +69,6 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); int tdx_parse_tdvf(void *flash_ptr, int size); int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); #endif /* QEMU_I386_TDX_H */ From 40da501d8989913935660dc24953ece02c9e98b8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 28 Nov 2022 17:43:52 +0800 Subject: [PATCH 24/24] i386/tdx: handle TDG.VP.VMCALL Add property "quote-generation-socket" to tdx-guest, which is a property of type SocketAddress to specify Quote Generation Service(QGS). On request of GetQuote, it connects to the QGS socket, read request data from shared guest memory, send the request data to the QGS, and store the response into shared guest memory, at last notify TD guest by interrupt. command line example: qemu-system-x86_64 \ -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type":"unix", "path":"/var/run/tdx-qgs/qgs.socket"}}' \ -machine confidential-guest-support=tdx0 Note, above example uses the unix socket. It can be other types, like vsock, which depends on the implementation of QGS. To avoid no response from QGS server, setup a timer for the transaction. If timeout, make it an error and interrupt guest. Define the threshold of time to 30s at present, maybe change to other value if not appropriate. Signed-off-by: Isaku Yamahata Co-developed-by: Chenyi Qiang Signed-off-by: Chenyi Qiang Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Tested-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- qapi/qom.json | 8 +- target/i386/kvm/kvm.c | 3 + target/i386/kvm/meson.build | 2 +- target/i386/kvm/tdx-quote-generator.c | 300 ++++++++++++++++++++++++++ target/i386/kvm/tdx-quote-generator.h | 82 +++++++ target/i386/kvm/tdx-stub.c | 4 + target/i386/kvm/tdx.c | 176 ++++++++++++++- target/i386/kvm/tdx.h | 10 + 8 files changed, 582 insertions(+), 3 deletions(-) create mode 100644 target/i386/kvm/tdx-quote-generator.c create mode 100644 target/i386/kvm/tdx-quote-generator.h diff --git a/qapi/qom.json b/qapi/qom.json index 3e8debf78c..b133b06447 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1071,6 +1071,11 @@ # e.g., specific to the workload rather than the run-time or OS # (base64 encoded SHA384 digest). Defaults to all zeros. # +# @quote-generation-socket: socket address for Quote Generation +# Service (QGS). QGS is a daemon running on the host. Without +# it, the guest will not be able to get a TD quote for +# attestation. +# # Since: 10.1 ## { 'struct': 'TdxGuestProperties', @@ -1078,7 +1083,8 @@ '*sept-ve-disable': 'bool', '*mrconfigid': 'str', '*mrowner': 'str', - '*mrownerconfig': 'str' } } + '*mrownerconfig': 'str', + '*quote-generation-socket': 'SocketAddress' } } ## # @ThreadContextProperties: diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 8ef29fc1fb..234878c613 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -6176,6 +6176,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) * does not handle the TDVMCALL. */ switch (run->tdx.nr) { + case TDVMCALL_GET_QUOTE: + tdx_handle_get_quote(cpu, run); + break; case TDVMCALL_GET_TD_VM_CALL_INFO: tdx_handle_get_tdvmcall_info(cpu, run); break; diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3f44cdedb7..2675bf8902 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,7 +8,7 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c')) i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c new file mode 100644 index 0000000000..f59715f617 --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.c @@ -0,0 +1,300 @@ +/* + * QEMU TDX Quote Generation Support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" + +#include "tdx-quote-generator.h" + +#define QGS_MSG_LIB_MAJOR_VER 1 +#define QGS_MSG_LIB_MINOR_VER 1 + +typedef enum _qgs_msg_type_t { + GET_QUOTE_REQ = 0, + GET_QUOTE_RESP = 1, + GET_COLLATERAL_REQ = 2, + GET_COLLATERAL_RESP = 3, + GET_PLATFORM_INFO_REQ = 4, + GET_PLATFORM_INFO_RESP = 5, + QGS_MSG_TYPE_MAX +} qgs_msg_type_t; + +typedef struct _qgs_msg_header_t { + uint16_t major_version; + uint16_t minor_version; + uint32_t type; + uint32_t size; // size of the whole message, include this header, in byte + uint32_t error_code; // used in response only +} qgs_msg_header_t; + +typedef struct _qgs_msg_get_quote_req_t { + qgs_msg_header_t header; // header.type = GET_QUOTE_REQ + uint32_t report_size; // cannot be 0 + uint32_t id_list_size; // length of id_list, in byte, can be 0 +} qgs_msg_get_quote_req_t; + +typedef struct _qgs_msg_get_quote_resp_s { + qgs_msg_header_t header; // header.type = GET_QUOTE_RESP + uint32_t selected_id_size; // can be 0 in case only one id is sent in request + uint32_t quote_size; // length of quote_data, in byte + uint8_t id_quote[]; // selected id followed by quote +} qgs_msg_get_quote_resp_t; + +#define HEADER_SIZE 4 + +static uint32_t decode_header(const char *buf, size_t len) { + if (len < HEADER_SIZE) { + return 0; + } + uint32_t msg_size = 0; + for (uint32_t i = 0; i < HEADER_SIZE; ++i) { + msg_size = msg_size * 256 + (buf[i] & 0xFF); + } + return msg_size; +} + +static void encode_header(char *buf, size_t len, uint32_t size) { + assert(len >= HEADER_SIZE); + buf[0] = ((size >> 24) & 0xFF); + buf[1] = ((size >> 16) & 0xFF); + buf[2] = ((size >> 8) & 0xFF); + buf[3] = (size & 0xFF); +} + +static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) +{ + timer_del(&task->timer); + + g_source_remove(task->watch); + qio_channel_close(QIO_CHANNEL(task->sioc), NULL); + object_unref(OBJECT(task->sioc)); + + task->completion(task); +} + +static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received, + task->payload_len - task->receive_buf_received, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return G_SOURCE_CONTINUE; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (ret == 0) { + error_report("End of file before reply received"); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + task->receive_buf_received += ret; + if (task->receive_buf_received >= HEADER_SIZE) { + uint32_t len = decode_header(task->receive_buf, + task->receive_buf_received); + if (len == 0 || + len > (task->payload_len - HEADER_SIZE)) { + error_report("Message len %u must be non-zero & less than %zu", + len, (task->payload_len - HEADER_SIZE)); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + /* Now we know the size, shrink to fit */ + task->payload_len = HEADER_SIZE + len; + task->receive_buf = g_renew(char, + task->receive_buf, + task->payload_len); + } + + if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) { + qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE); + if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER || + hdr->minor_version != QGS_MSG_LIB_MINOR_VER) { + error_report("Invalid QGS message header version %d.%d", + hdr->major_version, + hdr->minor_version); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->type != GET_QUOTE_RESP) { + error_report("Invalid QGS message type %d", + hdr->type); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->size > (task->payload_len - HEADER_SIZE)) { + error_report("QGS message size %d exceeds payload capacity %zu", + hdr->size, task->payload_len); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->error_code != 0) { + error_report("QGS message error code %d", + hdr->error_code); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) { + qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE); + if (msg->selected_id_size != 0) { + error_report("QGS message selected ID was %d not 0", + msg->selected_id_size); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) != + msg->quote_size) { + error_report("QGS quote size %d should be %zu", + msg->quote_size, + (task->payload_len - sizeof(qgs_msg_get_quote_resp_t))); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (task->receive_buf_received == task->payload_len) { + size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t); + memmove(task->receive_buf, + task->receive_buf + strip, + task->receive_buf_received - strip); + task->receive_buf_received -= strip; + task->status_code = TDX_VP_GET_QUOTE_SUCCESS; + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + tdx_generate_quote_cleanup(task); + return G_SOURCE_REMOVE; +} + +static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_write(ioc, task->send_data + task->send_data_sent, + task->send_data_size - task->send_data_sent, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); + goto end; + } + } + task->send_data_sent += ret; + + if (task->send_data_sent == task->send_data_size) { + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN, + tdx_get_quote_read, task, NULL); + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + return G_SOURCE_REMOVE; +} + +static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_task_propagate_error(qio_task, &err); + if (ret) { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE; + tdx_generate_quote_cleanup(task); + return; + } + + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT, + tdx_send_report, task, NULL); +} + +#define TRANSACTION_TIMEOUT 30000 + +static void getquote_expired(void *opaque) +{ + TdxGenerateQuoteTask *task = opaque; + + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); +} + +static void setup_get_quote_timer(TdxGenerateQuoteTask *task) +{ + int64_t time; + + timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task); + time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + timer_mod(&task->timer, time + TRANSACTION_TIMEOUT); +} + +void tdx_generate_quote(TdxGenerateQuoteTask *task, + SocketAddress *qg_sock_addr) +{ + QIOChannelSocket *sioc; + qgs_msg_get_quote_req_t msg; + + /* Prepare a QGS message prelude */ + msg.header.major_version = QGS_MSG_LIB_MAJOR_VER; + msg.header.minor_version = QGS_MSG_LIB_MINOR_VER; + msg.header.type = GET_QUOTE_REQ; + msg.header.size = sizeof(msg) + task->send_data_size; + msg.header.error_code = 0; + msg.report_size = task->send_data_size; + msg.id_list_size = 0; + + /* Make room to add the QGS message prelude */ + task->send_data = g_renew(char, + task->send_data, + task->send_data_size + sizeof(msg) + HEADER_SIZE); + memmove(task->send_data + sizeof(msg) + HEADER_SIZE, + task->send_data, + task->send_data_size); + memcpy(task->send_data + HEADER_SIZE, + &msg, + sizeof(msg)); + encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg)); + task->send_data_size += sizeof(msg) + HEADER_SIZE; + + sioc = qio_channel_socket_new(); + task->sioc = sioc; + + setup_get_quote_timer(task); + + qio_channel_socket_connect_async(sioc, qg_sock_addr, + tdx_quote_generator_connected, task, + NULL, NULL); +} diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h new file mode 100644 index 0000000000..3bd9b8ef33 --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H +#define QEMU_I386_TDX_QUOTE_GENERATOR_H + +#include "qom/object_interfaces.h" +#include "io/channel-socket.h" +#include "exec/hwaddr.h" + +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL + +#define TDX_VP_GET_QUOTE_SUCCESS 0ULL +#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL) +#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL + +/* Limit to avoid resource starvation. */ +#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024) +#define TDX_MAX_GET_QUOTE_REQUEST 16 + +#define TDX_GET_QUOTE_HDR_SIZE 24 + +/* Format of pages shared with guest. */ +struct tdx_get_quote_header { + /* Format version: must be 1 in little endian. */ + uint64_t structure_version; + + /* + * GetQuote status code in little endian: + * Guest must set error_code to 0 to avoid information leak. + * Qemu sets this before interrupting guest. + */ + uint64_t error_code; + + /* + * in-message size in little endian: The message will follow this header. + * The in-message will be send to QGS. + */ + uint32_t in_len; + + /* + * out-message size in little endian: + * On request, out_len must be zero to avoid information leak. + * On return, message size from QGS. Qemu overwrites this field. + * The message will follows this header. The in-message is overwritten. + */ + uint32_t out_len; + + /* + * Message buffer follows. + * Guest sets message that will be send to QGS. If out_len > in_len, guest + * should zero remaining buffer to avoid information leak. + * Qemu overwrites this buffer with a message returned from QGS. + */ +}; + +typedef struct TdxGenerateQuoteTask { + hwaddr buf_gpa; + hwaddr payload_gpa; + uint64_t payload_len; + + char *send_data; + uint64_t send_data_size; + uint64_t send_data_sent; + + char *receive_buf; + uint64_t receive_buf_received; + + uint64_t status_code; + struct tdx_get_quote_header hdr; + + QIOChannelSocket *sioc; + guint watch; + QEMUTimer timer; + + void (*completion)(struct TdxGenerateQuoteTask *task); + void *opaque; +} TdxGenerateQuoteTask; + +void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr); + +#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */ diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c index 62a12a0677..76fee49eff 100644 --- a/target/i386/kvm/tdx-stub.c +++ b/target/i386/kvm/tdx-stub.c @@ -19,6 +19,10 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) return -EINVAL; } +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ +} + void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) { } diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index ef10a19347..e809e4b2df 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -14,12 +14,14 @@ #include "qemu/base64.h" #include "qemu/mmap-alloc.h" #include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" #include "qom/object_interfaces.h" #include "crypto/hash.h" #include "system/kvm_int.h" #include "system/runstate.h" #include "system/system.h" #include "system/ramblock.h" +#include "system/address-spaces.h" #include @@ -32,6 +34,7 @@ #include "hw/i386/tdvf-hob.h" #include "kvm_i386.h" #include "tdx.h" +#include "tdx-quote-generator.h" #include "standard-headers/asm-x86/kvm_para.h" @@ -1120,13 +1123,146 @@ int tdx_parse_tdvf(void *flash_ptr, int size) return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); } +static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) +{ + TdxGuest *tdx = task->opaque; + int ret; + + /* Maintain the number of in-flight requests. */ + qemu_mutex_lock(&tdx->lock); + tdx->num--; + qemu_mutex_unlock(&tdx->lock); + + if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { + ret = address_space_write(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->receive_buf, + task->receive_buf_received); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to write quote data."); + } else { + task->hdr.out_len = cpu_to_le64(task->receive_buf_received); + } + } + task->hdr.error_code = cpu_to_le64(task->status_code); + + /* Publish the response contents before marking this request completed. */ + smp_wmb(); + ret = address_space_write(&address_space_memory, task->buf_gpa, + MEMTXATTRS_UNSPECIFIED, &task->hdr, + TDX_GET_QUOTE_HDR_SIZE); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to update GetQuote header."); + } + + g_free(task->send_data); + g_free(task->receive_buf); + g_free(task); + object_unref(tdx); +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ + TdxGenerateQuoteTask *task; + struct tdx_get_quote_header hdr; + hwaddr buf_gpa = run->tdx.get_quote.gpa; + uint64_t buf_len = run->tdx.get_quote.size; + + QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); + + run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; + + if (buf_len == 0) { + return; + } + + if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { + run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; + return; + } + + if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: get-quote: failed to read GetQuote header."); + return; + } + + if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { + return; + } + + /* Only safe-guard check to avoid too large buffer size. */ + if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || + le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { + return; + } + + if (!tdx_guest->qg_sock_addr) { + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: failed to update GetQuote header."); + return; + } + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + } + + qemu_mutex_lock(&tdx_guest->lock); + if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; + return; + } + tdx_guest->num++; + qemu_mutex_unlock(&tdx_guest->lock); + + task = g_new(TdxGenerateQuoteTask, 1); + task->buf_gpa = buf_gpa; + task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; + task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; + task->hdr = hdr; + task->completion = tdx_get_quote_completion; + + task->send_data_size = le32_to_cpu(hdr.in_len); + task->send_data = g_malloc(task->send_data_size); + task->send_data_sent = 0; + + if (address_space_read(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->send_data, + task->send_data_size) != MEMTX_OK) { + goto out_free; + } + + /* Mark the buffer in-flight. */ + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + goto out_free; + } + + task->receive_buf = g_malloc0(task->payload_len); + task->receive_buf_received = 0; + task->opaque = tdx_guest; + + object_ref(tdx_guest); + tdx_generate_quote(task, tdx_guest->qg_sock_addr); + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + +out_free: + g_free(task->send_data); + g_free(task); +} + void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) { if (run->tdx.get_tdvmcall_info.leaf != 1) { return; } - run->tdx.get_tdvmcall_info.r11 = 0; + run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE; run->tdx.get_tdvmcall_info.r12 = 0; run->tdx.get_tdvmcall_info.r13 = 0; run->tdx.get_tdvmcall_info.r14 = 0; @@ -1263,6 +1399,37 @@ static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error ** tdx->mrownerconfig = g_strdup(value); } +static void tdx_guest_get_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (!tdx->qg_sock_addr) { + error_setg(errp, "quote-generation-socket is not set"); + return; + } + visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); +} + +static void tdx_guest_set_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + SocketAddress *sock = NULL; + + if (!visit_type_SocketAddress(v, name, &sock, errp)) { + return; + } + + if (tdx->qg_sock_addr) { + qapi_free_SocketAddress(tdx->qg_sock_addr); + } + + tdx->qg_sock_addr = sock; +} + /* tdx guest */ OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, tdx_guest, @@ -1294,6 +1461,13 @@ static void tdx_guest_init(Object *obj) object_property_add_str(obj, "mrownerconfig", tdx_guest_get_mrownerconfig, tdx_guest_set_mrownerconfig); + + object_property_add(obj, "quote-generation-socket", "SocketAddress", + tdx_guest_get_qgs, + tdx_guest_set_qgs, + NULL, NULL); + + qemu_mutex_init(&tdx->lock); } static void tdx_guest_finalize(Object *obj) diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 0dd41d5811..35a09c19c5 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -11,6 +11,8 @@ #include "cpu.h" #include "hw/i386/tdvf.h" +#include "tdx-quote-generator.h" + #define TYPE_TDX_GUEST "tdx-guest" #define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) @@ -22,6 +24,7 @@ typedef struct TdxGuestClass { #define TDX_APIC_BUS_CYCLES_NS 40 #define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_GET_QUOTE 0x10002 #define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL #define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL @@ -29,6 +32,8 @@ typedef struct TdxGuestClass { #define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL #define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL +#define TDG_VP_VMCALL_SUBFUNC_GET_QUOTE 0x0000000000000001ULL + enum TdxRamType { TDX_RAM_UNACCEPTED, TDX_RAM_ADDED, @@ -57,6 +62,10 @@ typedef struct TdxGuest { uint32_t nr_ram_entries; TdxRamEntry *ram_entries; + + /* GetQuote */ + SocketAddress *qg_sock_addr; + int num; } TdxGuest; #ifdef CONFIG_TDX @@ -69,6 +78,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); int tdx_parse_tdvf(void *flash_ptr, int size); int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); #endif /* QEMU_I386_TDX_H */