ppc queue for 10.2

* FADUMP Support for pSeries
 * Pegasos II cleanup and Pegasos I emulation
 * Deprecation of pseries 3.0 up till 4.2
 * Coverity fix for amigaone (CID: 1641398)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEa4EM1tK+EPOIPSFCRUTplPnWj7sFAmj6G24ACgkQRUTplPnW
 j7uSmA/+MECjSD6TTVaRFdE/+Cd9LVMp8HmlkteaT+Fp/MhJKUGAxF89eBrZKBsJ
 Ukecklx+6x1d6grmBIAaI3sGW3qJW42CcZL4Q1xuL+zsny8sZ3vpp3q/Haxn68WV
 j53LC46rqCUueBffWWRf0q3wRovjY1MiO3LmQn+sEPLcMGLgghvD4kl2WD2uI/Gy
 iYeZJq+FjroVp606xVXQHhymuQeXd/4txGEzU2x7+FpLtdhuoxKa7FD+YtJ4PP3a
 qgZtZy3y4Bs41Gm6uEjdUnuKINjfADOhjqSDzqY5JZF7tgYh5hK0ibd1MI+opvCt
 39YTpDoN18ljtO2g509sBuywxkz+y/EDsI8pry42MpkXxys2bj8mXoAV45jBOp5s
 n/GHfJ0d//dm2gpDxGhOKFK5qvFldo8tX5msgbMAXirbXzke5PVHbMr/YXmM7kkW
 4DaKnMjyRxPIDeyqPfjKspC8VnmF6Z6LzDSMYGYejHO/OKlNE7ZQqaYh5itNTaqE
 xkRC+WISGm98cGYpu57VvD/wE0VmygBZ2l+j5gYS5tDVniZW3B4dUAOAqo7JCbIO
 uMfRgmHaR0Jc2Xj5wf8J/D1ZzDok2OV2siabfwew5pvhJl7HiCoH/k+qzgovC5hO
 L/5P4n+7ZRJwLhP45ewXkEoOK5tWowofdq/5SsFjie7n8d3U5cU=
 =usd9
 -----END PGP SIGNATURE-----

Merge tag 'pull-ppc-for-10.2-d2-20251023-1' of https://gitlab.com/harshpb/qemu into staging

ppc queue for 10.2

* FADUMP Support for pSeries
* Pegasos II cleanup and Pegasos I emulation
* Deprecation of pseries 3.0 up till 4.2
* Coverity fix for amigaone (CID: 1641398)

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEa4EM1tK+EPOIPSFCRUTplPnWj7sFAmj6G24ACgkQRUTplPnW
# j7uSmA/+MECjSD6TTVaRFdE/+Cd9LVMp8HmlkteaT+Fp/MhJKUGAxF89eBrZKBsJ
# Ukecklx+6x1d6grmBIAaI3sGW3qJW42CcZL4Q1xuL+zsny8sZ3vpp3q/Haxn68WV
# j53LC46rqCUueBffWWRf0q3wRovjY1MiO3LmQn+sEPLcMGLgghvD4kl2WD2uI/Gy
# iYeZJq+FjroVp606xVXQHhymuQeXd/4txGEzU2x7+FpLtdhuoxKa7FD+YtJ4PP3a
# qgZtZy3y4Bs41Gm6uEjdUnuKINjfADOhjqSDzqY5JZF7tgYh5hK0ibd1MI+opvCt
# 39YTpDoN18ljtO2g509sBuywxkz+y/EDsI8pry42MpkXxys2bj8mXoAV45jBOp5s
# n/GHfJ0d//dm2gpDxGhOKFK5qvFldo8tX5msgbMAXirbXzke5PVHbMr/YXmM7kkW
# 4DaKnMjyRxPIDeyqPfjKspC8VnmF6Z6LzDSMYGYejHO/OKlNE7ZQqaYh5itNTaqE
# xkRC+WISGm98cGYpu57VvD/wE0VmygBZ2l+j5gYS5tDVniZW3B4dUAOAqo7JCbIO
# uMfRgmHaR0Jc2Xj5wf8J/D1ZzDok2OV2siabfwew5pvhJl7HiCoH/k+qzgovC5hO
# L/5P4n+7ZRJwLhP45ewXkEoOK5tWowofdq/5SsFjie7n8d3U5cU=
# =usd9
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 23 Oct 2025 07:11:26 AM CDT
# gpg:                using RSA key 6B810CD6D2BE10F3883D21424544E994F9D68FBB
# gpg: Good signature from "Harsh Prateek Bora <harsh.prateek.bora@gmail.com>" [undefined]
# gpg:                 aka "Harsh Prateek Bora <harshpb@linux.ibm.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 6B81 0CD6 D2BE 10F3 883D  2142 4544 E994 F9D6 8FBB

* tag 'pull-ppc-for-10.2-d2-20251023-1' of https://gitlab.com/harshpb/qemu: (32 commits)
  MAINTAINERS: Add entry for FADump (pSeries)
  tests/functional: Add test for fadump in PSeries
  hw/ppc: Enable fadump for PSeries
  hw/ppc: Pass dump-sizes property for fadump in device tree
  hw/ppc: Implement saving CPU state in Fadump
  hw/ppc: Preserve memory regions registered for fadump
  hw/ppc: Trigger Fadump boot if fadump is registered
  hw/ppc: Implement fadump register command
  hw/ppc/pegasos2: Add VOF support for pegasos1
  hw/ppc/pegasos2: Add Pegasos I emulation
  hw/ppc/pegasos2: Add bus frequency to machine state
  hw/ppc/pegasos2: Introduce abstract superclass
  hw/ppc/pegasos2: Move hardware specific parts out of machine reset
  hw/ppc/pegasos2: Move PCI IRQ routing setup to a function
  hw/ppc/pegasos2: Add south bridge pointer in the machine state
  hw/ppc/pegasos2: Rename mv field in machine state
  hw/ppc/pegasos2: Remove fdt pointer from machine state
  hw/ppc/pegasos2: Change device tree generation
  hw/ppc/pegasos2: Remove explicit name properties from device tree
  ppc/vof: Make nextprop behave more like Open Firmware
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-10-23 13:16:45 -05:00
commit d2cc2b1dfc
27 changed files with 2049 additions and 796 deletions

View file

@ -1652,6 +1652,7 @@ F: hw/ppc/pegasos2.c
F: hw/pci-host/mv64361.c
F: hw/pci-host/mv643xx.h
F: include/hw/pci-host/mv64361.h
F: pc-bios/dtb/pegasos[12].dt[sb]
amigaone
M: BALATON Zoltan <balaton@eik.bme.hu>
@ -3214,6 +3215,14 @@ F: scripts/coccinelle/remove_local_err.cocci
F: scripts/coccinelle/use-error_fatal.cocci
F: scripts/coccinelle/errp-guard.cocci
Firmware Assisted Dump (fadump) for sPAPR (pseries)
M: Aditya Gupta <adityag@linux.ibm.com>
R: Sourabh Jain <sourabhjain@linux.ibm.com>
S: Maintained
F: include/hw/ppc/spapr_fadump.h
F: hw/ppc/spapr_fadump.c
F: tests/functional/ppc64/test_fadump.py
GDB stub
M: Alex Bennée <alex.bennee@linaro.org>
R: Philippe Mathieu-Daudé <philmd@linaro.org>

View file

@ -221,7 +221,7 @@ struct bd_info {
static void create_bd_info(hwaddr addr, ram_addr_t ram_size)
{
struct bd_info *bd = g_new0(struct bd_info, 1);
g_autofree struct bd_info *bd = g_new0(struct bd_info, 1);
bd->bi_memsize = cpu_to_be32(ram_size);
bd->bi_flashstart = cpu_to_be32(PROM_ADDR);

View file

@ -26,6 +26,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
'spapr_nvdimm.c',
'spapr_rtas_ddw.c',
'spapr_numa.c',
'spapr_fadump.c',
'pef.c',
))
ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(

File diff suppressed because it is too large Load diff

View file

@ -900,6 +900,73 @@ static int spapr_dt_rng(void *fdt)
return ret ? -1 : 0;
}
static void spapr_dt_rtas_fadump(SpaprMachineState *spapr, void *fdt, int rtas)
{
MachineState *ms = MACHINE(spapr);
MachineClass *mc = MACHINE_GET_CLASS(ms);
FadumpMemStruct *fdm = &spapr->registered_fdm;
uint16_t dump_status_flag;
uint32_t max_possible_cpus = mc->possible_cpu_arch_ids(ms)->len;
uint64_t fadump_cpu_state_size = 0;
uint16_t fadump_versions[2] = {
FADUMP_VERSION /* min supported version */,
FADUMP_VERSION /* max supported version */
};
uint32_t fadump_rgn_sizes[2][3] = {
{
cpu_to_be32(FADUMP_CPU_STATE_DATA),
0, 0 /* Calculated later */
},
{
cpu_to_be32(FADUMP_HPTE_REGION),
0, 0 /* HPTE region not implemented */
}
};
/*
* CPU State Data contains multiple fields such as header, num_cpus and
* register entries
*
* Calculate the maximum CPU State Data size, according to maximum
* possible CPUs the QEMU VM can have
*
* This calculation must match the 'cpu_state_len' calculation done in
* 'populate_cpu_state_data' in spapr_fadump.c
*/
fadump_cpu_state_size += sizeof(struct FadumpRegSaveAreaHeader);
fadump_cpu_state_size += 0xc; /* padding as in PAPR */
fadump_cpu_state_size += sizeof(uint32_t); /* num_cpus */
fadump_cpu_state_size += max_possible_cpus * /* reg entries */
FADUMP_PER_CPU_REG_ENTRIES *
sizeof(struct FadumpRegEntry);
/* Set maximum size for CPU state data region */
assert(fadump_rgn_sizes[0][0] == cpu_to_be32(FADUMP_CPU_STATE_DATA));
/* Upper 32 bits of size, usually 0 */
fadump_rgn_sizes[0][1] = cpu_to_be32(fadump_cpu_state_size >> 32);
/* Lower 32 bits of size */
fadump_rgn_sizes[0][2] = cpu_to_be32(fadump_cpu_state_size & 0xffffffff);
/* Add device tree properties required from platform for fadump */
_FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-version",
fadump_versions, sizeof(fadump_versions))));
_FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-sizes",
fadump_rgn_sizes, sizeof(fadump_rgn_sizes))));
dump_status_flag = be16_to_cpu(fdm->header.dump_status_flag);
if (dump_status_flag & FADUMP_STATUS_DUMP_TRIGGERED) {
uint64_t fdm_size =
sizeof(struct FadumpSectionHeader) +
(be16_to_cpu(fdm->header.dump_num_sections) *
sizeof(struct FadumpSection));
_FDT((fdt_setprop(fdt, rtas, "ibm,kernel-dump", fdm, fdm_size)));
}
}
static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
{
MachineState *ms = MACHINE(spapr);
@ -1015,6 +1082,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
_FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
lrdr_capacity, sizeof(lrdr_capacity)));
spapr_dt_rtas_fadump(spapr, fdt, rtas);
spapr_dt_rtas_tokens(fdt, rtas);
}
@ -1072,7 +1141,6 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
{
MachineState *machine = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
int chosen;
_FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
@ -1143,9 +1211,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
* We can deal with BAR reallocation just fine, advertise it
* to the guest
*/
if (smc->linux_pci_probe) {
_FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
}
_FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
spapr_dt_ov5_platform_support(spapr, fdt, chosen);
}
@ -1182,7 +1248,6 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
{
MachineState *machine = MACHINE(spapr);
MachineClass *mc = MACHINE_GET_CLASS(machine);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
uint32_t root_drc_type_mask = 0;
int ret;
void *fdt;
@ -1213,16 +1278,10 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
/* Host Model & Serial Number */
if (spapr->host_model) {
_FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model));
} else if (smc->broken_host_serial_model && kvmppc_get_host_model(&buf)) {
_FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
g_free(buf);
}
if (spapr->host_serial) {
_FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial));
} else if (smc->broken_host_serial_model && kvmppc_get_host_serial(&buf)) {
_FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
g_free(buf);
}
_FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
@ -1260,9 +1319,8 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
/* ibm,drc-indexes and friends */
root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_LMB;
if (smc->dr_phb_enabled) {
root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB;
}
root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB;
if (mc->nvdimm_supported) {
root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PMEM;
}
@ -2061,13 +2119,6 @@ static const VMStateDescription vmstate_spapr_irq_map = {
},
};
static bool spapr_dtb_needed(void *opaque)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque);
return smc->update_dt_enabled;
}
static int spapr_dtb_pre_load(void *opaque)
{
SpaprMachineState *spapr = (SpaprMachineState *)opaque;
@ -2083,7 +2134,6 @@ static const VMStateDescription vmstate_spapr_dtb = {
.name = "spapr_dtb",
.version_id = 1,
.minimum_version_id = 1,
.needed = spapr_dtb_needed,
.pre_load = spapr_dtb_pre_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(fdt_initial_size, SpaprMachineState),
@ -2605,7 +2655,6 @@ static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
{
MachineState *ms = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
Error *local_err = NULL;
bool vsmt_user = !!spapr->vsmt;
int kvm_smt = kvmppc_smt_threads();
@ -2641,15 +2690,6 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
return;
}
/* In this case, spapr->vsmt has been set by the command line */
} else if (!smc->smp_threads_vsmt) {
/*
* Default VSMT value is tricky, because we need it to be as
* consistent as possible (for migration), but this requires
* changing it for at least some existing cases. We pick 8 as
* the value that we'd get with KVM on POWER8, the
* overwhelmingly common case in production systems.
*/
spapr->vsmt = MAX(8, smp_threads);
} else {
spapr->vsmt = smp_threads;
}
@ -2758,7 +2798,6 @@ static PCIHostState *spapr_create_default_phb(void)
static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
{
MachineState *machine = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
hwaddr rma_size = machine->ram_size;
hwaddr node0_size = spapr_node0_size(machine);
@ -2771,15 +2810,6 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
*/
rma_size = MIN(rma_size, 1 * TiB);
/*
* Clamp the RMA size based on machine type. This is for
* migration compatibility with older qemu versions, which limited
* the RMA size for complicated and mostly bad reasons.
*/
if (smc->rma_limit) {
rma_size = MIN(rma_size, smc->rma_limit);
}
if (rma_size < MIN_RMA_SLOF) {
error_setg(errp,
"pSeries SLOF firmware requires >= %" HWADDR_PRIx
@ -3009,10 +3039,8 @@ static void spapr_machine_init(MachineState *machine)
* connectors for a PHBs PCI slots) are added as needed during their
* parent's realization.
*/
if (smc->dr_phb_enabled) {
for (i = 0; i < SPAPR_MAX_PHBS; i++) {
spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i);
}
for (i = 0; i < SPAPR_MAX_PHBS; i++) {
spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i);
}
/* Set up PCI */
@ -3347,9 +3375,7 @@ static char *spapr_get_ic_mode(Object *obj, Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
if (spapr->irq == &spapr_irq_xics_legacy) {
return g_strdup("legacy");
} else if (spapr->irq == &spapr_irq_xics) {
if (spapr->irq == &spapr_irq_xics) {
return g_strdup("xics");
} else if (spapr->irq == &spapr_irq_xive) {
return g_strdup("xive");
@ -3363,11 +3389,6 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode");
return;
}
/* The legacy IRQ backend can not be set */
if (strcmp(value, "xics") == 0) {
spapr->irq = &spapr_irq_xics;
@ -4093,20 +4114,65 @@ int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
return 0;
}
static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns, Error **errp)
{
/*
* New-style PHB window placement.
*
* Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
* for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
* windows.
*
* Some guest kernels can't work with MMIO windows above 1<<46
* (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
*
* 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
* PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the
* 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the
* 1TiB 64-bit MMIO windows for each PHB.
*/
const uint64_t base_buid = 0x800000020000000ULL;
int i;
/* Sanity check natural alignments */
QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
/* Sanity check bounds */
QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
SPAPR_PCI_MEM32_WIN_SIZE);
QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
SPAPR_PCI_MEM64_WIN_SIZE);
if (index >= SPAPR_MAX_PHBS) {
error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
SPAPR_MAX_PHBS - 1);
return false;
}
*buid = base_buid + index;
for (i = 0; i < n_dma; ++i) {
liobns[i] = SPAPR_PCI_LIOBN(index, i);
}
*pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
*mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
return true;
}
static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
const unsigned windows_supported = spapr_phb_windows_supported(sphb);
SpaprDrc *drc;
if (dev->hotplugged && !smc->dr_phb_enabled) {
error_setg(errp, "PHB hotplug not supported for this machine");
return false;
}
if (sphb->index == (uint32_t)-1) {
error_setg(errp, "\"index\" for PAPR PHB is mandatory");
return false;
@ -4122,26 +4188,18 @@ static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
* This will check that sphb->index doesn't exceed the maximum number of
* PHBs for the current machine type.
*/
return
smc->phb_placement(spapr, sphb->index,
&sphb->buid, &sphb->io_win_addr,
&sphb->mem_win_addr, &sphb->mem64_win_addr,
windows_supported, sphb->dma_liobn,
errp);
return spapr_phb_placement(spapr, sphb->index,
&sphb->buid, &sphb->io_win_addr,
&sphb->mem_win_addr, &sphb->mem64_win_addr,
windows_supported, sphb->dma_liobn, errp);
}
static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
{
SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
SpaprDrc *drc;
bool hotplugged = spapr_drc_hotplugged(dev);
if (!smc->dr_phb_enabled) {
return;
}
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
/* hotplug hooks should check it's enabled before getting this far */
assert(drc);
@ -4267,7 +4325,6 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
{
SpaprMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev));
MachineClass *mc = MACHINE_GET_CLASS(sms);
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
if (spapr_memory_hot_unplug_supported(sms)) {
@ -4282,10 +4339,6 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
}
spapr_core_unplug_request(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
if (!smc->dr_phb_enabled) {
error_setg(errp, "PHB hot unplug not supported on this machine");
return;
}
spapr_phb_unplug_request(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
spapr_tpm_proxy_unplug(hotplug_dev, dev);
@ -4386,57 +4439,6 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
return machine->possible_cpus;
}
static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns, Error **errp)
{
/*
* New-style PHB window placement.
*
* Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
* for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
* windows.
*
* Some guest kernels can't work with MMIO windows above 1<<46
* (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
*
* 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
* PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the
* 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the
* 1TiB 64-bit MMIO windows for each PHB.
*/
const uint64_t base_buid = 0x800000020000000ULL;
int i;
/* Sanity check natural alignments */
QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
/* Sanity check bounds */
QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
SPAPR_PCI_MEM32_WIN_SIZE);
QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
SPAPR_PCI_MEM64_WIN_SIZE);
if (index >= SPAPR_MAX_PHBS) {
error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
SPAPR_MAX_PHBS - 1);
return false;
}
*buid = base_buid + index;
for (i = 0; i < n_dma; ++i) {
liobns[i] = SPAPR_PCI_LIOBN(index, i);
}
*pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
*mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
return true;
}
static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
{
SpaprMachineState *spapr = SPAPR_MACHINE(dev);
@ -4641,14 +4643,12 @@ static void spapr_machine_class_init(ObjectClass *oc, const void *data)
hc->unplug_request = spapr_machine_device_unplug_request;
hc->unplug = spapr_machine_device_unplug;
smc->update_dt_enabled = true;
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
mc->has_hotpluggable_cpus = true;
mc->nvdimm_supported = true;
smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
fwc->get_dev_path = spapr_get_fw_dev_path;
nc->nmi_monitor_handler = spapr_nmi;
smc->phb_placement = spapr_phb_placement;
vhc->cpu_in_nested = spapr_cpu_in_nested;
vhc->deliver_hv_excp = spapr_exit_nested;
vhc->hypercall = emulate_spapr_hypercall;
@ -4695,10 +4695,6 @@ static void spapr_machine_class_init(ObjectClass *oc, const void *data)
smc->default_caps.caps[SPAPR_CAP_AIL_MODE_3] = SPAPR_CAP_ON;
spapr_caps_add_properties(smc);
smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
smc->linux_pci_probe = true;
smc->smp_threads_vsmt = true;
smc->nr_xirqs = SPAPR_NR_XIRQS;
xfc->match_nvt = spapr_match_nvt;
vmc->client_architecture_support = spapr_vof_client_architecture_support;
vmc->quiesce = spapr_vof_quiesce;
@ -4976,110 +4972,6 @@ static void spapr_machine_5_0_class_options(MachineClass *mc)
DEFINE_SPAPR_MACHINE(5, 0);
/*
* pseries-4.2
*/
static void spapr_machine_4_2_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
spapr_machine_5_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF;
smc->rma_limit = 16 * GiB;
mc->nvdimm_supported = false;
}
DEFINE_SPAPR_MACHINE(4, 2);
/*
* pseries-4.1
*/
static void spapr_machine_4_1_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
static GlobalProperty compat[] = {
/* Only allow 4kiB and 64kiB IOMMU pagesizes */
{ TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" },
};
spapr_machine_4_2_class_options(mc);
smc->linux_pci_probe = false;
smc->smp_threads_vsmt = false;
compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_SPAPR_MACHINE(4, 1);
/*
* pseries-4.0
*/
static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns, Error **errp)
{
if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma,
liobns, errp)) {
return false;
}
return true;
}
static void spapr_machine_4_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
spapr_machine_4_1_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
smc->phb_placement = phb_placement_4_0;
smc->irq = &spapr_irq_xics;
smc->pre_4_1_migration = true;
}
DEFINE_SPAPR_MACHINE(4, 0);
/*
* pseries-3.1
*/
static void spapr_machine_3_1_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
spapr_machine_4_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len);
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
smc->update_dt_enabled = false;
smc->dr_phb_enabled = false;
smc->broken_host_serial_model = true;
smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
}
DEFINE_SPAPR_MACHINE(3, 1);
/*
* pseries-3.0
*/
static void spapr_machine_3_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
spapr_machine_3_1_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len);
smc->legacy_irq_allocation = true;
smc->nr_xirqs = 0x400;
smc->irq = &spapr_irq_xics_legacy;
}
DEFINE_SPAPR_MACHINE(3, 0);
static void spapr_machine_register_types(void)
{
type_register_static(&spapr_machine_info);

View file

@ -66,7 +66,6 @@ typedef struct SpaprCapabilityInfo {
void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
uint8_t val, Error **errp);
bool (*migrate_needed)(void *opaque);
} SpaprCapabilityInfo;
static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
@ -336,11 +335,6 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
}
static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
{
return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
}
static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
uint32_t pshift)
{
@ -793,7 +787,6 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
.type = "int",
.apply = cap_hpt_maxpagesize_apply,
.cpu_apply = cap_hpt_maxpagesize_cpu_apply,
.migrate_needed = cap_hpt_maxpagesize_migrate_needed,
},
[SPAPR_CAP_NESTED_KVM_HV] = {
.name = "nested-hv",
@ -982,11 +975,8 @@ int spapr_caps_post_migration(SpaprMachineState *spapr)
static bool spapr_cap_##sname##_needed(void *opaque) \
{ \
SpaprMachineState *spapr = opaque; \
bool (*needed)(void *opaque) = \
capability_table[cap].migrate_needed; \
\
return needed ? needed(opaque) : true && \
spapr->cmd_line_caps[cap] && \
return spapr->cmd_line_caps[cap] && \
(spapr->eff.caps[cap] != \
spapr->def.caps[cap]); \
} \

View file

@ -1041,20 +1041,14 @@ void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr)
void spapr_events_init(SpaprMachineState *spapr)
{
int epow_irq = SPAPR_IRQ_EPOW;
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
epow_irq = spapr_irq_findone(spapr, &error_fatal);
}
spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
spapr_irq_claim(spapr, SPAPR_IRQ_EPOW, false, &error_fatal);
QTAILQ_INIT(&spapr->pending_events);
spapr->event_sources = spapr_event_sources_new();
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
epow_irq);
SPAPR_IRQ_EPOW);
/* NOTE: if machine supports modern/dedicated hotplug event source,
* we add it to the device-tree unconditionally. This means we may
@ -1065,16 +1059,10 @@ void spapr_events_init(SpaprMachineState *spapr)
* checking that it's enabled.
*/
if (spapr->use_hotplug_event_source) {
int hp_irq = SPAPR_IRQ_HOTPLUG;
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
hp_irq = spapr_irq_findone(spapr, &error_fatal);
}
spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
spapr_irq_claim(spapr, SPAPR_IRQ_HOTPLUG, false, &error_fatal);
spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
hp_irq);
SPAPR_IRQ_HOTPLUG);
}
spapr->epow_notifier.notify = spapr_powerdown_req;

730
hw/ppc/spapr_fadump.c Normal file
View file

@ -0,0 +1,730 @@
/*
* Firmware Assisted Dump in PSeries
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "hw/ppc/spapr.h"
#include "qemu/units.h"
#include "system/cpus.h"
#include "system/hw_accel.h"
#include <math.h>
/*
* Copy the ascii values for first 8 characters from a string into u64
* variable at their respective indexes.
* e.g.
* The string "FADMPINF" will be converted into 0x4641444d50494e46
*/
static uint64_t fadump_str_to_u64(const char *str)
{
uint64_t val = 0;
int i;
for (i = 0; i < sizeof(val); i++) {
val = (*str) ? (val << 8) | *str++ : val << 8;
}
return val;
}
/**
* Get the identifier id for register entries of GPRs
*
* It gives the same id as 'fadump_str_to_u64' when the complete string id
* of the GPR is given, ie.
*
* fadump_str_to_u64("GPR05") == fadump_gpr_id_to_u64(5);
* fadump_str_to_u64("GPR12") == fadump_gpr_id_to_u64(12);
*
* And so on. Hence this can be implemented by creating a dynamic
* string for each GPR, such as "GPR00", "GPR01", ... "GPR31"
* Instead of allocating a string, an observation from the math of
* 'fadump_str_to_u64' or from PAPR tells us that there's a pattern
* in the identifier IDs, such that the first 4 bytes are affected only by
* whether it is GPR0*, GPR1*, GPR2*, GPR3*.
* Upper half of 5th byte is always 0x3. Lower half (nibble) of 5th byte
* is the tens digit of the GPR id, ie. GPR ID / 10.
* Upper half of 6th byte is always 0x3. Lower half (nibble) of 5th byte
* is the ones digit of the GPR id, ie. GPR ID % 10
*
* For example, for GPR 29, the 5th and 6th byte will be 0x32 and 0x39
*/
static uint64_t fadump_gpr_id_to_u64(uint32_t gpr_id)
{
uint64_t val = 0;
/* Valid range of GPR id is only GPR0 to GPR31 */
assert(gpr_id < 32);
/* Below calculations set the 0th to 5th byte */
if (gpr_id <= 9) {
val = fadump_str_to_u64("GPR0");
} else if (gpr_id <= 19) {
val = fadump_str_to_u64("GPR1");
} else if (gpr_id <= 29) {
val = fadump_str_to_u64("GPR2");
} else {
val = fadump_str_to_u64("GPR3");
}
/* Set the 6th byte */
val |= 0x30000000;
val |= ((gpr_id % 10) << 24);
return val;
}
/*
* Handle the "FADUMP_CMD_REGISTER" command in 'ibm,configure-kernel-dump'
*
* Note: Any changes made by the kernel to the fadump memory struct won't
* reflect in QEMU after the 'ibm,configure-kernel-dump' RTAS call has returned,
* as we store the passed fadump memory structure passed during fadump
* registration.
* Kernel has to invalidate & re-register fadump, if it intends to make any
* changes to the fadump memory structure
*
* Returns:
* * RTAS_OUT_SUCCESS: On successful registration
* * RTAS_OUT_PARAM_ERROR: If parameters are not correct, eg. too many
* sections, invalid memory addresses that we are
* unable to read, etc
* * RTAS_OUT_DUMP_ALREADY_REGISTERED: Dump already registered
* * RTAS_OUT_HW_ERROR: Misc issue such as memory access failures
*/
uint32_t do_fadump_register(SpaprMachineState *spapr, target_ulong args)
{
FadumpSectionHeader header;
FadumpSection regions[FADUMP_MAX_SECTIONS] = {0};
target_ulong fdm_addr = rtas_ld(args, 1);
target_ulong fdm_size = rtas_ld(args, 2);
AddressSpace *default_as = &address_space_memory;
MemTxResult io_result;
MemTxAttrs attrs;
uint64_t next_section_addr;
uint16_t dump_num_sections;
/* Mark the memory transaction as privileged memory access */
attrs.user = 0;
attrs.memory = 1;
if (spapr->fadump_registered) {
/* FADump already registered */
return RTAS_OUT_DUMP_ALREADY_REGISTERED;
}
if (spapr->fadump_dump_active) {
return RTAS_OUT_DUMP_ACTIVE;
}
if (fdm_size < sizeof(FadumpSectionHeader)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Header size is invalid: " TARGET_FMT_lu "\n", fdm_size);
return RTAS_OUT_PARAM_ERROR;
}
/* Ensure fdm_addr points to a valid RMR-memory/RMA-memory buffer */
if ((fdm_addr <= 0) || ((fdm_addr + fdm_size) > spapr->rma_size)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Invalid fdm address: " TARGET_FMT_lu "\n", fdm_addr);
return RTAS_OUT_PARAM_ERROR;
}
/* Try to read the passed fadump header */
io_result = address_space_read(default_as, fdm_addr, attrs,
&header, sizeof(header));
if (io_result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Unable to read fdm: " TARGET_FMT_lu "\n", fdm_addr);
return RTAS_OUT_HW_ERROR;
}
/* Verify that we understand the fadump header version */
if (header.dump_format_version != cpu_to_be32(FADUMP_VERSION)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Unknown fadump header version: 0x%x\n",
header.dump_format_version);
return RTAS_OUT_PARAM_ERROR;
}
/* Reset dump status flags */
header.dump_status_flag = 0;
dump_num_sections = be16_to_cpu(header.dump_num_sections);
if (dump_num_sections > FADUMP_MAX_SECTIONS) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Too many sections: %d sections\n", dump_num_sections);
return RTAS_OUT_PARAM_ERROR;
}
next_section_addr =
fdm_addr +
be32_to_cpu(header.offset_first_dump_section);
for (int i = 0; i < dump_num_sections; ++i) {
/* Read the fadump section from memory */
io_result = address_space_read(default_as, next_section_addr, attrs,
&regions[i], sizeof(regions[i]));
if (io_result != MEMTX_OK) {
qemu_log_mask(LOG_UNIMP,
"FADump: Unable to read fadump %dth section\n", i);
return RTAS_OUT_PARAM_ERROR;
}
next_section_addr += sizeof(regions[i]);
}
spapr->fadump_registered = true;
spapr->fadump_dump_active = false;
/* Store the registered fadump memory struct */
spapr->registered_fdm.header = header;
for (int i = 0; i < dump_num_sections; ++i) {
spapr->registered_fdm.rgn[i] = regions[i];
}
return RTAS_OUT_SUCCESS;
}
/*
* Copy the source region of given fadump section, to the destination
* address mentioned in the region
*
* Also set the region's error flag, if the copy fails due to non-existent
* address (MEMTX_DECODE_ERROR) or permission issues (MEMTX_ACCESS_ERROR)
*
* Returns true if successful copy
*
* Returns false in case of any other error, being treated as hardware
* error for fadump purposes
*/
static bool do_preserve_region(FadumpSection *region)
{
AddressSpace *default_as = &address_space_memory;
MemTxResult io_result;
MemTxAttrs attrs;
uint64_t src_addr, src_len, dest_addr;
uint64_t num_chunks;
g_autofree void *copy_buffer = NULL;
src_addr = be64_to_cpu(region->source_address);
src_len = be64_to_cpu(region->source_len);
dest_addr = be64_to_cpu(region->destination_address);
/* Mark the memory transaction as privileged memory access */
attrs.user = 0;
attrs.memory = 1;
/*
* Optimisation: Skip copy if source and destination are same
* (eg. param area)
*/
if (src_addr == dest_addr) {
region->bytes_dumped = cpu_to_be64(src_len);
return true;
}
#define FADUMP_CHUNK_SIZE ((size_t)(32 * MiB))
copy_buffer = g_try_malloc(FADUMP_CHUNK_SIZE);
if (copy_buffer == NULL) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed allocating memory (size: %zu) for copying"
" reserved memory regions\n", FADUMP_CHUNK_SIZE);
}
num_chunks = ceil((src_len * 1.0f) / FADUMP_CHUNK_SIZE);
for (uint64_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
/* Take minimum of bytes left to copy, and chunk size */
uint64_t copy_len = MIN(
src_len - (chunk_id * FADUMP_CHUNK_SIZE),
FADUMP_CHUNK_SIZE
);
/* Copy the source region to destination */
io_result = address_space_read(default_as, src_addr, attrs,
copy_buffer, copy_len);
if ((io_result & MEMTX_DECODE_ERROR) ||
(io_result & MEMTX_ACCESS_ERROR)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to decode/access address in section: %d\n",
region->source_data_type);
/*
* Invalid source address is not an hardware error, instead
* wrong parameter from the kernel.
* Return true to let caller know to continue reading other
* sections
*/
region->error_flags = FADUMP_ERROR_INVALID_SOURCE_ADDR;
region->bytes_dumped = 0;
return true;
} else if (io_result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to read source region in section: %d\n",
region->source_data_type);
return false;
}
io_result = address_space_write(default_as, dest_addr, attrs,
copy_buffer, copy_len);
if ((io_result & MEMTX_DECODE_ERROR) ||
(io_result & MEMTX_ACCESS_ERROR)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to decode/access address in section: %d\n",
region->source_data_type);
/*
* Invalid destination address is not an hardware error,
* instead wrong parameter from the kernel.
* Return true to let caller know to continue reading other
* sections
*/
region->error_flags = FADUMP_ERROR_INVALID_DEST_ADDR;
region->bytes_dumped = 0;
return true;
} else if (io_result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to write destination in section: %d\n",
region->source_data_type);
return false;
}
src_addr += FADUMP_CHUNK_SIZE;
dest_addr += FADUMP_CHUNK_SIZE;
}
#undef FADUMP_CHUNK_SIZE
/*
* Considering address_space_write would have copied the
* complete region
*/
region->bytes_dumped = cpu_to_be64(src_len);
return true;
}
/*
* Populate the passed CPUs register entries, in the buffer starting at
* the argument 'curr_reg_entry'
*
* The register entries is an array of pair of register id and register
* value, as described in Table 591/592 in section "H.1 Register Save Area"
* in PAPR v2.13
*
* Returns pointer just past this CPU's register entries, which can be used
* as the start address for next CPU's register entries
*/
static FadumpRegEntry *populate_cpu_reg_entries(CPUState *cpu,
FadumpRegEntry *curr_reg_entry)
{
CPUPPCState *env;
PowerPCCPU *ppc_cpu;
uint32_t num_regs_per_cpu = 0;
ppc_cpu = POWERPC_CPU(cpu);
env = cpu_env(cpu);
num_regs_per_cpu = 0;
/*
* CPUSTRT and CPUEND register entries follow this format:
*
* 8 Bytes Reg ID (BE) | 4 Bytes (0x0) | 4 Bytes Logical CPU ID (BE)
*/
curr_reg_entry->reg_id =
cpu_to_be64(fadump_str_to_u64("CPUSTRT"));
curr_reg_entry->reg_value = cpu_to_be64(
ppc_cpu->vcpu_id & FADUMP_CPU_ID_MASK);
++curr_reg_entry;
#define REG_ENTRY(id, val) \
do { \
curr_reg_entry->reg_id = \
cpu_to_be64(fadump_str_to_u64(#id)); \
curr_reg_entry->reg_value = cpu_to_be64(val); \
++curr_reg_entry; \
++num_regs_per_cpu; \
} while (0)
REG_ENTRY(ACOP, env->spr[SPR_ACOP]);
REG_ENTRY(AMR, env->spr[SPR_AMR]);
REG_ENTRY(BESCR, env->spr[SPR_BESCR]);
REG_ENTRY(CFAR, env->spr[SPR_CFAR]);
REG_ENTRY(CIABR, env->spr[SPR_CIABR]);
/* Save the condition register */
REG_ENTRY(CR, ppc_get_cr(env));
REG_ENTRY(CTR, env->spr[SPR_CTR]);
REG_ENTRY(CTRL, env->spr[SPR_CTRL]);
REG_ENTRY(DABR, env->spr[SPR_DABR]);
REG_ENTRY(DABRX, env->spr[SPR_DABRX]);
REG_ENTRY(DAR, env->spr[SPR_DAR]);
REG_ENTRY(DAWR0, env->spr[SPR_DAWR0]);
REG_ENTRY(DAWR1, env->spr[SPR_DAWR1]);
REG_ENTRY(DAWRX0, env->spr[SPR_DAWRX0]);
REG_ENTRY(DAWRX1, env->spr[SPR_DAWRX1]);
REG_ENTRY(DPDES, env->spr[SPR_DPDES]);
REG_ENTRY(DSCR, env->spr[SPR_DSCR]);
REG_ENTRY(DSISR, env->spr[SPR_DSISR]);
REG_ENTRY(EBBHR, env->spr[SPR_EBBHR]);
REG_ENTRY(EBBRR, env->spr[SPR_EBBRR]);
REG_ENTRY(FPSCR, env->fpscr);
REG_ENTRY(FSCR, env->spr[SPR_FSCR]);
/* Save the GPRs */
for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
curr_reg_entry->reg_id =
cpu_to_be64(fadump_gpr_id_to_u64(gpr_id));
curr_reg_entry->reg_value =
cpu_to_be64(env->gpr[gpr_id]);
++curr_reg_entry;
++num_regs_per_cpu;
}
REG_ENTRY(IAMR, env->spr[SPR_IAMR]);
REG_ENTRY(IC, env->spr[SPR_IC]);
REG_ENTRY(LR, env->spr[SPR_LR]);
REG_ENTRY(MSR, env->msr);
REG_ENTRY(NIA, env->nip); /* NIA */
REG_ENTRY(PIR, env->spr[SPR_PIR]);
REG_ENTRY(PSPB, env->spr[SPR_PSPB]);
REG_ENTRY(PVR, env->spr[SPR_PVR]);
REG_ENTRY(RPR, env->spr[SPR_RPR]);
REG_ENTRY(SPURR, env->spr[SPR_SPURR]);
REG_ENTRY(SRR0, env->spr[SPR_SRR0]);
REG_ENTRY(SRR1, env->spr[SPR_SRR1]);
REG_ENTRY(TAR, env->spr[SPR_TAR]);
REG_ENTRY(TEXASR, env->spr[SPR_TEXASR]);
REG_ENTRY(TFHAR, env->spr[SPR_TFHAR]);
REG_ENTRY(TFIAR, env->spr[SPR_TFIAR]);
REG_ENTRY(TIR, env->spr[SPR_TIR]);
REG_ENTRY(UAMOR, env->spr[SPR_UAMOR]);
REG_ENTRY(VRSAVE, env->spr[SPR_VRSAVE]);
REG_ENTRY(VSCR, env->vscr);
REG_ENTRY(VTB, env->spr[SPR_VTB]);
REG_ENTRY(WORT, env->spr[SPR_WORT]);
REG_ENTRY(XER, env->spr[SPR_XER]);
/*
* Ignoring transaction checkpoint and few other registers
* mentioned in PAPR as not supported in QEMU
*/
#undef REG_ENTRY
/* End the registers for this CPU with "CPUEND" reg entry */
curr_reg_entry->reg_id =
cpu_to_be64(fadump_str_to_u64("CPUEND"));
curr_reg_entry->reg_value = cpu_to_be64(
ppc_cpu->vcpu_id & FADUMP_CPU_ID_MASK);
/*
* Ensure number of register entries saved matches the expected
* 'FADUMP_PER_CPU_REG_ENTRIES' count
*
* This will help catch an error if in future a new register entry
* is added/removed while not modifying FADUMP_PER_CPU_REG_ENTRIES
*/
assert(FADUMP_PER_CPU_REG_ENTRIES == num_regs_per_cpu + 2 /*CPUSTRT+CPUEND*/);
++curr_reg_entry;
return curr_reg_entry;
}
/*
* Populate the "Register Save Area"/CPU State as mentioned in section "H.1
* Register Save Area" in PAPR v2.13
*
* It allocates the buffer for this region, then populates the register
* entries
*
* Returns the pointer to the buffer (which should be deallocated by the
* callers), and sets the size of this buffer in the argument
* 'cpu_state_len'
*/
static void *get_cpu_state_data(uint64_t *cpu_state_len)
{
FadumpRegSaveAreaHeader reg_save_hdr;
FadumpRegEntry *reg_entries;
FadumpRegEntry *curr_reg_entry;
CPUState *cpu;
uint32_t num_reg_entries;
uint32_t reg_entries_size;
uint32_t num_cpus = 0;
void *cpu_state_buffer = NULL;
uint64_t offset = 0;
CPU_FOREACH(cpu) {
++num_cpus;
}
reg_save_hdr.version = cpu_to_be32(0);
reg_save_hdr.magic_number =
cpu_to_be64(fadump_str_to_u64("REGSAVE"));
/* Reg save area header is immediately followed by num cpus */
reg_save_hdr.num_cpu_offset =
cpu_to_be32(sizeof(FadumpRegSaveAreaHeader));
num_reg_entries = num_cpus * FADUMP_PER_CPU_REG_ENTRIES;
reg_entries_size = num_reg_entries * sizeof(FadumpRegEntry);
reg_entries = g_new(FadumpRegEntry, num_reg_entries);
/* Pointer to current CPU's registers */
curr_reg_entry = reg_entries;
/* Populate register entries for all CPUs */
CPU_FOREACH(cpu) {
cpu_synchronize_state(cpu);
curr_reg_entry = populate_cpu_reg_entries(cpu, curr_reg_entry);
}
*cpu_state_len = 0;
*cpu_state_len += sizeof(reg_save_hdr); /* reg save header */
*cpu_state_len += 0xc; /* padding as in PAPR */
*cpu_state_len += sizeof(num_cpus); /* num_cpus */
*cpu_state_len += reg_entries_size; /* reg entries */
cpu_state_buffer = g_malloc(*cpu_state_len);
memcpy(cpu_state_buffer + offset,
&reg_save_hdr, sizeof(reg_save_hdr));
offset += sizeof(reg_save_hdr);
/* Write num_cpus */
num_cpus = cpu_to_be32(num_cpus);
memcpy(cpu_state_buffer + offset, &num_cpus, sizeof(num_cpus));
offset += sizeof(num_cpus);
/* Write the register entries */
memcpy(cpu_state_buffer + offset, reg_entries, reg_entries_size);
offset += reg_entries_size;
return cpu_state_buffer;
}
/*
* Save the CPU State Data (aka "Register Save Area") in given region
*
* Region argument is expected to be of CPU_STATE_DATA type
*
* Returns false only in case of Hardware Error, such as failure to
* read/write a valid address.
*
* Otherwise, even in case of unsuccessful copy of CPU state data for reasons
* such as invalid destination address or non-fatal error errors likely
* caused due to invalid parameters, return true and set region->error_flags
*/
static bool do_populate_cpu_state(FadumpSection *region)
{
uint64_t dest_addr = be64_to_cpu(region->destination_address);
uint64_t cpu_state_len = 0;
g_autofree void *cpu_state_buffer = NULL;
AddressSpace *default_as = &address_space_memory;
MemTxResult io_result;
MemTxAttrs attrs;
assert(region->source_data_type == cpu_to_be16(FADUMP_CPU_STATE_DATA));
/* Mark the memory transaction as privileged memory access */
attrs.user = 0;
attrs.memory = 1;
cpu_state_buffer = get_cpu_state_data(&cpu_state_len);
io_result = address_space_write(default_as, dest_addr, attrs,
cpu_state_buffer, cpu_state_len);
if ((io_result & MEMTX_DECODE_ERROR) ||
(io_result & MEMTX_ACCESS_ERROR)) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to decode/access address in CPU State Region's"
" destination address: 0x%016" PRIx64 "\n", dest_addr);
/*
* Invalid source address is not an hardware error, instead
* wrong parameter from the kernel.
* Return true to let caller know to continue reading other
* sections
*/
region->error_flags = FADUMP_ERROR_INVALID_SOURCE_ADDR;
region->bytes_dumped = 0;
return true;
} else if (io_result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to write CPU state region.\n");
return false;
}
/*
* Set bytes_dumped in cpu state region, so kernel knows platform have
* exported it
*/
region->bytes_dumped = cpu_to_be64(cpu_state_len);
if (region->source_len != region->bytes_dumped) {
/*
* Log the error, but don't fail the dump collection here, let
* kernel handle the mismatch
*/
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Mismatch in CPU State region's length exported:"
" Kernel expected: 0x%" PRIx64 " bytes,"
" QEMU exported: 0x%" PRIx64 " bytes\n",
be64_to_cpu(region->source_len),
be64_to_cpu(region->bytes_dumped));
}
return true;
}
/*
* Preserve the memory locations registered for fadump
*
* Returns false only in case of RTAS_OUT_HW_ERROR, otherwise true
*/
static bool fadump_preserve_mem(SpaprMachineState *spapr)
{
FadumpMemStruct *fdm = &spapr->registered_fdm;
uint16_t dump_num_sections, data_type;
assert(spapr->fadump_registered);
/*
* Handle all sections
*
* CPU State Data and HPTE regions are handled in their own cases
*
* RMR regions and any custom OS reserved regions such as parameter
* save area, are handled by simply copying the source region to
* destination address
*/
dump_num_sections = be16_to_cpu(fdm->header.dump_num_sections);
for (int i = 0; i < dump_num_sections; ++i) {
data_type = be16_to_cpu(fdm->rgn[i].source_data_type);
/* Reset error_flags & bytes_dumped for now */
fdm->rgn[i].error_flags = 0;
fdm->rgn[i].bytes_dumped = 0;
/* If kernel did not request for the memory region, then skip it */
if (be32_to_cpu(fdm->rgn[i].request_flag) != FADUMP_REQUEST_FLAG) {
qemu_log_mask(LOG_UNIMP,
"FADump: Skipping copying region as not requested\n");
continue;
}
switch (data_type) {
case FADUMP_CPU_STATE_DATA:
if (!do_populate_cpu_state(&fdm->rgn[i])) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to store CPU State Data");
fdm->header.dump_status_flag |=
cpu_to_be16(FADUMP_STATUS_DUMP_ERROR);
return false;
}
break;
case FADUMP_HPTE_REGION:
/* TODO: Add hpte state data */
break;
case FADUMP_REAL_MODE_REGION:
case FADUMP_PARAM_AREA:
/* Copy the memory region from region's source to its destination */
if (!do_preserve_region(&fdm->rgn[i])) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Failed to preserve dump section: %d\n",
be16_to_cpu(fdm->rgn[i].source_data_type));
fdm->header.dump_status_flag |=
cpu_to_be16(FADUMP_STATUS_DUMP_ERROR);
}
break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Skipping unknown source data type: %d\n", data_type);
fdm->rgn[i].error_flags =
cpu_to_be16(FADUMP_ERROR_INVALID_DATA_TYPE);
}
}
return true;
}
/*
* Trigger a fadump boot, ie. next boot will be a crashkernel/fadump boot
* with fadump dump active.
*
* This is triggered by ibm,os-term RTAS call, if fadump was registered.
*
* It preserves the memory and sets 'FADUMP_STATUS_DUMP_TRIGGERED' as
* fadump status, which can be used later to add the "ibm,kernel-dump"
* device tree node as presence of 'FADUMP_STATUS_DUMP_TRIGGERED' signifies
* next boot as fadump boot in our case
*/
void trigger_fadump_boot(SpaprMachineState *spapr, target_ulong spapr_retcode)
{
FadumpSectionHeader *header = &spapr->registered_fdm.header;
pause_all_vcpus();
/* Preserve the memory locations registered for fadump */
if (!fadump_preserve_mem(spapr)) {
/* Failed to preserve the registered memory regions */
rtas_st(spapr_retcode, 0, RTAS_OUT_HW_ERROR);
/* Cause a reboot */
qemu_system_guest_panicked(NULL);
return;
}
/*
* Mark next boot as fadump boot
*
* Note: These is some bit of assumption involved here, as PAPR doesn't
* specify any use of the dump status flags, nor does the kernel use it
*
* But from description in Table 136 in PAPR v2.13, it looks like:
* FADUMP_STATUS_DUMP_TRIGGERED
* = Dump was triggered by the previous system boot (PAPR says)
* = Next boot will be a fadump boot (Assumed)
*
* FADUMP_STATUS_DUMP_PERFORMED
* = Dump performed (Set to 0 by caller of the
* ibm,configure-kernel-dump call) (PAPR says)
* = Firmware has performed the copying/dump of requested regions
* (Assumed)
* = Dump is active for the next boot (Assumed)
*/
header->dump_status_flag = cpu_to_be16(
FADUMP_STATUS_DUMP_TRIGGERED | /* Next boot will be fadump boot */
FADUMP_STATUS_DUMP_PERFORMED /* Dump is active */
);
/* Reset fadump_registered for next boot */
spapr->fadump_registered = false;
spapr->fadump_dump_active = true;
/*
* Then do a guest reset
*
* Requirement:
* GUEST_RESET is expected to NOT clear the memory, as is the case when
* this is merged
*/
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
rtas_st(spapr_retcode, 0, RTAS_OUT_SUCCESS);
}

View file

@ -1475,16 +1475,11 @@ static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong dt = ppc64_phys_to_real(args[0]);
struct fdt_header hdr = { 0 };
unsigned cb;
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
void *fdt;
cpu_physical_memory_read(dt, &hdr, sizeof(hdr));
cb = fdt32_to_cpu(hdr.totalsize);
if (!smc->update_dt_enabled) {
return H_SUCCESS;
}
/* Check that the fdt did not grow out of proportion */
if (cb > spapr->fdt_initial_size * 2) {
trace_spapr_update_dt_failed_size(spapr->fdt_initial_size, cb,

View file

@ -33,11 +33,6 @@ static const TypeInfo spapr_intc_info = {
static void spapr_irq_msi_init(SpaprMachineState *spapr)
{
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
/* Legacy mode doesn't use this allocator */
return;
}
spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
spapr->irq_map = bitmap_new(spapr->irq_map_nr);
}
@ -284,19 +279,11 @@ void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
if (smc->legacy_irq_allocation) {
return smc->nr_xirqs;
} else {
return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
}
return SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE - SPAPR_IRQ_MSI;
}
void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
if (kvm_enabled() && kvm_kernel_irqchip_split()) {
error_setg(errp, "kernel_irqchip split mode not supported on pseries");
return;
@ -317,7 +304,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
object_property_add_child(OBJECT(spapr), "ics", obj);
object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
&error_abort);
object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
object_property_set_int(obj, "nr-irqs", SPAPR_NR_XIRQS, &error_abort);
if (!qdev_realize(DEVICE(obj), NULL, errp)) {
return;
}
@ -331,7 +318,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
int i;
dev = qdev_new(TYPE_SPAPR_XIVE);
qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
qdev_prop_set_uint32(dev, "nr-irqs", SPAPR_NR_XIRQS + SPAPR_IRQ_NR_IPIS);
/*
* 8 XIVE END structures per CPU. One for each available
* priority
@ -358,7 +345,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
}
spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
SPAPR_NR_XIRQS + SPAPR_IRQ_NR_IPIS);
/*
* Mostly we don't actually need this until reset, except that not
@ -373,11 +360,10 @@ int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
{
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
int i;
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
int rc;
assert(irq >= SPAPR_XIRQ_BASE);
assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
assert(irq < (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE));
for (i = 0; i < ARRAY_SIZE(intcs); i++) {
SpaprInterruptController *intc = intcs[i];
@ -397,10 +383,9 @@ void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
{
SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
int i, j;
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
assert(irq >= SPAPR_XIRQ_BASE);
assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
assert((irq + num) <= (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE));
for (i = irq; i < (irq + num); i++) {
for (j = 0; j < ARRAY_SIZE(intcs); j++) {
@ -417,8 +402,6 @@ void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
/*
* This interface is basically for VIO and PHB devices to find the
* right qemu_irq to manipulate, so we only allow access to the
@ -427,7 +410,7 @@ qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
* interfaces, we can change this if we need to in future.
*/
assert(irq >= SPAPR_XIRQ_BASE);
assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
assert(irq < (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE));
if (spapr->ics) {
assert(ics_valid_irq(spapr->ics, irq));
@ -588,11 +571,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
return first + ics->offset;
}
SpaprIrq spapr_irq_xics_legacy = {
.xics = true,
.xive = false,
};
static void spapr_irq_register_types(void)
{
type_register_static(&spapr_intc_info);

View file

@ -268,7 +268,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong args, uint32_t nret,
target_ulong rets)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
uint32_t config_addr = rtas_ld(args, 0);
uint64_t buid = rtas_ldq(args, 1);
unsigned int func = rtas_ld(args, 3);
@ -373,13 +372,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
}
/* Allocate MSIs */
if (smc->legacy_irq_allocation) {
irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
&err);
} else {
irq = spapr_irq_msi_alloc(spapr, req_num,
ret_intr_type == RTAS_TYPE_MSI, &err);
}
irq = spapr_irq_msi_alloc(spapr, req_num,
ret_intr_type == RTAS_TYPE_MSI, &err);
if (err) {
error_reportf_err(err, "Can't allocate MSIs for device %x: ",
config_addr);
@ -393,9 +387,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
if (i) {
spapr_irq_free(spapr, irq, i);
}
if (!smc->legacy_irq_allocation) {
spapr_irq_msi_free(spapr, irq, req_num);
}
spapr_irq_msi_free(spapr, irq, req_num);
error_reportf_err(err, "Can't allocate MSIs for device %x: ",
config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
@ -1789,12 +1781,9 @@ static void spapr_phb_unrealize(DeviceState *dev)
static void spapr_phb_destroy_msi(gpointer opaque)
{
SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
SpaprPciMsi *msi = opaque;
if (!smc->legacy_irq_allocation) {
spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
}
spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
spapr_irq_free(spapr, msi->first_irq, msi->num);
g_free(msi);
}
@ -1808,7 +1797,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
SpaprMachineState *spapr =
(SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
TYPE_SPAPR_MACHINE);
SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(sbd);
PCIHostState *phb = PCI_HOST_BRIDGE(sbd);
@ -1956,18 +1944,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
for (i = 0; i < PCI_NUM_PINS; i++) {
int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
if (smc->legacy_irq_allocation) {
irq = spapr_irq_findone(spapr, errp);
if (irq < 0) {
error_prepend(errp, "can't allocate LSIs: ");
/*
* Older machines will never support PHB hotplug, ie, this is an
* init only path and QEMU will terminate. No need to rollback.
*/
return;
}
}
if (spapr_irq_claim(spapr, irq, true, errp) < 0) {
error_prepend(errp, "can't allocate LSIs: ");
goto unrealize;

View file

@ -344,6 +344,73 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
rtas_st(rets, 0, ret);
}
/* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */
static void rtas_configure_kernel_dump(PowerPCCPU *cpu,
SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
target_ulong cmd = rtas_ld(args, 0);
uint32_t ret_val;
/* Number of outputs has to be 1 */
if (nret != 1) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: ibm,configure-kernel-dump called with nret != 1.\n");
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
/* Number of inputs has to be 3 */
if (nargs != 3) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: ibm,configure-kernel-dump called with nargs != 3.\n");
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
switch (cmd) {
case FADUMP_CMD_REGISTER:
ret_val = do_fadump_register(spapr, args);
if (ret_val != RTAS_OUT_SUCCESS) {
rtas_st(rets, 0, ret_val);
return;
}
break;
case FADUMP_CMD_UNREGISTER:
if (spapr->fadump_dump_active) {
rtas_st(rets, 0, RTAS_OUT_DUMP_ACTIVE);
return;
}
spapr->fadump_registered = false;
spapr->fadump_dump_active = false;
memset(&spapr->registered_fdm, 0, sizeof(spapr->registered_fdm));
break;
case FADUMP_CMD_INVALIDATE:
if (!spapr->fadump_dump_active) {
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Nothing to invalidate, no dump active\n");
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
spapr->fadump_registered = false;
spapr->fadump_dump_active = false;
memset(&spapr->registered_fdm, 0, sizeof(spapr->registered_fdm));
break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"FADump: Unknown command: " TARGET_FMT_lu "\n", cmd);
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
static void rtas_ibm_os_term(PowerPCCPU *cpu,
SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@ -353,6 +420,11 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
target_ulong msgaddr = rtas_ld(args, 0);
char msg[512];
if (spapr->fadump_registered) {
/* If fadump boot works, control won't come back here */
return trigger_fadump_boot(spapr, rets);
}
cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1);
msg[sizeof(msg) - 1] = 0;
@ -659,6 +731,10 @@ static void core_rtas_register_types(void)
spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
rtas_ibm_nmi_interlock);
/* Register fadump rtas call */
spapr_rtas_register(RTAS_CONFIGURE_KERNEL_DUMP, "ibm,configure-kernel-dump",
rtas_configure_kernel_dump);
qtest_set_command_cb(spapr_qtest_callback);
}

View file

@ -507,15 +507,6 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
dev->irq = spapr_vio_reg_to_irq(dev->reg);
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
int irq = spapr_irq_findone(spapr, errp);
if (irq < 0) {
return;
}
dev->irq = irq;
}
if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) {
return;
}

View file

@ -353,34 +353,50 @@ static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
{
int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
char prev[OF_PROPNAME_LEN_MAX + 1];
const char *tmp;
const char *tmp = NULL;
bool match = false;
if (readstr(prevaddr, prev, sizeof(prev))) {
return PROM_ERROR;
}
fdt_for_each_property_offset(offset, fdt, nodeoff) {
if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
return 0;
/*
* "name" may or may not be present in fdt but we should still return it.
* Do that first and then skip it if seen later.
*/
if (prev[0] == '\0') {
tmp = "name";
} else {
if (strcmp(prev, "name") == 0) {
prev[0] = '\0';
}
if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
if (prev[0] != '\0') {
offset = fdt_next_property_offset(fdt, offset);
if (offset < 0) {
return 0;
}
}
fdt_for_each_property_offset(offset, fdt, nodeoff) {
if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
return 0;
}
if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
return PROM_ERROR;
if (strcmp(tmp, "name") == 0) {
continue;
}
return 1;
if (match) {
break;
}
if (strcmp(prev, tmp) == 0) {
match = true;
continue;
}
if (prev[0] == '\0') {
break;
}
}
if (offset < 0) {
return 0;
}
}
if (tmp) {
if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
return PROM_ERROR;
}
return 1;
}
return 0;
}

View file

@ -13,6 +13,7 @@
#include "hw/ppc/xics.h" /* For ICSState */
#include "hw/ppc/spapr_tpm_proxy.h"
#include "hw/ppc/spapr_nested.h" /* For SpaprMachineStateNested */
#include "hw/ppc/spapr_fadump.h" /* For FadumpMemStruct */
struct SpaprVioBus;
struct SpaprPhbState;
@ -139,27 +140,11 @@ struct SpaprCapabilities {
* SpaprMachineClass:
*/
struct SpaprMachineClass {
/*< private >*/
MachineClass parent_class;
/*< public >*/
bool dr_phb_enabled; /* enable dynamic-reconfig/hotplug of PHBs */
bool update_dt_enabled; /* enable KVMPPC_H_UPDATE_DT */
bool legacy_irq_allocation;
uint32_t nr_xirqs;
bool broken_host_serial_model; /* present real host info to the guest */
bool pre_4_1_migration; /* don't migrate hpt-max-page-size */
bool linux_pci_probe;
bool smp_threads_vsmt; /* set VSMT to smp_threads by default */
hwaddr rma_limit; /* clamp the RMA to this size */
bool pre_5_1_assoc_refpoints;
bool pre_5_2_numa_associativity;
bool pre_6_2_numa_affinity;
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns, Error **errp);
SpaprResizeHpt resize_hpt_default;
SpaprCapabilities default_caps;
SpaprIrq *irq;
@ -283,6 +268,11 @@ struct SpaprMachineState {
Error *fwnmi_migration_blocker;
SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
/* Fadump State */
bool fadump_registered;
bool fadump_dump_active;
FadumpMemStruct registered_fdm;
};
#define H_SUCCESS 0
@ -708,6 +698,8 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
#define RTAS_OUT_PARAM_ERROR -3
#define RTAS_OUT_NOT_SUPPORTED -3
#define RTAS_OUT_NO_SUCH_INDICATOR -3
#define RTAS_OUT_DUMP_ALREADY_REGISTERED -9
#define RTAS_OUT_DUMP_ACTIVE -10
#define RTAS_OUT_NOT_AUTHORIZED -9002
#define RTAS_OUT_SYSPARM_PARAM_ERROR -9999
@ -770,8 +762,9 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
#define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
#define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B)
#define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C)
#define RTAS_CONFIGURE_KERNEL_DUMP (RTAS_TOKEN_BASE + 0x2D)
#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D)
#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2E)
/* RTAS ibm,get-system-parameter token values */
#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20

View file

@ -0,0 +1,124 @@
/*
* Firmware Assisted Dump in PSeries
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef PPC_SPAPR_FADUMP_H
#define PPC_SPAPR_FADUMP_H
#include "qemu/osdep.h"
#include "cpu.h"
/* Fadump commands */
#define FADUMP_CMD_REGISTER 1
#define FADUMP_CMD_UNREGISTER 2
#define FADUMP_CMD_INVALIDATE 3
#define FADUMP_VERSION 1
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
#define FADUMP_REAL_MODE_REGION 0x0011
/* OS defined sections */
#define FADUMP_PARAM_AREA 0x0100
/* Dump request flag */
#define FADUMP_REQUEST_FLAG 0x00000001
/* Dump status flags */
#define FADUMP_STATUS_DUMP_PERFORMED 0x8000
#define FADUMP_STATUS_DUMP_TRIGGERED 0x4000
#define FADUMP_STATUS_DUMP_ERROR 0x2000
/* Region dump error flags */
#define FADUMP_ERROR_INVALID_DATA_TYPE 0x8000
#define FADUMP_ERROR_INVALID_SOURCE_ADDR 0x4000
#define FADUMP_ERROR_LENGTH_EXCEEDS_SOURCE 0x2000
#define FADUMP_ERROR_INVALID_DEST_ADDR 0x1000
#define FAUDMP_ERROR_DEST_TOO_SMALL 0x0800
/*
* The Firmware Assisted Dump Memory structure supports a maximum of 10 sections
* in the dump memory structure. Presently, three sections are used for
* CPU state data, HPTE & Parameters area, while the remaining seven sections
* can be used for boot memory regions.
*/
#define FADUMP_MAX_SECTIONS 10
/* Number of register entries stored per cpu */
#define FADUMP_PER_CPU_REG_ENTRIES (32 /*GPR*/ + 45 /*others*/ + 2 /*STRT & END*/)
/* Mask of CPU ID in CPUSTRT and CPUEND entries */
#define FADUMP_CPU_ID_MASK ((1ULL << 32) - 1)
typedef struct FadumpSection FadumpSection;
typedef struct FadumpSectionHeader FadumpSectionHeader;
typedef struct FadumpMemStruct FadumpMemStruct;
typedef struct FadumpRegSaveAreaHeader FadumpRegSaveAreaHeader;
typedef struct FadumpRegEntry FadumpRegEntry;
struct SpaprMachineState;
/* Kernel Dump section info */
/* All fields are in big-endian */
struct FadumpSection {
uint32_t request_flag;
uint16_t source_data_type;
uint16_t error_flags;
uint64_t source_address;
uint64_t source_len;
uint64_t bytes_dumped;
uint64_t destination_address;
};
/* ibm,configure-kernel-dump header. */
struct FadumpSectionHeader {
uint32_t dump_format_version;
uint16_t dump_num_sections;
uint16_t dump_status_flag;
uint32_t offset_first_dump_section;
/* Fields for disk dump option. */
uint32_t dd_block_size;
uint64_t dd_block_offset;
uint64_t dd_num_blocks;
uint32_t dd_offset_disk_path;
/* Maximum time allowed to prevent an automatic dump-reboot. */
uint32_t max_time_auto;
};
/* Note: All the data in these structures is in big-endian */
struct FadumpMemStruct {
FadumpSectionHeader header;
FadumpSection rgn[FADUMP_MAX_SECTIONS];
};
/*
* The firmware-assisted dump format.
*
* The register save area is an area in the partition's memory used to preserve
* the register contents (CPU state data) for the active CPUs during a firmware
* assisted dump. The dump format contains register save area header followed
* by register entries. Each list of registers for a CPU starts with "CPUSTRT"
* and ends with "CPUEND".
*/
/* Register save area header. */
struct FadumpRegSaveAreaHeader {
uint64_t magic_number;
uint32_t version;
uint32_t num_cpu_offset;
};
/* Register entry. */
struct FadumpRegEntry {
uint64_t reg_id;
uint64_t reg_value;
};
uint32_t do_fadump_register(struct SpaprMachineState *, target_ulong);
void trigger_fadump_boot(struct SpaprMachineState *, target_ulong);
#endif /* PPC_SPAPR_FADUMP_H */

View file

@ -100,7 +100,6 @@ typedef struct SpaprIrq {
} SpaprIrq;
extern SpaprIrq spapr_irq_xics;
extern SpaprIrq spapr_irq_xics_legacy;
extern SpaprIrq spapr_irq_xive;
extern SpaprIrq spapr_irq_dual;

View file

@ -1,6 +1,8 @@
dtbs = [
'bamboo.dtb',
'canyonlands.dtb',
'pegasos1.dtb',
'pegasos2.dtb',
'petalogix-ml605.dtb',
'petalogix-s3adsp1800.dtb',
]

BIN
pc-bios/dtb/pegasos1.dtb Normal file

Binary file not shown.

125
pc-bios/dtb/pegasos1.dts Normal file
View file

@ -0,0 +1,125 @@
/*
* QEMU Pegasos1 Device Tree Source
*
* Copyright 2025 BALATON Zoltan
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This is partial source, more info will be filled in by board code.
*/
/dts-v1/;
/ {
#address-cells = <1>;
device_type = "chrp";
model = "Pegasos";
revision = "1A";
CODEGEN,vendor = "bplan GmbH";
CODEGEN,board = "Pegasos";
CODEGEN,description = "Pegasos CHRP PowerPC System";
openprom {
model = "Pegasos,0.1b123";
};
chosen {
};
memory@0 {
device_type = "memory";
reg = <0 0>;
};
cpus {
#size-cells = <0>;
#address-cells = <1>;
#cpus = <1>;
};
failsafe {
device_type = "serial";
};
pci@80000000 {
device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
clock-frequency = <33333333>;
8259-interrupt-acknowledge = <0xfef00000>;
reg = <0x80000000 0x7f000000>;
ranges = <0x01000000 0 0x00000000 0xfe000000 0 0x00800000
0x02000000 0 0x80000000 0x80000000 0 0x7d000000
0x02000000 0 0xfd000000 0xfd000000 0 0x01000000>;
bus-range = <0 0>;
isa@7 {
vendor-id = <0x1106>;
device-id = <0x8231>;
revision-id = <0x10>;
class-code = <0x60100>;
/* Pegasos firmware has subsystem-id and */
/* subsystem-vendor-id swapped */
subsystem-id = <0x1af4>;
subsystem-vendor-id = <0x1100>;
reg = <0x3800 0 0 0 0>;
device_type = "isa";
#address-cells = <2>;
#size-cells = <1>;
eisa-slots = <0>;
clock-frequency = <8333333>;
slot-names = <0>;
serial@i2f8 {
device_type = "serial";
reg = <1 0x2f8 8>;
interrupts = <3 0>;
clock-frequency = <0>;
};
8042@i60 {
device_type = "";
reg = <1 0x60 5>;
clock-frequency = <0>;
interrupt-controller = "";
#address-cells = <1>;
#size-cells = <0>;
#interrupt-cells = <2>;
};
keyboard@i60 {
device_type = "keyboard";
reg = <1 0x60 5>;
interrupts = <1 0>;
};
rtc@i70 {
device_type = "rtc";
reg = <1 0x70 2>;
interrupts = <8 0>;
clock-frequency = <0>;
compatible = "ds1385-rtc";
};
timer@i40 {
device_type = "timer";
reg = <1 0x40 8>;
clock-frequency = <0>;
};
fdc@i3f0 {
device_type = "fdc";
reg = <1 0x3f0 8>;
interrupts = <6 0>;
clock-frequency = <0>;
};
lpt@i3bc {
device_type = "lpt";
reg = <1 0x3bc 8>;
interrupts = <7 0>;
clock-frequency = <0>;
};
};
};
};

BIN
pc-bios/dtb/pegasos2.dtb Normal file

Binary file not shown.

167
pc-bios/dtb/pegasos2.dts Normal file
View file

@ -0,0 +1,167 @@
/*
* QEMU Pegasos2 Device Tree Source
*
* Copyright 2025 BALATON Zoltan
* SPDX-License-Identifier: GPL-2.0-or-later
*
* This is partial source, more info will be filled in by board code.
*/
/dts-v1/;
/ {
#address-cells = <1>;
device_type = "chrp";
model = "Pegasos2";
revision = "2B";
CODEGEN,vendor = "bplan GmbH";
CODEGEN,board = "Pegasos2";
CODEGEN,description = "Pegasos CHRP PowerPC System";
openprom {
model = "Pegasos2,1.1";
};
chosen {
};
memory@0 {
device_type = "memory";
reg = <0 0>;
};
cpus {
#size-cells = <0>;
#address-cells = <1>;
#cpus = <1>;
};
rtas {
rtas-version = <1>;
rtas-size = <20>;
rtas-display-device = <0>;
rtas-event-scan-rate = <0>;
rtas-error-log-max = <0>;
restart-rtas = <0>;
nvram-fetch = <1>;
nvram-store = <2>;
get-time-of-day = <3>;
set-time-of-day = <4>;
event-scan = <6>;
/* Pegasos2 firmware misspells check-exception */
check-execption = <7>;
read-pci-config = <8>;
write-pci-config = <9>;
display-character = <10>;
set-indicator = <11>;
power-off = <17>;
suspend = <18>;
hibernate = <19>;
system-reboot = <20>;
};
failsafe {
device_type = "serial";
};
pci@80000000 {
device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
clock-frequency = <33333333>;
ranges = <0x01000000 0 0x00000000 0xfe000000 0 0x00010000
0x02000000 0 0x80000000 0x80000000 0 0x40000000>;
8259-interrupt-acknowledge = <0xf1000cb4>;
reg = <0x80000000 0x40000000>;
pci-bridge-number = <0>;
bus-range = <0 0>;
isa@c {
vendor-id = <0x1106>;
device-id = <0x8231>;
revision-id = <0x10>;
class-code = <0x60100>;
/* Pegasos firmware has subsystem-id and
subsystem-vendor-id swapped */
subsystem-id = <0x1af4>;
subsystem-vendor-id = <0x1100>;
reg = <0x6000 0 0 0 0>;
device_type = "isa";
#address-cells = <2>;
#size-cells = <1>;
eisa-slots = <0>;
clock-frequency = <8333333>;
slot-names = <0>;
serial@i2f8 {
device_type = "serial";
reg = <1 0x2f8 8>;
interrupts = <3 0>;
clock-frequency = <1843200>;
compatible = "pnpPNP,501";
};
8042@i60 {
device_type = "";
reg = <1 0x60 5>;
clock-frequency = <0>;
compatible = "chrp,8042";
interrupt-controller = "";
#address-cells = <1>;
#size-cells = <0>;
#interrupt-cells = <2>;
};
keyboard@i60 {
device_type = "keyboard";
reg = <1 0x60 5>;
interrupts = <1 0>;
compatible = "pnpPNP,303";
};
rtc@i70 {
device_type = "rtc";
reg = <1 0x70 2>;
interrupts = <8 0>;
clock-frequency = <0>;
compatible = "ds1385-rtc";
};
timer@i40 {
device_type = "timer";
reg = <1 0x40 8>;
clock-frequency = <0>;
compatible = "pnpPNP,100";
};
fdc@i3f0 {
device_type = "fdc";
reg = <1 0x3f0 8>;
interrupts = <6 0>;
clock-frequency = <0>;
compatible = "pnpPNP,700";
};
lpt@i3bc {
device_type = "lpt";
reg = <1 0x3bc 8>;
interrupts = <7 0>;
clock-frequency = <0>;
compatible = "pnpPNP,400";
};
};
};
pci@c0000000 {
device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
clock-frequency = <66666666>;
ranges = <0x01000000 0 0x00000000 0xf8000000 0 0x00010000
0x02000000 0 0xc0000000 0xc0000000 0 0x20000000>;
reg = <0xc0000000 0x20000000>;
pci-bridge-number = <1>;
bus-range = <0 0>;
};
};

View file

@ -1864,17 +1864,6 @@ uint32_t kvmppc_get_tbfreq(void)
return cached_tbfreq;
}
bool kvmppc_get_host_serial(char **value)
{
return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
NULL);
}
bool kvmppc_get_host_model(char **value)
{
return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
}
/* Try to find a device tree node for a CPU with clock-frequency property */
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
{

View file

@ -21,8 +21,6 @@
uint32_t kvmppc_get_tbfreq(void);
uint64_t kvmppc_get_clockfreq(void);
bool kvmppc_get_host_model(char **buf);
bool kvmppc_get_host_serial(char **buf);
int kvmppc_get_hasidle(CPUPPCState *env);
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
@ -129,16 +127,6 @@ static inline uint32_t kvmppc_get_tbfreq(void)
return 0;
}
static inline bool kvmppc_get_host_model(char **buf)
{
return false;
}
static inline bool kvmppc_get_host_serial(char **buf)
{
return false;
}
static inline uint64_t kvmppc_get_clockfreq(void)
{
return 0;

View file

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-or-later
test_ppc64_timeouts = {
'fadump' : 480,
'hv' : 1000,
'mac99' : 120,
'powernv' : 480,
@ -16,6 +17,7 @@ tests_ppc64_system_quick = [
tests_ppc64_system_thorough = [
'e500',
'fadump',
'hv',
'mac99',
'powernv',

View file

@ -0,0 +1,182 @@
#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0-or-later
from unittest import skip
from qemu_test import Asset
from qemu_test import wait_for_console_pattern
from qemu_test import LinuxKernelTest
from qemu_test import exec_command, exec_command_and_wait_for_pattern
class QEMUFadump(LinuxKernelTest):
"""
Functional test to verify Fadump is working in following scenarios:
1. test_fadump_pseries: PSeries
2. test_fadump_pseries_kvm: PSeries + KVM
"""
timeout = 90
KERNEL_COMMON_COMMAND_LINE = 'console=hvc0 fadump=on '
msg_panic = 'Kernel panic - not syncing'
msg_not_supported = 'Firmware-Assisted Dump is not supported on this hardware'
msg_registered_success = ''
msg_registered_failed = ''
msg_dump_active = ''
ASSET_EPAPR_KERNEL = Asset(
('https://github.com/open-power/op-build/releases/download/v2.7/'
'zImage.epapr'),
'0ab237df661727e5392cee97460e8674057a883c5f74381a128fa772588d45cd')
ASSET_VMLINUZ_KERNEL = Asset(
('https://archives.fedoraproject.org/pub/archive/fedora-secondary/'
'releases/39/Everything/ppc64le/os/ppc/ppc64/vmlinuz'),
('81e5541d243b50c8f9568906c6918dda22239744d637bb9a7b22d23c3d661226'
'8d5302beb2ca5c06f93bdbc9736c414ef5120756c8bf496ff488ad07d116d67f')
)
ASSET_FEDORA_INITRD = Asset(
('https://archives.fedoraproject.org/pub/archive/fedora-secondary/'
'releases/39/Everything/ppc64le/os/ppc/ppc64/initrd.img'),
'e7f24b44cb2aaa67d30e551db6ac8d29cc57c934b158dabca6b7f885f2cfdd9b')
def do_test_fadump(self, is_kvm=False, is_powernv=False):
"""
Helper Function for Fadump tests below
It boots the VM with fadump enabled, checks if fadump is correctly
registered.
Then crashes the system causing a QEMU_SYSTEM_RESET, after which
dump should be available in the kernel.
Finally it checks the filesize of the exported /proc/vmcore in 2nd
kernel to verify it's same as the VM's memory size
"""
if is_kvm:
self.require_accelerator("kvm")
self.vm.add_args("-accel", "kvm")
else:
self.require_accelerator("tcg")
if is_powernv:
self.set_machine("powernv10")
else:
# SLOF takes upto >20s in startup time, use VOF
self.set_machine("pseries")
self.vm.add_args("-machine", "x-vof=on")
self.vm.add_args("-m", "6G")
self.vm.set_console()
kernel_path = None
if is_powernv:
kernel_path = self.ASSET_EPAPR_KERNEL.fetch()
else:
kernel_path = self.ASSET_VMLINUZ_KERNEL.fetch()
initrd_path = self.ASSET_FEDORA_INITRD.fetch()
self.vm.add_args('-kernel', kernel_path)
self.vm.add_args('-initrd', initrd_path)
self.vm.add_args('-append', "fadump=on"\
" -nodefaults -serial mon:stdio crashkernel=2G"\
" rdinit=/bin/sh ")
self.vm.launch()
# If kernel detects fadump support, and "fadump=on" is in command
# line which we add above, it will print something like:
#
# fadump: Reserved 1024MB of memory at 0x00000040000000 ...
#
# Else, if the kernel doesn't detect fadump support, it prints:
#
# fadump: Firmware-Assisted Dump is not supported on this hardware
#
# Timeout after 20s if kernel doesn't print any fadump logs, this
# can happen due to fadump being disabled in the kernel
self.wait_for_regex_console_pattern(
success_pattern="fadump: Reserved ",
failure_pattern=r"fadump: (Firmware-Assisted Dump is not"\
" supported on this hardware|Failed to find memory chunk for"\
" reservation!)",
timeout=20
)
# Ensure fadump is registered successfully, if registration
# succeeds, we get a log from rtas fadump:
#
# rtas fadump: Registration is successful!
self.wait_for_console_pattern(
"rtas fadump: Registration is successful!"
)
# Wait for the shell
self.wait_for_console_pattern("#")
# Mount /proc since not available in the initrd used
exec_command(self, command="mount -t proc proc /proc")
# Crash the kernel
exec_command(self, command="echo c > /proc/sysrq-trigger")
# Check for the kernel panic message, setting timeout to 20s as it
# should occur almost immediately after previous echo c
self.wait_for_regex_console_pattern(
success_pattern="Kernel panic - not syncing: sysrq" \
" triggered crash",
timeout=20
)
# Check if fadump is active
# If the kernel shows that fadump is active, that implies it's a
# crashkernel boot
# Else if the kernel shows "fadump: Reserved ..." then it's
# treating this as the first kernel boot, this is likely the case
# that qemu didn't pass the 'ibm,kernel-dump' device tree node
wait_for_console_pattern(
test=self,
success_message="rtas fadump: Firmware-assisted dump is active",
failure_message="fadump: Reserved "
)
# In a successful fadump boot, we get these logs:
#
# [ 0.000000] fadump: Firmware-assisted dump is active.
# [ 0.000000] fadump: Reserving <>MB of memory at <> for preserving crash data
#
# Check if these logs are present in the fadump boot
self.wait_for_console_pattern("preserving crash data")
# Wait for prompt
self.wait_for_console_pattern("sh-5.2#")
# Mount /proc since not available in the initrd used
exec_command_and_wait_for_pattern(self,
command="mount -t proc proc /proc",
success_message="#"
)
# Check if vmcore exists
exec_command_and_wait_for_pattern(self,
command="stat /proc/vmcore",
success_message="File: /proc/vmcore",
failure_message="No such file or directory"
)
def test_fadump_pseries(self):
return self.do_test_fadump(is_kvm=False, is_powernv=False)
@skip("PowerNV Fadump not supported yet")
def test_fadump_powernv(self):
return
def test_fadump_pseries_kvm(self):
"""
Test Fadump in PSeries with KVM accel
"""
self.do_test_fadump(is_kvm=True, is_powernv=False)
if __name__ == '__main__':
QEMUFadump.main()

View file

@ -5,6 +5,9 @@
import hashlib
import urllib.request
import logging
import re
import time
from .cmd import wait_for_console_pattern, exec_command_and_wait_for_pattern
from .testcase import QemuSystemTest
@ -19,6 +22,62 @@ class LinuxKernelTest(QemuSystemTest):
failure_message='Kernel panic - not syncing',
vm=vm)
def wait_for_regex_console_pattern(self, success_pattern,
failure_pattern=None,
timeout=None):
"""
Similar to 'wait_for_console_pattern', but supports regex patterns,
hence multiple failure/success patterns can be detected at a time.
Args:
success_pattern (str | re.Pattern): A regex pattern that indicates
a successful event. If found, the method exits normally.
failure_pattern (str | re.Pattern, optional): A regex pattern that
indicates a failure event. If found, the test fails
timeout (int, optional): The maximum time (in seconds) to wait for
a match.
If exceeded, the test fails.
"""
console = self.vm.console_file
console_logger = logging.getLogger('console')
self.log.debug(
f"Console interaction: success_msg='{success_pattern}' " +
f"failure_msg='{failure_pattern}' timeout='{timeout}s'")
# Only consume console output if waiting for something
if success_pattern is None and failure_pattern is None:
return
start_time = time.time()
while time.time() - start_time < timeout:
try:
msg = console.readline().decode().strip()
except UnicodeDecodeError:
msg = None
if not msg:
continue
console_logger.debug(msg)
if success_pattern is None or re.search(success_pattern, msg):
break
if failure_pattern:
# Find the matching error to print in log
match = re.search(failure_pattern, msg)
if not match:
continue
console.close()
fail = 'Failure message found in console: "%s".' \
' Expected: "%s"' % \
(match.group(), success_pattern)
self.fail(fail)
if time.time() - start_time >= timeout:
fail = f"Timeout ({timeout}s) while trying to search pattern"
self.fail(fail)
def launch_kernel(self, kernel, initrd=None, dtb=None, console_index=0,
wait_for=None):
self.vm.set_console(console_index=console_index)