diff --git a/MAINTAINERS b/MAINTAINERS index 36eef27b41..53dbf915ea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1652,6 +1652,7 @@ F: hw/ppc/pegasos2.c F: hw/pci-host/mv64361.c F: hw/pci-host/mv643xx.h F: include/hw/pci-host/mv64361.h +F: pc-bios/dtb/pegasos[12].dt[sb] amigaone M: BALATON Zoltan @@ -3214,6 +3215,14 @@ F: scripts/coccinelle/remove_local_err.cocci F: scripts/coccinelle/use-error_fatal.cocci F: scripts/coccinelle/errp-guard.cocci +Firmware Assisted Dump (fadump) for sPAPR (pseries) +M: Aditya Gupta +R: Sourabh Jain +S: Maintained +F: include/hw/ppc/spapr_fadump.h +F: hw/ppc/spapr_fadump.c +F: tests/functional/ppc64/test_fadump.py + GDB stub M: Alex Bennée R: Philippe Mathieu-Daudé diff --git a/hw/ppc/amigaone.c b/hw/ppc/amigaone.c index 12279f42bc..47fb016b4a 100644 --- a/hw/ppc/amigaone.c +++ b/hw/ppc/amigaone.c @@ -221,7 +221,7 @@ struct bd_info { static void create_bd_info(hwaddr addr, ram_addr_t ram_size) { - struct bd_info *bd = g_new0(struct bd_info, 1); + g_autofree struct bd_info *bd = g_new0(struct bd_info, 1); bd->bi_memsize = cpu_to_be32(ram_size); bd->bi_flashstart = cpu_to_be32(PROM_ADDR); diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 170b90ae7d..6b7c1f4f49 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -26,6 +26,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_nvdimm.c', 'spapr_rtas_ddw.c', 'spapr_numa.c', + 'spapr_fadump.c', 'pef.c', )) ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files( diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index e15cf96427..93696ed381 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -1,5 +1,5 @@ /* - * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator + * QEMU PowerPC CHRP (Genesi/bPlan Pegasos I/II) hardware System Emulator * * Copyright (c) 2018-2021 BALATON Zoltan * @@ -15,6 +15,7 @@ #include "hw/pci/pci_host.h" #include "hw/irq.h" #include "hw/or-irq.h" +#include "hw/pci-host/articia.h" #include "hw/pci-host/mv64361.h" #include "hw/isa/vt82c686.h" #include "hw/ide/pci.h" @@ -55,34 +56,27 @@ #define H_PRIVILEGE -3 /* Caller not privileged */ #define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ -#define BUS_FREQ_HZ 133333333 +typedef enum { + PEGASOS1 = 1, + PEGASOS2 = 2, +} PegasosMachineType; -#define PCI0_CFG_ADDR 0xcf8 -#define PCI0_MEM_BASE 0xc0000000 -#define PCI0_MEM_SIZE 0x20000000 -#define PCI0_IO_BASE 0xf8000000 -#define PCI0_IO_SIZE 0x10000 +#define TYPE_PEGASOS_MACHINE MACHINE_TYPE_NAME("pegasos") +OBJECT_DECLARE_SIMPLE_TYPE(PegasosMachineState, PEGASOS_MACHINE) -#define PCI1_CFG_ADDR 0xc78 -#define PCI1_MEM_BASE 0x80000000 -#define PCI1_MEM_SIZE 0x40000000 -#define PCI1_IO_BASE 0xfe000000 -#define PCI1_IO_SIZE 0x10000 - -#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2") -OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE) - -struct Pegasos2MachineState { +struct PegasosMachineState { MachineState parent_obj; + PegasosMachineType type; PowerPCCPU *cpu; - DeviceState *mv; + DeviceState *nb; /* north bridge */ + DeviceState *sb; /* south bridge */ + int bus_freq_hz; IRQState pci_irqs[PCI_NUM_PINS]; OrIRQState orirq[PCI_NUM_PINS]; qemu_irq mv_pirq[PCI_NUM_PINS]; qemu_irq via_pirq[PCI_NUM_PINS]; Vof *vof; - void *fdt_blob; uint64_t kernel_addr; uint64_t kernel_entry; uint64_t kernel_size; @@ -90,44 +84,80 @@ struct Pegasos2MachineState { uint64_t initrd_size; }; -static void *build_fdt(MachineState *machine, int *fdt_size); +static void *pegasos1_build_fdt(PegasosMachineState *pm, int *fdt_size); +static void *pegasos2_build_fdt(PegasosMachineState *pm, int *fdt_size); -static void pegasos2_cpu_reset(void *opaque) +static void pegasos_cpu_reset(void *opaque) { PowerPCCPU *cpu = opaque; - Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine); + PegasosMachineState *pm = PEGASOS_MACHINE(current_machine); cpu_reset(CPU(cpu)); cpu->env.spr[SPR_HID1] = 7ULL << 28; if (pm->vof) { cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20; cpu->env.nip = 0x100; + } else if (pm->type == PEGASOS1) { + cpu->env.nip = 0xfffc0100; } cpu_ppc_tb_reset(&cpu->env); } static void pegasos2_pci_irq(void *opaque, int n, int level) { - Pegasos2MachineState *pm = opaque; + PegasosMachineState *pm = opaque; /* PCI interrupt lines are connected to both MV64361 and VT8231 */ qemu_set_irq(pm->mv_pirq[n], level); qemu_set_irq(pm->via_pirq[n], level); } -static void pegasos2_init(MachineState *machine) +/* Set up PCI interrupt routing: lines from pci.0 and pci.1 are ORed */ +static void pegasos2_setup_pci_irq(PegasosMachineState *pm) { - Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + for (int h = 0; h < 2; h++) { + DeviceState *pd; + g_autofree const char *pn = g_strdup_printf("pcihost%d", h); + + pd = DEVICE(object_resolve_path_component(OBJECT(pm->nb), pn)); + assert(pd); + for (int i = 0; i < PCI_NUM_PINS; i++) { + OrIRQState *ori = &pm->orirq[i]; + + if (h == 0) { + g_autofree const char *n = g_strdup_printf("pci-orirq[%d]", i); + + object_initialize_child_with_props(OBJECT(pm), n, + ori, sizeof(*ori), + TYPE_OR_IRQ, &error_fatal, + "num-lines", "2", NULL); + qdev_realize(DEVICE(ori), NULL, &error_fatal); + qemu_init_irq(&pm->pci_irqs[i], pegasos2_pci_irq, pm, i); + qdev_connect_gpio_out(DEVICE(ori), 0, &pm->pci_irqs[i]); + pm->mv_pirq[i] = qdev_get_gpio_in_named(pm->nb, "gpp", 12 + i); + pm->via_pirq[i] = qdev_get_gpio_in_named(pm->sb, "pirq", i); + } + qdev_connect_gpio_out(pd, i, qdev_get_gpio_in(DEVICE(ori), h)); + } + } + qdev_connect_gpio_out_named(pm->sb, "intr", 0, + qdev_get_gpio_in_named(pm->nb, "gpp", 31)); +} + +static void pegasos_init(MachineState *machine) +{ + PegasosMachineState *pm = PEGASOS_MACHINE(machine); CPUPPCState *env; MemoryRegion *rom = g_new(MemoryRegion, 1); - PCIBus *pci_bus; + PCIBus *pci_bus = NULL; Object *via; PCIDevice *dev; I2CBus *i2c_bus; const char *fwname = machine->firmware ?: PROM_FILENAME; char *filename; - int i; + hwaddr prom_addr; ssize_t sz; + int devfn; uint8_t *spd_data; /* init CPU */ @@ -139,8 +169,8 @@ static void pegasos2_init(MachineState *machine) } /* Set time-base frequency */ - cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4); - qemu_register_reset(pegasos2_cpu_reset, pm->cpu); + cpu_ppc_tb_init(env, pm->bus_freq_hz / 4); + qemu_register_reset(pegasos_cpu_reset, pm->cpu); /* RAM */ if (machine->ram_size > 2 * GiB) { @@ -158,12 +188,16 @@ static void pegasos2_init(MachineState *machine) if (!machine->firmware && !pm->vof) { pm->vof = g_malloc0(sizeof(*pm->vof)); } - memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal); - memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom); + prom_addr = PROM_ADDR; + if (pm->type == PEGASOS1) { + prom_addr += PROM_SIZE; + } + memory_region_init_rom(rom, NULL, "rom", PROM_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), prom_addr, rom); sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0); if (sz <= 0) { - sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE); + sz = load_image_targphys(filename, pm->vof ? 0 : prom_addr, PROM_SIZE); } if (sz <= 0 || sz > PROM_SIZE) { error_report("Could not load firmware '%s'", filename); @@ -174,16 +208,38 @@ static void pegasos2_init(MachineState *machine) pm->vof->fw_size = sz; } - /* Marvell Discovery II system controller */ - pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, - qdev_get_gpio_in(DEVICE(pm->cpu), PPC6xx_INPUT_INT))); - for (i = 0; i < PCI_NUM_PINS; i++) { - pm->mv_pirq[i] = qdev_get_gpio_in_named(pm->mv, "gpp", 12 + i); + /* north bridge */ + switch (pm->type) { + case PEGASOS1: + { + MemoryRegion *pci_mem, *mr; + + /* Articia S */ + pm->nb = DEVICE(sysbus_create_simple(TYPE_ARTICIA, 0xfe000000, NULL)); + pci_mem = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->nb), 1); + mr = g_new(MemoryRegion, 1); + memory_region_init_alias(mr, OBJECT(pm->nb), "pci-mem-low", pci_mem, + 0, 0x1000000); + memory_region_add_subregion(get_system_memory(), 0xfd000000, mr); + mr = g_new(MemoryRegion, 1); + memory_region_init_alias(mr, OBJECT(pm->nb), "pci-mem-high", pci_mem, + 0x80000000, 0x7d000000); + memory_region_add_subregion(get_system_memory(), 0x80000000, mr); + pci_bus = PCI_BUS(qdev_get_child_bus(pm->nb, "pci.0")); + break; + } + case PEGASOS2: + /* Marvell Discovery II system controller */ + pm->nb = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, + qdev_get_gpio_in(DEVICE(pm->cpu), PPC6xx_INPUT_INT))); + pci_bus = mv64361_get_pci_bus(pm->nb, 1); + break; } - pci_bus = mv64361_get_pci_bus(pm->mv, 1); /* VIA VT8231 South Bridge (multifunction PCI device) */ - via = OBJECT(pci_new_multifunction(PCI_DEVFN(12, 0), TYPE_VT8231_ISA)); + devfn = PCI_DEVFN(pm->type == PEGASOS1 ? 7 : 12, 0); + pm->sb = DEVICE(pci_new_multifunction(devfn, TYPE_VT8231_ISA)); + via = OBJECT(pm->sb); /* Set properties on individual devices before realizing the south bridge */ if (machine->audiodev) { @@ -192,14 +248,9 @@ static void pegasos2_init(MachineState *machine) } pci_realize_and_unref(PCI_DEVICE(via), pci_bus, &error_abort); - for (i = 0; i < PCI_NUM_PINS; i++) { - pm->via_pirq[i] = qdev_get_gpio_in_named(DEVICE(via), "pirq", i); - } object_property_add_alias(OBJECT(machine), "rtc-time", object_resolve_path_component(via, "rtc"), "date"); - qdev_connect_gpio_out_named(DEVICE(via), "intr", 0, - qdev_get_gpio_in_named(pm->mv, "gpp", 31)); dev = PCI_DEVICE(object_resolve_path_component(via, "ide")); pci_ide_create_devs(dev); @@ -212,29 +263,20 @@ static void pegasos2_init(MachineState *machine) /* other PC hardware */ pci_vga_init(pci_bus); - /* PCI interrupt routing: lines from pci.0 and pci.1 are ORed */ - for (int h = 0; h < 2; h++) { - DeviceState *pd; - g_autofree const char *pn = g_strdup_printf("pcihost%d", h); - - pd = DEVICE(object_resolve_path_component(OBJECT(pm->mv), pn)); - assert(pd); - for (i = 0; i < PCI_NUM_PINS; i++) { - OrIRQState *ori = &pm->orirq[i]; - - if (h == 0) { - g_autofree const char *n = g_strdup_printf("pci-orirq[%d]", i); - - object_initialize_child_with_props(OBJECT(pm), n, - ori, sizeof(*ori), - TYPE_OR_IRQ, &error_fatal, - "num-lines", "2", NULL); - qdev_realize(DEVICE(ori), NULL, &error_fatal); - qemu_init_irq(&pm->pci_irqs[i], pegasos2_pci_irq, pm, i); - qdev_connect_gpio_out(DEVICE(ori), 0, &pm->pci_irqs[i]); - } - qdev_connect_gpio_out(pd, i, qdev_get_gpio_in(DEVICE(ori), h)); + /* pci interrupt routing */ + switch (pm->type) { + case PEGASOS1: + qdev_connect_gpio_out_named(pm->sb, "intr", 0, + qdev_get_gpio_in(DEVICE(pm->cpu), + PPC6xx_INPUT_INT)); + for (int i = 0; i < PCI_NUM_PINS; i++) { + qdev_connect_gpio_out(pm->nb, i, + qdev_get_gpio_in_named(pm->sb, "pirq", i)); } + break; + case PEGASOS2: + pegasos2_setup_pci_irq(pm); + break; } if (machine->kernel_filename) { @@ -273,25 +315,104 @@ static void pegasos2_init(MachineState *machine) } } -static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm, +static void pegasos_superio_write(uint8_t addr, uint8_t val) +{ + cpu_physical_memory_write(0xfe0003f0, &addr, 1); + cpu_physical_memory_write(0xfe0003f1, &val, 1); +} + +static void pegasos1_pci_config_write(PegasosMachineState *pm, int bus, + uint32_t addr, uint32_t len, uint32_t val) +{ + addr |= BIT(31); + cpu_physical_memory_write(0xfec00cf8, &addr, 4); + cpu_physical_memory_write(0xfee00cfc, &val, len); +} + +static void pegasos1_chipset_reset(PegasosMachineState *pm) +{ + uint8_t elcr = 0x2e; + cpu_physical_memory_write(0xfe0004d1, &elcr, sizeof(elcr)); + + pegasos1_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + 0x50, 1, 0x6); + pegasos_superio_write(0xf4, 0xbe); + pegasos_superio_write(0xf6, 0xef); + pegasos_superio_write(0xf7, 0xfc); + pegasos_superio_write(0xf2, 0x14); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + 0x51, 1, 0x3d); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + 0x55, 1, 0x90); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + 0x56, 1, 0x99); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 0) << 8) | + 0x57, 1, 0x90); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 1) << 8) | + PCI_INTERRUPT_LINE, 2, 0x10e); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 1) << 8) | + PCI_CLASS_PROG, 1, 0xf); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 1) << 8) | + 0x40, 1, 0xb); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 1) << 8) | + 0x50, 4, 0x17171717); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 1) << 8) | + PCI_COMMAND, 2, 0x87); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 2) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 2) << 8) | + PCI_COMMAND, 2, 0x7); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 3) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 3) << 8) | + PCI_COMMAND, 2, 0x7); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 4) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 4) << 8) | + 0x48, 4, 0x2001); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 4) << 8) | + 0x41, 1, 0); + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 4) << 8) | + 0x90, 4, 0x1000); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 5) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); + + pegasos1_pci_config_write(pm, 0, (PCI_DEVFN(7, 6) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); +} + +static uint32_t pegasos2_mv_reg_read(PegasosMachineState *pm, uint32_t addr, uint32_t len) { - MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->nb), 0); uint64_t val = 0xffffffffULL; memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE, MEMTXATTRS_UNSPECIFIED); return val; } -static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr, +static void pegasos2_mv_reg_write(PegasosMachineState *pm, uint32_t addr, uint32_t len, uint32_t val) { - MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->nb), 0); memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE, MEMTXATTRS_UNSPECIFIED); } -static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus, +#define PCI0_CFG_ADDR 0xcf8 +#define PCI1_CFG_ADDR 0xc78 + +static uint32_t pegasos2_pci_config_read(PegasosMachineState *pm, int bus, uint32_t addr, uint32_t len) { hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; @@ -304,7 +425,7 @@ static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus, return val; } -static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, +static void pegasos2_pci_config_write(PegasosMachineState *pm, int bus, uint32_t addr, uint32_t len, uint32_t val) { hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; @@ -313,25 +434,8 @@ static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); } -static void pegasos2_superio_write(uint8_t addr, uint8_t val) +static void pegasos2_chipset_reset(PegasosMachineState *pm) { - cpu_physical_memory_write(PCI1_IO_BASE + 0x3f0, &addr, 1); - cpu_physical_memory_write(PCI1_IO_BASE + 0x3f1, &val, 1); -} - -static void pegasos2_machine_reset(MachineState *machine, ResetType type) -{ - Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); - void *fdt; - uint64_t d[2]; - int sz; - - qemu_devices_reset(type); - if (!pm->vof) { - return; /* Firmware should set up machine so nothing to do */ - } - - /* Otherwise, set up devices that board firmware would normally do */ pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff); pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc); pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400); @@ -346,10 +450,10 @@ static void pegasos2_machine_reset(MachineState *machine, ResetType type) PCI_INTERRUPT_LINE, 2, 0x9); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | 0x50, 1, 0x6); - pegasos2_superio_write(0xf4, 0xbe); - pegasos2_superio_write(0xf6, 0xef); - pegasos2_superio_write(0xf7, 0xfc); - pegasos2_superio_write(0xf2, 0x14); + pegasos_superio_write(0xf4, 0xbe); + pegasos_superio_write(0xf6, 0xef); + pegasos_superio_write(0xf7, 0xfc); + pegasos_superio_write(0xf2, 0x14); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | 0x50, 1, 0x2); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | @@ -394,6 +498,35 @@ static void pegasos2_machine_reset(MachineState *machine, ResetType type) pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) | PCI_INTERRUPT_LINE, 2, 0x309); +} + +static void pegasos_machine_reset(MachineState *machine, ResetType type) +{ + PegasosMachineState *pm = PEGASOS_MACHINE(machine); + void *fdt = NULL; + uint32_t c[2]; + uint64_t d[2]; + int sz; + + qemu_devices_reset(type); + if (!pm->vof) { + return; /* Firmware should set up machine so nothing to do */ + } + + /* Otherwise, set up devices that board firmware would normally do */ + switch (pm->type) { + case PEGASOS1: + pegasos1_chipset_reset(pm); + fdt = pegasos1_build_fdt(pm, &sz); + break; + case PEGASOS2: + pegasos2_chipset_reset(pm); + fdt = pegasos2_build_fdt(pm, &sz); + break; + } + if (!fdt) { + exit(1); + } /* Device tree and VOF set up */ vof_init(pm->vof, machine->ram_size, &error_fatal); @@ -411,19 +544,31 @@ static void pegasos2_machine_reset(MachineState *machine, ResetType type) error_report("Memory for initrd is in use"); exit(1); } - fdt = build_fdt(machine, &sz); + + /* Set memory size */ + c[0] = 0; + c[1] = cpu_to_be32(machine->ram_size); + qemu_fdt_setprop(fdt, "/memory@0", "reg", c, sizeof(c)); + + /* Boot parameters */ + if (pm->initrd_addr && pm->initrd_size) { + qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", + pm->initrd_addr + pm->initrd_size); + qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", + pm->initrd_addr); + } + qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", + machine->kernel_cmdline ?: ""); /* FIXME: VOF assumes entry is same as load address */ d[0] = cpu_to_be64(pm->kernel_entry); d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr)); qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d)); - g_free(pm->fdt_blob); - pm->fdt_blob = fdt; - vof_build_dt(fdt, pm->vof); vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe"); /* Set machine->fdt for 'dumpdtb' QMP/HMP command */ + g_free(machine->fdt); machine->fdt = fdt; pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); @@ -448,7 +593,7 @@ enum pegasos2_rtas_tokens { RTAS_SYSTEM_REBOOT = 20, }; -static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, +static target_ulong pegasos2_rtas(PowerPCCPU *cpu, PegasosMachineState *pm, target_ulong args_real) { AddressSpace *as = CPU(cpu)->as; @@ -544,14 +689,14 @@ static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, } } -static bool pegasos2_cpu_in_nested(PowerPCCPU *cpu) +static bool pegasos_cpu_in_nested(PowerPCCPU *cpu) { return false; } -static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +static void pegasos_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) { - Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp); + PegasosMachineState *pm = PEGASOS_MACHINE(vhyp); CPUPPCState *env = &cpu->env; /* The TCG path should also be holding the BQL at this point */ @@ -560,10 +705,10 @@ static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) if (FIELD_EX64(env->msr, MSR, PR)) { qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n"); env->gpr[3] = H_PRIVILEGE; - } else if (env->gpr[3] == KVMPPC_H_RTAS) { + } else if (env->gpr[3] == KVMPPC_H_RTAS && pm->type == PEGASOS2) { env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]); } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) { - int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob, + int ret = vof_client_call(MACHINE(pm), pm->vof, MACHINE(pm)->fdt, env->gpr[4]); env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS); } else { @@ -582,56 +727,88 @@ static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp) return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1]; } -static bool pegasos2_setprop(MachineState *ms, const char *path, - const char *propname, void *val, int vallen) +static bool pegasos_setprop(MachineState *ms, const char *path, + const char *propname, void *val, int vallen) { return true; } +static void pegasos_machine_init(MachineClass *mc) +{ + PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(mc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(mc); + + mc->init = pegasos_init; + mc->reset = pegasos_machine_reset; + mc->block_default_type = IF_IDE; + mc->default_boot_order = "cd"; + mc->default_display = "std"; + mc->default_ram_id = "ram"; + mc->default_ram_size = 512 * MiB; + machine_add_audiodev_property(mc); + + vhc->cpu_in_nested = pegasos_cpu_in_nested; + vhc->hypercall = pegasos_hypercall; + vhc->cpu_exec_enter = vhyp_nop; + vhc->cpu_exec_exit = vhyp_nop; + vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr; + + vmc->setprop = pegasos_setprop; +} + +static void pegasos1_init(Object *obj) +{ + PegasosMachineState *pm = PEGASOS_MACHINE(obj); + + pm->type = PEGASOS1; + pm->bus_freq_hz = 33000000; +} + +static void pegasos1_machine_class_init(ObjectClass *oc, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "Genesi/bPlan Pegasos I"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("750cxe_v3.1b"); +} + +static void pegasos2_init(Object *obj) +{ + PegasosMachineState *pm = PEGASOS_MACHINE(obj); + + pm->type = PEGASOS2; + pm->bus_freq_hz = 133333333; +} + static void pegasos2_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); - PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc); - VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); mc->desc = "Genesi/bPlan Pegasos II"; - mc->init = pegasos2_init; - mc->reset = pegasos2_machine_reset; - mc->block_default_type = IF_IDE; - mc->default_boot_order = "cd"; - mc->default_display = "std"; mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7457_v1.2"); - mc->default_ram_id = "pegasos2.ram"; - mc->default_ram_size = 512 * MiB; - machine_add_audiodev_property(mc); - - vhc->cpu_in_nested = pegasos2_cpu_in_nested; - vhc->hypercall = pegasos2_hypercall; - vhc->cpu_exec_enter = vhyp_nop; - vhc->cpu_exec_exit = vhyp_nop; - vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr; - - vmc->setprop = pegasos2_setprop; } -static const TypeInfo pegasos2_machine_info = { - .name = TYPE_PEGASOS2_MACHINE, - .parent = TYPE_MACHINE, - .class_init = pegasos2_machine_class_init, - .instance_size = sizeof(Pegasos2MachineState), - .interfaces = (const InterfaceInfo[]) { - { TYPE_PPC_VIRTUAL_HYPERVISOR }, - { TYPE_VOF_MACHINE_IF }, - { } +DEFINE_MACHINE_EXTENDED("pegasos", MACHINE, PegasosMachineState, + pegasos_machine_init, true, (const InterfaceInfo[]) { + { TYPE_PPC_VIRTUAL_HYPERVISOR }, + { TYPE_VOF_MACHINE_IF }, { } }) + +static const TypeInfo pegasos_machine_types[] = { + { + .name = MACHINE_TYPE_NAME("pegasos1"), + .parent = TYPE_PEGASOS_MACHINE, + .class_init = pegasos1_machine_class_init, + .instance_init = pegasos1_init, + }, + { + .name = MACHINE_TYPE_NAME("pegasos2"), + .parent = TYPE_PEGASOS_MACHINE, + .class_init = pegasos2_machine_class_init, + .instance_init = pegasos2_init, }, }; -static void pegasos2_machine_register_types(void) -{ - type_register_static(&pegasos2_machine_info); -} - -type_init(pegasos2_machine_register_types) +DEFINE_TYPES(pegasos_machine_types) /* FDT creation for passing to firmware */ @@ -654,121 +831,14 @@ static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi) qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb"); } -static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi) -{ - GString *name = g_string_sized_new(64); - uint32_t cells[3]; - - qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1); - qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2); - qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa"); - qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa"); - - /* additional devices */ - g_string_printf(name, "%s/lpt@i3bc", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(7); - cells[1] = 0; - qemu_fdt_setprop(fi->fdt, name->str, "interrupts", - cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x3bc); - cells[2] = cpu_to_be32(8); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt"); - - g_string_printf(name, "%s/fdc@i3f0", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(6); - cells[1] = 0; - qemu_fdt_setprop(fi->fdt, name->str, "interrupts", - cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x3f0); - cells[2] = cpu_to_be32(8); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc"); - - g_string_printf(name, "%s/timer@i40", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x40); - cells[2] = cpu_to_be32(8); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer"); - - g_string_printf(name, "%s/rtc@i70", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc"); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(8); - cells[1] = 0; - qemu_fdt_setprop(fi->fdt, name->str, "interrupts", - cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x70); - cells[2] = cpu_to_be32(2); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc"); - - g_string_printf(name, "%s/keyboard@i60", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - cells[0] = cpu_to_be32(1); - cells[1] = 0; - qemu_fdt_setprop(fi->fdt, name->str, "interrupts", - cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x60); - cells[2] = cpu_to_be32(5); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard"); - - g_string_printf(name, "%s/8042@i60", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2); - qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0); - qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1); - qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", ""); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x60); - cells[2] = cpu_to_be32(5); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", ""); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042"); - - g_string_printf(name, "%s/serial@i2f8", fi->path); - qemu_fdt_add_subnode(fi->fdt, name->str); - qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); - cells[0] = cpu_to_be32(3); - cells[1] = 0; - qemu_fdt_setprop(fi->fdt, name->str, "interrupts", - cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(1); - cells[1] = cpu_to_be32(0x2f8); - cells[2] = cpu_to_be32(8); - qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial"); - qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial"); - - g_string_free(name, TRUE); -} - static struct { const char *id; const char *name; void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi); } device_map[] = { + { "pci10cc,660", "host", NULL }, + { "pci10cc,661", "host", NULL }, { "pci11ab,6460", "host", NULL }, - { "pci1106,8231", "isa", dt_isa }, { "pci1106,571", "ide", dt_ide }, { "pci1106,3044", "firewire", NULL }, { "pci1106,3038", "usb", dt_usb }, @@ -788,7 +858,10 @@ static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) pci_get_word(&d->config[PCI_VENDOR_ID]), pci_get_word(&d->config[PCI_DEVICE_ID])); - if (pci_get_word(&d->config[PCI_CLASS_DEVICE]) == + if (!strcmp(pn, "pci1106,8231")) { + return; /* ISA bridge and devices are included in dtb */ + } + if (pci_get_word(&d->config[PCI_CLASS_DEVICE]) == PCI_CLASS_NETWORK_ETHERNET) { name = "ethernet"; } else if (pci_get_word(&d->config[PCI_CLASS_DEVICE]) >> 8 == @@ -846,12 +919,11 @@ static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) j += 5; } qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0])); - qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn); if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) { qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts", pci_get_byte(&d->config[PCI_INTERRUPT_PIN])); } - /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */ + /* Pegasos firmware has subsystem-id and subsystem-vendor-id swapped */ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id", pci_get_word(&d->config[PCI_SUBSYSTEM_ID])); qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id", @@ -867,136 +939,9 @@ static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) g_string_free(node, TRUE); } -static void *build_fdt(MachineState *machine, int *fdt_size) +static void add_cpu_info(void *fdt, PowerPCCPU *cpu, int bus_freq) { - Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); - PowerPCCPU *cpu = pm->cpu; - PCIBus *pci_bus; - FDTInfo fi; - uint32_t cells[16]; - void *fdt = create_device_tree(fdt_size); - - fi.fdt = fdt; - - /* root node */ - qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description", - "Pegasos CHRP PowerPC System"); - qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2"); - qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH"); - qemu_fdt_setprop_string(fdt, "/", "revision", "2B"); - qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2"); - qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp"); - qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1); - qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2"); - - /* pci@c0000000 */ - qemu_fdt_add_subnode(fdt, "/pci@c0000000"); - cells[0] = 0; - cells[1] = 0; - qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range", - cells, 2 * sizeof(cells[0])); - qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1); - cells[0] = cpu_to_be32(PCI0_MEM_BASE); - cells[1] = cpu_to_be32(PCI0_MEM_SIZE); - qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0])); - cells[0] = cpu_to_be32(0x01000000); - cells[1] = 0; - cells[2] = 0; - cells[3] = cpu_to_be32(PCI0_IO_BASE); - cells[4] = 0; - cells[5] = cpu_to_be32(PCI0_IO_SIZE); - cells[6] = cpu_to_be32(0x02000000); - cells[7] = 0; - cells[8] = cpu_to_be32(PCI0_MEM_BASE); - cells[9] = cpu_to_be32(PCI0_MEM_BASE); - cells[10] = 0; - cells[11] = cpu_to_be32(PCI0_MEM_SIZE); - qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges", - cells, 12 * sizeof(cells[0])); - qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2); - qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3); - qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci"); - qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci"); - - fi.path = "/pci@c0000000"; - pci_bus = mv64361_get_pci_bus(pm->mv, 0); - pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); - - /* pci@80000000 */ - qemu_fdt_add_subnode(fdt, "/pci@80000000"); - cells[0] = 0; - cells[1] = 0; - qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range", - cells, 2 * sizeof(cells[0])); - qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0); - cells[0] = cpu_to_be32(PCI1_MEM_BASE); - cells[1] = cpu_to_be32(PCI1_MEM_SIZE); - qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0])); - qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge", - 0xf1000cb4); - cells[0] = cpu_to_be32(0x01000000); - cells[1] = 0; - cells[2] = 0; - cells[3] = cpu_to_be32(PCI1_IO_BASE); - cells[4] = 0; - cells[5] = cpu_to_be32(PCI1_IO_SIZE); - cells[6] = cpu_to_be32(0x02000000); - cells[7] = 0; - cells[8] = cpu_to_be32(PCI1_MEM_BASE); - cells[9] = cpu_to_be32(PCI1_MEM_BASE); - cells[10] = 0; - cells[11] = cpu_to_be32(PCI1_MEM_SIZE); - qemu_fdt_setprop(fdt, "/pci@80000000", "ranges", - cells, 12 * sizeof(cells[0])); - qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2); - qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3); - qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci"); - qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci"); - - fi.path = "/pci@80000000"; - pci_bus = mv64361_get_pci_bus(pm->mv, 1); - pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); - - qemu_fdt_add_subnode(fdt, "/failsafe"); - qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial"); - qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe"); - - qemu_fdt_add_subnode(fdt, "/rtas"); - qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT); - qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE); - qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND); - qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF); - qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR); - qemu_fdt_setprop_cell(fdt, "/rtas", "display-character", - RTAS_DISPLAY_CHARACTER); - qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config", - RTAS_WRITE_PCI_CONFIG); - qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config", - RTAS_READ_PCI_CONFIG); - /* Pegasos2 firmware misspells check-exception and guests use that */ - qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption", - RTAS_CHECK_EXCEPTION); - qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN); - qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day", - RTAS_SET_TIME_OF_DAY); - qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day", - RTAS_GET_TIME_OF_DAY); - qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE); - qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH); - qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS); - qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0); - qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0); - qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0); - qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20); - qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1); - qemu_fdt_setprop_string(fdt, "/rtas", "name", "rtas"); - - /* cpus */ - qemu_fdt_add_subnode(fdt, "/cpus"); - qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1); - qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1); - qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0); - qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus"); + uint32_t cells[2]; /* FIXME Get CPU name from CPU object */ const char *cp = "/cpus/PowerPC,G4"; @@ -1041,36 +986,72 @@ static void *build_fdt(MachineState *machine, int *fdt_size) qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4); qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency", cpu->env.tb_env->tb_freq); - qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ); - qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5); + qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", bus_freq); + qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", bus_freq * 7.5); qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]); cells[0] = 0; cells[1] = 0; qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0])); qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu"); - qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1); +} - /* memory */ - qemu_fdt_add_subnode(fdt, "/memory@0"); - cells[0] = 0; - cells[1] = cpu_to_be32(machine->ram_size); - qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0])); - qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory"); - qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory"); +static void *load_dtb(const char *filename, int *fdt_size) +{ + void *fdt; + g_autofree char *name = qemu_find_file(QEMU_FILE_TYPE_DTB, filename); - qemu_fdt_add_subnode(fdt, "/chosen"); - if (pm->initrd_addr && pm->initrd_size) { - qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", - pm->initrd_addr + pm->initrd_size); - qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", - pm->initrd_addr); + if (!name) { + error_report("Could not find dtb file '%s'", filename); + return NULL; } - qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", - machine->kernel_cmdline ?: ""); - qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen"); + fdt = load_device_tree(name, fdt_size); + if (!fdt) { + error_report("Could not load dtb file '%s'", name); + } + return fdt; +} - qemu_fdt_add_subnode(fdt, "/openprom"); - qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1"); +static void *pegasos1_build_fdt(PegasosMachineState *pm, int *fdt_size) +{ + FDTInfo fi; + PCIBus *pci_bus; + void *fdt = load_dtb("pegasos1.dtb", fdt_size); + + if (!fdt) { + return NULL; + } + qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos"); + + add_cpu_info(fdt, pm->cpu, pm->bus_freq_hz); + + fi.fdt = fdt; + fi.path = "/pci@80000000"; + pci_bus = PCI_BUS(qdev_get_child_bus(pm->nb, "pci.0")); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + + return fdt; +} + +static void *pegasos2_build_fdt(PegasosMachineState *pm, int *fdt_size) +{ + FDTInfo fi; + PCIBus *pci_bus; + void *fdt = load_dtb("pegasos2.dtb", fdt_size); + + if (!fdt) { + return NULL; + } + qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2"); + + add_cpu_info(fdt, pm->cpu, pm->bus_freq_hz); + + fi.fdt = fdt; + fi.path = "/pci@c0000000"; + pci_bus = mv64361_get_pci_bus(pm->nb, 0); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + fi.path = "/pci@80000000"; + pci_bus = mv64361_get_pci_bus(pm->nb, 1); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); return fdt; } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e0a2e5a984..6764730beb 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -900,6 +900,73 @@ static int spapr_dt_rng(void *fdt) return ret ? -1 : 0; } +static void spapr_dt_rtas_fadump(SpaprMachineState *spapr, void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(ms); + FadumpMemStruct *fdm = &spapr->registered_fdm; + uint16_t dump_status_flag; + + uint32_t max_possible_cpus = mc->possible_cpu_arch_ids(ms)->len; + uint64_t fadump_cpu_state_size = 0; + uint16_t fadump_versions[2] = { + FADUMP_VERSION /* min supported version */, + FADUMP_VERSION /* max supported version */ + }; + uint32_t fadump_rgn_sizes[2][3] = { + { + cpu_to_be32(FADUMP_CPU_STATE_DATA), + 0, 0 /* Calculated later */ + }, + { + cpu_to_be32(FADUMP_HPTE_REGION), + 0, 0 /* HPTE region not implemented */ + } + }; + + /* + * CPU State Data contains multiple fields such as header, num_cpus and + * register entries + * + * Calculate the maximum CPU State Data size, according to maximum + * possible CPUs the QEMU VM can have + * + * This calculation must match the 'cpu_state_len' calculation done in + * 'populate_cpu_state_data' in spapr_fadump.c + */ + fadump_cpu_state_size += sizeof(struct FadumpRegSaveAreaHeader); + fadump_cpu_state_size += 0xc; /* padding as in PAPR */ + fadump_cpu_state_size += sizeof(uint32_t); /* num_cpus */ + fadump_cpu_state_size += max_possible_cpus * /* reg entries */ + FADUMP_PER_CPU_REG_ENTRIES * + sizeof(struct FadumpRegEntry); + + /* Set maximum size for CPU state data region */ + assert(fadump_rgn_sizes[0][0] == cpu_to_be32(FADUMP_CPU_STATE_DATA)); + + /* Upper 32 bits of size, usually 0 */ + fadump_rgn_sizes[0][1] = cpu_to_be32(fadump_cpu_state_size >> 32); + + /* Lower 32 bits of size */ + fadump_rgn_sizes[0][2] = cpu_to_be32(fadump_cpu_state_size & 0xffffffff); + + /* Add device tree properties required from platform for fadump */ + _FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-version", + fadump_versions, sizeof(fadump_versions)))); + _FDT((fdt_setprop(fdt, rtas, "ibm,configure-kernel-dump-sizes", + fadump_rgn_sizes, sizeof(fadump_rgn_sizes)))); + + dump_status_flag = be16_to_cpu(fdm->header.dump_status_flag); + if (dump_status_flag & FADUMP_STATUS_DUMP_TRIGGERED) { + uint64_t fdm_size = + sizeof(struct FadumpSectionHeader) + + (be16_to_cpu(fdm->header.dump_num_sections) * + sizeof(struct FadumpSection)); + + _FDT((fdt_setprop(fdt, rtas, "ibm,kernel-dump", fdm, fdm_size))); + } +} + static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) { MachineState *ms = MACHINE(spapr); @@ -1015,6 +1082,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity", lrdr_capacity, sizeof(lrdr_capacity))); + spapr_dt_rtas_fadump(spapr, fdt, rtas); + spapr_dt_rtas_tokens(fdt, rtas); } @@ -1072,7 +1141,6 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt, static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) { MachineState *machine = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); int chosen; _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); @@ -1143,9 +1211,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) * We can deal with BAR reallocation just fine, advertise it * to the guest */ - if (smc->linux_pci_probe) { - _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0)); - } + _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0)); spapr_dt_ov5_platform_support(spapr, fdt, chosen); } @@ -1182,7 +1248,6 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) { MachineState *machine = MACHINE(spapr); MachineClass *mc = MACHINE_GET_CLASS(machine); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); uint32_t root_drc_type_mask = 0; int ret; void *fdt; @@ -1213,16 +1278,10 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) /* Host Model & Serial Number */ if (spapr->host_model) { _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model)); - } else if (smc->broken_host_serial_model && kvmppc_get_host_model(&buf)) { - _FDT(fdt_setprop_string(fdt, 0, "host-model", buf)); - g_free(buf); } if (spapr->host_serial) { _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial)); - } else if (smc->broken_host_serial_model && kvmppc_get_host_serial(&buf)) { - _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf)); - g_free(buf); } _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2)); @@ -1260,9 +1319,8 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) /* ibm,drc-indexes and friends */ root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_LMB; - if (smc->dr_phb_enabled) { - root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB; - } + root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB; + if (mc->nvdimm_supported) { root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PMEM; } @@ -2061,13 +2119,6 @@ static const VMStateDescription vmstate_spapr_irq_map = { }, }; -static bool spapr_dtb_needed(void *opaque) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque); - - return smc->update_dt_enabled; -} - static int spapr_dtb_pre_load(void *opaque) { SpaprMachineState *spapr = (SpaprMachineState *)opaque; @@ -2083,7 +2134,6 @@ static const VMStateDescription vmstate_spapr_dtb = { .name = "spapr_dtb", .version_id = 1, .minimum_version_id = 1, - .needed = spapr_dtb_needed, .pre_load = spapr_dtb_pre_load, .fields = (const VMStateField[]) { VMSTATE_UINT32(fdt_initial_size, SpaprMachineState), @@ -2605,7 +2655,6 @@ static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) { MachineState *ms = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); Error *local_err = NULL; bool vsmt_user = !!spapr->vsmt; int kvm_smt = kvmppc_smt_threads(); @@ -2641,15 +2690,6 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) return; } /* In this case, spapr->vsmt has been set by the command line */ - } else if (!smc->smp_threads_vsmt) { - /* - * Default VSMT value is tricky, because we need it to be as - * consistent as possible (for migration), but this requires - * changing it for at least some existing cases. We pick 8 as - * the value that we'd get with KVM on POWER8, the - * overwhelmingly common case in production systems. - */ - spapr->vsmt = MAX(8, smp_threads); } else { spapr->vsmt = smp_threads; } @@ -2758,7 +2798,6 @@ static PCIHostState *spapr_create_default_phb(void) static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp) { MachineState *machine = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); hwaddr rma_size = machine->ram_size; hwaddr node0_size = spapr_node0_size(machine); @@ -2771,15 +2810,6 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp) */ rma_size = MIN(rma_size, 1 * TiB); - /* - * Clamp the RMA size based on machine type. This is for - * migration compatibility with older qemu versions, which limited - * the RMA size for complicated and mostly bad reasons. - */ - if (smc->rma_limit) { - rma_size = MIN(rma_size, smc->rma_limit); - } - if (rma_size < MIN_RMA_SLOF) { error_setg(errp, "pSeries SLOF firmware requires >= %" HWADDR_PRIx @@ -3009,10 +3039,8 @@ static void spapr_machine_init(MachineState *machine) * connectors for a PHBs PCI slots) are added as needed during their * parent's realization. */ - if (smc->dr_phb_enabled) { - for (i = 0; i < SPAPR_MAX_PHBS; i++) { - spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i); - } + for (i = 0; i < SPAPR_MAX_PHBS; i++) { + spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i); } /* Set up PCI */ @@ -3347,9 +3375,7 @@ static char *spapr_get_ic_mode(Object *obj, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); - if (spapr->irq == &spapr_irq_xics_legacy) { - return g_strdup("legacy"); - } else if (spapr->irq == &spapr_irq_xics) { + if (spapr->irq == &spapr_irq_xics) { return g_strdup("xics"); } else if (spapr->irq == &spapr_irq_xive) { return g_strdup("xive"); @@ -3363,11 +3389,6 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); - if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode"); - return; - } - /* The legacy IRQ backend can not be set */ if (strcmp(value, "xics") == 0) { spapr->irq = &spapr_irq_xics; @@ -4093,20 +4114,65 @@ int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, return 0; } +static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, Error **errp) +{ + /* + * New-style PHB window placement. + * + * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window + * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO + * windows. + * + * Some guest kernels can't work with MMIO windows above 1<<46 + * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB + * + * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each + * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the + * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the + * 1TiB 64-bit MMIO windows for each PHB. + */ + const uint64_t base_buid = 0x800000020000000ULL; + int i; + + /* Sanity check natural alignments */ + QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0); + /* Sanity check bounds */ + QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) > + SPAPR_PCI_MEM32_WIN_SIZE); + QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) > + SPAPR_PCI_MEM64_WIN_SIZE); + + if (index >= SPAPR_MAX_PHBS) { + error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)", + SPAPR_MAX_PHBS - 1); + return false; + } + + *buid = base_buid + index; + for (i = 0; i < n_dma; ++i) { + liobns[i] = SPAPR_PCI_LIOBN(index, i); + } + + *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE; + *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE; + *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE; + return true; +} + static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); const unsigned windows_supported = spapr_phb_windows_supported(sphb); SpaprDrc *drc; - if (dev->hotplugged && !smc->dr_phb_enabled) { - error_setg(errp, "PHB hotplug not supported for this machine"); - return false; - } - if (sphb->index == (uint32_t)-1) { error_setg(errp, "\"index\" for PAPR PHB is mandatory"); return false; @@ -4122,26 +4188,18 @@ static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, * This will check that sphb->index doesn't exceed the maximum number of * PHBs for the current machine type. */ - return - smc->phb_placement(spapr, sphb->index, - &sphb->buid, &sphb->io_win_addr, - &sphb->mem_win_addr, &sphb->mem64_win_addr, - windows_supported, sphb->dma_liobn, - errp); + return spapr_phb_placement(spapr, sphb->index, + &sphb->buid, &sphb->io_win_addr, + &sphb->mem_win_addr, &sphb->mem64_win_addr, + windows_supported, sphb->dma_liobn, errp); } static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev) { - SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); SpaprDrc *drc; bool hotplugged = spapr_drc_hotplugged(dev); - if (!smc->dr_phb_enabled) { - return; - } - drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); /* hotplug hooks should check it's enabled before getting this far */ assert(drc); @@ -4267,7 +4325,6 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, { SpaprMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev)); MachineClass *mc = MACHINE_GET_CLASS(sms); - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { if (spapr_memory_hot_unplug_supported(sms)) { @@ -4282,10 +4339,6 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, } spapr_core_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { - if (!smc->dr_phb_enabled) { - error_setg(errp, "PHB hot unplug not supported on this machine"); - return; - } spapr_phb_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { spapr_tpm_proxy_unplug(hotplug_dev, dev); @@ -4386,57 +4439,6 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) return machine->possible_cpus; } -static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, - hwaddr *mmio32, hwaddr *mmio64, - unsigned n_dma, uint32_t *liobns, Error **errp) -{ - /* - * New-style PHB window placement. - * - * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window - * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO - * windows. - * - * Some guest kernels can't work with MMIO windows above 1<<46 - * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB - * - * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each - * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the - * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the - * 1TiB 64-bit MMIO windows for each PHB. - */ - const uint64_t base_buid = 0x800000020000000ULL; - int i; - - /* Sanity check natural alignments */ - QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0); - QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0); - QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0); - QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0); - /* Sanity check bounds */ - QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) > - SPAPR_PCI_MEM32_WIN_SIZE); - QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) > - SPAPR_PCI_MEM64_WIN_SIZE); - - if (index >= SPAPR_MAX_PHBS) { - error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)", - SPAPR_MAX_PHBS - 1); - return false; - } - - *buid = base_buid + index; - for (i = 0; i < n_dma; ++i) { - liobns[i] = SPAPR_PCI_LIOBN(index, i); - } - - *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE; - *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE; - *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE; - return true; -} - static ICSState *spapr_ics_get(XICSFabric *dev, int irq) { SpaprMachineState *spapr = SPAPR_MACHINE(dev); @@ -4641,14 +4643,12 @@ static void spapr_machine_class_init(ObjectClass *oc, const void *data) hc->unplug_request = spapr_machine_device_unplug_request; hc->unplug = spapr_machine_device_unplug; - smc->update_dt_enabled = true; mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0"); mc->has_hotpluggable_cpus = true; mc->nvdimm_supported = true; smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; - smc->phb_placement = spapr_phb_placement; vhc->cpu_in_nested = spapr_cpu_in_nested; vhc->deliver_hv_excp = spapr_exit_nested; vhc->hypercall = emulate_spapr_hypercall; @@ -4695,10 +4695,6 @@ static void spapr_machine_class_init(ObjectClass *oc, const void *data) smc->default_caps.caps[SPAPR_CAP_AIL_MODE_3] = SPAPR_CAP_ON; spapr_caps_add_properties(smc); smc->irq = &spapr_irq_dual; - smc->dr_phb_enabled = true; - smc->linux_pci_probe = true; - smc->smp_threads_vsmt = true; - smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; @@ -4976,110 +4972,6 @@ static void spapr_machine_5_0_class_options(MachineClass *mc) DEFINE_SPAPR_MACHINE(5, 0); -/* - * pseries-4.2 - */ -static void spapr_machine_4_2_class_options(MachineClass *mc) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - spapr_machine_5_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len); - smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; - smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; - smc->rma_limit = 16 * GiB; - mc->nvdimm_supported = false; -} - -DEFINE_SPAPR_MACHINE(4, 2); - -/* - * pseries-4.1 - */ -static void spapr_machine_4_1_class_options(MachineClass *mc) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - static GlobalProperty compat[] = { - /* Only allow 4kiB and 64kiB IOMMU pagesizes */ - { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, - }; - - spapr_machine_4_2_class_options(mc); - smc->linux_pci_probe = false; - smc->smp_threads_vsmt = false; - compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} - -DEFINE_SPAPR_MACHINE(4, 1); - -/* - * pseries-4.0 - */ -static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, - hwaddr *mmio32, hwaddr *mmio64, - unsigned n_dma, uint32_t *liobns, Error **errp) -{ - if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma, - liobns, errp)) { - return false; - } - return true; -} -static void spapr_machine_4_0_class_options(MachineClass *mc) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - spapr_machine_4_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); - smc->phb_placement = phb_placement_4_0; - smc->irq = &spapr_irq_xics; - smc->pre_4_1_migration = true; -} - -DEFINE_SPAPR_MACHINE(4, 0); - -/* - * pseries-3.1 - */ -static void spapr_machine_3_1_class_options(MachineClass *mc) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - spapr_machine_4_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len); - - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); - smc->update_dt_enabled = false; - smc->dr_phb_enabled = false; - smc->broken_host_serial_model = true; - smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; - smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; - smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; - smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; -} - -DEFINE_SPAPR_MACHINE(3, 1); - -/* - * pseries-3.0 - */ - -static void spapr_machine_3_0_class_options(MachineClass *mc) -{ - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - spapr_machine_3_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len); - - smc->legacy_irq_allocation = true; - smc->nr_xirqs = 0x400; - smc->irq = &spapr_irq_xics_legacy; -} - -DEFINE_SPAPR_MACHINE(3, 0); - static void spapr_machine_register_types(void) { type_register_static(&spapr_machine_info); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 0f94c192fd..170795ad6a 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -66,7 +66,6 @@ typedef struct SpaprCapabilityInfo { void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp); void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu, uint8_t val, Error **errp); - bool (*migrate_needed)(void *opaque); } SpaprCapabilityInfo; static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name, @@ -336,11 +335,6 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -static bool cap_hpt_maxpagesize_migrate_needed(void *opaque) -{ - return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration; -} - static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, uint32_t pshift) { @@ -793,7 +787,6 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "int", .apply = cap_hpt_maxpagesize_apply, .cpu_apply = cap_hpt_maxpagesize_cpu_apply, - .migrate_needed = cap_hpt_maxpagesize_migrate_needed, }, [SPAPR_CAP_NESTED_KVM_HV] = { .name = "nested-hv", @@ -982,11 +975,8 @@ int spapr_caps_post_migration(SpaprMachineState *spapr) static bool spapr_cap_##sname##_needed(void *opaque) \ { \ SpaprMachineState *spapr = opaque; \ - bool (*needed)(void *opaque) = \ - capability_table[cap].migrate_needed; \ \ - return needed ? needed(opaque) : true && \ - spapr->cmd_line_caps[cap] && \ + return spapr->cmd_line_caps[cap] && \ (spapr->eff.caps[cap] != \ spapr->def.caps[cap]); \ } \ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 832b0212f3..892ddc7f8f 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -1041,20 +1041,14 @@ void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr) void spapr_events_init(SpaprMachineState *spapr) { - int epow_irq = SPAPR_IRQ_EPOW; - - if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - epow_irq = spapr_irq_findone(spapr, &error_fatal); - } - - spapr_irq_claim(spapr, epow_irq, false, &error_fatal); + spapr_irq_claim(spapr, SPAPR_IRQ_EPOW, false, &error_fatal); QTAILQ_INIT(&spapr->pending_events); spapr->event_sources = spapr_event_sources_new(); spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW, - epow_irq); + SPAPR_IRQ_EPOW); /* NOTE: if machine supports modern/dedicated hotplug event source, * we add it to the device-tree unconditionally. This means we may @@ -1065,16 +1059,10 @@ void spapr_events_init(SpaprMachineState *spapr) * checking that it's enabled. */ if (spapr->use_hotplug_event_source) { - int hp_irq = SPAPR_IRQ_HOTPLUG; - - if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - hp_irq = spapr_irq_findone(spapr, &error_fatal); - } - - spapr_irq_claim(spapr, hp_irq, false, &error_fatal); + spapr_irq_claim(spapr, SPAPR_IRQ_HOTPLUG, false, &error_fatal); spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG, - hp_irq); + SPAPR_IRQ_HOTPLUG); } spapr->epow_notifier.notify = spapr_powerdown_req; diff --git a/hw/ppc/spapr_fadump.c b/hw/ppc/spapr_fadump.c new file mode 100644 index 0000000000..fa3aeac94c --- /dev/null +++ b/hw/ppc/spapr_fadump.c @@ -0,0 +1,730 @@ +/* + * Firmware Assisted Dump in PSeries + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/ppc/spapr.h" +#include "qemu/units.h" +#include "system/cpus.h" +#include "system/hw_accel.h" +#include + +/* + * Copy the ascii values for first 8 characters from a string into u64 + * variable at their respective indexes. + * e.g. + * The string "FADMPINF" will be converted into 0x4641444d50494e46 + */ +static uint64_t fadump_str_to_u64(const char *str) +{ + uint64_t val = 0; + int i; + + for (i = 0; i < sizeof(val); i++) { + val = (*str) ? (val << 8) | *str++ : val << 8; + } + return val; +} + +/** + * Get the identifier id for register entries of GPRs + * + * It gives the same id as 'fadump_str_to_u64' when the complete string id + * of the GPR is given, ie. + * + * fadump_str_to_u64("GPR05") == fadump_gpr_id_to_u64(5); + * fadump_str_to_u64("GPR12") == fadump_gpr_id_to_u64(12); + * + * And so on. Hence this can be implemented by creating a dynamic + * string for each GPR, such as "GPR00", "GPR01", ... "GPR31" + * Instead of allocating a string, an observation from the math of + * 'fadump_str_to_u64' or from PAPR tells us that there's a pattern + * in the identifier IDs, such that the first 4 bytes are affected only by + * whether it is GPR0*, GPR1*, GPR2*, GPR3*. + * Upper half of 5th byte is always 0x3. Lower half (nibble) of 5th byte + * is the tens digit of the GPR id, ie. GPR ID / 10. + * Upper half of 6th byte is always 0x3. Lower half (nibble) of 5th byte + * is the ones digit of the GPR id, ie. GPR ID % 10 + * + * For example, for GPR 29, the 5th and 6th byte will be 0x32 and 0x39 + */ +static uint64_t fadump_gpr_id_to_u64(uint32_t gpr_id) +{ + uint64_t val = 0; + + /* Valid range of GPR id is only GPR0 to GPR31 */ + assert(gpr_id < 32); + + /* Below calculations set the 0th to 5th byte */ + if (gpr_id <= 9) { + val = fadump_str_to_u64("GPR0"); + } else if (gpr_id <= 19) { + val = fadump_str_to_u64("GPR1"); + } else if (gpr_id <= 29) { + val = fadump_str_to_u64("GPR2"); + } else { + val = fadump_str_to_u64("GPR3"); + } + + /* Set the 6th byte */ + val |= 0x30000000; + val |= ((gpr_id % 10) << 24); + + return val; +} + +/* + * Handle the "FADUMP_CMD_REGISTER" command in 'ibm,configure-kernel-dump' + * + * Note: Any changes made by the kernel to the fadump memory struct won't + * reflect in QEMU after the 'ibm,configure-kernel-dump' RTAS call has returned, + * as we store the passed fadump memory structure passed during fadump + * registration. + * Kernel has to invalidate & re-register fadump, if it intends to make any + * changes to the fadump memory structure + * + * Returns: + * * RTAS_OUT_SUCCESS: On successful registration + * * RTAS_OUT_PARAM_ERROR: If parameters are not correct, eg. too many + * sections, invalid memory addresses that we are + * unable to read, etc + * * RTAS_OUT_DUMP_ALREADY_REGISTERED: Dump already registered + * * RTAS_OUT_HW_ERROR: Misc issue such as memory access failures + */ +uint32_t do_fadump_register(SpaprMachineState *spapr, target_ulong args) +{ + FadumpSectionHeader header; + FadumpSection regions[FADUMP_MAX_SECTIONS] = {0}; + target_ulong fdm_addr = rtas_ld(args, 1); + target_ulong fdm_size = rtas_ld(args, 2); + AddressSpace *default_as = &address_space_memory; + MemTxResult io_result; + MemTxAttrs attrs; + uint64_t next_section_addr; + uint16_t dump_num_sections; + + /* Mark the memory transaction as privileged memory access */ + attrs.user = 0; + attrs.memory = 1; + + if (spapr->fadump_registered) { + /* FADump already registered */ + return RTAS_OUT_DUMP_ALREADY_REGISTERED; + } + + if (spapr->fadump_dump_active) { + return RTAS_OUT_DUMP_ACTIVE; + } + + if (fdm_size < sizeof(FadumpSectionHeader)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Header size is invalid: " TARGET_FMT_lu "\n", fdm_size); + return RTAS_OUT_PARAM_ERROR; + } + + /* Ensure fdm_addr points to a valid RMR-memory/RMA-memory buffer */ + if ((fdm_addr <= 0) || ((fdm_addr + fdm_size) > spapr->rma_size)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Invalid fdm address: " TARGET_FMT_lu "\n", fdm_addr); + return RTAS_OUT_PARAM_ERROR; + } + + /* Try to read the passed fadump header */ + io_result = address_space_read(default_as, fdm_addr, attrs, + &header, sizeof(header)); + if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Unable to read fdm: " TARGET_FMT_lu "\n", fdm_addr); + + return RTAS_OUT_HW_ERROR; + } + + /* Verify that we understand the fadump header version */ + if (header.dump_format_version != cpu_to_be32(FADUMP_VERSION)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Unknown fadump header version: 0x%x\n", + header.dump_format_version); + return RTAS_OUT_PARAM_ERROR; + } + + /* Reset dump status flags */ + header.dump_status_flag = 0; + + dump_num_sections = be16_to_cpu(header.dump_num_sections); + + if (dump_num_sections > FADUMP_MAX_SECTIONS) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Too many sections: %d sections\n", dump_num_sections); + return RTAS_OUT_PARAM_ERROR; + } + + next_section_addr = + fdm_addr + + be32_to_cpu(header.offset_first_dump_section); + + for (int i = 0; i < dump_num_sections; ++i) { + /* Read the fadump section from memory */ + io_result = address_space_read(default_as, next_section_addr, attrs, + ®ions[i], sizeof(regions[i])); + if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_UNIMP, + "FADump: Unable to read fadump %dth section\n", i); + return RTAS_OUT_PARAM_ERROR; + } + + next_section_addr += sizeof(regions[i]); + } + + spapr->fadump_registered = true; + spapr->fadump_dump_active = false; + + /* Store the registered fadump memory struct */ + spapr->registered_fdm.header = header; + for (int i = 0; i < dump_num_sections; ++i) { + spapr->registered_fdm.rgn[i] = regions[i]; + } + + return RTAS_OUT_SUCCESS; +} + +/* + * Copy the source region of given fadump section, to the destination + * address mentioned in the region + * + * Also set the region's error flag, if the copy fails due to non-existent + * address (MEMTX_DECODE_ERROR) or permission issues (MEMTX_ACCESS_ERROR) + * + * Returns true if successful copy + * + * Returns false in case of any other error, being treated as hardware + * error for fadump purposes + */ +static bool do_preserve_region(FadumpSection *region) +{ + AddressSpace *default_as = &address_space_memory; + MemTxResult io_result; + MemTxAttrs attrs; + uint64_t src_addr, src_len, dest_addr; + uint64_t num_chunks; + g_autofree void *copy_buffer = NULL; + + src_addr = be64_to_cpu(region->source_address); + src_len = be64_to_cpu(region->source_len); + dest_addr = be64_to_cpu(region->destination_address); + + /* Mark the memory transaction as privileged memory access */ + attrs.user = 0; + attrs.memory = 1; + + /* + * Optimisation: Skip copy if source and destination are same + * (eg. param area) + */ + if (src_addr == dest_addr) { + region->bytes_dumped = cpu_to_be64(src_len); + return true; + } + +#define FADUMP_CHUNK_SIZE ((size_t)(32 * MiB)) + copy_buffer = g_try_malloc(FADUMP_CHUNK_SIZE); + if (copy_buffer == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed allocating memory (size: %zu) for copying" + " reserved memory regions\n", FADUMP_CHUNK_SIZE); + } + + num_chunks = ceil((src_len * 1.0f) / FADUMP_CHUNK_SIZE); + for (uint64_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) { + /* Take minimum of bytes left to copy, and chunk size */ + uint64_t copy_len = MIN( + src_len - (chunk_id * FADUMP_CHUNK_SIZE), + FADUMP_CHUNK_SIZE + ); + + /* Copy the source region to destination */ + io_result = address_space_read(default_as, src_addr, attrs, + copy_buffer, copy_len); + if ((io_result & MEMTX_DECODE_ERROR) || + (io_result & MEMTX_ACCESS_ERROR)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to decode/access address in section: %d\n", + region->source_data_type); + + /* + * Invalid source address is not an hardware error, instead + * wrong parameter from the kernel. + * Return true to let caller know to continue reading other + * sections + */ + region->error_flags = FADUMP_ERROR_INVALID_SOURCE_ADDR; + region->bytes_dumped = 0; + return true; + } else if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to read source region in section: %d\n", + region->source_data_type); + + return false; + } + + io_result = address_space_write(default_as, dest_addr, attrs, + copy_buffer, copy_len); + if ((io_result & MEMTX_DECODE_ERROR) || + (io_result & MEMTX_ACCESS_ERROR)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to decode/access address in section: %d\n", + region->source_data_type); + + /* + * Invalid destination address is not an hardware error, + * instead wrong parameter from the kernel. + * Return true to let caller know to continue reading other + * sections + */ + region->error_flags = FADUMP_ERROR_INVALID_DEST_ADDR; + region->bytes_dumped = 0; + return true; + } else if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to write destination in section: %d\n", + region->source_data_type); + + return false; + } + + src_addr += FADUMP_CHUNK_SIZE; + dest_addr += FADUMP_CHUNK_SIZE; + } +#undef FADUMP_CHUNK_SIZE + + /* + * Considering address_space_write would have copied the + * complete region + */ + region->bytes_dumped = cpu_to_be64(src_len); + return true; +} + +/* + * Populate the passed CPUs register entries, in the buffer starting at + * the argument 'curr_reg_entry' + * + * The register entries is an array of pair of register id and register + * value, as described in Table 591/592 in section "H.1 Register Save Area" + * in PAPR v2.13 + * + * Returns pointer just past this CPU's register entries, which can be used + * as the start address for next CPU's register entries + */ +static FadumpRegEntry *populate_cpu_reg_entries(CPUState *cpu, + FadumpRegEntry *curr_reg_entry) +{ + CPUPPCState *env; + PowerPCCPU *ppc_cpu; + uint32_t num_regs_per_cpu = 0; + + ppc_cpu = POWERPC_CPU(cpu); + env = cpu_env(cpu); + num_regs_per_cpu = 0; + + /* + * CPUSTRT and CPUEND register entries follow this format: + * + * 8 Bytes Reg ID (BE) | 4 Bytes (0x0) | 4 Bytes Logical CPU ID (BE) + */ + curr_reg_entry->reg_id = + cpu_to_be64(fadump_str_to_u64("CPUSTRT")); + curr_reg_entry->reg_value = cpu_to_be64( + ppc_cpu->vcpu_id & FADUMP_CPU_ID_MASK); + ++curr_reg_entry; + +#define REG_ENTRY(id, val) \ + do { \ + curr_reg_entry->reg_id = \ + cpu_to_be64(fadump_str_to_u64(#id)); \ + curr_reg_entry->reg_value = cpu_to_be64(val); \ + ++curr_reg_entry; \ + ++num_regs_per_cpu; \ + } while (0) + + REG_ENTRY(ACOP, env->spr[SPR_ACOP]); + REG_ENTRY(AMR, env->spr[SPR_AMR]); + REG_ENTRY(BESCR, env->spr[SPR_BESCR]); + REG_ENTRY(CFAR, env->spr[SPR_CFAR]); + REG_ENTRY(CIABR, env->spr[SPR_CIABR]); + + /* Save the condition register */ + REG_ENTRY(CR, ppc_get_cr(env)); + + REG_ENTRY(CTR, env->spr[SPR_CTR]); + REG_ENTRY(CTRL, env->spr[SPR_CTRL]); + REG_ENTRY(DABR, env->spr[SPR_DABR]); + REG_ENTRY(DABRX, env->spr[SPR_DABRX]); + REG_ENTRY(DAR, env->spr[SPR_DAR]); + REG_ENTRY(DAWR0, env->spr[SPR_DAWR0]); + REG_ENTRY(DAWR1, env->spr[SPR_DAWR1]); + REG_ENTRY(DAWRX0, env->spr[SPR_DAWRX0]); + REG_ENTRY(DAWRX1, env->spr[SPR_DAWRX1]); + REG_ENTRY(DPDES, env->spr[SPR_DPDES]); + REG_ENTRY(DSCR, env->spr[SPR_DSCR]); + REG_ENTRY(DSISR, env->spr[SPR_DSISR]); + REG_ENTRY(EBBHR, env->spr[SPR_EBBHR]); + REG_ENTRY(EBBRR, env->spr[SPR_EBBRR]); + + REG_ENTRY(FPSCR, env->fpscr); + REG_ENTRY(FSCR, env->spr[SPR_FSCR]); + + /* Save the GPRs */ + for (int gpr_id = 0; gpr_id < 32; ++gpr_id) { + curr_reg_entry->reg_id = + cpu_to_be64(fadump_gpr_id_to_u64(gpr_id)); + curr_reg_entry->reg_value = + cpu_to_be64(env->gpr[gpr_id]); + ++curr_reg_entry; + ++num_regs_per_cpu; + } + + REG_ENTRY(IAMR, env->spr[SPR_IAMR]); + REG_ENTRY(IC, env->spr[SPR_IC]); + REG_ENTRY(LR, env->spr[SPR_LR]); + + REG_ENTRY(MSR, env->msr); + REG_ENTRY(NIA, env->nip); /* NIA */ + REG_ENTRY(PIR, env->spr[SPR_PIR]); + REG_ENTRY(PSPB, env->spr[SPR_PSPB]); + REG_ENTRY(PVR, env->spr[SPR_PVR]); + REG_ENTRY(RPR, env->spr[SPR_RPR]); + REG_ENTRY(SPURR, env->spr[SPR_SPURR]); + REG_ENTRY(SRR0, env->spr[SPR_SRR0]); + REG_ENTRY(SRR1, env->spr[SPR_SRR1]); + REG_ENTRY(TAR, env->spr[SPR_TAR]); + REG_ENTRY(TEXASR, env->spr[SPR_TEXASR]); + REG_ENTRY(TFHAR, env->spr[SPR_TFHAR]); + REG_ENTRY(TFIAR, env->spr[SPR_TFIAR]); + REG_ENTRY(TIR, env->spr[SPR_TIR]); + REG_ENTRY(UAMOR, env->spr[SPR_UAMOR]); + REG_ENTRY(VRSAVE, env->spr[SPR_VRSAVE]); + REG_ENTRY(VSCR, env->vscr); + REG_ENTRY(VTB, env->spr[SPR_VTB]); + REG_ENTRY(WORT, env->spr[SPR_WORT]); + REG_ENTRY(XER, env->spr[SPR_XER]); + + /* + * Ignoring transaction checkpoint and few other registers + * mentioned in PAPR as not supported in QEMU + */ +#undef REG_ENTRY + + /* End the registers for this CPU with "CPUEND" reg entry */ + curr_reg_entry->reg_id = + cpu_to_be64(fadump_str_to_u64("CPUEND")); + curr_reg_entry->reg_value = cpu_to_be64( + ppc_cpu->vcpu_id & FADUMP_CPU_ID_MASK); + + /* + * Ensure number of register entries saved matches the expected + * 'FADUMP_PER_CPU_REG_ENTRIES' count + * + * This will help catch an error if in future a new register entry + * is added/removed while not modifying FADUMP_PER_CPU_REG_ENTRIES + */ + assert(FADUMP_PER_CPU_REG_ENTRIES == num_regs_per_cpu + 2 /*CPUSTRT+CPUEND*/); + + ++curr_reg_entry; + + return curr_reg_entry; +} + +/* + * Populate the "Register Save Area"/CPU State as mentioned in section "H.1 + * Register Save Area" in PAPR v2.13 + * + * It allocates the buffer for this region, then populates the register + * entries + * + * Returns the pointer to the buffer (which should be deallocated by the + * callers), and sets the size of this buffer in the argument + * 'cpu_state_len' + */ +static void *get_cpu_state_data(uint64_t *cpu_state_len) +{ + FadumpRegSaveAreaHeader reg_save_hdr; + FadumpRegEntry *reg_entries; + FadumpRegEntry *curr_reg_entry; + CPUState *cpu; + + uint32_t num_reg_entries; + uint32_t reg_entries_size; + uint32_t num_cpus = 0; + + void *cpu_state_buffer = NULL; + uint64_t offset = 0; + + CPU_FOREACH(cpu) { + ++num_cpus; + } + + reg_save_hdr.version = cpu_to_be32(0); + reg_save_hdr.magic_number = + cpu_to_be64(fadump_str_to_u64("REGSAVE")); + + /* Reg save area header is immediately followed by num cpus */ + reg_save_hdr.num_cpu_offset = + cpu_to_be32(sizeof(FadumpRegSaveAreaHeader)); + + num_reg_entries = num_cpus * FADUMP_PER_CPU_REG_ENTRIES; + reg_entries_size = num_reg_entries * sizeof(FadumpRegEntry); + + reg_entries = g_new(FadumpRegEntry, num_reg_entries); + + /* Pointer to current CPU's registers */ + curr_reg_entry = reg_entries; + + /* Populate register entries for all CPUs */ + CPU_FOREACH(cpu) { + cpu_synchronize_state(cpu); + curr_reg_entry = populate_cpu_reg_entries(cpu, curr_reg_entry); + } + + *cpu_state_len = 0; + *cpu_state_len += sizeof(reg_save_hdr); /* reg save header */ + *cpu_state_len += 0xc; /* padding as in PAPR */ + *cpu_state_len += sizeof(num_cpus); /* num_cpus */ + *cpu_state_len += reg_entries_size; /* reg entries */ + + cpu_state_buffer = g_malloc(*cpu_state_len); + + memcpy(cpu_state_buffer + offset, + ®_save_hdr, sizeof(reg_save_hdr)); + offset += sizeof(reg_save_hdr); + + /* Write num_cpus */ + num_cpus = cpu_to_be32(num_cpus); + memcpy(cpu_state_buffer + offset, &num_cpus, sizeof(num_cpus)); + offset += sizeof(num_cpus); + + /* Write the register entries */ + memcpy(cpu_state_buffer + offset, reg_entries, reg_entries_size); + offset += reg_entries_size; + + return cpu_state_buffer; +} + +/* + * Save the CPU State Data (aka "Register Save Area") in given region + * + * Region argument is expected to be of CPU_STATE_DATA type + * + * Returns false only in case of Hardware Error, such as failure to + * read/write a valid address. + * + * Otherwise, even in case of unsuccessful copy of CPU state data for reasons + * such as invalid destination address or non-fatal error errors likely + * caused due to invalid parameters, return true and set region->error_flags + */ +static bool do_populate_cpu_state(FadumpSection *region) +{ + uint64_t dest_addr = be64_to_cpu(region->destination_address); + uint64_t cpu_state_len = 0; + g_autofree void *cpu_state_buffer = NULL; + AddressSpace *default_as = &address_space_memory; + MemTxResult io_result; + MemTxAttrs attrs; + + assert(region->source_data_type == cpu_to_be16(FADUMP_CPU_STATE_DATA)); + + /* Mark the memory transaction as privileged memory access */ + attrs.user = 0; + attrs.memory = 1; + + cpu_state_buffer = get_cpu_state_data(&cpu_state_len); + + io_result = address_space_write(default_as, dest_addr, attrs, + cpu_state_buffer, cpu_state_len); + if ((io_result & MEMTX_DECODE_ERROR) || + (io_result & MEMTX_ACCESS_ERROR)) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to decode/access address in CPU State Region's" + " destination address: 0x%016" PRIx64 "\n", dest_addr); + + /* + * Invalid source address is not an hardware error, instead + * wrong parameter from the kernel. + * Return true to let caller know to continue reading other + * sections + */ + region->error_flags = FADUMP_ERROR_INVALID_SOURCE_ADDR; + region->bytes_dumped = 0; + return true; + } else if (io_result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to write CPU state region.\n"); + + return false; + } + + /* + * Set bytes_dumped in cpu state region, so kernel knows platform have + * exported it + */ + region->bytes_dumped = cpu_to_be64(cpu_state_len); + + if (region->source_len != region->bytes_dumped) { + /* + * Log the error, but don't fail the dump collection here, let + * kernel handle the mismatch + */ + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Mismatch in CPU State region's length exported:" + " Kernel expected: 0x%" PRIx64 " bytes," + " QEMU exported: 0x%" PRIx64 " bytes\n", + be64_to_cpu(region->source_len), + be64_to_cpu(region->bytes_dumped)); + } + + return true; +} + +/* + * Preserve the memory locations registered for fadump + * + * Returns false only in case of RTAS_OUT_HW_ERROR, otherwise true + */ +static bool fadump_preserve_mem(SpaprMachineState *spapr) +{ + FadumpMemStruct *fdm = &spapr->registered_fdm; + uint16_t dump_num_sections, data_type; + + assert(spapr->fadump_registered); + + /* + * Handle all sections + * + * CPU State Data and HPTE regions are handled in their own cases + * + * RMR regions and any custom OS reserved regions such as parameter + * save area, are handled by simply copying the source region to + * destination address + */ + dump_num_sections = be16_to_cpu(fdm->header.dump_num_sections); + for (int i = 0; i < dump_num_sections; ++i) { + data_type = be16_to_cpu(fdm->rgn[i].source_data_type); + + /* Reset error_flags & bytes_dumped for now */ + fdm->rgn[i].error_flags = 0; + fdm->rgn[i].bytes_dumped = 0; + + /* If kernel did not request for the memory region, then skip it */ + if (be32_to_cpu(fdm->rgn[i].request_flag) != FADUMP_REQUEST_FLAG) { + qemu_log_mask(LOG_UNIMP, + "FADump: Skipping copying region as not requested\n"); + continue; + } + + switch (data_type) { + case FADUMP_CPU_STATE_DATA: + if (!do_populate_cpu_state(&fdm->rgn[i])) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to store CPU State Data"); + fdm->header.dump_status_flag |= + cpu_to_be16(FADUMP_STATUS_DUMP_ERROR); + + return false; + } + + break; + case FADUMP_HPTE_REGION: + /* TODO: Add hpte state data */ + break; + case FADUMP_REAL_MODE_REGION: + case FADUMP_PARAM_AREA: + /* Copy the memory region from region's source to its destination */ + if (!do_preserve_region(&fdm->rgn[i])) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Failed to preserve dump section: %d\n", + be16_to_cpu(fdm->rgn[i].source_data_type)); + fdm->header.dump_status_flag |= + cpu_to_be16(FADUMP_STATUS_DUMP_ERROR); + } + + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Skipping unknown source data type: %d\n", data_type); + + fdm->rgn[i].error_flags = + cpu_to_be16(FADUMP_ERROR_INVALID_DATA_TYPE); + } + } + + return true; +} + +/* + * Trigger a fadump boot, ie. next boot will be a crashkernel/fadump boot + * with fadump dump active. + * + * This is triggered by ibm,os-term RTAS call, if fadump was registered. + * + * It preserves the memory and sets 'FADUMP_STATUS_DUMP_TRIGGERED' as + * fadump status, which can be used later to add the "ibm,kernel-dump" + * device tree node as presence of 'FADUMP_STATUS_DUMP_TRIGGERED' signifies + * next boot as fadump boot in our case + */ +void trigger_fadump_boot(SpaprMachineState *spapr, target_ulong spapr_retcode) +{ + FadumpSectionHeader *header = &spapr->registered_fdm.header; + + pause_all_vcpus(); + + /* Preserve the memory locations registered for fadump */ + if (!fadump_preserve_mem(spapr)) { + /* Failed to preserve the registered memory regions */ + rtas_st(spapr_retcode, 0, RTAS_OUT_HW_ERROR); + + /* Cause a reboot */ + qemu_system_guest_panicked(NULL); + return; + } + + /* + * Mark next boot as fadump boot + * + * Note: These is some bit of assumption involved here, as PAPR doesn't + * specify any use of the dump status flags, nor does the kernel use it + * + * But from description in Table 136 in PAPR v2.13, it looks like: + * FADUMP_STATUS_DUMP_TRIGGERED + * = Dump was triggered by the previous system boot (PAPR says) + * = Next boot will be a fadump boot (Assumed) + * + * FADUMP_STATUS_DUMP_PERFORMED + * = Dump performed (Set to 0 by caller of the + * ibm,configure-kernel-dump call) (PAPR says) + * = Firmware has performed the copying/dump of requested regions + * (Assumed) + * = Dump is active for the next boot (Assumed) + */ + header->dump_status_flag = cpu_to_be16( + FADUMP_STATUS_DUMP_TRIGGERED | /* Next boot will be fadump boot */ + FADUMP_STATUS_DUMP_PERFORMED /* Dump is active */ + ); + + /* Reset fadump_registered for next boot */ + spapr->fadump_registered = false; + spapr->fadump_dump_active = true; + + /* + * Then do a guest reset + * + * Requirement: + * GUEST_RESET is expected to NOT clear the memory, as is the case when + * this is merged + */ + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + + rtas_st(spapr_retcode, 0, RTAS_OUT_SUCCESS); +} diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 8c1e0a4817..8f03b3e776 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1475,16 +1475,11 @@ static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong dt = ppc64_phys_to_real(args[0]); struct fdt_header hdr = { 0 }; unsigned cb; - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); void *fdt; cpu_physical_memory_read(dt, &hdr, sizeof(hdr)); cb = fdt32_to_cpu(hdr.totalsize); - if (!smc->update_dt_enabled) { - return H_SUCCESS; - } - /* Check that the fdt did not grow out of proportion */ if (cb > spapr->fdt_initial_size * 2) { trace_spapr_update_dt_failed_size(spapr->fdt_initial_size, cb, diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index d6d368dd08..2ce323457b 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -33,11 +33,6 @@ static const TypeInfo spapr_intc_info = { static void spapr_irq_msi_init(SpaprMachineState *spapr) { - if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - /* Legacy mode doesn't use this allocator */ - return; - } - spapr->irq_map_nr = spapr_irq_nr_msis(spapr); spapr->irq_map = bitmap_new(spapr->irq_map_nr); } @@ -284,19 +279,11 @@ void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) { - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - - if (smc->legacy_irq_allocation) { - return smc->nr_xirqs; - } else { - return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; - } + return SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE - SPAPR_IRQ_MSI; } void spapr_irq_init(SpaprMachineState *spapr, Error **errp) { - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - if (kvm_enabled() && kvm_kernel_irqchip_split()) { error_setg(errp, "kernel_irqchip split mode not supported on pseries"); return; @@ -317,7 +304,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp) object_property_add_child(OBJECT(spapr), "ics", obj); object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr), &error_abort); - object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort); + object_property_set_int(obj, "nr-irqs", SPAPR_NR_XIRQS, &error_abort); if (!qdev_realize(DEVICE(obj), NULL, errp)) { return; } @@ -331,7 +318,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp) int i; dev = qdev_new(TYPE_SPAPR_XIVE); - qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); + qdev_prop_set_uint32(dev, "nr-irqs", SPAPR_NR_XIRQS + SPAPR_IRQ_NR_IPIS); /* * 8 XIVE END structures per CPU. One for each available * priority @@ -358,7 +345,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp) } spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, - smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); + SPAPR_NR_XIRQS + SPAPR_IRQ_NR_IPIS); /* * Mostly we don't actually need this until reset, except that not @@ -373,11 +360,10 @@ int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) { SpaprInterruptController *intcs[] = ALL_INTCS(spapr); int i; - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); int rc; assert(irq >= SPAPR_XIRQ_BASE); - assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + assert(irq < (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE)); for (i = 0; i < ARRAY_SIZE(intcs); i++) { SpaprInterruptController *intc = intcs[i]; @@ -397,10 +383,9 @@ void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) { SpaprInterruptController *intcs[] = ALL_INTCS(spapr); int i, j; - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); assert(irq >= SPAPR_XIRQ_BASE); - assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + assert((irq + num) <= (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE)); for (i = irq; i < (irq + num); i++) { for (j = 0; j < ARRAY_SIZE(intcs); j++) { @@ -417,8 +402,6 @@ void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) { - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - /* * This interface is basically for VIO and PHB devices to find the * right qemu_irq to manipulate, so we only allow access to the @@ -427,7 +410,7 @@ qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) * interfaces, we can change this if we need to in future. */ assert(irq >= SPAPR_XIRQ_BASE); - assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + assert(irq < (SPAPR_NR_XIRQS + SPAPR_XIRQ_BASE)); if (spapr->ics) { assert(ics_valid_irq(spapr->ics, irq)); @@ -588,11 +571,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) return first + ics->offset; } -SpaprIrq spapr_irq_xics_legacy = { - .xics = true, - .xive = false, -}; - static void spapr_irq_register_types(void) { type_register_static(&spapr_intc_info); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index f9095552e8..bdec8f0728 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -268,7 +268,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); uint32_t config_addr = rtas_ld(args, 0); uint64_t buid = rtas_ldq(args, 1); unsigned int func = rtas_ld(args, 3); @@ -373,13 +372,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, } /* Allocate MSIs */ - if (smc->legacy_irq_allocation) { - irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, - &err); - } else { - irq = spapr_irq_msi_alloc(spapr, req_num, - ret_intr_type == RTAS_TYPE_MSI, &err); - } + irq = spapr_irq_msi_alloc(spapr, req_num, + ret_intr_type == RTAS_TYPE_MSI, &err); if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", config_addr); @@ -393,9 +387,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, if (i) { spapr_irq_free(spapr, irq, i); } - if (!smc->legacy_irq_allocation) { - spapr_irq_msi_free(spapr, irq, req_num); - } + spapr_irq_msi_free(spapr, irq, req_num); error_reportf_err(err, "Can't allocate MSIs for device %x: ", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -1789,12 +1781,9 @@ static void spapr_phb_unrealize(DeviceState *dev) static void spapr_phb_destroy_msi(gpointer opaque) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); SpaprPciMsi *msi = opaque; - if (!smc->legacy_irq_allocation) { - spapr_irq_msi_free(spapr, msi->first_irq, msi->num); - } + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); spapr_irq_free(spapr, msi->first_irq, msi->num); g_free(msi); } @@ -1808,7 +1797,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) SpaprMachineState *spapr = (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(), TYPE_SPAPR_MACHINE); - SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL; SysBusDevice *sbd = SYS_BUS_DEVICE(dev); SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(sbd); PCIHostState *phb = PCI_HOST_BRIDGE(sbd); @@ -1956,18 +1944,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) for (i = 0; i < PCI_NUM_PINS; i++) { int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; - if (smc->legacy_irq_allocation) { - irq = spapr_irq_findone(spapr, errp); - if (irq < 0) { - error_prepend(errp, "can't allocate LSIs: "); - /* - * Older machines will never support PHB hotplug, ie, this is an - * init only path and QEMU will terminate. No need to rollback. - */ - return; - } - } - if (spapr_irq_claim(spapr, irq, true, errp) < 0) { error_prepend(errp, "can't allocate LSIs: "); goto unrealize; diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 143bc8c379..1f78fe406e 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -344,6 +344,73 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } +/* Papr Section 7.4.9 ibm,configure-kernel-dump RTAS call */ +static void rtas_configure_kernel_dump(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong cmd = rtas_ld(args, 0); + uint32_t ret_val; + + /* Number of outputs has to be 1 */ + if (nret != 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: ibm,configure-kernel-dump called with nret != 1.\n"); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* Number of inputs has to be 3 */ + if (nargs != 3) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: ibm,configure-kernel-dump called with nargs != 3.\n"); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + switch (cmd) { + case FADUMP_CMD_REGISTER: + ret_val = do_fadump_register(spapr, args); + if (ret_val != RTAS_OUT_SUCCESS) { + rtas_st(rets, 0, ret_val); + return; + } + break; + case FADUMP_CMD_UNREGISTER: + if (spapr->fadump_dump_active) { + rtas_st(rets, 0, RTAS_OUT_DUMP_ACTIVE); + return; + } + + spapr->fadump_registered = false; + spapr->fadump_dump_active = false; + memset(&spapr->registered_fdm, 0, sizeof(spapr->registered_fdm)); + break; + case FADUMP_CMD_INVALIDATE: + if (!spapr->fadump_dump_active) { + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Nothing to invalidate, no dump active\n"); + + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + } + + spapr->fadump_registered = false; + spapr->fadump_dump_active = false; + memset(&spapr->registered_fdm, 0, sizeof(spapr->registered_fdm)); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "FADump: Unknown command: " TARGET_FMT_lu "\n", cmd); + + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + static void rtas_ibm_os_term(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, @@ -353,6 +420,11 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, target_ulong msgaddr = rtas_ld(args, 0); char msg[512]; + if (spapr->fadump_registered) { + /* If fadump boot works, control won't come back here */ + return trigger_fadump_boot(spapr, rets); + } + cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1); msg[sizeof(msg) - 1] = 0; @@ -659,6 +731,10 @@ static void core_rtas_register_types(void) spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock", rtas_ibm_nmi_interlock); + /* Register fadump rtas call */ + spapr_rtas_register(RTAS_CONFIGURE_KERNEL_DUMP, "ibm,configure-kernel-dump", + rtas_configure_kernel_dump); + qtest_set_command_cb(spapr_qtest_callback); } diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 7759436a4f..c21a2a3274 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -507,15 +507,6 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) dev->irq = spapr_vio_reg_to_irq(dev->reg); - if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { - int irq = spapr_irq_findone(spapr, errp); - - if (irq < 0) { - return; - } - dev->irq = irq; - } - if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) { return; } diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c index f14efa3a7c..5ecfc68910 100644 --- a/hw/ppc/vof.c +++ b/hw/ppc/vof.c @@ -353,34 +353,50 @@ static uint32_t vof_nextprop(const void *fdt, uint32_t phandle, { int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle); char prev[OF_PROPNAME_LEN_MAX + 1]; - const char *tmp; + const char *tmp = NULL; + bool match = false; if (readstr(prevaddr, prev, sizeof(prev))) { return PROM_ERROR; } - - fdt_for_each_property_offset(offset, fdt, nodeoff) { - if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { - return 0; + /* + * "name" may or may not be present in fdt but we should still return it. + * Do that first and then skip it if seen later. + */ + if (prev[0] == '\0') { + tmp = "name"; + } else { + if (strcmp(prev, "name") == 0) { + prev[0] = '\0'; } - if (prev[0] == '\0' || strcmp(prev, tmp) == 0) { - if (prev[0] != '\0') { - offset = fdt_next_property_offset(fdt, offset); - if (offset < 0) { - return 0; - } - } + fdt_for_each_property_offset(offset, fdt, nodeoff) { if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { return 0; } - - if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) { - return PROM_ERROR; + if (strcmp(tmp, "name") == 0) { + continue; } - return 1; + if (match) { + break; + } + if (strcmp(prev, tmp) == 0) { + match = true; + continue; + } + if (prev[0] == '\0') { + break; + } + } + if (offset < 0) { + return 0; } } - + if (tmp) { + if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) { + return PROM_ERROR; + } + return 1; + } return 0; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 39bd5bd5ed..28cf2723d8 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -13,6 +13,7 @@ #include "hw/ppc/xics.h" /* For ICSState */ #include "hw/ppc/spapr_tpm_proxy.h" #include "hw/ppc/spapr_nested.h" /* For SpaprMachineStateNested */ +#include "hw/ppc/spapr_fadump.h" /* For FadumpMemStruct */ struct SpaprVioBus; struct SpaprPhbState; @@ -139,27 +140,11 @@ struct SpaprCapabilities { * SpaprMachineClass: */ struct SpaprMachineClass { - /*< private >*/ MachineClass parent_class; - /*< public >*/ - bool dr_phb_enabled; /* enable dynamic-reconfig/hotplug of PHBs */ - bool update_dt_enabled; /* enable KVMPPC_H_UPDATE_DT */ - bool legacy_irq_allocation; - uint32_t nr_xirqs; - bool broken_host_serial_model; /* present real host info to the guest */ - bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ - bool linux_pci_probe; - bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ - hwaddr rma_limit; /* clamp the RMA to this size */ bool pre_5_1_assoc_refpoints; bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; - - bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, - hwaddr *mmio32, hwaddr *mmio64, - unsigned n_dma, uint32_t *liobns, Error **errp); SpaprResizeHpt resize_hpt_default; SpaprCapabilities default_caps; SpaprIrq *irq; @@ -283,6 +268,11 @@ struct SpaprMachineState { Error *fwnmi_migration_blocker; SpaprWatchdog wds[WDT_MAX_WATCHDOGS]; + + /* Fadump State */ + bool fadump_registered; + bool fadump_dump_active; + FadumpMemStruct registered_fdm; }; #define H_SUCCESS 0 @@ -708,6 +698,8 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr); #define RTAS_OUT_PARAM_ERROR -3 #define RTAS_OUT_NOT_SUPPORTED -3 #define RTAS_OUT_NO_SUCH_INDICATOR -3 +#define RTAS_OUT_DUMP_ALREADY_REGISTERED -9 +#define RTAS_OUT_DUMP_ACTIVE -10 #define RTAS_OUT_NOT_AUTHORIZED -9002 #define RTAS_OUT_SYSPARM_PARAM_ERROR -9999 @@ -770,8 +762,9 @@ void push_sregs_to_kvm_pr(SpaprMachineState *spapr); #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A) #define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B) #define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C) +#define RTAS_CONFIGURE_KERNEL_DUMP (RTAS_TOKEN_BASE + 0x2D) -#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D) +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2E) /* RTAS ibm,get-system-parameter token values */ #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20 diff --git a/include/hw/ppc/spapr_fadump.h b/include/hw/ppc/spapr_fadump.h new file mode 100644 index 0000000000..fde2830e94 --- /dev/null +++ b/include/hw/ppc/spapr_fadump.h @@ -0,0 +1,124 @@ +/* + * Firmware Assisted Dump in PSeries + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef PPC_SPAPR_FADUMP_H +#define PPC_SPAPR_FADUMP_H + +#include "qemu/osdep.h" +#include "cpu.h" + +/* Fadump commands */ +#define FADUMP_CMD_REGISTER 1 +#define FADUMP_CMD_UNREGISTER 2 +#define FADUMP_CMD_INVALIDATE 3 + +#define FADUMP_VERSION 1 + +/* Firmware provided dump sections */ +#define FADUMP_CPU_STATE_DATA 0x0001 +#define FADUMP_HPTE_REGION 0x0002 +#define FADUMP_REAL_MODE_REGION 0x0011 + +/* OS defined sections */ +#define FADUMP_PARAM_AREA 0x0100 + +/* Dump request flag */ +#define FADUMP_REQUEST_FLAG 0x00000001 + +/* Dump status flags */ +#define FADUMP_STATUS_DUMP_PERFORMED 0x8000 +#define FADUMP_STATUS_DUMP_TRIGGERED 0x4000 +#define FADUMP_STATUS_DUMP_ERROR 0x2000 + +/* Region dump error flags */ +#define FADUMP_ERROR_INVALID_DATA_TYPE 0x8000 +#define FADUMP_ERROR_INVALID_SOURCE_ADDR 0x4000 +#define FADUMP_ERROR_LENGTH_EXCEEDS_SOURCE 0x2000 +#define FADUMP_ERROR_INVALID_DEST_ADDR 0x1000 +#define FAUDMP_ERROR_DEST_TOO_SMALL 0x0800 + +/* + * The Firmware Assisted Dump Memory structure supports a maximum of 10 sections + * in the dump memory structure. Presently, three sections are used for + * CPU state data, HPTE & Parameters area, while the remaining seven sections + * can be used for boot memory regions. + */ +#define FADUMP_MAX_SECTIONS 10 + +/* Number of register entries stored per cpu */ +#define FADUMP_PER_CPU_REG_ENTRIES (32 /*GPR*/ + 45 /*others*/ + 2 /*STRT & END*/) + +/* Mask of CPU ID in CPUSTRT and CPUEND entries */ +#define FADUMP_CPU_ID_MASK ((1ULL << 32) - 1) + +typedef struct FadumpSection FadumpSection; +typedef struct FadumpSectionHeader FadumpSectionHeader; +typedef struct FadumpMemStruct FadumpMemStruct; +typedef struct FadumpRegSaveAreaHeader FadumpRegSaveAreaHeader; +typedef struct FadumpRegEntry FadumpRegEntry; + +struct SpaprMachineState; + +/* Kernel Dump section info */ +/* All fields are in big-endian */ +struct FadumpSection { + uint32_t request_flag; + uint16_t source_data_type; + uint16_t error_flags; + uint64_t source_address; + uint64_t source_len; + uint64_t bytes_dumped; + uint64_t destination_address; +}; + +/* ibm,configure-kernel-dump header. */ +struct FadumpSectionHeader { + uint32_t dump_format_version; + uint16_t dump_num_sections; + uint16_t dump_status_flag; + uint32_t offset_first_dump_section; + + /* Fields for disk dump option. */ + uint32_t dd_block_size; + uint64_t dd_block_offset; + uint64_t dd_num_blocks; + uint32_t dd_offset_disk_path; + + /* Maximum time allowed to prevent an automatic dump-reboot. */ + uint32_t max_time_auto; +}; + +/* Note: All the data in these structures is in big-endian */ +struct FadumpMemStruct { + FadumpSectionHeader header; + FadumpSection rgn[FADUMP_MAX_SECTIONS]; +}; + +/* + * The firmware-assisted dump format. + * + * The register save area is an area in the partition's memory used to preserve + * the register contents (CPU state data) for the active CPUs during a firmware + * assisted dump. The dump format contains register save area header followed + * by register entries. Each list of registers for a CPU starts with "CPUSTRT" + * and ends with "CPUEND". + */ + +/* Register save area header. */ +struct FadumpRegSaveAreaHeader { + uint64_t magic_number; + uint32_t version; + uint32_t num_cpu_offset; +}; + +/* Register entry. */ +struct FadumpRegEntry { + uint64_t reg_id; + uint64_t reg_value; +}; + +uint32_t do_fadump_register(struct SpaprMachineState *, target_ulong); +void trigger_fadump_boot(struct SpaprMachineState *, target_ulong); +#endif /* PPC_SPAPR_FADUMP_H */ diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index cb9a85f657..5ddd1107c3 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -100,7 +100,6 @@ typedef struct SpaprIrq { } SpaprIrq; extern SpaprIrq spapr_irq_xics; -extern SpaprIrq spapr_irq_xics_legacy; extern SpaprIrq spapr_irq_xive; extern SpaprIrq spapr_irq_dual; diff --git a/pc-bios/dtb/meson.build b/pc-bios/dtb/meson.build index 993032949f..81bdd7580e 100644 --- a/pc-bios/dtb/meson.build +++ b/pc-bios/dtb/meson.build @@ -1,6 +1,8 @@ dtbs = [ 'bamboo.dtb', 'canyonlands.dtb', + 'pegasos1.dtb', + 'pegasos2.dtb', 'petalogix-ml605.dtb', 'petalogix-s3adsp1800.dtb', ] diff --git a/pc-bios/dtb/pegasos1.dtb b/pc-bios/dtb/pegasos1.dtb new file mode 100644 index 0000000000..3b863b2528 Binary files /dev/null and b/pc-bios/dtb/pegasos1.dtb differ diff --git a/pc-bios/dtb/pegasos1.dts b/pc-bios/dtb/pegasos1.dts new file mode 100644 index 0000000000..e5ef9db866 --- /dev/null +++ b/pc-bios/dtb/pegasos1.dts @@ -0,0 +1,125 @@ +/* + * QEMU Pegasos1 Device Tree Source + * + * Copyright 2025 BALATON Zoltan + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This is partial source, more info will be filled in by board code. + */ + +/dts-v1/; + +/ { + #address-cells = <1>; + device_type = "chrp"; + model = "Pegasos"; + revision = "1A"; + CODEGEN,vendor = "bplan GmbH"; + CODEGEN,board = "Pegasos"; + CODEGEN,description = "Pegasos CHRP PowerPC System"; + + openprom { + model = "Pegasos,0.1b123"; + }; + + chosen { + }; + + memory@0 { + device_type = "memory"; + reg = <0 0>; + }; + + cpus { + #size-cells = <0>; + #address-cells = <1>; + #cpus = <1>; + }; + + failsafe { + device_type = "serial"; + }; + + pci@80000000 { + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + clock-frequency = <33333333>; + 8259-interrupt-acknowledge = <0xfef00000>; + reg = <0x80000000 0x7f000000>; + ranges = <0x01000000 0 0x00000000 0xfe000000 0 0x00800000 + 0x02000000 0 0x80000000 0x80000000 0 0x7d000000 + 0x02000000 0 0xfd000000 0xfd000000 0 0x01000000>; + bus-range = <0 0>; + + isa@7 { + vendor-id = <0x1106>; + device-id = <0x8231>; + revision-id = <0x10>; + class-code = <0x60100>; + /* Pegasos firmware has subsystem-id and */ + /* subsystem-vendor-id swapped */ + subsystem-id = <0x1af4>; + subsystem-vendor-id = <0x1100>; + reg = <0x3800 0 0 0 0>; + device_type = "isa"; + #address-cells = <2>; + #size-cells = <1>; + eisa-slots = <0>; + clock-frequency = <8333333>; + slot-names = <0>; + + serial@i2f8 { + device_type = "serial"; + reg = <1 0x2f8 8>; + interrupts = <3 0>; + clock-frequency = <0>; + }; + + 8042@i60 { + device_type = ""; + reg = <1 0x60 5>; + clock-frequency = <0>; + interrupt-controller = ""; + #address-cells = <1>; + #size-cells = <0>; + #interrupt-cells = <2>; + + }; + + keyboard@i60 { + device_type = "keyboard"; + reg = <1 0x60 5>; + interrupts = <1 0>; + }; + + rtc@i70 { + device_type = "rtc"; + reg = <1 0x70 2>; + interrupts = <8 0>; + clock-frequency = <0>; + compatible = "ds1385-rtc"; + }; + + timer@i40 { + device_type = "timer"; + reg = <1 0x40 8>; + clock-frequency = <0>; + }; + + fdc@i3f0 { + device_type = "fdc"; + reg = <1 0x3f0 8>; + interrupts = <6 0>; + clock-frequency = <0>; + }; + + lpt@i3bc { + device_type = "lpt"; + reg = <1 0x3bc 8>; + interrupts = <7 0>; + clock-frequency = <0>; + }; + }; + }; +}; diff --git a/pc-bios/dtb/pegasos2.dtb b/pc-bios/dtb/pegasos2.dtb new file mode 100644 index 0000000000..29c35216ec Binary files /dev/null and b/pc-bios/dtb/pegasos2.dtb differ diff --git a/pc-bios/dtb/pegasos2.dts b/pc-bios/dtb/pegasos2.dts new file mode 100644 index 0000000000..0ddb9e3bec --- /dev/null +++ b/pc-bios/dtb/pegasos2.dts @@ -0,0 +1,167 @@ +/* + * QEMU Pegasos2 Device Tree Source + * + * Copyright 2025 BALATON Zoltan + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This is partial source, more info will be filled in by board code. + */ + +/dts-v1/; + +/ { + #address-cells = <1>; + device_type = "chrp"; + model = "Pegasos2"; + revision = "2B"; + CODEGEN,vendor = "bplan GmbH"; + CODEGEN,board = "Pegasos2"; + CODEGEN,description = "Pegasos CHRP PowerPC System"; + + openprom { + model = "Pegasos2,1.1"; + }; + + chosen { + }; + + memory@0 { + device_type = "memory"; + reg = <0 0>; + }; + + cpus { + #size-cells = <0>; + #address-cells = <1>; + #cpus = <1>; + }; + + rtas { + rtas-version = <1>; + rtas-size = <20>; + rtas-display-device = <0>; + rtas-event-scan-rate = <0>; + rtas-error-log-max = <0>; + restart-rtas = <0>; + nvram-fetch = <1>; + nvram-store = <2>; + get-time-of-day = <3>; + set-time-of-day = <4>; + event-scan = <6>; + /* Pegasos2 firmware misspells check-exception */ + check-execption = <7>; + read-pci-config = <8>; + write-pci-config = <9>; + display-character = <10>; + set-indicator = <11>; + power-off = <17>; + suspend = <18>; + hibernate = <19>; + system-reboot = <20>; + }; + + failsafe { + device_type = "serial"; + }; + + pci@80000000 { + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + clock-frequency = <33333333>; + ranges = <0x01000000 0 0x00000000 0xfe000000 0 0x00010000 + 0x02000000 0 0x80000000 0x80000000 0 0x40000000>; + 8259-interrupt-acknowledge = <0xf1000cb4>; + reg = <0x80000000 0x40000000>; + pci-bridge-number = <0>; + bus-range = <0 0>; + + isa@c { + vendor-id = <0x1106>; + device-id = <0x8231>; + revision-id = <0x10>; + class-code = <0x60100>; + /* Pegasos firmware has subsystem-id and + subsystem-vendor-id swapped */ + subsystem-id = <0x1af4>; + subsystem-vendor-id = <0x1100>; + reg = <0x6000 0 0 0 0>; + device_type = "isa"; + #address-cells = <2>; + #size-cells = <1>; + eisa-slots = <0>; + clock-frequency = <8333333>; + slot-names = <0>; + + serial@i2f8 { + device_type = "serial"; + reg = <1 0x2f8 8>; + interrupts = <3 0>; + clock-frequency = <1843200>; + compatible = "pnpPNP,501"; + }; + + 8042@i60 { + device_type = ""; + reg = <1 0x60 5>; + clock-frequency = <0>; + compatible = "chrp,8042"; + interrupt-controller = ""; + #address-cells = <1>; + #size-cells = <0>; + #interrupt-cells = <2>; + + }; + + keyboard@i60 { + device_type = "keyboard"; + reg = <1 0x60 5>; + interrupts = <1 0>; + compatible = "pnpPNP,303"; + }; + + rtc@i70 { + device_type = "rtc"; + reg = <1 0x70 2>; + interrupts = <8 0>; + clock-frequency = <0>; + compatible = "ds1385-rtc"; + }; + + timer@i40 { + device_type = "timer"; + reg = <1 0x40 8>; + clock-frequency = <0>; + compatible = "pnpPNP,100"; + }; + + fdc@i3f0 { + device_type = "fdc"; + reg = <1 0x3f0 8>; + interrupts = <6 0>; + clock-frequency = <0>; + compatible = "pnpPNP,700"; + }; + + lpt@i3bc { + device_type = "lpt"; + reg = <1 0x3bc 8>; + interrupts = <7 0>; + clock-frequency = <0>; + compatible = "pnpPNP,400"; + }; + }; + }; + + pci@c0000000 { + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + clock-frequency = <66666666>; + ranges = <0x01000000 0 0x00000000 0xf8000000 0 0x00010000 + 0x02000000 0 0xc0000000 0xc0000000 0 0x20000000>; + reg = <0xc0000000 0x20000000>; + pci-bridge-number = <1>; + bus-range = <0 0>; + }; +}; diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index cd60893a17..43124bf1c7 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1864,17 +1864,6 @@ uint32_t kvmppc_get_tbfreq(void) return cached_tbfreq; } -bool kvmppc_get_host_serial(char **value) -{ - return g_file_get_contents("/proc/device-tree/system-id", value, NULL, - NULL); -} - -bool kvmppc_get_host_model(char **value) -{ - return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); -} - /* Try to find a device tree node for a CPU with clock-frequency property */ static int kvmppc_find_cpu_dt(char *buf, int buf_len) { diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index a1d9ce9f9a..742881231e 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -21,8 +21,6 @@ uint32_t kvmppc_get_tbfreq(void); uint64_t kvmppc_get_clockfreq(void); -bool kvmppc_get_host_model(char **buf); -bool kvmppc_get_host_serial(char **buf); int kvmppc_get_hasidle(CPUPPCState *env); int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len); int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level); @@ -129,16 +127,6 @@ static inline uint32_t kvmppc_get_tbfreq(void) return 0; } -static inline bool kvmppc_get_host_model(char **buf) -{ - return false; -} - -static inline bool kvmppc_get_host_serial(char **buf) -{ - return false; -} - static inline uint64_t kvmppc_get_clockfreq(void) { return 0; diff --git a/tests/functional/ppc64/meson.build b/tests/functional/ppc64/meson.build index 1fa0a70f7e..f0f8ab8f61 100644 --- a/tests/functional/ppc64/meson.build +++ b/tests/functional/ppc64/meson.build @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later test_ppc64_timeouts = { + 'fadump' : 480, 'hv' : 1000, 'mac99' : 120, 'powernv' : 480, @@ -16,6 +17,7 @@ tests_ppc64_system_quick = [ tests_ppc64_system_thorough = [ 'e500', + 'fadump', 'hv', 'mac99', 'powernv', diff --git a/tests/functional/ppc64/test_fadump.py b/tests/functional/ppc64/test_fadump.py new file mode 100755 index 0000000000..2d6b8017e8 --- /dev/null +++ b/tests/functional/ppc64/test_fadump.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from unittest import skip +from qemu_test import Asset +from qemu_test import wait_for_console_pattern +from qemu_test import LinuxKernelTest +from qemu_test import exec_command, exec_command_and_wait_for_pattern + +class QEMUFadump(LinuxKernelTest): + """ + Functional test to verify Fadump is working in following scenarios: + + 1. test_fadump_pseries: PSeries + 2. test_fadump_pseries_kvm: PSeries + KVM + """ + + timeout = 90 + KERNEL_COMMON_COMMAND_LINE = 'console=hvc0 fadump=on ' + msg_panic = 'Kernel panic - not syncing' + msg_not_supported = 'Firmware-Assisted Dump is not supported on this hardware' + msg_registered_success = '' + msg_registered_failed = '' + msg_dump_active = '' + + ASSET_EPAPR_KERNEL = Asset( + ('https://github.com/open-power/op-build/releases/download/v2.7/' + 'zImage.epapr'), + '0ab237df661727e5392cee97460e8674057a883c5f74381a128fa772588d45cd') + + ASSET_VMLINUZ_KERNEL = Asset( + ('https://archives.fedoraproject.org/pub/archive/fedora-secondary/' + 'releases/39/Everything/ppc64le/os/ppc/ppc64/vmlinuz'), + ('81e5541d243b50c8f9568906c6918dda22239744d637bb9a7b22d23c3d661226' + '8d5302beb2ca5c06f93bdbc9736c414ef5120756c8bf496ff488ad07d116d67f') + ) + + ASSET_FEDORA_INITRD = Asset( + ('https://archives.fedoraproject.org/pub/archive/fedora-secondary/' + 'releases/39/Everything/ppc64le/os/ppc/ppc64/initrd.img'), + 'e7f24b44cb2aaa67d30e551db6ac8d29cc57c934b158dabca6b7f885f2cfdd9b') + + def do_test_fadump(self, is_kvm=False, is_powernv=False): + """ + Helper Function for Fadump tests below + + It boots the VM with fadump enabled, checks if fadump is correctly + registered. + Then crashes the system causing a QEMU_SYSTEM_RESET, after which + dump should be available in the kernel. + Finally it checks the filesize of the exported /proc/vmcore in 2nd + kernel to verify it's same as the VM's memory size + """ + if is_kvm: + self.require_accelerator("kvm") + self.vm.add_args("-accel", "kvm") + else: + self.require_accelerator("tcg") + + if is_powernv: + self.set_machine("powernv10") + else: + # SLOF takes upto >20s in startup time, use VOF + self.set_machine("pseries") + self.vm.add_args("-machine", "x-vof=on") + self.vm.add_args("-m", "6G") + + self.vm.set_console() + + kernel_path = None + + if is_powernv: + kernel_path = self.ASSET_EPAPR_KERNEL.fetch() + else: + kernel_path = self.ASSET_VMLINUZ_KERNEL.fetch() + + initrd_path = self.ASSET_FEDORA_INITRD.fetch() + + self.vm.add_args('-kernel', kernel_path) + self.vm.add_args('-initrd', initrd_path) + self.vm.add_args('-append', "fadump=on"\ + " -nodefaults -serial mon:stdio crashkernel=2G"\ + " rdinit=/bin/sh ") + + self.vm.launch() + + # If kernel detects fadump support, and "fadump=on" is in command + # line which we add above, it will print something like: + # + # fadump: Reserved 1024MB of memory at 0x00000040000000 ... + # + # Else, if the kernel doesn't detect fadump support, it prints: + # + # fadump: Firmware-Assisted Dump is not supported on this hardware + # + # Timeout after 20s if kernel doesn't print any fadump logs, this + # can happen due to fadump being disabled in the kernel + self.wait_for_regex_console_pattern( + success_pattern="fadump: Reserved ", + failure_pattern=r"fadump: (Firmware-Assisted Dump is not"\ + " supported on this hardware|Failed to find memory chunk for"\ + " reservation!)", + timeout=20 + ) + + # Ensure fadump is registered successfully, if registration + # succeeds, we get a log from rtas fadump: + # + # rtas fadump: Registration is successful! + self.wait_for_console_pattern( + "rtas fadump: Registration is successful!" + ) + + # Wait for the shell + self.wait_for_console_pattern("#") + + # Mount /proc since not available in the initrd used + exec_command(self, command="mount -t proc proc /proc") + + # Crash the kernel + exec_command(self, command="echo c > /proc/sysrq-trigger") + + # Check for the kernel panic message, setting timeout to 20s as it + # should occur almost immediately after previous echo c + self.wait_for_regex_console_pattern( + success_pattern="Kernel panic - not syncing: sysrq" \ + " triggered crash", + timeout=20 + ) + + # Check if fadump is active + # If the kernel shows that fadump is active, that implies it's a + # crashkernel boot + # Else if the kernel shows "fadump: Reserved ..." then it's + # treating this as the first kernel boot, this is likely the case + # that qemu didn't pass the 'ibm,kernel-dump' device tree node + wait_for_console_pattern( + test=self, + success_message="rtas fadump: Firmware-assisted dump is active", + failure_message="fadump: Reserved " + ) + + # In a successful fadump boot, we get these logs: + # + # [ 0.000000] fadump: Firmware-assisted dump is active. + # [ 0.000000] fadump: Reserving <>MB of memory at <> for preserving crash data + # + # Check if these logs are present in the fadump boot + self.wait_for_console_pattern("preserving crash data") + + # Wait for prompt + self.wait_for_console_pattern("sh-5.2#") + + # Mount /proc since not available in the initrd used + exec_command_and_wait_for_pattern(self, + command="mount -t proc proc /proc", + success_message="#" + ) + + # Check if vmcore exists + exec_command_and_wait_for_pattern(self, + command="stat /proc/vmcore", + success_message="File: /proc/vmcore", + failure_message="No such file or directory" + ) + + def test_fadump_pseries(self): + return self.do_test_fadump(is_kvm=False, is_powernv=False) + + @skip("PowerNV Fadump not supported yet") + def test_fadump_powernv(self): + return + + def test_fadump_pseries_kvm(self): + """ + Test Fadump in PSeries with KVM accel + """ + self.do_test_fadump(is_kvm=True, is_powernv=False) + +if __name__ == '__main__': + QEMUFadump.main() diff --git a/tests/functional/qemu_test/linuxkernel.py b/tests/functional/qemu_test/linuxkernel.py index 2aca0ee3cd..c4767527da 100644 --- a/tests/functional/qemu_test/linuxkernel.py +++ b/tests/functional/qemu_test/linuxkernel.py @@ -5,6 +5,9 @@ import hashlib import urllib.request +import logging +import re +import time from .cmd import wait_for_console_pattern, exec_command_and_wait_for_pattern from .testcase import QemuSystemTest @@ -19,6 +22,62 @@ class LinuxKernelTest(QemuSystemTest): failure_message='Kernel panic - not syncing', vm=vm) + def wait_for_regex_console_pattern(self, success_pattern, + failure_pattern=None, + timeout=None): + """ + Similar to 'wait_for_console_pattern', but supports regex patterns, + hence multiple failure/success patterns can be detected at a time. + + Args: + success_pattern (str | re.Pattern): A regex pattern that indicates + a successful event. If found, the method exits normally. + failure_pattern (str | re.Pattern, optional): A regex pattern that + indicates a failure event. If found, the test fails + timeout (int, optional): The maximum time (in seconds) to wait for + a match. + If exceeded, the test fails. + """ + + console = self.vm.console_file + console_logger = logging.getLogger('console') + + self.log.debug( + f"Console interaction: success_msg='{success_pattern}' " + + f"failure_msg='{failure_pattern}' timeout='{timeout}s'") + + # Only consume console output if waiting for something + if success_pattern is None and failure_pattern is None: + return + + start_time = time.time() + + while time.time() - start_time < timeout: + try: + msg = console.readline().decode().strip() + except UnicodeDecodeError: + msg = None + if not msg: + continue + console_logger.debug(msg) + if success_pattern is None or re.search(success_pattern, msg): + break + if failure_pattern: + # Find the matching error to print in log + match = re.search(failure_pattern, msg) + if not match: + continue + + console.close() + fail = 'Failure message found in console: "%s".' \ + ' Expected: "%s"' % \ + (match.group(), success_pattern) + self.fail(fail) + + if time.time() - start_time >= timeout: + fail = f"Timeout ({timeout}s) while trying to search pattern" + self.fail(fail) + def launch_kernel(self, kernel, initrd=None, dtb=None, console_index=0, wait_for=None): self.vm.set_console(console_index=console_index)