From 44d691f7d9b6ebab102a31aa87fe59da8f7feff9 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 8 Jul 2016 15:12:07 +0200 Subject: [PATCH 01/14] spapr: fix core unplug crash If the host has 8 threads/core and the guest is started with: -smp cores=1,threads=4,maxcpus=12 It is possible to crash QEMU by doing: (qemu) device_add host-spapr-cpu-core,core-id=16,id=foo (qemu) device_del foo Segmentation fault This happens because spapr_core_unplug() assumes cpu_dt_id == core_id. As long as cpu_dt_id is derived from the non-table cpu_index, this is only true when you plug cores with contiguous ids. It is safer to be consistent: the DR connector was created with an index that is immediately written to cc->core_id, and spapr_core_plug() also relies on cc->core_id. Let's use it also in spapr_core_unplug(). Signed-off-by: Greg Kurz Reviewed-by: Bharata B Rao Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9347f0741e..bc52b3c289 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -126,11 +126,9 @@ static void spapr_core_release(DeviceState *dev, void *opaque) void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); - PowerPCCPU *cpu = POWERPC_CPU(core->threads); - int id = ppc_get_vcpu_dt_id(cpu); + CPUCore *cc = CPU_CORE(dev); sPAPRDRConnector *drc = - spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cc->core_id); sPAPRDRConnectorClass *drck; Error *local_err = NULL; From ba0b17dd8f00bdc4d55d67046e4300d95ad5f3f2 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:53 +0100 Subject: [PATCH 02/14] dbdma: always define DBDMA_DPRINTF and enable debug with DEBUG_DBDMA Enabling DBDMA_DPRINTF unconditionally ensures that any errors in debug statements are picked up immediately. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index f116f9c36d..b6639f4f95 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -45,14 +45,13 @@ #include "sysemu/dma.h" /* debug DBDMA */ -//#define DEBUG_DBDMA +#define DEBUG_DBDMA 0 -#ifdef DEBUG_DBDMA -#define DBDMA_DPRINTF(fmt, ...) \ - do { printf("DBDMA: " fmt , ## __VA_ARGS__); } while (0) -#else -#define DBDMA_DPRINTF(fmt, ...) -#endif +#define DBDMA_DPRINTF(fmt, ...) do { \ + if (DEBUG_DBDMA) { \ + printf("DBDMA: " fmt , ## __VA_ARGS__); \ + } \ +} while (0); /* */ @@ -62,7 +61,7 @@ static DBDMAState *dbdma_from_ch(DBDMA_channel *ch) return container_of(ch, DBDMAState, channels[ch->channel]); } -#ifdef DEBUG_DBDMA +#if DEBUG_DBDMA static void dump_dbdma_cmd(dbdma_cmd *cmd) { printf("dbdma_cmd %p\n", cmd); From 3e49c43940fa3e61911969dd7b60534d9ec7f00f Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:54 +0100 Subject: [PATCH 03/14] dbdma: add per-channel debugging enabled via DEBUG_DBDMA_CHANMASK By default large amounts of DBDMA debugging are produced when often it is just 1 or 2 channels that are of interest. Introduce DEBUG_DBDMA_CHANMASK to allow the developer to select the channels of interest at compile time, and then further add the extra channel information to each debug statement where possible. Also clearly mark the start/end of DBDMA_run_bh to allow tracking the bottom half execution. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 75 ++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index b6639f4f95..e692312dcc 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -46,6 +46,7 @@ /* debug DBDMA */ #define DEBUG_DBDMA 0 +#define DEBUG_DBDMA_CHANMASK ((1ull << DBDMA_CHANNELS) - 1) #define DBDMA_DPRINTF(fmt, ...) do { \ if (DEBUG_DBDMA) { \ @@ -53,6 +54,14 @@ } \ } while (0); +#define DBDMA_DPRINTFCH(ch, fmt, ...) do { \ + if (DEBUG_DBDMA) { \ + if ((1ul << (ch)->channel) & DEBUG_DBDMA_CHANMASK) { \ + printf("DBDMA[%02x]: " fmt , (ch)->channel, ## __VA_ARGS__); \ + } \ + } \ +} while (0); + /* */ @@ -79,26 +88,26 @@ static void dump_dbdma_cmd(dbdma_cmd *cmd) #endif static void dbdma_cmdptr_load(DBDMA_channel *ch) { - DBDMA_DPRINTF("dbdma_cmdptr_load 0x%08x\n", - ch->regs[DBDMA_CMDPTR_LO]); + DBDMA_DPRINTFCH(ch, "dbdma_cmdptr_load 0x%08x\n", + ch->regs[DBDMA_CMDPTR_LO]); dma_memory_read(&address_space_memory, ch->regs[DBDMA_CMDPTR_LO], &ch->current, sizeof(dbdma_cmd)); } static void dbdma_cmdptr_save(DBDMA_channel *ch) { - DBDMA_DPRINTF("dbdma_cmdptr_save 0x%08x\n", - ch->regs[DBDMA_CMDPTR_LO]); - DBDMA_DPRINTF("xfer_status 0x%08x res_count 0x%04x\n", - le16_to_cpu(ch->current.xfer_status), - le16_to_cpu(ch->current.res_count)); + DBDMA_DPRINTFCH(ch, "dbdma_cmdptr_save 0x%08x\n", + ch->regs[DBDMA_CMDPTR_LO]); + DBDMA_DPRINTFCH(ch, "xfer_status 0x%08x res_count 0x%04x\n", + le16_to_cpu(ch->current.xfer_status), + le16_to_cpu(ch->current.res_count)); dma_memory_write(&address_space_memory, ch->regs[DBDMA_CMDPTR_LO], &ch->current, sizeof(dbdma_cmd)); } static void kill_channel(DBDMA_channel *ch) { - DBDMA_DPRINTF("kill_channel\n"); + DBDMA_DPRINTFCH(ch, "kill_channel\n"); ch->regs[DBDMA_STATUS] |= DEAD; ch->regs[DBDMA_STATUS] &= ~ACTIVE; @@ -114,7 +123,7 @@ static void conditional_interrupt(DBDMA_channel *ch) uint32_t status; int cond; - DBDMA_DPRINTF("%s\n", __func__); + DBDMA_DPRINTFCH(ch, "%s\n", __func__); intr = le16_to_cpu(current->command) & INTR_MASK; @@ -123,7 +132,7 @@ static void conditional_interrupt(DBDMA_channel *ch) return; case INTR_ALWAYS: /* always interrupt */ qemu_irq_raise(ch->irq); - DBDMA_DPRINTF("%s: raise\n", __func__); + DBDMA_DPRINTFCH(ch, "%s: raise\n", __func__); return; } @@ -138,13 +147,13 @@ static void conditional_interrupt(DBDMA_channel *ch) case INTR_IFSET: /* intr if condition bit is 1 */ if (cond) { qemu_irq_raise(ch->irq); - DBDMA_DPRINTF("%s: raise\n", __func__); + DBDMA_DPRINTFCH(ch, "%s: raise\n", __func__); } return; case INTR_IFCLR: /* intr if condition bit is 0 */ if (!cond) { qemu_irq_raise(ch->irq); - DBDMA_DPRINTF("%s: raise\n", __func__); + DBDMA_DPRINTFCH(ch, "%s: raise\n", __func__); } return; } @@ -158,7 +167,7 @@ static int conditional_wait(DBDMA_channel *ch) uint32_t status; int cond; - DBDMA_DPRINTF("conditional_wait\n"); + DBDMA_DPRINTFCH(ch, "conditional_wait\n"); wait = le16_to_cpu(current->command) & WAIT_MASK; @@ -217,7 +226,7 @@ static void conditional_branch(DBDMA_channel *ch) uint32_t status; int cond; - DBDMA_DPRINTF("conditional_branch\n"); + DBDMA_DPRINTFCH(ch, "conditional_branch\n"); /* check if we must branch */ @@ -262,7 +271,7 @@ static void dbdma_end(DBDMA_io *io) DBDMA_channel *ch = io->channel; dbdma_cmd *current = &ch->current; - DBDMA_DPRINTF("%s\n", __func__); + DBDMA_DPRINTFCH(ch, "%s\n", __func__); if (conditional_wait(ch)) goto wait; @@ -288,13 +297,13 @@ wait: static void start_output(DBDMA_channel *ch, int key, uint32_t addr, uint16_t req_count, int is_last) { - DBDMA_DPRINTF("start_output\n"); + DBDMA_DPRINTFCH(ch, "start_output\n"); /* KEY_REGS, KEY_DEVICE and KEY_STREAM * are not implemented in the mac-io chip */ - DBDMA_DPRINTF("addr 0x%x key 0x%x\n", addr, key); + DBDMA_DPRINTFCH(ch, "addr 0x%x key 0x%x\n", addr, key); if (!addr || key > KEY_STREAM3) { kill_channel(ch); return; @@ -314,13 +323,13 @@ static void start_output(DBDMA_channel *ch, int key, uint32_t addr, static void start_input(DBDMA_channel *ch, int key, uint32_t addr, uint16_t req_count, int is_last) { - DBDMA_DPRINTF("start_input\n"); + DBDMA_DPRINTFCH(ch, "start_input\n"); /* KEY_REGS, KEY_DEVICE and KEY_STREAM * are not implemented in the mac-io chip */ - DBDMA_DPRINTF("addr 0x%x key 0x%x\n", addr, key); + DBDMA_DPRINTFCH(ch, "addr 0x%x key 0x%x\n", addr, key); if (!addr || key > KEY_STREAM3) { kill_channel(ch); return; @@ -343,7 +352,7 @@ static void load_word(DBDMA_channel *ch, int key, uint32_t addr, dbdma_cmd *current = &ch->current; uint32_t val; - DBDMA_DPRINTF("load_word\n"); + DBDMA_DPRINTFCH(ch, "load_word\n"); /* only implements KEY_SYSTEM */ @@ -382,7 +391,7 @@ static void store_word(DBDMA_channel *ch, int key, uint32_t addr, dbdma_cmd *current = &ch->current; uint32_t val; - DBDMA_DPRINTF("store_word\n"); + DBDMA_DPRINTFCH(ch, "store_word\n"); /* only implements KEY_SYSTEM */ @@ -445,7 +454,7 @@ static void channel_run(DBDMA_channel *ch) uint16_t req_count; uint32_t phy_addr; - DBDMA_DPRINTF("channel_run\n"); + DBDMA_DPRINTFCH(ch, "channel_run\n"); dump_dbdma_cmd(current); /* clear WAKE flag at command fetch */ @@ -539,9 +548,9 @@ static void DBDMA_run_bh(void *opaque) { DBDMAState *s = opaque; - DBDMA_DPRINTF("DBDMA_run_bh\n"); - + DBDMA_DPRINTF("-> DBDMA_run_bh\n"); DBDMA_run(s); + DBDMA_DPRINTF("<- DBDMA_run_bh\n"); } void DBDMA_kick(DBDMAState *dbdma) @@ -556,7 +565,7 @@ void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, DBDMAState *s = dbdma; DBDMA_channel *ch = &s->channels[nchan]; - DBDMA_DPRINTF("DBDMA_register_channel 0x%x\n", nchan); + DBDMA_DPRINTFCH(ch, "DBDMA_register_channel 0x%x\n", nchan); assert(rw); assert(flush); @@ -600,7 +609,7 @@ dbdma_control_write(DBDMA_channel *ch) status &= ~FLUSH; } - DBDMA_DPRINTF(" status 0x%08x\n", status); + DBDMA_DPRINTFCH(ch, " status 0x%08x\n", status); ch->regs[DBDMA_STATUS] = status; @@ -617,10 +626,10 @@ static void dbdma_write(void *opaque, hwaddr addr, DBDMA_channel *ch = &s->channels[channel]; int reg = (addr - (channel << DBDMA_CHANNEL_SHIFT)) >> 2; - DBDMA_DPRINTF("writel 0x" TARGET_FMT_plx " <= 0x%08"PRIx64"\n", - addr, value); - DBDMA_DPRINTF("channel 0x%x reg 0x%x\n", - (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); + DBDMA_DPRINTFCH(ch, "writel 0x" TARGET_FMT_plx " <= 0x%08"PRIx64"\n", + addr, value); + DBDMA_DPRINTFCH(ch, "channel 0x%x reg 0x%x\n", + (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); /* cmdptr cannot be modified if channel is ACTIVE */ @@ -671,9 +680,9 @@ static uint64_t dbdma_read(void *opaque, hwaddr addr, value = ch->regs[reg]; - DBDMA_DPRINTF("readl 0x" TARGET_FMT_plx " => 0x%08x\n", addr, value); - DBDMA_DPRINTF("channel 0x%x reg 0x%x\n", - (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); + DBDMA_DPRINTFCH(ch, "readl 0x" TARGET_FMT_plx " => 0x%08x\n", addr, value); + DBDMA_DPRINTFCH(ch, "channel 0x%x reg 0x%x\n", + (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); switch(reg) { case DBDMA_CONTROL: From 3f0d4128dc641f082c3631d610f843b0cdbb6e61 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:55 +0100 Subject: [PATCH 04/14] dbdma: fix endian of DBDMA_CMDPTR_LO during branch The current DBDMA command is stored in little-endian format, so make sure we convert it to match our CPU when updating the DBDMA_CMDPTR_LO register. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index e692312dcc..c4ee38109d 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -213,7 +213,7 @@ static void branch(DBDMA_channel *ch) { dbdma_cmd *current = &ch->current; - ch->regs[DBDMA_CMDPTR_LO] = current->cmd_dep; + ch->regs[DBDMA_CMDPTR_LO] = le32_to_cpu(current->cmd_dep); ch->regs[DBDMA_STATUS] |= BT; dbdma_cmdptr_load(ch); } From e12f50b900bcc2079954c40828dcc167e4ace5cb Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:56 +0100 Subject: [PATCH 05/14] dbdma: fix load_word/store_word value endianness The values to read/write to/from physical memory are copied directly to the physical address with no endian swapping required. Also add some extra information to debugging output while we are here. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index c4ee38109d..c5dd0ac16b 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -350,9 +350,8 @@ static void load_word(DBDMA_channel *ch, int key, uint32_t addr, uint16_t len) { dbdma_cmd *current = &ch->current; - uint32_t val; - DBDMA_DPRINTFCH(ch, "load_word\n"); + DBDMA_DPRINTFCH(ch, "load_word %d bytes, addr=%08x\n", len, addr); /* only implements KEY_SYSTEM */ @@ -362,14 +361,7 @@ static void load_word(DBDMA_channel *ch, int key, uint32_t addr, return; } - dma_memory_read(&address_space_memory, addr, &val, len); - - if (len == 2) - val = (val << 16) | (current->cmd_dep & 0x0000ffff); - else if (len == 1) - val = (val << 24) | (current->cmd_dep & 0x00ffffff); - - current->cmd_dep = val; + dma_memory_read(&address_space_memory, addr, ¤t->cmd_dep, len); if (conditional_wait(ch)) goto wait; @@ -389,9 +381,9 @@ static void store_word(DBDMA_channel *ch, int key, uint32_t addr, uint16_t len) { dbdma_cmd *current = &ch->current; - uint32_t val; - DBDMA_DPRINTFCH(ch, "store_word\n"); + DBDMA_DPRINTFCH(ch, "store_word %d bytes, addr=%08x pa=%x\n", + len, addr, le32_to_cpu(current->cmd_dep)); /* only implements KEY_SYSTEM */ @@ -401,13 +393,7 @@ static void store_word(DBDMA_channel *ch, int key, uint32_t addr, return; } - val = current->cmd_dep; - if (len == 2) - val >>= 16; - else if (len == 1) - val >>= 24; - - dma_memory_write(&address_space_memory, addr, &val, len); + dma_memory_write(&address_space_memory, addr, ¤t->cmd_dep, len); if (conditional_wait(ch)) goto wait; From 894993905daf9c56fee67e77d8f0f76889dc7b76 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:57 +0100 Subject: [PATCH 06/14] dbdma: set FLUSH bit upon reception of flush command for unassigned DBDMA channels This fixes MacOS 9 whereby it continually flushes and polls the status bits until they are set to indicate a successful flush. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index c5dd0ac16b..ef5b0a51ca 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -783,8 +783,18 @@ static void dbdma_unassigned_rw(DBDMA_io *io) static void dbdma_unassigned_flush(DBDMA_io *io) { DBDMA_channel *ch = io->channel; + dbdma_cmd *current = &ch->current; + uint16_t cmd; qemu_log_mask(LOG_GUEST_ERROR, "%s: use of unassigned channel %d\n", __func__, ch->channel); + + cmd = le16_to_cpu(current->command) & COMMAND_MASK; + if (cmd == OUTPUT_MORE || cmd == OUTPUT_LAST || + cmd == INPUT_MORE || cmd == INPUT_LAST) { + current->xfer_status = cpu_to_le16(ch->regs[DBDMA_STATUS] | FLUSH); + current->res_count = cpu_to_le16(io->len); + dbdma_cmdptr_save(ch); + } } void* DBDMA_init (MemoryRegion **dbdma_mem) From 2df778967b5d27c361c8f1389525d6c7e2dc9d10 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Sun, 10 Jul 2016 19:08:58 +0100 Subject: [PATCH 07/14] dbdma: reset io->processing flag for unassigned DBDMA channel rw accesses Otherwise MacOS 9 hangs upon shutdown. Signed-off-by: Mark Cave-Ayland Acked-by: Benjamin Herrenschmidt Signed-off-by: David Gibson --- hw/misc/macio/mac_dbdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index ef5b0a51ca..15452b9a28 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -778,6 +778,7 @@ static void dbdma_unassigned_rw(DBDMA_io *io) DBDMA_channel *ch = io->channel; qemu_log_mask(LOG_GUEST_ERROR, "%s: use of unassigned channel %d\n", __func__, ch->channel); + ch->io.processing = false; } static void dbdma_unassigned_flush(DBDMA_io *io) From 36a24df84a4728b1cd7425af24c0d30cd65a51b5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 9 Jul 2016 13:41:31 +1000 Subject: [PATCH 08/14] ppc: Fix support for odd MSR combinations MacOS uses an architecturally illegal MSR combination that seems nonetheless supported by 32-bit processors, which is to have MSR[PR]=1 and one or more of MSR[DR/IR/EE]=0. This adds support for it. To work properly we need to also properly include support for PR=1,{I,D}R=0 to the MMU index used by the qemu TLB. Signed-off-by: Benjamin Herrenschmidt Tested-by: Mark Cave-Ayland Signed-off-by: David Gibson --- target-ppc/helper_regs.h | 46 +++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 8d38828936..3d279f1d8a 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -41,17 +41,19 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env) static inline void hreg_compute_mem_idx(CPUPPCState *env) { - /* This is our encoding for server processors + /* This is our encoding for server processors. The architecture + * specifies that there is no such thing as userspace with + * translation off, however it appears that MacOS does it and + * some 32-bit CPUs support it. Weird... * * 0 = Guest User space virtual mode * 1 = Guest Kernel space virtual mode - * 2 = Guest Kernel space real mode - * 3 = HV User space virtual mode - * 4 = HV Kernel space virtual mode - * 5 = HV Kernel space real mode - * - * The combination PR=1 IR&DR=0 is invalid, we will treat - * it as IR=DR=1 + * 2 = Guest User space real mode + * 3 = Guest Kernel space real mode + * 4 = HV User space virtual mode + * 5 = HV Kernel space virtual mode + * 6 = HV User space real mode + * 7 = HV Kernel space real mode * * For BookE, we need 8 MMU modes as follow: * @@ -71,20 +73,11 @@ static inline void hreg_compute_mem_idx(CPUPPCState *env) env->immu_idx += msr_gs ? 4 : 0; env->dmmu_idx += msr_gs ? 4 : 0; } else { - /* First calucalte a base value independent of HV */ - if (msr_pr != 0) { - /* User space, ignore IR and DR */ - env->immu_idx = env->dmmu_idx = 0; - } else { - /* Kernel, setup a base I/D value */ - env->immu_idx = msr_ir ? 1 : 2; - env->dmmu_idx = msr_dr ? 1 : 2; - } - /* Then offset it for HV */ - if (msr_hv) { - env->immu_idx += 3; - env->dmmu_idx += 3; - } + env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1; + env->immu_idx += msr_ir ? 0 : 2; + env->dmmu_idx += msr_dr ? 0 : 2; + env->immu_idx += msr_hv ? 4 : 0; + env->dmmu_idx += msr_hv ? 4 : 0; } } @@ -136,8 +129,13 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, /* Change the exception prefix on PowerPC 601 */ env->excp_prefix = ((value >> MSR_EP) & 1) * 0xFFF00000; } - /* If PR=1 then EE, IR and DR must be 1 */ - if ((value >> MSR_PR) & 1) { + /* If PR=1 then EE, IR and DR must be 1 + * + * Note: We only enforce this on 64-bit processors. It appears that + * 32-bit implementations supports PR=1 and EE/DR/IR=0 and MacOS + * exploits it. + */ + if ((env->insns_flags & PPC_64B) && ((value >> MSR_PR) & 1)) { value |= (1 << MSR_EE) | (1 << MSR_DR) | (1 << MSR_IR); } #endif From 21bb3093e6accd2d7d60531a472a34e40911acd9 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 12 Jul 2016 16:54:03 +1000 Subject: [PATCH 09/14] vfio/spapr: Remove stale ioctl() call This ioctl() call to VFIO_IOMMU_SPAPR_TCE_REMOVE was left over from an earlier version of the code and has since been folded into vfio_spapr_remove_window(). It wasn't caught because although the argument structure has been removed, the libc function remove() means this didn't trigger a compile failure. The ioctl() was also almost certain to fail silently and harmlessly with the bogus argument, so this wasn't caught in testing. Suggested-by: Paolo Bonzini Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/vfio/spapr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 0af342332c..7443d348d9 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -177,7 +177,6 @@ int vfio_spapr_create_window(VFIOContainer *container, error_report("Host doesn't support DMA window at %"HWADDR_PRIx", must be %"PRIx64, section->offset_within_address_space, (uint64_t)create.start_addr); - ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); return -EINVAL; } trace_vfio_spapr_create_window(create.page_shift, From 5cbc64de25973e9129c5a7897734a06ac64b9aff Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 13 Jul 2016 12:20:20 +0530 Subject: [PATCH 10/14] spapr: Ensure CPU cores are added contiguously and removed in LIFO order If CPU core addition or removal is allowed in random order leading to holes in the core id range (and hence in the cpu_index range), migration can fail as migration with holes in cpu_index range isn't yet handled correctly. Prevent this situation by enforcing the addition in contiguous order and removal in LIFO order so that we never end up with holes in cpu_index range. Signed-off-by: Bharata B Rao Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index bc52b3c289..4bfc96bd5a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -126,12 +126,23 @@ static void spapr_core_release(DeviceState *dev, void *opaque) void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); CPUCore *cc = CPU_CORE(dev); sPAPRDRConnector *drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cc->core_id); sPAPRDRConnectorClass *drck; Error *local_err = NULL; + int smt = kvmppc_smt_threads(); + int index = cc->core_id / smt; + int spapr_max_cores = max_cpus / smp_threads; + int i; + for (i = spapr_max_cores - 1; i > index; i--) { + if (spapr->cores[i]) { + error_setg(errp, "core-id %d should be removed first", i * smt); + return; + } + } g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -214,7 +225,7 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(OBJECT(hotplug_dev)); sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); int spapr_max_cores = max_cpus / smp_threads; - int index; + int index, i; int smt = kvmppc_smt_threads(); Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); @@ -252,6 +263,14 @@ void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } + for (i = 0; i < index; i++) { + if (!spapr->cores[i]) { + error_setg(&local_err, "core-id %d should be added first", + i * smt); + goto out; + } + } + out: g_free(base_core_type); error_propagate(errp, local_err); From c4dfc14b5573d7a2c9731dd3d0cfc20f331f3c87 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 13 Jul 2016 12:00:17 +0200 Subject: [PATCH 11/14] ppc: abort if compat property contains an unknown value It is not possible to set the compat property to an unknown value with powerpc_set_compat(). Something must have gone terribly wrong in QEMU, if we detect an "Internal error" in powerpc_get_compat(). Let's abort then. This patch also drops the "max_compat ? *max_compat : -1" construct. It is useless since max_compat is dereferenced a few lines above. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- target-ppc/translate_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 7cb784262c..5ecafc7b8b 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8446,8 +8446,8 @@ static void powerpc_get_compat(Object *obj, Visitor *v, const char *name, case 0: break; default: - error_setg(errp, "Internal error: compat is set to %x", - max_compat ? *max_compat : -1); + error_report("Internal error: compat is set to %x", *max_compat); + abort(); break; } From 28f3331887f9ae1fc19d2b9d7914047483442270 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 14 Jul 2016 10:14:18 +0200 Subject: [PATCH 12/14] ppc/mmu-hash64: Remove duplicated #include statement No need to include error-report.h twice here. Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- target-ppc/mmu-hash64.c | 1 - 1 file changed, 1 deletion(-) diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 82c2186bcf..f6ffe35788 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -24,7 +24,6 @@ #include "exec/helper-proto.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" -#include "qemu/error-report.h" #include "kvm_ppc.h" #include "mmu-hash64.h" #include "exec/log.h" From b56d417b8d7548e913d928809ce6bb1d6c2563e2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jul 2016 17:22:10 +0200 Subject: [PATCH 13/14] target-ppc: fix left shift overflow in hpte_page_shift ps->pte_enc is a 32-bit value, which is shifted left and then compared to a 64-bit value. It needs a cast before the shift. Reported by Coverity. Signed-off-by: Paolo Bonzini Signed-off-by: David Gibson --- target-ppc/mmu-hash64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index f6ffe35788..5de1358d1c 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -478,7 +478,7 @@ static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps, mask = ((1ULL << ps->page_shift) - 1) & HPTE64_R_RPN; - if ((pte1 & mask) == (ps->pte_enc << HPTE64_R_RPN_SHIFT)) { + if ((pte1 & mask) == ((uint64_t)ps->pte_enc << HPTE64_R_RPN_SHIFT)) { return ps->page_shift; } } From 159d2e39a8602c369542a92573a52acb5f5f58f2 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 15 Jul 2016 10:10:25 +0200 Subject: [PATCH 14/14] ppc: Yet another fix for the huge page support detection mechanism Commit 86b50f2e1bef ("Disable huge page support if it is not available for main RAM") already made sure that huge page support is not announced to the guest if the normal RAM of non-NUMA configurations is not backed by a huge page filesystem. However, there is one more case that can go wrong: NUMA is enabled, but the RAM of the NUMA nodes are not configured with huge page support (and only the memory of a DIMM is configured with it). When QEMU is started with the following command line for example, the Linux guest currently crashes because it is trying to use huge pages on a memory region that does not support huge pages: qemu-system-ppc64 -enable-kvm ... -m 1G,slots=4,maxmem=32G -object \ memory-backend-file,policy=default,mem-path=/hugepages,size=1G,id=mem-mem1 \ -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \ -numa node,nodeid=0 -numa node,nodeid=1 To fix this issue, we've got to make sure to disable huge page support, too, when there is a NUMA node that is not using a memory backend with huge page support. Fixes: 86b50f2e1befc33407bdfeb6f45f7b0d2439a740 Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- target-ppc/kvm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 884d564e0f..7a8f5559d9 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -389,12 +389,16 @@ static long getrampagesize(void) object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); - if (hpsize == LONG_MAX) { + if (hpsize == LONG_MAX || hpsize == getpagesize()) { return getpagesize(); } - if (nb_numa_nodes == 0 && hpsize > getpagesize()) { - /* No NUMA nodes and normal RAM without -mem-path ==> no huge pages! */ + /* If NUMA is disabled or the NUMA nodes are not backed with a + * memory-backend, then there is at least one node using "normal" + * RAM. And since normal RAM has not been configured with "-mem-path" + * (what we've checked earlier here already), we can not use huge pages! + */ + if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) { static bool warned; if (!warned) { error_report("Huge page support disabled (n/a for main memory).");