diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 8705612535..228dc54b0b 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -259,8 +259,9 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); - /* 4K and 64K granule support */ + /* 4K, 16K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ @@ -503,7 +504,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) tg = CD_TG(cd, i); tt->granule_sz = tg2granule(tg, i); - if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) { + if ((tt->granule_sz != 12 && tt->granule_sz != 14 && + tt->granule_sz != 16) || CD_ENDI(cd)) { goto bad_cd; } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9f01d9041b..fee696fb0e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2757,10 +2757,15 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_6_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(6, 1) + static void virt_machine_6_0_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE_AS_LATEST(6, 0) +DEFINE_VIRT_MACHINE(6, 0) static void virt_machine_5_2_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 40def78183..0f5ce43d0c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -36,6 +36,11 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +GlobalProperty hw_compat_6_0[] = { + { "gpex-pcihost", "allow-unmapped-accesses", "false" }, +}; +const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0); + GlobalProperty hw_compat_5_2[] = { { "ICH9-LPC", "smm-compat", "on"}, { "PIIX4_PM", "smm-compat", "on"}, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8a84b25a03..364816efc9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -96,6 +96,9 @@ #include "trace.h" #include CONFIG_DEVICES +GlobalProperty pc_compat_6_0[] = {}; +const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); + GlobalProperty pc_compat_5_2[] = { { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 46cc951073..4e8edffeaf 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -415,7 +415,7 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_i440fx_6_0_machine_options(MachineClass *m) +static void pc_i440fx_6_1_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_machine_options(m); @@ -424,6 +424,18 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); + +static void pc_i440fx_6_0_machine_options(MachineClass *m) +{ + pc_i440fx_6_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, pc_i440fx_6_0_machine_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 53450190f5..458ed41c65 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -345,7 +345,7 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_6_0_machine_options(MachineClass *m) +static void pc_q35_6_1_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_options(m); @@ -353,6 +353,17 @@ static void pc_q35_6_0_machine_options(MachineClass *m) pcmc->default_cpu_version = 1; } +DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL, + pc_q35_6_1_machine_options); + +static void pc_q35_6_0_machine_options(MachineClass *m) +{ + pc_q35_6_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len); + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); +} + DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL, pc_q35_6_0_machine_options); diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c index 2bdbe7b456..a6752fac5e 100644 --- a/hw/pci-host/gpex.c +++ b/hw/pci-host/gpex.c @@ -83,12 +83,51 @@ static void gpex_host_realize(DeviceState *dev, Error **errp) int i; pcie_host_mmcfg_init(pex, PCIE_MMCFG_SIZE_MAX); + sysbus_init_mmio(sbd, &pex->mmio); + + /* + * Note that the MemoryRegions io_mmio and io_ioport that we pass + * to pci_register_root_bus() are not the same as the + * MemoryRegions io_mmio_window and io_ioport_window that we + * expose as SysBus MRs. The difference is in the behaviour of + * accesses to addresses where no PCI device has been mapped. + * + * io_mmio and io_ioport are the underlying PCI view of the PCI + * address space, and when a PCI device does a bus master access + * to a bad address this is reported back to it as a transaction + * failure. + * + * io_mmio_window and io_ioport_window implement "unmapped + * addresses read as -1 and ignore writes"; this is traditional + * x86 PC behaviour, which is not mandated by the PCI spec proper + * but expected by much PCI-using guest software, including Linux. + * + * In the interests of not being unnecessarily surprising, we + * implement it in the gpex PCI host controller, by providing the + * _window MRs, which are containers with io ops that implement + * the 'background' behaviour and which hold the real PCI MRs as + * subregions. + */ memory_region_init(&s->io_mmio, OBJECT(s), "gpex_mmio", UINT64_MAX); memory_region_init(&s->io_ioport, OBJECT(s), "gpex_ioport", 64 * 1024); - sysbus_init_mmio(sbd, &pex->mmio); - sysbus_init_mmio(sbd, &s->io_mmio); - sysbus_init_mmio(sbd, &s->io_ioport); + if (s->allow_unmapped_accesses) { + memory_region_init_io(&s->io_mmio_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_mmio_window", UINT64_MAX); + memory_region_init_io(&s->io_ioport_window, OBJECT(s), + &unassigned_io_ops, OBJECT(s), + "gpex_ioport_window", 64 * 1024); + + memory_region_add_subregion(&s->io_mmio_window, 0, &s->io_mmio); + memory_region_add_subregion(&s->io_ioport_window, 0, &s->io_ioport); + sysbus_init_mmio(sbd, &s->io_mmio_window); + sysbus_init_mmio(sbd, &s->io_ioport_window); + } else { + sysbus_init_mmio(sbd, &s->io_mmio); + sysbus_init_mmio(sbd, &s->io_ioport); + } + for (i = 0; i < GPEX_NUM_IRQS; i++) { sysbus_init_irq(sbd, &s->irq[i]); s->irq_num[i] = -1; @@ -108,6 +147,16 @@ static const char *gpex_host_root_bus_path(PCIHostState *host_bridge, return "0000:00"; } +static Property gpex_host_properties[] = { + /* + * Permit CPU accesses to unmapped areas of the PIO and MMIO windows + * (discarding writes and returning -1 for reads) rather than aborting. + */ + DEFINE_PROP_BOOL("allow-unmapped-accesses", GPEXHost, + allow_unmapped_accesses, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void gpex_host_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -117,6 +166,7 @@ static void gpex_host_class_init(ObjectClass *klass, void *data) dc->realize = gpex_host_realize; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->fw_name = "pci"; + device_class_set_props(dc, gpex_host_properties); } static void gpex_host_initfn(Object *obj) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e4be00b732..529ff056dd 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4605,14 +4605,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc) type_init(spapr_machine_register_##suffix) /* - * pseries-6.0 + * pseries-6.1 */ -static void spapr_machine_6_0_class_options(MachineClass *mc) +static void spapr_machine_6_1_class_options(MachineClass *mc) { /* Defaults for the latest behaviour inherited from the base class */ } -DEFINE_SPAPR_MACHINE(6_0, "6.0", true); +DEFINE_SPAPR_MACHINE(6_1, "6.1", true); + +/* + * pseries-6.0 + */ +static void spapr_machine_6_0_class_options(MachineClass *mc) +{ + spapr_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); +} + +DEFINE_SPAPR_MACHINE(6_0, "6.0", false); /* * pseries-5.2 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2972b607f3..56b52d2d30 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -795,14 +795,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_6_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_6_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(6_1, "6.1", true); + static void ccw_machine_6_0_instance_options(MachineState *machine) { + ccw_machine_6_1_instance_options(machine); } static void ccw_machine_6_0_class_options(MachineClass *mc) { + ccw_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); } -DEFINE_CCW_MACHINE(6_0, "6.0", true); +DEFINE_CCW_MACHINE(6_0, "6.0", false); static void ccw_machine_5_2_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index ad6c8fd537..3d55d2bd62 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -353,6 +353,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_6_0[]; +extern const size_t hw_compat_6_0_len; + extern GlobalProperty hw_compat_5_2[]; extern const size_t hw_compat_5_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index dcf060b791..1522a3359a 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -197,6 +197,9 @@ bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry); +extern GlobalProperty pc_compat_6_0[]; +extern const size_t pc_compat_6_0_len; + extern GlobalProperty pc_compat_5_2[]; extern const size_t pc_compat_5_2_len; diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h index d48a020a95..fcf8b63820 100644 --- a/include/hw/pci-host/gpex.h +++ b/include/hw/pci-host/gpex.h @@ -49,8 +49,12 @@ struct GPEXHost { MemoryRegion io_ioport; MemoryRegion io_mmio; + MemoryRegion io_ioport_window; + MemoryRegion io_mmio_window; qemu_irq irq[GPEX_NUM_IRQS]; int irq_num[GPEX_NUM_IRQS]; + + bool allow_unmapped_accesses; }; struct GPEXConfig { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 193a49ec7f..616b393253 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -225,6 +225,11 @@ typedef struct ARMPACKey { } ARMPACKey; #endif +/* See the commentary above the TBFLAG field definitions. */ +typedef struct CPUARMTBFlags { + uint32_t flags; + target_ulong flags2; +} CPUARMTBFlags; typedef struct CPUARMState { /* Regs for current mode. */ @@ -253,7 +258,7 @@ typedef struct CPUARMState { uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */ /* Cached TBFLAGS state. See below for which bits are included. */ - uint32_t hflags; + CPUARMTBFlags hflags; /* Frequently accessed CPSR bits are stored separately for efficiency. This contains all the other bits. Use cpsr_{read,write} to access @@ -3377,74 +3382,82 @@ typedef ARMCPU ArchCPU; #include "exec/cpu-all.h" /* - * Bit usage in the TB flags field: bit 31 indicates whether we are - * in 32 or 64 bit mode. The meaning of the other bits depends on that. - * We put flags which are shared between 32 and 64 bit mode at the top - * of the word, and flags which apply to only one mode at the bottom. + * We have more than 32-bits worth of state per TB, so we split the data + * between tb->flags and tb->cs_base, which is otherwise unused for ARM. + * We collect these two parts in CPUARMTBFlags where they are named + * flags and flags2 respectively. * - * 31 20 18 14 9 0 - * +--------------+-----+-----+----------+--------------+ - * | | | TBFLAG_A32 | | - * | | +-----+----------+ TBFLAG_AM32 | - * | TBFLAG_ANY | |TBFLAG_M32| | - * | +-----------+----------+--------------| - * | | TBFLAG_A64 | - * +--------------+-------------------------------------+ - * 31 20 0 + * The flags that are shared between all execution modes, TBFLAG_ANY, + * are stored in flags. The flags that are specific to a given mode + * are stores in flags2. Since cs_base is sized on the configured + * address size, flags2 always has 64-bits for A64, and a minimum of + * 32-bits for A32 and M32. + * + * The bits for 32-bit A-profile and M-profile partially overlap: + * + * 31 23 11 10 0 + * +-------------+----------+----------------+ + * | | | TBFLAG_A32 | + * | TBFLAG_AM32 | +-----+----------+ + * | | |TBFLAG_M32| + * +-------------+----------------+----------+ + * 31 23 5 4 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ -FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1) -FIELD(TBFLAG_ANY, SS_ACTIVE, 30, 1) -FIELD(TBFLAG_ANY, PSTATE_SS, 29, 1) /* Not cached. */ -FIELD(TBFLAG_ANY, BE_DATA, 28, 1) -FIELD(TBFLAG_ANY, MMUIDX, 24, 4) +FIELD(TBFLAG_ANY, AARCH64_STATE, 0, 1) +FIELD(TBFLAG_ANY, SS_ACTIVE, 1, 1) +FIELD(TBFLAG_ANY, PSTATE__SS, 2, 1) /* Not cached. */ +FIELD(TBFLAG_ANY, BE_DATA, 3, 1) +FIELD(TBFLAG_ANY, MMUIDX, 4, 4) /* Target EL if we take a floating-point-disabled exception */ -FIELD(TBFLAG_ANY, FPEXC_EL, 22, 2) +FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2) /* For A-profile only, target EL for debug exceptions. */ -FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 20, 2) +FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2) +/* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */ +FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1) /* * Bit usage when in AArch32 state, both A- and M-profile. */ -FIELD(TBFLAG_AM32, CONDEXEC, 0, 8) /* Not cached. */ -FIELD(TBFLAG_AM32, THUMB, 8, 1) /* Not cached. */ +FIELD(TBFLAG_AM32, CONDEXEC, 24, 8) /* Not cached. */ +FIELD(TBFLAG_AM32, THUMB, 23, 1) /* Not cached. */ /* * Bit usage when in AArch32 state, for A-profile only. */ -FIELD(TBFLAG_A32, VECLEN, 9, 3) /* Not cached. */ -FIELD(TBFLAG_A32, VECSTRIDE, 12, 2) /* Not cached. */ +FIELD(TBFLAG_A32, VECLEN, 0, 3) /* Not cached. */ +FIELD(TBFLAG_A32, VECSTRIDE, 3, 2) /* Not cached. */ /* * We store the bottom two bits of the CPAR as TB flags and handle * checks on the other bits at runtime. This shares the same bits as * VECSTRIDE, which is OK as no XScale CPU has VFP. * Not cached, because VECLEN+VECSTRIDE are not cached. */ -FIELD(TBFLAG_A32, XSCALE_CPAR, 12, 2) -FIELD(TBFLAG_A32, VFPEN, 14, 1) /* Partially cached, minus FPEXC. */ -FIELD(TBFLAG_A32, SCTLR_B, 15, 1) -FIELD(TBFLAG_A32, HSTR_ACTIVE, 16, 1) +FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2) +FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */ +FIELD(TBFLAG_A32, SCTLR__B, 8, 1) /* Cannot overlap with SCTLR_B */ +FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) /* * Indicates whether cp register reads and writes by guest code should access * the secure or nonsecure bank of banked registers; note that this is not * the same thing as the current security state of the processor! */ -FIELD(TBFLAG_A32, NS, 17, 1) +FIELD(TBFLAG_A32, NS, 10, 1) /* * Bit usage when in AArch32 state, for M-profile only. */ /* Handler (ie not Thread) mode */ -FIELD(TBFLAG_M32, HANDLER, 9, 1) +FIELD(TBFLAG_M32, HANDLER, 0, 1) /* Whether we should generate stack-limit checks */ -FIELD(TBFLAG_M32, STACKCHECK, 10, 1) +FIELD(TBFLAG_M32, STACKCHECK, 1, 1) /* Set if FPCCR.LSPACT is set */ -FIELD(TBFLAG_M32, LSPACT, 11, 1) /* Not cached. */ +FIELD(TBFLAG_M32, LSPACT, 2, 1) /* Not cached. */ /* Set if we must create a new FP context */ -FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 12, 1) /* Not cached. */ +FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */ /* Set if FPCCR.S does not match current security state */ -FIELD(TBFLAG_M32, FPCCR_S_WRONG, 13, 1) /* Not cached. */ +FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */ /* * Bit usage when in AArch64 state @@ -3462,6 +3475,26 @@ FIELD(TBFLAG_A64, TCMA, 16, 2) FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) +/* + * Helpers for using the above. + */ +#define DP_TBFLAG_ANY(DST, WHICH, VAL) \ + (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL)) +#define DP_TBFLAG_A64(DST, WHICH, VAL) \ + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A64, WHICH, VAL)) +#define DP_TBFLAG_A32(DST, WHICH, VAL) \ + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL)) +#define DP_TBFLAG_M32(DST, WHICH, VAL) \ + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_M32, WHICH, VAL)) +#define DP_TBFLAG_AM32(DST, WHICH, VAL) \ + (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL)) + +#define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH) +#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A64, WHICH) +#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH) +#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH) +#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH) + /** * cpu_mmu_index: * @env: The cpu environment @@ -3472,7 +3505,7 @@ FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) */ static inline int cpu_mmu_index(CPUARMState *env, bool ifetch) { - return FIELD_EX32(env->hflags, TBFLAG_ANY, MMUIDX); + return EX_TBFLAG_ANY(env->hflags, MMUIDX); } static inline bool bswap_code(bool sctlr_b) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 061c8ff846..9cc3b066e2 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -1020,7 +1020,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) * the hflags rebuild, since we can pull the composite TBII field * from there. */ - tbii = FIELD_EX32(env->hflags, TBFLAG_A64, TBII); + tbii = EX_TBFLAG_A64(env->hflags, TBII); if ((tbii >> extract64(new_pc, 55, 1)) & 1) { /* TBI is enabled. */ int core_mmu_idx = cpu_mmu_index(env, false); diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index c139fa81f9..7b706571bb 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -104,8 +104,7 @@ DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) -DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) -DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index d9220be7c5..9b1b98705f 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12984,42 +12984,49 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) } #endif -static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, uint32_t flags) +static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) { - flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el); - flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, - arm_to_core_mmu_idx(mmu_idx)); + DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el); + DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx)); if (arm_singlestep_active(env)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1); + DP_TBFLAG_ANY(flags, SS_ACTIVE, 1); } return flags; } -static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, uint32_t flags) +static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) { bool sctlr_b = arm_sctlr_b(env); if (sctlr_b) { - flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, 1); + DP_TBFLAG_A32(flags, SCTLR__B, 1); } if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1); + DP_TBFLAG_ANY(flags, BE_DATA, 1); } - flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env)); + DP_TBFLAG_A32(flags, NS, !access_secure_reg(env)); return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = 0; + CPUARMTBFlags flags = {}; + uint32_t ccr = env->v7m.ccr[env->v7m.secure]; + + /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */ + if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } if (arm_v7m_is_handler_mode(env)) { - flags = FIELD_DP32(flags, TBFLAG_M32, HANDLER, 1); + DP_TBFLAG_M32(flags, HANDLER, 1); } /* @@ -13029,56 +13036,60 @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el, */ if (arm_feature(env, ARM_FEATURE_V8) && !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && - (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) { - flags = FIELD_DP32(flags, TBFLAG_M32, STACKCHECK, 1); + (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) { + DP_TBFLAG_M32(flags, STACKCHECK, 1); } return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_aprofile(CPUARMState *env) +static CPUARMTBFlags rebuild_hflags_aprofile(CPUARMState *env) { - int flags = 0; + CPUARMTBFlags flags = {}; - flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL, - arm_debug_target_el(env)); + DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env)); return flags; } -static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = rebuild_hflags_aprofile(env); + CPUARMTBFlags flags = rebuild_hflags_aprofile(env); + int el = arm_current_el(env); - if (arm_el_is_aa64(env, 1)) { - flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1); + if (arm_sctlr(env, el) & SCTLR_A) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); } - if (arm_current_el(env) < 2 && env->cp15.hstr_el2 && + if (arm_el_is_aa64(env, 1)) { + DP_TBFLAG_A32(flags, VFPEN, 1); + } + + if (el < 2 && env->cp15.hstr_el2 && (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - flags = FIELD_DP32(flags, TBFLAG_A32, HSTR_ACTIVE, 1); + DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1); } return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, - ARMMMUIdx mmu_idx) +static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, + ARMMMUIdx mmu_idx) { - uint32_t flags = rebuild_hflags_aprofile(env); + CPUARMTBFlags flags = rebuild_hflags_aprofile(env); ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx); uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; uint64_t sctlr; int tbii, tbid; - flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1); + DP_TBFLAG_ANY(flags, AARCH64_STATE, 1); /* Get control bits for tagged addresses. */ tbid = aa64_va_parameter_tbi(tcr, mmu_idx); tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx); - flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii); - flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid); + DP_TBFLAG_A64(flags, TBII, tbii); + DP_TBFLAG_A64(flags, TBID, tbid); if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { int sve_el = sve_exception_el(env, el); @@ -13093,14 +13104,18 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } else { zcr_len = sve_zcr_len_for_el(env, el); } - flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el); - flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len); + DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); + DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len); } sctlr = regime_sctlr(env, stage1); + if (sctlr & SCTLR_A) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } + if (arm_cpu_data_is_big_endian_a64(el, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1); + DP_TBFLAG_ANY(flags, BE_DATA, 1); } if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) { @@ -13111,14 +13126,14 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * The decision of which action to take is left to a helper. */ if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) { - flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1); + DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1); } } if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */ if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) { - flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1); + DP_TBFLAG_A64(flags, BT, 1); } } @@ -13130,7 +13145,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, case ARMMMUIdx_SE10_1: case ARMMMUIdx_SE10_1_PAN: /* TODO: ARMv8.3-NV */ - flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1); + DP_TBFLAG_A64(flags, UNPRIV, 1); break; case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: @@ -13141,7 +13156,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * gated by HCR_EL2. == '11', and so is LDTR. */ if (env->cp15.hcr_el2 & HCR_TGE) { - flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1); + DP_TBFLAG_A64(flags, UNPRIV, 1); } break; default: @@ -13159,30 +13174,29 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, * 4) If no Allocation Tag Access, then all accesses are Unchecked. */ if (allocation_tag_access_enabled(env, el, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1); + DP_TBFLAG_A64(flags, ATA, 1); if (tbid && !(env->pstate & PSTATE_TCO) && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { - flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1); + DP_TBFLAG_A64(flags, MTE_ACTIVE, 1); } } /* And again for unprivileged accesses, if required. */ - if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + if (EX_TBFLAG_A64(flags, UNPRIV) && tbid && !(env->pstate & PSTATE_TCO) && (sctlr & SCTLR_TCF0) && allocation_tag_access_enabled(env, 0, sctlr)) { - flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1); + DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); } /* Cache TCMA as well as TBI. */ - flags = FIELD_DP32(flags, TBFLAG_A64, TCMA, - aa64_va_parameter_tcma(tcr, mmu_idx)); + DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } -static uint32_t rebuild_hflags_internal(CPUARMState *env) +static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env) { int el = arm_current_el(env); int fp_el = fp_exception_el(env, el); @@ -13211,6 +13225,7 @@ void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env) int el = arm_current_el(env); int fp_el = fp_exception_el(env, el); ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); } @@ -13253,12 +13268,14 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) static inline void assert_hflags_rebuild_correctly(CPUARMState *env) { #ifdef CONFIG_DEBUG_TCG - uint32_t env_flags_current = env->hflags; - uint32_t env_flags_rebuilt = rebuild_hflags_internal(env); + CPUARMTBFlags c = env->hflags; + CPUARMTBFlags r = rebuild_hflags_internal(env); - if (unlikely(env_flags_current != env_flags_rebuilt)) { - fprintf(stderr, "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n", - env_flags_current, env_flags_rebuilt); + if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { + fprintf(stderr, "TCG hflags mismatch " + "(current:(0x%08x,0x" TARGET_FMT_lx ")" + " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", + c.flags, c.flags2, r.flags, r.flags2); abort(); } #endif @@ -13267,15 +13284,15 @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env) void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { - uint32_t flags = env->hflags; + CPUARMTBFlags flags; - *cs_base = 0; assert_hflags_rebuild_correctly(env); + flags = env->hflags; - if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) { + if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { *pc = env->pc; if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { - flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype); + DP_TBFLAG_A64(flags, BTYPE, env->btype); } } else { *pc = env->regs[15]; @@ -13284,7 +13301,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (arm_feature(env, ARM_FEATURE_M_SECURITY) && FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) { - flags = FIELD_DP32(flags, TBFLAG_M32, FPCCR_S_WRONG, 1); + DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); } if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && @@ -13296,12 +13313,12 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * active FP context; we must create a new FP context before * executing any FP insn. */ - flags = FIELD_DP32(flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED, 1); + DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); } bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { - flags = FIELD_DP32(flags, TBFLAG_M32, LSPACT, 1); + DP_TBFLAG_M32(flags, LSPACT, 1); } } else { /* @@ -13309,21 +13326,18 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * Note that VECLEN+VECSTRIDE are RES0 for M-profile. */ if (arm_feature(env, ARM_FEATURE_XSCALE)) { - flags = FIELD_DP32(flags, TBFLAG_A32, - XSCALE_CPAR, env->cp15.c15_cpar); + DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); } else { - flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, - env->vfp.vec_len); - flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, - env->vfp.vec_stride); + DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); + DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); } if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { - flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1); + DP_TBFLAG_A32(flags, VFPEN, 1); } } - flags = FIELD_DP32(flags, TBFLAG_AM32, THUMB, env->thumb); - flags = FIELD_DP32(flags, TBFLAG_AM32, CONDEXEC, env->condexec_bits); + DP_TBFLAG_AM32(flags, THUMB, env->thumb); + DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); } /* @@ -13333,14 +13347,14 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, * 0 x Inactive (the TB flag for SS is always 0) * 1 0 Active-pending * 1 1 Active-not-pending - * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB. + * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. */ - if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) && - (env->pstate & PSTATE_SS)) { - flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1); + if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { + DP_TBFLAG_ANY(flags, PSTATE__SS, 1); } - *pflags = flags; + *pflags = flags.flags; + *cs_base = flags.flags2; } #ifdef TARGET_AARCH64 diff --git a/target/arm/internals.h b/target/arm/internals.h index f11bd32696..886db56b58 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -26,6 +26,7 @@ #define TARGET_ARM_INTERNALS_H #include "hw/registerfields.h" +#include "tcg/tcg-gvec-desc.h" #include "syndrome.h" /* register banks for CPU modes */ @@ -1142,14 +1143,10 @@ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) -FIELD(MTEDESC, ESIZE, 9, 5) -FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ +FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ -bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); +bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); +uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); static inline int allocation_tag_from_addr(uint64_t ptr) { diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 8be17e1b70..a6fccc6e69 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -121,7 +121,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, * exception for inaccessible pages, and resolves the virtual address * into the softmmu tlb. * - * When RA == 0, this is for mte_probe1. The page is expected to be + * When RA == 0, this is for mte_probe. The page is expected to be * valid. Indicate to probe_access_flags no-fault, then assert that * we received a valid page. */ @@ -617,80 +617,6 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, } } -/* - * Perform an MTE checked access for a single logical or atomic access. - */ -static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, - uintptr_t ra, int bit55) -{ - int mem_tag, mmu_idx, ptr_tag, size; - MMUAccessType type; - uint8_t *mem; - - ptr_tag = allocation_tag_from_addr(ptr); - - if (tcma_check(desc, bit55, ptr_tag)) { - return true; - } - - mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); - type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - size = FIELD_EX32(desc, MTEDESC, ESIZE); - - mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, - MMU_DATA_LOAD, 1, ra); - if (!mem) { - return true; - } - - mem_tag = load_tag1(ptr, mem); - return ptr_tag == mem_tag; -} - -/* - * No-fault version of mte_check1, to be used by SVE for MemSingleNF. - * Returns false if the access is Checked and the check failed. This - * is only intended to probe the tag -- the validity of the page must - * be checked beforehand. - */ -bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) -{ - int bit55 = extract64(ptr, 55, 1); - - /* If TBI is disabled, the access is unchecked. */ - if (unlikely(!tbi_check(desc, bit55))) { - return true; - } - - return mte_probe1_int(env, desc, ptr, 0, bit55); -} - -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) -{ - int bit55 = extract64(ptr, 55, 1); - - /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ - if (unlikely(!tbi_check(desc, bit55))) { - return ptr; - } - - if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { - mte_check_fail(env, desc, ptr, ra); - } - - return useronly_clean_ptr(ptr); -} - -uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) -{ - return mte_check1(env, desc, ptr, GETPC()); -} - -/* - * Perform an MTE checked access for multiple logical accesses. - */ - /** * checkN: * @tag: tag memory to test @@ -753,59 +679,70 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) return n; } -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) +/** + * mte_probe_int() - helper for mte_probe and mte_check + * @env: CPU environment + * @desc: MTEDESC descriptor + * @ptr: virtual address of the base of the access + * @fault: return virtual address of the first check failure + * + * Internal routine for both mte_probe and mte_check. + * Return zero on failure, filling in *fault. + * Return negative on trivial success for tbi disabled. + * Return positive on success with tbi enabled. + */ +static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, uint64_t *fault) { int mmu_idx, ptr_tag, bit55; - uint64_t ptr_last, ptr_end, prev_page, next_page; - uint64_t tag_first, tag_end; - uint64_t tag_byte_first, tag_byte_end; - uint32_t esize, total, tag_count, tag_size, n, c; + uint64_t ptr_last, prev_page, next_page; + uint64_t tag_first, tag_last; + uint64_t tag_byte_first, tag_byte_last; + uint32_t sizem1, tag_count, tag_size, n, c; uint8_t *mem1, *mem2; MMUAccessType type; bit55 = extract64(ptr, 55, 1); + *fault = ptr; /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ if (unlikely(!tbi_check(desc, bit55))) { - return ptr; + return -1; } ptr_tag = allocation_tag_from_addr(ptr); if (tcma_check(desc, bit55, ptr_tag)) { - goto done; + return 1; } mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - esize = FIELD_EX32(desc, MTEDESC, ESIZE); - total = FIELD_EX32(desc, MTEDESC, TSIZE); + sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1); - /* Find the addr of the end of the access, and of the last element. */ - ptr_end = ptr + total; - ptr_last = ptr_end - esize; + /* Find the addr of the end of the access */ + ptr_last = ptr + sizem1; /* Round the bounds to the tag granule, and compute the number of tags. */ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); - tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); - tag_count = (tag_end - tag_first) / TAG_GRANULE; + tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; /* Round the bounds to twice the tag granule, and compute the bytes. */ tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); - tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); /* Locate the page boundaries. */ prev_page = ptr & TARGET_PAGE_MASK; next_page = prev_page + TARGET_PAGE_SIZE; - if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ - tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); - mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, MMU_DATA_LOAD, tag_size, ra); if (!mem1) { - goto done; + return 1; } /* Perform all of the comparisons. */ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); @@ -815,9 +752,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, MMU_DATA_LOAD, tag_size, ra); - tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, - ptr_end - next_page, + ptr_last - next_page + 1, MMU_DATA_LOAD, tag_size, ra); /* @@ -831,31 +768,57 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, } if (n == c) { if (!mem2) { - goto done; + return 1; } n += checkN(mem2, 0, ptr_tag, tag_count - c); } } - /* - * If we failed, we know which granule. Compute the element that - * is first in that granule, and signal failure on that element. - */ - if (unlikely(n < tag_count)) { - uint64_t fail_ofs; - - fail_ofs = tag_first + n * TAG_GRANULE - ptr; - fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, desc, ptr + fail_ofs, ra); + if (likely(n == tag_count)) { + return 1; } - done: + /* + * If we failed, we know which granule. For the first granule, the + * failure address is @ptr, the first byte accessed. Otherwise the + * failure address is the first byte of the nth granule. + */ + if (n > 0) { + *fault = tag_first + n * TAG_GRANULE; + } + return 0; +} + +uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) +{ + uint64_t fault; + int ret = mte_probe_int(env, desc, ptr, ra, &fault); + + if (unlikely(ret == 0)) { + mte_check_fail(env, desc, fault, ra); + } else if (ret < 0) { + return ptr; + } return useronly_clean_ptr(ptr); } -uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return mte_checkN(env, desc, ptr, GETPC()); + return mte_check(env, desc, ptr, GETPC()); +} + +/* + * No-fault version of mte_check, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uint64_t fault; + int ret = mte_probe_int(env, desc, ptr, 0, &fault); + + return ret != 0; } /* diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode index c17f5019e3..0a2a0e15db 100644 --- a/target/arm/neon-ls.decode +++ b/target/arm/neon-ls.decode @@ -46,7 +46,7 @@ VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \ VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \ vd=%vd_dp size=0 stride=1 -VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \ +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 . align:1 rm:4 \ vd=%vd_dp size=1 stride=%imm1_5_p1 -VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \ +VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 . align:2 rm:4 \ vd=%vd_dp size=2 stride=%imm1_6_p1 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index fd6c58f96a..c068dfa0d5 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4382,13 +4382,9 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, #endif } -typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); - -static inline QEMU_ALWAYS_INLINE -void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, int esize, - int msize, uint32_t mtedesc, uintptr_t ra, - mte_check_fn *check) +static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) { intptr_t mem_off, reg_off, reg_last; @@ -4405,7 +4401,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t pg = vg[reg_off >> 6]; do { if ((pg >> (reg_off & 63)) & 1) { - check(env, mtedesc, addr, ra); + mte_check(env, mtedesc, addr, ra); } reg_off += esize; mem_off += msize; @@ -4422,7 +4418,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t pg = vg[reg_off >> 6]; do { if ((pg >> (reg_off & 63)) & 1) { - check(env, mtedesc, addr, ra); + mte_check(env, mtedesc, addr, ra); } reg_off += esize; mem_off += msize; @@ -4431,30 +4427,6 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, } } -typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra); - -static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra) -{ - sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_check1); -} - -static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, uint32_t mtedesc, - uintptr_t ra) -{ - sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, - mtedesc, ra, mte_checkN); -} - - /* * Common helper for all contiguous 1,2,3,4-register predicated stores. */ @@ -4463,8 +4435,7 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn, - sve_cont_ldst_mte_check_fn *mte_check_fn) + sve_ldst1_tlb_fn *tlb_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -4493,9 +4464,9 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, * Handle mte checks for all active elements. * Since TBI must be set for MTE, !mtedesc => !mte_active. */ - if (mte_check_fn && mtedesc) { - mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, - mtedesc, retaddr); + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); } flags = info.page[0].flags | info.page[1].flags; @@ -4621,8 +4592,7 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, mtedesc = 0; } - sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, - N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn); } #define DO_LD1_1(NAME, ESZ) \ @@ -4630,7 +4600,7 @@ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ - sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ + sve_##NAME##_host, sve_##NAME##_tlb); \ } \ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -4644,22 +4614,22 @@ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } \ void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint32_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint32_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ @@ -4693,7 +4663,7 @@ void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ - sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ + sve_ld1bb_host, sve_ld1bb_tlb); \ } \ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -4707,13 +4677,13 @@ void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ - sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ - sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -4826,7 +4796,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, if (fault == FAULT_FIRST) { /* Trapping mte check for the first-fault element. */ if (mtedesc) { - mte_check1(env, mtedesc, addr + mem_off, retaddr); + mte_check(env, mtedesc, addr + mem_off, retaddr); } /* @@ -4869,7 +4839,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, /* Watchpoint hit, see below. */ goto do_fault; } - if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { goto do_fault; } /* @@ -4919,7 +4889,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, & BP_MEM_READ)) { goto do_fault; } - if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { goto do_fault; } host_fn(vd, reg_off, host + mem_off); @@ -5090,8 +5060,7 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn, - sve_cont_ldst_mte_check_fn *mte_check_fn) + sve_ldst1_tlb_fn *tlb_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -5117,9 +5086,9 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, * Handle mte checks for all active elements. * Since TBI must be set for MTE, !mtedesc => !mte_active. */ - if (mte_check_fn && mtedesc) { - mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, - mtedesc, retaddr); + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); } flags = info.page[0].flags | info.page[1].flags; @@ -5233,8 +5202,7 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, mtedesc = 0; } - sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, - N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn); } #define DO_STN_1(N, NAME, ESZ) \ @@ -5242,7 +5210,7 @@ void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ - sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ } \ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -5256,13 +5224,13 @@ void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ - sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ - sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } \ void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ @@ -5373,7 +5341,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, info.attrs, BP_MEM_READ, retaddr); } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } host_fn(&scratch, reg_off, info.host); } else { @@ -5386,7 +5354,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, BP_MEM_READ, retaddr); } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } tlb_fn(env, &scratch, reg_off, addr, retaddr); } @@ -5552,7 +5520,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, */ addr = base + (off_fn(vm, reg_off) << scale); if (mtedesc) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } tlb_fn(env, vd, reg_off, addr, retaddr); @@ -5588,7 +5556,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } if (mtedesc && arm_tlb_mte_tagged(&info.attrs) && - !mte_probe1(env, mtedesc, addr)) { + !mte_probe(env, mtedesc, addr)) { goto fault; } @@ -5773,7 +5741,7 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { - mte_check1(env, mtedesc, addr, retaddr); + mte_check(env, mtedesc, addr, retaddr); } } i += 1; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0b42e53500..95897e63af 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -272,11 +272,11 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); - gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr); + gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); tcg_temp_free_i32(tcg_desc); return ret; @@ -295,9 +295,9 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, * For MTE, check multiple logical sequential accesses. */ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_esize, int total_size) + bool tag_checked, int size) { - if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + if (tag_checked && s->mte_active[0]) { TCGv_i32 tcg_desc; TCGv_i64 ret; int desc = 0; @@ -306,17 +306,16 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize); - desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); tcg_desc = tcg_const_i32(desc); ret = new_tmp_a64(s); - gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr); + gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); tcg_temp_free_i32(tcg_desc); return ret; } - return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); + return clean_data_tbi(s, addr); } typedef struct DisasCompare64 { @@ -887,19 +886,19 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) * Store from GPR register to memory. */ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, int memidx, + TCGv_i64 tcg_addr, MemOp memop, int memidx, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - g_assert(size <= 3); - tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size); + memop = finalize_memop(s, memop); + tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); if (iss_valid) { uint32_t syn; syn = syn_data_abort_with_iss(0, - size, + (memop & MO_SIZE), false, iss_srt, iss_sf, @@ -910,37 +909,28 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, } static void do_gpr_st(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, + TCGv_i64 tcg_addr, MemOp memop, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), + do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } /* * Load from memory to GPR register */ -static void do_gpr_ld_memidx(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, - bool extend, int memidx, +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + MemOp memop, bool extend, int memidx, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - MemOp memop = s->be_data + size; - - g_assert(size <= 3); - - if (is_signed) { - memop += MO_SIGN; - } - + memop = finalize_memop(s, memop); tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); - if (extend && is_signed) { - g_assert(size < 3); + if (extend && (memop & MO_SIGN)) { + g_assert((memop & MO_SIZE) <= MO_32); tcg_gen_ext32u_i64(dest, dest); } @@ -948,8 +938,8 @@ static void do_gpr_ld_memidx(DisasContext *s, uint32_t syn; syn = syn_data_abort_with_iss(0, - size, - is_signed, + (memop & MO_SIZE), + (memop & MO_SIGN) != 0, iss_srt, iss_sf, iss_ar, @@ -958,14 +948,12 @@ static void do_gpr_ld_memidx(DisasContext *s, } } -static void do_gpr_ld(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, bool extend, +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + MemOp memop, bool extend, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, - get_mem_index(s), + do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } @@ -975,25 +963,33 @@ static void do_gpr_ld(DisasContext *s, static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { /* This writes the bottom N bits of a 128 bit wide vector to memory */ - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + TCGv_i64 tmplo = tcg_temp_new_i64(); + MemOp mop; + + tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + if (size < 4) { - tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), - s->be_data + size); + mop = finalize_memop(s, size); + tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { bool be = s->be_data == MO_BE; TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); + TCGv_i64 tmphi = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); + + mop = s->be_data | MO_Q; + tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), + mop | (s->align_mem ? MO_ALIGN_16 : 0)); tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx)); - tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, + get_mem_index(s), mop); + tcg_temp_free_i64(tcg_hiaddr); + tcg_temp_free_i64(tmphi); } - tcg_temp_free_i64(tmp); + tcg_temp_free_i64(tmplo); } /* @@ -1004,10 +1000,11 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi = NULL; + MemOp mop; if (size < 4) { - MemOp memop = s->be_data + size; - tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop); + mop = finalize_memop(s, size); + tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { bool be = s->be_data == MO_BE; TCGv_i64 tcg_hiaddr; @@ -1015,11 +1012,12 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) tmphi = tcg_temp_new_i64(); tcg_hiaddr = tcg_temp_new_i64(); + mop = s->be_data | MO_Q; + tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), + mop | (s->align_mem ? MO_ALIGN_16 : 0)); tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, + get_mem_index(s), mop); tcg_temp_free_i64(tcg_hiaddr); } @@ -1148,24 +1146,24 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size, MemOp endian) + TCGv_i64 tcg_addr, MemOp mop) { TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size, MemOp endian) + TCGv_i64 tcg_addr, MemOp mop) { TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size); - write_vec_element(s, tcg_tmp, destidx, element, size); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); + write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); tcg_temp_free_i64(tcg_tmp); } @@ -2701,7 +2699,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2718,8 +2717,9 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, - disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, + rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -2831,8 +2831,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) /* Only unsigned 32bit loads target 32bit registers. */ bool iss_sf = opc != 0; - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + false, true, rt, iss_sf, false); } tcg_temp_free_i64(clean_addr); } @@ -2966,8 +2966,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, - (wback || rn != 31) && !set_tag, - size, 2 << size); + (wback || rn != 31) && !set_tag, 2 << size); if (is_vector) { if (is_load) { @@ -2991,11 +2990,11 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) /* Do not modify tcg_rt before recognizing any exception * from the second load. */ - do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, + false, false, 0, false, false); tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, + false, false, 0, false, false); tcg_gen_mov_i64(tcg_rt, tmp); tcg_temp_free_i64(tmp); @@ -3126,8 +3125,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, iss_valid, rt, iss_sf, false); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, - is_signed, is_extended, memidx, + do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, memidx, iss_valid, rt, iss_sf, false); } } @@ -3231,9 +3230,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, - is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, true, rt, iss_sf, false); } } } @@ -3316,8 +3314,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + is_extended, true, rt, iss_sf, false); } } } @@ -3404,7 +3402,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * full load-acquire (we only need "load-acquire processor consistent"), * but we choose to implement them as full LDAQ. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -3477,7 +3475,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); - do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, + do_gpr_ld(s, tcg_rt, clean_addr, size, /* extend */ false, /* iss_valid */ !is_wback, /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); @@ -3509,15 +3507,18 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) int size = extract32(insn, 30, 2); TCGv_i64 clean_addr, dirty_addr; bool is_store = false; - bool is_signed = false; bool extend = false; bool iss_sf; + MemOp mop; if (!dc_isar_feature(aa64_rcpc_8_4, s)) { unallocated_encoding(s); return; } + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + mop = size | MO_ALIGN; + switch (opc) { case 0: /* STLURB */ is_store = true; @@ -3529,21 +3530,21 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - is_signed = true; + mop |= MO_SIGN; break; case 3: /* LDAPURS* 32-bit variant */ if (size > 1) { unallocated_encoding(s); return; } - is_signed = true; + mop |= MO_SIGN; extend = true; /* zero-extend 32->64 after signed load */ break; default: g_assert_not_reached(); } - iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); + iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); if (rn == 31) { gen_check_sp_alignment(s); @@ -3556,14 +3557,14 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) if (is_store) { /* Store-Release semantics */ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, iss_sf, true); + do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); } else { /* * Load-AcquirePC semantics; we implement as the slightly more * restrictive Load-Acquire. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, - true, rt, iss_sf, true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, + extend, true, rt, iss_sf, true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } } @@ -3634,7 +3635,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; - MemOp endian = s->be_data; + MemOp endian, align, mop; int total; /* total bytes */ int elements; /* elements per vector */ @@ -3702,6 +3703,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) } /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; if (size == 0) { endian = MO_LE; } @@ -3714,17 +3716,23 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) * promote consecutive little-endian elements below. */ clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, - size, total); + total); /* * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ + align = MO_ALIGN; if (selem == 1 && endian == MO_LE) { + align = pow2_align(size); size = 3; } - elements = (is_q ? 16 : 8) >> size; + if (!s->align_mem) { + align = 0; + } + mop = endian | size | align; + elements = (is_q ? 16 : 8) >> size; tcg_ebytes = tcg_const_i64(1 << size); for (r = 0; r < rpt; r++) { int e; @@ -3733,9 +3741,9 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) for (xs = 0; xs < selem; xs++) { int tt = (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, clean_addr, size, endian); + do_vec_st(s, tt, e, clean_addr, mop); } else { - do_vec_ld(s, tt, e, clean_addr, size, endian); + do_vec_ld(s, tt, e, clean_addr, mop); } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); } @@ -3807,6 +3815,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int index = is_q << 3 | S << 2 | size; int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; if (extract32(insn, 31, 1)) { unallocated_encoding(s); @@ -3867,7 +3876,8 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, - scale, total); + total); + mop = finalize_memop(s, scale); tcg_ebytes = tcg_const_i64(1 << scale); for (xs = 0; xs < selem; xs++) { @@ -3875,8 +3885,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) /* Load and replicate to all elements */ TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, - get_mem_index(s), s->be_data + scale); + tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), (is_q + 1) * 8, vec_full_reg_size(s), tcg_tmp); @@ -3884,9 +3893,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, clean_addr, scale, s->be_data); + do_vec_ld(s, rt, index, clean_addr, mop); } else { - do_vec_st(s, rt, index, clean_addr, scale, s->be_data); + do_vec_st(s, rt, index, clean_addr, mop); } } tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); @@ -14672,7 +14681,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, DisasContext *dc = container_of(dcbase, DisasContext, base); CPUARMState *env = cpu->env_ptr; ARMCPU *arm_cpu = env_archcpu(env); - uint32_t tb_flags = dc->base.tb->flags; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); int bound, core_mmu_idx; dc->isar = &arm_cpu->isar; @@ -14686,28 +14695,29 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, !arm_el_is_aa64(env, 3); dc->thumb = 0; dc->sctlr_b = 0; - dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; + dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; dc->condexec_mask = 0; dc->condexec_cond = 0; - core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); + core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); - dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); - dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); - dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); + dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); + dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); + dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); #endif - dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); - dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL); - dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16; - dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE); - dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); - dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); - dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); - dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); - dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); - dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); + dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); + dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); + dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); + dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16; + dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); + dc->bt = EX_TBFLAG_A64(tb_flags, BT); + dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); + dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); + dc->ata = EX_TBFLAG_A64(tb_flags, ATA); + dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); + dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -14734,10 +14744,10 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, * emit code to generate a software step exception * end the TB */ - dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS); + dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); + dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); dc->is_ldex = false; - dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); + dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); /* Bound the number of insns to execute to those left on the page. */ bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 3668b671dd..868d355048 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -44,7 +44,7 @@ TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, int log2_size); TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int count, int log2_esize); + bool tag_checked, int size); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index f6c68e30ab..a02b8369a1 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -429,7 +429,7 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) { /* Neon load/store multiple structures */ int nregs, interleave, spacing, reg, n; - MemOp endian = s->be_data; + MemOp mop, align, endian; int mmu_idx = get_mem_index(s); int size = a->size; TCGv_i64 tmp64; @@ -473,20 +473,36 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) } /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; if (size == 0) { endian = MO_LE; } + + /* Enforce alignment requested by the instruction */ + if (a->align) { + align = pow2_align(a->align + 2); /* 4 ** a->align */ + } else { + align = s->align_mem ? MO_ALIGN : 0; + } + /* * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (interleave == 1 && endian == MO_LE) { + /* Retain any natural alignment. */ + if (align == MO_ALIGN) { + align = pow2_align(size); + } size = 3; } + tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); tmp = tcg_const_i32(1 << size); load_reg_var(s, addr, a->rn); + + mop = endian | size | align; for (reg = 0; reg < nregs; reg++) { for (n = 0; n < 8 >> size; n++) { int xs; @@ -494,13 +510,16 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) int tt = a->vd + reg + spacing * xs; if (a->l) { - gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop); neon_store_element64(tt, n, size, tmp64); } else { neon_load_element64(tmp64, tt, n, size); - gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); + gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); } tcg_gen_add_i32(addr, addr, tmp); + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } } } @@ -520,6 +539,7 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) int size = a->size; int nregs = a->n + 1; TCGv_i32 addr, tmp; + MemOp mop, align; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -530,18 +550,33 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) return false; } + align = 0; if (size == 3) { if (nregs != 4 || a->a == 0) { return false; } /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a->a == 1 && size == 0) { - return false; - } - if (nregs == 3 && a->a == 1) { - return false; + size = MO_32; + align = MO_ALIGN_16; + } else if (a->a) { + switch (nregs) { + case 1: + if (size == 0) { + return false; + } + align = MO_ALIGN; + break; + case 2: + align = pow2_align(size + 1); + break; + case 3: + return false; + case 4: + align = pow2_align(size + 2); + break; + default: + g_assert_not_reached(); + } } if (!vfp_access_check(s)) { @@ -554,13 +589,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) */ stride = a->t ? 2 : 1; vec_size = nregs == 1 ? stride * 8 : 8; - + mop = size | align; tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop); if ((vd & 1) && vec_size == 16) { /* * We cannot write 16 bytes at once because the @@ -576,6 +610,9 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) } tcg_gen_addi_i32(addr, addr, 1 << size); vd += stride; + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); @@ -592,6 +629,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) int nregs = a->n + 1; int vd = a->vd; TCGv_i32 addr, tmp; + MemOp mop; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -606,7 +644,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) switch (nregs) { case 1: if (((a->align & (1 << a->size)) != 0) || - (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { + (a->size == 2 && (a->align == 1 || a->align == 2))) { return false; } break; @@ -621,7 +659,7 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) } break; case 4: - if ((a->size == 2) && ((a->align & 3) == 3)) { + if (a->size == 2 && a->align == 3) { return false; } break; @@ -641,25 +679,58 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) return true; } + /* Pick up SCTLR settings */ + mop = finalize_memop(s, a->size); + + if (a->align) { + MemOp align_op; + + switch (nregs) { + case 1: + /* For VLD1, use natural alignment. */ + align_op = MO_ALIGN; + break; + case 2: + /* For VLD2, use double alignment. */ + align_op = pow2_align(a->size + 1); + break; + case 4: + if (a->size == MO_32) { + /* + * For VLD4.32, align = 1 is double alignment, align = 2 is + * quad alignment; align = 3 is rejected above. + */ + align_op = pow2_align(a->size + a->align); + } else { + /* For VLD4.8 and VLD.16, we want quad alignment. */ + align_op = pow2_align(a->size + 2); + } + break; + default: + /* For VLD3, the alignment field is zero and rejected above. */ + g_assert_not_reached(); + } + + mop = (mop & ~MO_AMASK) | align_op; + } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); - /* - * TODO: if we implemented alignment exceptions, we should check - * addr against the alignment encoded in a->align here. - */ + for (reg = 0; reg < nregs; reg++) { if (a->l) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | a->size); + gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop); neon_store_element(vd, a->reg_idx, a->size, tmp); } else { /* Store */ neon_load_element(tmp, vd, a->reg_idx, a->size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), - s->be_data | a->size); + gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop); } vd += a->stride; tcg_gen_addi_i32(addr, addr, 1 << a->size); + + /* Subsequent memory operations inherit alignment */ + mop &= ~MO_AMASK; } tcg_temp_free_i32(addr); tcg_temp_free_i32(tmp); diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 0eefb61214..864ed669c4 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4264,7 +4264,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); tcg_temp_free_i64(dirty_addr); /* @@ -4352,7 +4352,7 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); tcg_temp_free_i64(dirty_addr); /* Note that unpredicated load/store of vector/predicate registers @@ -4509,8 +4509,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); - desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); desc <<= SVE_MTEDESC_SHIFT; } else { addr = clean_data_tbi(s, addr); @@ -5002,7 +5001,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) clean_addr = gen_mte_check1(s, temp, false, true, msz); tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), - s->be_data | dtype_mop[a->dtype]); + finalize_memop(s, dtype_mop[a->dtype])); /* Broadcast to *all* elements. */ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), @@ -5189,7 +5188,7 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); desc <<= SVE_MTEDESC_SHIFT; } desc = simd_desc(vsz, vsz, desc | scale); diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 10766f210c..e20d9c7ba6 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -1364,11 +1364,11 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i32(); if (a->l) { - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); vfp_store_reg32(tmp, a->vd); } else { vfp_load_reg32(tmp, a->vd); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); @@ -1398,11 +1398,11 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i32(); if (a->l) { - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); vfp_store_reg32(tmp, a->vd); } else { vfp_load_reg32(tmp, a->vd); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); } tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); @@ -1439,11 +1439,11 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i64(); if (a->l) { - gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd); } else { vfp_load_reg64(tmp, a->vd); - gen_aa32_st64(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); } tcg_temp_free_i64(tmp); tcg_temp_free_i32(addr); @@ -1503,12 +1503,12 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) for (i = 0; i < n; i++) { if (a->l) { /* load */ - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); vfp_store_reg32(tmp, a->vd + i); } else { /* store */ vfp_load_reg32(tmp, a->vd + i); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); } tcg_gen_addi_i32(addr, addr, offset); } @@ -1586,12 +1586,12 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) for (i = 0; i < n; i++) { if (a->l) { /* load */ - gen_aa32_ld64(s, tmp, addr, get_mem_index(s)); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd + i); } else { /* store */ vfp_load_reg64(tmp, a->vd + i); - gen_aa32_st64(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); } tcg_gen_addi_i32(addr, addr, offset); } diff --git a/target/arm/translate.c b/target/arm/translate.c index 7103da2d7a..43ff0d4b8a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -908,7 +908,23 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) #define IS_USER_ONLY 0 #endif -/* Abstractions of "generate code to do a guest load/store for +MemOp pow2_align(unsigned i) +{ + static const MemOp mop_align[] = { + 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, + /* + * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such + * that 256-bit alignment (MO_ALIGN_32) cannot be supported: + * see get_alignment_bits(). Enforce only 128-bit alignment for now. + */ + MO_ALIGN_16 + }; + g_assert(i < ARRAY_SIZE(mop_align)); + return mop_align[i]; +} + +/* + * Abstractions of "generate code to do a guest load/store for * AArch32", where a vaddr is always 32 bits (and is zero * extended if we're a 64 bit core) and data is also * 32 bits unless specifically doing a 64 bit access. @@ -916,7 +932,7 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) * that the address argument is TCGv_i32 rather than TCGv. */ -static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) +static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) { TCGv addr = tcg_temp_new(); tcg_gen_extu_i32_tl(addr, a32); @@ -928,80 +944,47 @@ static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op) return addr; } -static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, - int index, MemOp opc) +/* + * Internal routines are used for NEON cases where the endianness + * and/or alignment has already been taken into account and manipulated. + */ +static void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val, + TCGv_i32 a32, int index, MemOp opc) { - TCGv addr; - - if (arm_dc_feature(s, ARM_FEATURE_M) && - !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { - opc |= MO_ALIGN; - } - - addr = gen_aa32_addr(s, a32, opc); + TCGv addr = gen_aa32_addr(s, a32, opc); tcg_gen_qemu_ld_i32(val, addr, index, opc); tcg_temp_free(addr); } -static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, - int index, MemOp opc) +static void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val, + TCGv_i32 a32, int index, MemOp opc) { - TCGv addr; - - if (arm_dc_feature(s, ARM_FEATURE_M) && - !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) { - opc |= MO_ALIGN; - } - - addr = gen_aa32_addr(s, a32, opc); + TCGv addr = gen_aa32_addr(s, a32, opc); tcg_gen_qemu_st_i32(val, addr, index, opc); tcg_temp_free(addr); } -#define DO_GEN_LD(SUFF, OPC) \ -static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 a32, int index) \ -{ \ - gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \ -} - -#define DO_GEN_ST(SUFF, OPC) \ -static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ - TCGv_i32 a32, int index) \ -{ \ - gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \ -} - -static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val) -{ - /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { - tcg_gen_rotri_i64(val, val, 32); - } -} - -static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, - int index, MemOp opc) +static void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index, MemOp opc) { TCGv addr = gen_aa32_addr(s, a32, opc); + tcg_gen_qemu_ld_i64(val, addr, index, opc); - gen_aa32_frob64(s, val); + + /* Not needed for user-mode BE32, where we use MO_BE instead. */ + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { + tcg_gen_rotri_i64(val, val, 32); + } tcg_temp_free(addr); } -static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, - TCGv_i32 a32, int index) -{ - gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data); -} - -static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, - int index, MemOp opc) +static void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index, MemOp opc) { TCGv addr = gen_aa32_addr(s, a32, opc); /* Not needed for user-mode BE32, where we use MO_BE instead. */ - if (!IS_USER_ONLY && s->sctlr_b) { + if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_rotri_i64(tmp, val, 32); tcg_gen_qemu_st_i64(tmp, addr, index, opc); @@ -1012,10 +995,54 @@ static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, tcg_temp_free(addr); } +static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc)); +} + +static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc)); +} + +static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc)); +} + +static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, + int index, MemOp opc) +{ + gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc)); +} + +#define DO_GEN_LD(SUFF, OPC) \ + static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \ + TCGv_i32 a32, int index) \ + { \ + gen_aa32_ld_i32(s, val, a32, index, OPC); \ + } + +#define DO_GEN_ST(SUFF, OPC) \ + static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \ + TCGv_i32 a32, int index) \ + { \ + gen_aa32_st_i32(s, val, a32, index, OPC); \ + } + +static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, + TCGv_i32 a32, int index) +{ + gen_aa32_ld_i64(s, val, a32, index, MO_Q); +} + static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index) { - gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data); + gen_aa32_st_i64(s, val, a32, index, MO_Q); } DO_GEN_LD(8u, MO_UB) @@ -4984,16 +5011,13 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i32 tmp2 = tcg_temp_new_i32(); TCGv_i64 t64 = tcg_temp_new_i64(); - /* For AArch32, architecturally the 32-bit word at the lowest + /* + * For AArch32, architecturally the 32-bit word at the lowest * address is always Rt and the one at addr+4 is Rt2, even if * the CPU is big-endian. That means we don't want to do a - * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if - * for an architecturally 64-bit access, but instead do a - * 64-bit access using MO_BE if appropriate and then split - * the two halves. - * This only makes a difference for BE32 user-mode, where - * frob64() must not flip the two halves of the 64-bit data - * but this code must treat BE32 user-mode like BE32 system. + * gen_aa32_ld_i64(), which checks SCTLR_B as if for an + * architecturally 64-bit access, but instead do a 64-bit access + * using MO_BE if appropriate and then split the two halves. */ TCGv taddr = gen_aa32_addr(s, addr, opc); @@ -5053,14 +5077,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGv_i64 n64 = tcg_temp_new_i64(); t2 = load_reg(s, rt2); - /* For AArch32, architecturally the 32-bit word at the lowest + + /* + * For AArch32, architecturally the 32-bit word at the lowest * address is always Rt and the one at addr+4 is Rt2, even if * the CPU is big-endian. Since we're going to treat this as a * single 64-bit BE store, we need to put the two halves in the * opposite order for BE to LE, so that they end up in the right - * places. - * We don't want gen_aa32_frob64() because that does the wrong - * thing for BE32 usermode. + * places. We don't want gen_aa32_st_i64, because that checks + * SCTLR_B as if for an architectural 64-bit access. */ if (s->be_data == MO_BE) { tcg_gen_concat_i32_i64(n64, t2, t1); @@ -5190,11 +5215,11 @@ static void gen_srs(DisasContext *s, } tcg_gen_addi_i32(addr, addr, offset); tmp = load_reg(s, 14); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tmp = load_cpu_field(spsr); tcg_gen_addi_i32(addr, addr, 4); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); if (writeback) { switch (amode) { @@ -6458,7 +6483,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a, addr = op_addr_rr_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); /* @@ -6476,10 +6501,18 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a, ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite; TCGv_i32 addr, tmp; + /* + * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it + * is either UNPREDICTABLE or has defined behaviour + */ + if (s->thumb && a->rn == 15) { + return false; + } + addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); tcg_temp_free_i32(tmp); @@ -6502,13 +6535,13 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt + 1, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6531,13 +6564,13 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) addr = op_addr_rr_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, a->rt + 1); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); op_addr_rr_post(s, a, addr, -4); @@ -6602,7 +6635,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a, addr = op_addr_ri_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); /* @@ -6620,10 +6653,18 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a, ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite; TCGv_i32 addr, tmp; + /* + * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it + * is either UNPREDICTABLE or has defined behaviour + */ + if (s->thumb && a->rn == 15) { + return false; + } + addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); tcg_temp_free_i32(tmp); @@ -6639,13 +6680,13 @@ static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, a->rt, tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); store_reg(s, rt2, tmp); /* LDRD w/ base writeback is undefined if the registers overlap. */ @@ -6678,13 +6719,13 @@ static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) addr = op_addr_ri_pre(s, a); tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, rt2); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); op_addr_ri_post(s, a, addr, -4); @@ -6910,7 +6951,7 @@ static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop) addr = load_reg(s, a->rn); tmp = load_reg(s, a->rt); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite); tcg_temp_free_i32(tmp); @@ -7066,7 +7107,7 @@ static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop) addr = load_reg(s, a->rn); tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel); tcg_temp_free_i32(addr); @@ -7858,7 +7899,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) } else { tmp = load_reg(s, i); } - gen_aa32_st32(s, tmp, addr, mem_idx); + gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); tcg_temp_free_i32(tmp); /* No need to add after the last transfer. */ @@ -7933,7 +7974,7 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) } tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, mem_idx); + gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); if (user) { tmp2 = tcg_const_i32(i); gen_helper_set_user_reg(cpu_env, tmp2, tmp); @@ -8250,8 +8291,7 @@ static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) addr = load_reg(s, a->rn); tcg_gen_add_i32(addr, addr, tmp); - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - half ? MO_UW | s->be_data : MO_UB); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB); tcg_temp_free_i32(addr); tcg_gen_add_i32(tmp, tmp, tmp); @@ -8332,10 +8372,10 @@ static bool trans_RFE(DisasContext *s, arg_RFE *a) /* Load PC into tmp and CPSR into tmp2. */ t1 = tcg_temp_new_i32(); - gen_aa32_ld32u(s, t1, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN); tcg_gen_addi_i32(addr, addr, 4); t2 = tcg_temp_new_i32(); - gen_aa32_ld32u(s, t2, addr, get_mem_index(s)); + gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN); if (a->w) { /* Base writeback. */ @@ -8836,7 +8876,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) DisasContext *dc = container_of(dcbase, DisasContext, base); CPUARMState *env = cs->env_ptr; ARMCPU *cpu = env_archcpu(env); - uint32_t tb_flags = dc->base.tb->flags; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); uint32_t condexec, core_mmu_idx; dc->isar = &cpu->isar; @@ -8848,46 +8888,43 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) */ dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3); - dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB); - dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; - condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC); + dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB); + dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; + condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC); dc->condexec_mask = (condexec & 0xf) << 1; dc->condexec_cond = condexec >> 4; - core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); + core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); #endif - dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); + dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); + dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); if (arm_feature(env, ARM_FEATURE_M)) { dc->vfp_enabled = 1; dc->be_data = MO_TE; - dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER); + dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER); dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && regime_is_secure(env, dc->mmu_idx); - dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK); - dc->v8m_fpccr_s_wrong = - FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG); + dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK); + dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG); dc->v7m_new_fp_ctxt_needed = - FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED); - dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT); + EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED); + dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT); } else { - dc->be_data = - FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; - dc->debug_target_el = - FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL); - dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B); - dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE); - dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS); - dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN); + dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); + dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B); + dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE); + dc->ns = EX_TBFLAG_A32(tb_flags, NS); + dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN); if (arm_feature(env, ARM_FEATURE_XSCALE)) { - dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR); + dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR); } else { - dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN); - dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE); + dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); + dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); } } dc->cp_regs = cpu->cp_regs; @@ -8908,8 +8945,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) * emit code to generate a software step exception * end the TB */ - dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE); - dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS); + dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); + dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); dc->is_ldex = false; dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK; @@ -9347,12 +9384,13 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) { DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; + CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb); - if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) { + if (EX_TBFLAG_AM32(tb_flags, THUMB)) { ops = &thumb_translator_ops; } #ifdef TARGET_AARCH64 - if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) { + if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) { ops = &aarch64_translator_ops; } #endif diff --git a/target/arm/translate.h b/target/arm/translate.h index 423b0e08df..ccf60c96d8 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -87,6 +87,8 @@ typedef struct DisasContext { bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ bool hstr_active; + /* True if memory operations require alignment */ + bool align_mem; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -202,6 +204,7 @@ void arm_test_cc(DisasCompare *cmp, int cc); void arm_free_cc(DisasCompare *cmp); void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); +MemOp pow2_align(unsigned i); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) @@ -394,6 +397,17 @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); +/** + * arm_tbflags_from_tb: + * @tb: the TranslationBlock + * + * Extract the flag values from @tb. + */ +static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) +{ + return (CPUARMTBFlags){ tb->flags, tb->cs_base }; +} + /* * Enum for argument to fpstatus_ptr(). */ @@ -446,4 +460,28 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) return statusptr; } +/** + * finalize_memop: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Build the complete MemOp for a memory operation, including alignment + * and endianness. + * + * If (op & MO_AMASK) then the operation already contains the required + * alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally + * unaligned operation, e.g. for AccType_NORMAL. + * + * In the latter case, there are configuration bits that require alignment, + * and this is applied here. Note that there is no way to indicate that + * no alignment should ever be enforced; this must be handled manually. + */ +static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +{ + if (s->align_mem && !(opc & MO_AMASK)) { + opc |= MO_ALIGN; + } + return opc | s->be_data; +} + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index 05b2622bfc..928357b10a 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -37,7 +37,7 @@ AARCH64_TESTS += bti-2 # MTE Tests ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_ARMV8_MTE),) -AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-6 +AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-%: CFLAGS += -march=armv8.5-a+memtag endif diff --git a/tests/tcg/aarch64/mte-5.c b/tests/tcg/aarch64/mte-5.c new file mode 100644 index 0000000000..6dbd6ab3ea --- /dev/null +++ b/tests/tcg/aarch64/mte-5.c @@ -0,0 +1,44 @@ +/* + * Memory tagging, faulting unaligned access. + * + * Copyright (c) 2021 Linaro Ltd + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "mte.h" + +void pass(int sig, siginfo_t *info, void *uc) +{ + assert(info->si_code == SEGV_MTESERR); + exit(0); +} + +int main(int ac, char **av) +{ + struct sigaction sa; + void *p0, *p1, *p2; + long excl = 1; + + enable_mte(PR_MTE_TCF_SYNC); + p0 = alloc_mte_mem(sizeof(*p0)); + + /* Create two differently tagged pointers. */ + asm("irg %0,%1,%2" : "=r"(p1) : "r"(p0), "r"(excl)); + asm("gmi %0,%1,%0" : "+r"(excl) : "r" (p1)); + assert(excl != 1); + asm("irg %0,%1,%2" : "=r"(p2) : "r"(p0), "r"(excl)); + assert(p1 != p2); + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = pass; + sa.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &sa, NULL); + + /* Store store two different tags in sequential granules. */ + asm("stg %0, [%0]" : : "r"(p1)); + asm("stg %0, [%0]" : : "r"(p2 + 16)); + + /* Perform an unaligned load crossing the granules. */ + asm volatile("ldr %0, [%1]" : "=r"(p0) : "r"(p1 + 12)); + abort(); +}