From 1031a1592908ccd3240f4a5731c96c382c932310 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Sat, 1 Jul 2017 12:03:35 +0530 Subject: [PATCH 1/7] arm64: perf: Allow more than one cycle counter to be used Currently: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,24,699 cycles:u cycles:k (0.00%) 0.000788087 seconds time elapsed We can not count more than one cycle counter in one instance,because we allow to map cycle counter into PMCCNTR_EL0 only. However, if I did not miss anything then specification do not prohibit to use PMEVCNTR_EL0 for cycle count as well. Modify the code so that it still prefers to use PMCCNTR_EL0 for cycle counter, however allow to use PMEVCNTR_EL0 if PMCCNTR_EL0 is already in use. After this patch: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,17,310 cycles:u 7,40,009 cycles:k 0.000764149 seconds time elapsed Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b5798ba21189..372317667773 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -846,17 +846,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; - /* Always place a cycle counter into the cycle counter. */ + /* Always prefer to place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV8_IDX_CYCLE_COUNTER; + if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return ARMV8_IDX_CYCLE_COUNTER; } /* - * For anything other than a cycle counter, try and use - * the events counters + * Otherwise use events counters */ for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { if (!test_and_set_bit(idx, cpuc->used_mask)) From c1be2ddb1e25ecf425d93a81dc64a650b9ff3107 Mon Sep 17 00:00:00 2001 From: Tai Nguyen Date: Thu, 13 Jul 2017 11:19:08 -0700 Subject: [PATCH 2/7] perf: xgene: Remove unnecessary managed resources cleanup Managed resources in the driver should be automatically cleaned up on driver detach. It's unnecessary to manually free/unmmap these resources. One of the manual cleanup causes static checkers to complain. The bug is reported by Dan Carpenter in [1] [1] https://www.spinics.net/lists/arm-kernel/msg593012.html This patch gets rid of all the unnecessary manual cleanup and properly unregister all the registered PMU devices by the driver on driver detach. Signed-off-by: Tai Nguyen Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 74 ++++++++++++---------------------------- 1 file changed, 22 insertions(+), 52 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index e841282d690c..eb23311bc70c 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1147,7 +1147,6 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) { struct device *dev = xgene_pmu->dev; struct xgene_pmu_dev *pmu; - int rc; pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL); if (!pmu) @@ -1159,7 +1158,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) switch (pmu->inf->type) { case PMU_TYPE_L3C: if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask)) - goto dev_err; + return -ENODEV; if (xgene_pmu->version == PCP_PMU_V3) pmu->attr_groups = l3c_pmu_v3_attr_groups; else @@ -1177,7 +1176,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) break; case PMU_TYPE_MCB: if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask)) - goto dev_err; + return -ENODEV; if (xgene_pmu->version == PCP_PMU_V3) pmu->attr_groups = mcb_pmu_v3_attr_groups; else @@ -1185,7 +1184,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) break; case PMU_TYPE_MC: if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask)) - goto dev_err; + return -ENODEV; if (xgene_pmu->version == PCP_PMU_V3) pmu->attr_groups = mc_pmu_v3_attr_groups; else @@ -1195,19 +1194,14 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) return -EINVAL; } - rc = xgene_init_perf(pmu, ctx->name); - if (rc) { + if (xgene_init_perf(pmu, ctx->name)) { dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name); - goto dev_err; + return -ENODEV; } dev_info(dev, "%s PMU registered\n", ctx->name); - return rc; - -dev_err: - devm_kfree(dev, pmu); - return -ENODEV; + return 0; } static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) @@ -1515,13 +1509,13 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, acpi_dev_free_resource_list(&resource_list); if (rc < 0) { dev_err(dev, "PMU type %d: No resource address found\n", type); - goto err; + return NULL; } dev_csr = devm_ioremap_resource(dev, &res); if (IS_ERR(dev_csr)) { dev_err(dev, "PMU type %d: Fail to map resource\n", type); - goto err; + return NULL; } /* A PMU device node without enable-bit-index is always enabled */ @@ -1535,7 +1529,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, ctx->name = xgene_pmu_dev_name(dev, type, enable_bit); if (!ctx->name) { dev_err(dev, "PMU type %d: Fail to get device name\n", type); - goto err; + return NULL; } inf = &ctx->inf; inf->type = type; @@ -1543,9 +1537,6 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, inf->enable_mask = 1 << enable_bit; return ctx; -err: - devm_kfree(dev, ctx); - return NULL; } static const struct acpi_device_id xgene_pmu_acpi_type_match[] = { @@ -1663,20 +1654,20 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, void __iomem *dev_csr; struct resource res; int enable_bit; - int rc; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return NULL; - rc = of_address_to_resource(np, 0, &res); - if (rc < 0) { + + if (of_address_to_resource(np, 0, &res) < 0) { dev_err(dev, "PMU type %d: No resource address found\n", type); - goto err; + return NULL; } + dev_csr = devm_ioremap_resource(dev, &res); if (IS_ERR(dev_csr)) { dev_err(dev, "PMU type %d: Fail to map resource\n", type); - goto err; + return NULL; } /* A PMU device node without enable-bit-index is always enabled */ @@ -1686,17 +1677,15 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, ctx->name = xgene_pmu_dev_name(dev, type, enable_bit); if (!ctx->name) { dev_err(dev, "PMU type %d: Fail to get device name\n", type); - goto err; + return NULL; } + inf = &ctx->inf; inf->type = type; inf->csr = dev_csr; inf->enable_mask = 1 << enable_bit; return ctx; -err: - devm_kfree(dev, ctx); - return NULL; } static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu, @@ -1868,22 +1857,20 @@ static int xgene_pmu_probe(struct platform_device *pdev) xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(xgene_pmu->pcppmu_csr)) { dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n"); - rc = PTR_ERR(xgene_pmu->pcppmu_csr); - goto err; + return PTR_ERR(xgene_pmu->pcppmu_csr); } irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); - rc = -EINVAL; - goto err; + return -EINVAL; } rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(&pdev->dev), xgene_pmu); if (rc) { dev_err(&pdev->dev, "Could not request IRQ %d\n", irq); - goto err; + return rc; } raw_spin_lock_init(&xgene_pmu->lock); @@ -1903,42 +1890,29 @@ static int xgene_pmu_probe(struct platform_device *pdev) rc = irq_set_affinity(irq, &xgene_pmu->cpu); if (rc) { dev_err(&pdev->dev, "Failed to set interrupt affinity!\n"); - goto err; + return rc; } /* Walk through the tree for all PMU perf devices */ rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev); if (rc) { dev_err(&pdev->dev, "No PMU perf devices found!\n"); - goto err; + return rc; } /* Enable interrupt */ xgene_pmu->ops->unmask_int(xgene_pmu); return 0; - -err: - if (xgene_pmu->pcppmu_csr) - devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr); - devm_kfree(&pdev->dev, xgene_pmu); - - return rc; } static void xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus) { struct xgene_pmu_dev_ctx *ctx; - struct device *dev = xgene_pmu->dev; - struct xgene_pmu_dev *pmu_dev; list_for_each_entry(ctx, pmus, next) { - pmu_dev = ctx->pmu_dev; - if (pmu_dev->inf->csr) - devm_iounmap(dev, pmu_dev->inf->csr); - devm_kfree(dev, ctx); - devm_kfree(dev, pmu_dev); + perf_pmu_unregister(&ctx->pmu_dev->pmu); } } @@ -1951,10 +1925,6 @@ static int xgene_pmu_remove(struct platform_device *pdev) xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus); xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus); - if (xgene_pmu->pcppmu_csr) - devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr); - devm_kfree(&pdev->dev, xgene_pmu); - return 0; } From 6c833bb9247ed51028279ef7b82ebbbe60d789e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Aug 2017 16:58:33 +0100 Subject: [PATCH 3/7] arm64: perf: Allow standard PMUv3 events to be extended by the CPU type Rather than continue adding CPU-specific event maps, instead look up by default in the PMUv3 event map and only fallback to the CPU-specific maps if either the event isn't described by PMUv3, or it is described but the PMCEID registers say that it is unsupported by the current CPU. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 46 +++++++++++++++++++--------------- drivers/perf/arm_pmu.c | 6 +++++ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 372317667773..b83f986e7fbf 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -921,7 +921,13 @@ static void armv8pmu_reset(void *info) ARMV8_PMU_PMCR_LC); } -static int armv8_pmuv3_map_event(struct perf_event *event) +static int __armv8_pmuv3_map_event(struct perf_event *event, + const unsigned (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) { int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); @@ -929,44 +935,44 @@ static int armv8_pmuv3_map_event(struct perf_event *event) hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); - if (hw_event_id < 0) - return hw_event_id; - /* disable micro/arch events not supported by this PMU */ - if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && - !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { - return -EOPNOTSUPP; + /* Onl expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return hw_event_id; } - return hw_event_id; + return armpmu_map_event(event, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, NULL); } static int armv8_a53_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map, + &armv8_a53_perf_cache_map); } static int armv8_a57_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map, + &armv8_a57_perf_cache_map); } static int armv8_thunder_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_thunder_perf_map, - &armv8_thunder_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map, + &armv8_thunder_perf_cache_map); } static int armv8_vulcan_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_vulcan_perf_map, - &armv8_vulcan_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map, + &armv8_vulcan_perf_cache_map); } struct armv8pmu_probe_info { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1c5e0f333779..d14fc2e67f93 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -47,6 +47,9 @@ armpmu_map_cache_event(const unsigned (*cache_map) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + if (!cache_map) + return -ENOENT; + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) @@ -63,6 +66,9 @@ armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) if (config >= PERF_COUNT_HW_MAX) return -EINVAL; + if (!event_map) + return -ENOENT; + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } From 5cf7fb26ea841e31488723a32b1613e6b5b876fe Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Tue, 25 Jul 2017 17:27:36 +0100 Subject: [PATCH 4/7] arm64: perf: Connect additional events to pmu counters Last level caches and node events were almost never connected in current supported cores. We connect last level caches to the actual last level within the core and node events are connected to bus accesses. Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b83f986e7fbf..f336753d5baa 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -302,6 +302,9 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -317,6 +320,11 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, @@ -326,6 +334,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] From d0d09d4d99e08767050bc30f2b19d6146abe01e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Aug 2017 17:11:27 +0100 Subject: [PATCH 5/7] arm64: perf: Remove redundant entries from CPU-specific event maps Now that the event mapping code always looks into the PMUv3 events before any extended mappings, the extended mappings can be reduced to only those events that are not discoverable through the PMCEID registers. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 114 ++------------------------------- 1 file changed, 4 insertions(+), 110 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f336753d5baa..f7737f6dcc36 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -202,55 +202,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; -/* ARM Cortex-A53 HW events mapping. */ -static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -/* ARM Cortex-A57 and Cortex-A72 events mapping. */ -static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, -}; - -/* Broadcom Vulcan events mapping */ -static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_BR_RETIRED, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, -}; - static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -281,28 +232,8 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -317,24 +248,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE, - [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -351,8 +267,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS, [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS, [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS, @@ -360,13 +274,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] @@ -379,22 +286,11 @@ static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, - [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, - [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; @@ -964,25 +860,23 @@ static int armv8_pmuv3_map_event(struct perf_event *event) static int armv8_a53_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map); + return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map); } static int armv8_a57_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map); + return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); } static int armv8_thunder_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map, + return __armv8_pmuv3_map_event(event, NULL, &armv8_thunder_perf_cache_map); } static int armv8_vulcan_map_event(struct perf_event *event) { - return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map, + return __armv8_pmuv3_map_event(event, NULL, &armv8_vulcan_perf_cache_map); } From 5561b6c5e9813df16d7453f6ce1a0546221fca97 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 9 Aug 2017 17:46:38 +0100 Subject: [PATCH 6/7] arm64: perf: add support for Cortex-A73 The Cortex-A73 uses some implementation defined perf events. This patch sets up the necessary mapping for Cortex-A73. Mappings are based on Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 1 + arch/arm64/kernel/perf_event.c | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 61c8b4620415..54c9727c70d8 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -9,6 +9,7 @@ Required properties: - compatible : should be one of "apm,potenza-pmu" "arm,armv8-pmuv3" + "arm,cortex-a73-pmu" "arm,cortex-a72-pmu" "arm,cortex-a57-pmu" "arm,cortex-a53-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7737f6dcc36..3fc00f61f729 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -255,6 +255,21 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; +static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, + + [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, + [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, +}; + static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -868,6 +883,11 @@ static int armv8_a57_map_event(struct perf_event *event) return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map); } +static int armv8_a73_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map); +} + static int armv8_thunder_map_event(struct perf_event *event) { return __armv8_pmuv3_map_event(event, NULL, @@ -1018,6 +1038,22 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) return 0; } +static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->name = "armv8_cortex_a73"; + cpu_pmu->map_event = armv8_a73_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; + + return 0; +} + static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv8_pmu_init(cpu_pmu); @@ -1055,6 +1091,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, From e884f80cf2a76a86547e2316982e1f200f556ddf Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 9 Aug 2017 17:46:39 +0100 Subject: [PATCH 7/7] arm64: perf: add support for Cortex-A35 The Cortex-A35 uses some implementation defined perf events. The Cortex-A35 derives from the Cortex-A53 core, using the same event mapings based on Cortex-A35 TRM r0p2, section C2.3 - Performance monitoring events (pages C2-562 to C2-565). Signed-off-by: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 1 + arch/arm64/kernel/perf_event.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 54c9727c70d8..13611a8199bb 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -13,6 +13,7 @@ Required properties: "arm,cortex-a72-pmu" "arm,cortex-a57-pmu" "arm,cortex-a53-pmu" + "arm,cortex-a35-pmu" "arm,cortex-a17-pmu" "arm,cortex-a15-pmu" "arm,cortex-a12-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3fc00f61f729..9eaef51f83ff 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -990,6 +990,22 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) return 0; } +static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) +{ + int ret = armv8_pmu_init(cpu_pmu); + if (ret) + return ret; + + cpu_pmu->name = "armv8_cortex_a35"; + cpu_pmu->map_event = armv8_a53_map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; + + return 0; +} + static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv8_pmu_init(cpu_pmu); @@ -1088,6 +1104,7 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},