From 32e7024eab4c703b2f2fb362380c93be8f1949e3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Jul 2018 20:06:47 -0400 Subject: [PATCH 1/8] tools/power turbostat: Update turbostat(8) RAPL throttling column description Explain that this column may increment for some throttling causes, and may not increment for others. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index d39e4ff7d0bf..a6db83a88e85 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -106,7 +106,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. -\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. +\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. @@ -114,7 +114,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBCorWatt\fP Watts consumed by the core part of the package. \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. -\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi .SH TOO MUCH INFORMATION EXAMPLE From 9d83601a9cc1884d1b5706ee2acc661d558c6838 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 20 Jul 2018 14:47:03 -0400 Subject: [PATCH 2/8] tools/power turbostat: fix -S on UP systems The -S (system summary) option failed to print any data on a 1-processor system. Reported-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4d14bbbf9b63..81a1df0fb5e2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1163,9 +1163,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ if (!printed || !summary_only) print_header("\t"); - if (topo.num_cpus > 1) - format_counters(&average.threads, &average.cores, - &average.packages); + format_counters(&average.threads, &average.cores, &average.packages); printed = 1; From 4f206a0fabc3e806349add0996b3a999739559d2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 25 Jul 2018 11:52:06 +0300 Subject: [PATCH 3/8] tools/power turbostat: fix bogus summary values This patch fixes a regression introduced in commit 8cb48b32a5de ("tools/power turbostat: track thread ID in cpu_topology") Turbostat uses incorrect cores number ('topo.num_cores') - its value is count of logical CPUs, instead of count of physical cores. So it is twice as large as it should be on a typical Intel system. For example, on a 6 core Xeon system 'topo.num_cores' is 12, and on a 52 core Xeon system 'topo.num_cores' is 104. And interestingly, on a 68-core Knights Landing Intel system 'topo.num_cores' is 272, because this system has 4 logical CPUs per core. As a result, some of the turbostat calculations are incorrect. For example, on idle 52-core Xeon system when all cores are ~99% in Core C6 (CPU%c6), the summary (very first) line shows ~48% Core C6, while it should be ~99%. This patch fixes the problem by fixing 'topo.num_cores' calculation. Was: 1. Init 'thread_id' for all CPUs to -1 2. Run 'get_thread_siblings()' which sets it to 0 or 1 3. Increment 'topo.num_cores' when thread_id != -1 (bug!) Now: 1. Init 'thread_id' for all CPUs to -1 2. Run 'get_thread_siblings()' which sets it to 0 or 1 3. Increment 'topo.num_cores' when thread_id is not 0 I did not have a chance to test this on an AMD machine, and only tested on a couple of Intel Xeons (6 and 52 cores). Reported-by: Vladislav Govtva Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 81a1df0fb5e2..1b53a2489ebb 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4838,7 +4838,7 @@ void topology_probe() siblings = get_thread_siblings(&cpus[i]); if (siblings > max_siblings) max_siblings = siblings; - if (cpus[i].thread_id != -1) + if (cpus[i].thread_id == 0) topo.num_cores++; if (debug > 1) From cfce494db3bfccd2a0774652b95f286639acef36 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 25 Jul 2018 17:25:29 -0400 Subject: [PATCH 4/8] tools/power turbostat: fix x2apic debug message output file A recently added x2apic debug message was hard-coded to stderr. That doesn't work with "-o outfile". Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1b53a2489ebb..02e71accad16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1690,7 +1690,7 @@ void get_apic_id(struct thread_data *t) t->x2apic_id = edx; if (debug && (t->apic_id != t->x2apic_id)) - fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); + fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); } /* From 2ffbb22406079fec2c3a6ad6ee1dc99fede740ac Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 26 Jul 2018 09:08:54 -0400 Subject: [PATCH 5/8] tools/power turbostat: Fix logical node enumeration to allow for non-sequential physical nodes turbostat fails on some multi-package topologies because the logical node enumeration assumes that the nodes are sequentially numbered, which causes the logical numa nodes to not be enumerated, or enumerated incorrectly. Use a more robust enumeration algorithm which allows for non-seqential physical nodes. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 102 ++++++++++++-------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 02e71accad16..2b0135599f37 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2471,55 +2471,43 @@ int get_core_id(int cpu) void set_node_data(void) { - char path[80]; - FILE *filep; - int pkg, node, cpu; + int pkg, node, lnode, cpu, cpux; + int cpu_count; - struct pkg_node_info { - int count; - int min; - } *pni; + /* initialize logical_node_id */ + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) + cpus[cpu].logical_node_id = -1; - pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); - if (!pni) - err(1, "calloc pkg_node_count"); - - for (pkg = 0; pkg < topo.num_packages; pkg++) - pni[pkg].min = topo.num_cpus; - - for (node = 0; node <= topo.max_node_num; node++) { - /* find the "first" cpu in the node */ - sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); - filep = fopen(path, "r"); - if (!filep) - continue; - fscanf(filep, "%d", &cpu); - fclose(filep); - - pkg = cpus[cpu].physical_package_id; - pni[pkg].count++; - - if (node < pni[pkg].min) - pni[pkg].min = node; + cpu_count = 0; + for (pkg = 0; pkg < topo.num_packages; pkg++) { + lnode = 0; + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + if (cpus[cpu].physical_package_id != pkg) + continue; + /* find a cpu with an unset logical_node_id */ + if (cpus[cpu].logical_node_id != -1) + continue; + cpus[cpu].logical_node_id = lnode; + node = cpus[cpu].physical_node_id; + cpu_count++; + /* + * find all matching cpus on this pkg and set + * the logical_node_id + */ + for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { + if ((cpus[cpux].physical_package_id == pkg) && + (cpus[cpux].physical_node_id == node)) { + cpus[cpux].logical_node_id = lnode; + cpu_count++; + } + } + lnode++; + if (lnode > topo.nodes_per_pkg) + topo.nodes_per_pkg = lnode; + } + if (cpu_count >= topo.max_cpu_num) + break; } - - for (pkg = 0; pkg < topo.num_packages; pkg++) - if (pni[pkg].count > topo.nodes_per_pkg) - topo.nodes_per_pkg = pni[0].count; - - /* Fake 1 node per pkg for machines that don't - * expose nodes and thus avoid -nan results - */ - if (topo.nodes_per_pkg == 0) - topo.nodes_per_pkg = 1; - - for (cpu = 0; cpu < topo.num_cpus; cpu++) { - pkg = cpus[cpu].physical_package_id; - node = cpus[cpu].physical_node_id; - cpus[cpu].logical_node_id = node - pni[pkg].min; - } - free(pni); - } int get_physical_node_id(struct cpu_topology *thiscpu) @@ -4840,14 +4828,6 @@ void topology_probe() max_siblings = siblings; if (cpus[i].thread_id == 0) topo.num_cores++; - - if (debug > 1) - fprintf(outf, - "cpu %d pkg %d node %d core %d thread %d\n", - i, cpus[i].physical_package_id, - cpus[i].physical_node_id, - cpus[i].physical_core_id, - cpus[i].thread_id); } topo.cores_per_node = max_core_id + 1; @@ -4873,6 +4853,20 @@ void topology_probe() topo.threads_per_core = max_siblings; if (debug > 1) fprintf(outf, "max_siblings %d\n", max_siblings); + + if (debug < 1) + return; + + for (i = 0; i <= topo.max_cpu_num; ++i) { + fprintf(outf, + "cpu %d pkg %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, + cpus[i].physical_node_id, + cpus[i].logical_node_id, + cpus[i].physical_core_id, + cpus[i].thread_id); + } + } void From 5aa3d1a20a233d4a5f1ec3d62da3f19d9afea682 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 27 Jul 2018 07:50:53 -0400 Subject: [PATCH 6/8] tools/power turbostat: Read extended processor family from CPUID This fixes the reported family on modern AMD processors (e.g. Ryzen, which is family 0x17). Previously these processors all showed up as family 0xf. See the document https://support.amd.com/TechDocs/56255_OSRR.pdf section CPUID_Fn00000001_EAX for how to calculate the family from the BaseFamily and ExtFamily values. This matches the code in arch/x86/lib/cpu.c Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2b0135599f37..6c8effebf7c5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4457,7 +4457,9 @@ void process_cpuid() family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; - if (family == 6 || family == 0xf) + if (family == 0xf) + family += (fms >> 20) & 0xff; + if (family >= 6) model += ((fms >> 16) & 0xf) << 4; if (!quiet) { From 538c48f27ac669cebd6d9abe1ce8b46d55f917ee Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 27 Jul 2018 12:55:08 -0400 Subject: [PATCH 7/8] tools/power turbostat: version 18.07.27 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6c8effebf7c5..980bd9d20646 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5096,7 +5096,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.06.20" + fprintf(outf, "turbostat version 18.07.27" " - Len Brown \n"); } From 01e61a42a5d345a4c0205889498f0c9a0fb9ee8c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 30 Jul 2018 15:00:29 -0700 Subject: [PATCH 8/8] cpufreq: intel_pstate: Limit the scope of HWP dynamic boost platforms Dynamic boosting of HWP performance on IO wake showed significant improvement to IO workloads. This series was intended for Skylake Xeon platforms only and feature was enabled by default based on CPU model number. But some Xeon platforms reused the Skylake desktop CPU model number. This caused some undesirable side effects to some graphics workloads. Since they are heavily IO bound, the increase in CPU performance decreased the power available for GPU to do its computing and hence decrease in graphics benchmark performance. For example on a Skylake desktop, GpuTest benchmark showed average FPS reduction from 529 to 506. This change makes sure that HWP boost feature is only enabled for Skylake server platforms by using ACPI FADT preferred PM Profile. If some desktop users wants to get benefit of boost, they can still enable boost from intel_pstate sysfs attribute "hwp_dynamic_boost". Fixes: 41ab43c9c89e (cpufreq: intel_pstate: enable boost for Skylake Xeon) Link: https://bugs.freedesktop.org/show_bug.cgi?id=107410 Reported-by: Eero Tamminen Signed-off-by: Srinivas Pandruvada Reviewed-by: Francisco Jerez Acked-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3c3971256130..d4ed0022b0dd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -311,12 +311,20 @@ static DEFINE_MUTEX(intel_pstate_limits_lock); #ifdef CONFIG_ACPI -static bool intel_pstate_get_ppc_enable_status(void) +static bool intel_pstate_acpi_pm_profile_server(void) { if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) return true; + return false; +} + +static bool intel_pstate_get_ppc_enable_status(void) +{ + if (intel_pstate_acpi_pm_profile_server()) + return true; + return acpi_ppc; } @@ -459,6 +467,11 @@ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *pol static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) { } + +static inline bool intel_pstate_acpi_pm_profile_server(void) +{ + return false; +} #endif static inline void update_turbo_state(void) @@ -1841,7 +1854,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_hwp_enable(cpu); id = x86_match_cpu(intel_pstate_hwp_boost_ids); - if (id) + if (id && intel_pstate_acpi_pm_profile_server()) hwp_boost = true; }