- Add logic to correct MBM total and local values fixing errata SKX99 and BDF102
(Fenghua Yu) - Cleanups. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl/XhTUACgkQEsHwGGHe VUpTkA/9FNOaJohBa9XO2bv3RjsqE4/2PqZS434HJL6yrbbRDFyscSp2sNCDlfh/ 6zj9R72HpRf6xLW8CTeszrfUQ0z3CFnfwz8EAolhv5DOiJnM3wSS5inmLhtyTMgw mJ8qVXXELdAFm1R8rAQLVmA3FE9aV6u19POstKeXMUykCyaxKDhNLJgn3CiXpegU AvG3QWmrR/1x4DjQguMNwXMQiYVDkRdnfJa5SOzCTwyUj0D0an+kPmSHEMvhaOL6 tUYfjLkZYdB5THG8eUM833EJmgRe7X1VtTQIydcVyt/6tbL50FmVYUpWk+SXuSnJ /uBdzx0IXmfvYKgGSFw+FGOyGH8u02nR4621rECNLNf1h8YknzpN7Ri1hB83GjQe PMiW/gCwxM6gfRsBpJ17xnAbmR5Az2dOz71uj5k13L1GNL8VZD2DZz//a4TDarzE 4R8i3PQ/oUMgmL+ARpVWwycc/dhB8o+1glmAjOwWHO/kM1k/hx9Ou5bbLlhffbL5 zkk6ORrNfKzAMvE1flkjim5XgNHY8n/L4CwBKXJFQ3IYbXBGg0CKxBHWvTpIz1KO O4h52OnYUpM15sY22NSvpdRiHcsgEqNOB4lb6K2dby9iE5b5RHpNgfMNO6+eOtag dgCWopamLK2+MFeNe48+1v/3bDveskhvJon91GUZYmfkFzhZTN8= =9Lsd -----END PGP SIGNATURE----- Merge tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 cache resource control updates from Borislav Petkov: - add logic to correct MBM total and local values fixing errata SKX99 and BDF102 (Fenghua Yu) - cleanups * tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/resctrl: Clean up unused function parameter in rmdir path x86/resctrl: Constify kernfs_ops x86/resctrl: Correct MBM total and local values Documentation/x86: Rename resctrl_ui.rst and add two errata to the file
This commit is contained in:
commit
8ba27ae36b
@ -27,7 +27,7 @@ x86-specific Documentation
|
||||
pti
|
||||
mds
|
||||
microcode
|
||||
resctrl_ui
|
||||
resctrl
|
||||
tsx_async_abort
|
||||
usb-legacy-support
|
||||
i386/index
|
||||
|
@ -1209,3 +1209,96 @@ View the llc occupancy snapshot::
|
||||
|
||||
# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
|
||||
11234000
|
||||
|
||||
Intel RDT Errata
|
||||
================
|
||||
|
||||
Intel MBM Counters May Report System Memory Bandwidth Incorrectly
|
||||
-----------------------------------------------------------------
|
||||
|
||||
Errata SKX99 for Skylake server and BDF102 for Broadwell server.
|
||||
|
||||
Problem: Intel Memory Bandwidth Monitoring (MBM) counters track metrics
|
||||
according to the assigned Resource Monitor ID (RMID) for that logical
|
||||
core. The IA32_QM_CTR register (MSR 0xC8E), used to report these
|
||||
metrics, may report incorrect system bandwidth for certain RMID values.
|
||||
|
||||
Implication: Due to the errata, system memory bandwidth may not match
|
||||
what is reported.
|
||||
|
||||
Workaround: MBM total and local readings are corrected according to the
|
||||
following correction factor table:
|
||||
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|core count |rmid count |rmid threshold |correction factor|
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|1 |8 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|2 |16 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|3 |24 |15 |0.969650 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|4 |32 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|6 |48 |31 |0.969650 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|7 |56 |47 |1.142857 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|8 |64 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|9 |72 |63 |1.185115 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|10 |80 |63 |1.066553 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|11 |88 |79 |1.454545 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|12 |96 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|13 |104 |95 |1.230769 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|14 |112 |95 |1.142857 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|15 |120 |95 |1.066667 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|16 |128 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|17 |136 |127 |1.254863 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|18 |144 |127 |1.185255 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|19 |152 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|20 |160 |127 |1.066667 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|21 |168 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|22 |176 |159 |1.454334 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|23 |184 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|24 |192 |127 |0.969744 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|25 |200 |191 |1.280246 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|26 |208 |191 |1.230921 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|27 |216 |0 |1.000000 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|28 |224 |191 |1.143118 |
|
||||
+---------------+---------------+---------------+-----------------+
|
||||
|
||||
If rmid > rmid threshold, MBM total and local values should be multiplied
|
||||
by the correction factor.
|
||||
|
||||
See:
|
||||
|
||||
1. Erratum SKX99 in Intel Xeon Processor Scalable Family Specification Update:
|
||||
http://web.archive.org/web/20200716124958/https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-spec-update.html
|
||||
|
||||
2. Erratum BDF102 in Intel Xeon E5-2600 v4 Processor Product Family Specification Update:
|
||||
http://web.archive.org/web/20191125200531/https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-v4-spec-update.pdf
|
||||
|
||||
3. The errata in Intel Resource Director Technology (Intel RDT) on 2nd Generation Intel Xeon Scalable Processors Reference Manual:
|
||||
https://software.intel.com/content/www/us/en/develop/articles/intel-resource-director-technology-rdt-reference-manual.html
|
||||
|
||||
for further information.
|
@ -895,6 +895,10 @@ static __init void __check_quirks_intel(void)
|
||||
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
|
||||
else
|
||||
set_rdt_options("!l3cat");
|
||||
fallthrough;
|
||||
case INTEL_FAM6_BROADWELL_X:
|
||||
intel_rdt_mbm_apply_quirk();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -264,7 +264,7 @@ void __exit rdtgroup_exit(void);
|
||||
struct rftype {
|
||||
char *name;
|
||||
umode_t mode;
|
||||
struct kernfs_ops *kf_ops;
|
||||
const struct kernfs_ops *kf_ops;
|
||||
unsigned long flags;
|
||||
unsigned long fflags;
|
||||
|
||||
@ -619,6 +619,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
|
||||
void mbm_setup_overflow_handler(struct rdt_domain *dom,
|
||||
unsigned long delay_ms);
|
||||
void mbm_handle_overflow(struct work_struct *work);
|
||||
void __init intel_rdt_mbm_apply_quirk(void);
|
||||
bool is_mba_sc(struct rdt_resource *r);
|
||||
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
|
||||
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
|
||||
|
@ -64,6 +64,69 @@ unsigned int rdt_mon_features;
|
||||
*/
|
||||
unsigned int resctrl_cqm_threshold;
|
||||
|
||||
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
|
||||
|
||||
/*
|
||||
* The correction factor table is documented in Documentation/x86/resctrl.rst.
|
||||
* If rmid > rmid threshold, MBM total and local values should be multiplied
|
||||
* by the correction factor.
|
||||
*
|
||||
* The original table is modified for better code:
|
||||
*
|
||||
* 1. The threshold 0 is changed to rmid count - 1 so don't do correction
|
||||
* for the case.
|
||||
* 2. MBM total and local correction table indexed by core counter which is
|
||||
* equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27.
|
||||
* 3. The correction factor is normalized to 2^20 (1048576) so it's faster
|
||||
* to calculate corrected value by shifting:
|
||||
* corrected_value = (original_value * correction_factor) >> 20
|
||||
*/
|
||||
static const struct mbm_correction_factor_table {
|
||||
u32 rmidthreshold;
|
||||
u64 cf;
|
||||
} mbm_cf_table[] __initdata = {
|
||||
{7, CF(1.000000)},
|
||||
{15, CF(1.000000)},
|
||||
{15, CF(0.969650)},
|
||||
{31, CF(1.000000)},
|
||||
{31, CF(1.066667)},
|
||||
{31, CF(0.969650)},
|
||||
{47, CF(1.142857)},
|
||||
{63, CF(1.000000)},
|
||||
{63, CF(1.185115)},
|
||||
{63, CF(1.066553)},
|
||||
{79, CF(1.454545)},
|
||||
{95, CF(1.000000)},
|
||||
{95, CF(1.230769)},
|
||||
{95, CF(1.142857)},
|
||||
{95, CF(1.066667)},
|
||||
{127, CF(1.000000)},
|
||||
{127, CF(1.254863)},
|
||||
{127, CF(1.185255)},
|
||||
{151, CF(1.000000)},
|
||||
{127, CF(1.066667)},
|
||||
{167, CF(1.000000)},
|
||||
{159, CF(1.454334)},
|
||||
{183, CF(1.000000)},
|
||||
{127, CF(0.969744)},
|
||||
{191, CF(1.280246)},
|
||||
{191, CF(1.230921)},
|
||||
{215, CF(1.000000)},
|
||||
{191, CF(1.143118)},
|
||||
};
|
||||
|
||||
static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
|
||||
static u64 mbm_cf __read_mostly;
|
||||
|
||||
static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
|
||||
{
|
||||
/* Correct MBM value. */
|
||||
if (rmid > mbm_cf_rmidthreshold)
|
||||
val = (val * mbm_cf) >> 20;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline struct rmid_entry *__rmid_entry(u32 rmid)
|
||||
{
|
||||
struct rmid_entry *entry;
|
||||
@ -260,7 +323,8 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
|
||||
m->chunks += chunks;
|
||||
m->prev_msr = tval;
|
||||
|
||||
rr->val += m->chunks;
|
||||
rr->val += get_corrected_mbm_count(rmid, m->chunks);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -279,7 +343,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
|
||||
return;
|
||||
|
||||
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
|
||||
cur_bw = (chunks * r->mon_scale) >> 20;
|
||||
cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
|
||||
|
||||
if (m->delta_comp)
|
||||
m->delta_bw = abs(cur_bw - m->prev_bw);
|
||||
@ -642,3 +706,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init intel_rdt_mbm_apply_quirk(void)
|
||||
{
|
||||
int cf_index;
|
||||
|
||||
cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
|
||||
if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
|
||||
pr_info("No MBM correction factor available\n");
|
||||
return;
|
||||
}
|
||||
|
||||
mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
|
||||
mbm_cf = mbm_cf_table[cf_index].cf;
|
||||
}
|
||||
|
@ -240,13 +240,13 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct kernfs_ops rdtgroup_kf_single_ops = {
|
||||
static const struct kernfs_ops rdtgroup_kf_single_ops = {
|
||||
.atomic_write_len = PAGE_SIZE,
|
||||
.write = rdtgroup_file_write,
|
||||
.seq_show = rdtgroup_seqfile_show,
|
||||
};
|
||||
|
||||
static struct kernfs_ops kf_mondata_ops = {
|
||||
static const struct kernfs_ops kf_mondata_ops = {
|
||||
.atomic_write_len = PAGE_SIZE,
|
||||
.seq_show = rdtgroup_mondata_show,
|
||||
};
|
||||
@ -3023,8 +3023,7 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
|
||||
cpumask_var_t tmpmask)
|
||||
static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
|
||||
{
|
||||
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
|
||||
int cpu;
|
||||
@ -3056,8 +3055,7 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
|
||||
struct rdtgroup *rdtgrp)
|
||||
static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
|
||||
{
|
||||
rdtgrp->flags = RDT_DELETED;
|
||||
list_del(&rdtgrp->rdtgroup_list);
|
||||
@ -3066,8 +3064,7 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
|
||||
cpumask_var_t tmpmask)
|
||||
static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
@ -3094,7 +3091,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
|
||||
closid_free(rdtgrp->closid);
|
||||
free_rmid(rdtgrp->mon.rmid);
|
||||
|
||||
rdtgroup_ctrl_remove(kn, rdtgrp);
|
||||
rdtgroup_ctrl_remove(rdtgrp);
|
||||
|
||||
/*
|
||||
* Free all the child monitor group rmids.
|
||||
@ -3131,13 +3128,13 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
|
||||
rdtgrp != &rdtgroup_default) {
|
||||
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
|
||||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
|
||||
ret = rdtgroup_ctrl_remove(kn, rdtgrp);
|
||||
ret = rdtgroup_ctrl_remove(rdtgrp);
|
||||
} else {
|
||||
ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
|
||||
ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
|
||||
}
|
||||
} else if (rdtgrp->type == RDTMON_GROUP &&
|
||||
is_mon_groups(parent_kn, kn->name)) {
|
||||
ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
|
||||
ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
|
||||
} else {
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user