powerpc/papr_scm: Add support for reporting dirty-shutdown-count

Persistent memory devices like NVDIMMs can loose cached writes in case
something prevents flush on power-fail. Such situations are termed as
dirty shutdown and are exposed to applications as
last-shutdown-state (LSS) flag and a dirty-shutdown-counter(DSC) as
described at [1]. The latter being useful in conditions where multiple
applications want to detect a dirty shutdown event without racing with
one another.

PAPR-NVDIMMs have so far only exposed LSS style flags to indicate a
dirty-shutdown-state. This patch further adds support for DSC via the
"ibm,persistence-failed-count" device tree property of an NVDIMM. This
property is a monotonic increasing 64-bit counter thats an indication
of number of times an NVDIMM has encountered a dirty-shutdown event
causing persistence loss.

Since this value is not expected to change after system-boot hence
papr_scm reads & caches its value during NVDIMM probe and exposes it
as a PAPR sysfs attributed named 'dirty_shutdown' to match the name of
similarly named NFIT sysfs attribute. Also this value is available to
libnvdimm via PAPR_PDSM_HEALTH payload. 'struct nd_papr_pdsm_health'
has been extended to add a new member called 'dimm_dsc' presence of
which is indicated by the newly introduced PDSM_DIMM_DSC_VALID flag.

References:
[1] https://pmem.io/documents/Dirty_Shutdown_Handling-V1.0.pdf

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210624080621.252038-1-vaibhav@linux.ibm.com
This commit is contained in:
Vaibhav Jain 2021-06-24 13:36:21 +05:30 committed by Michael Ellerman
parent ed78f56e12
commit de21e1377c
2 changed files with 36 additions and 0 deletions

View File

@ -77,6 +77,9 @@
/* Indicate that the 'dimm_fuel_gauge' field is valid */ /* Indicate that the 'dimm_fuel_gauge' field is valid */
#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
/* Indicate that the 'dimm_dsc' field is valid */
#define PDSM_DIMM_DSC_VALID 2
/* /*
* Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
* Various flags indicate the health status of the dimm. * Various flags indicate the health status of the dimm.
@ -105,6 +108,9 @@ struct nd_papr_pdsm_health {
/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
__u16 dimm_fuel_gauge; __u16 dimm_fuel_gauge;
/* Extension flag PDSM_DIMM_DSC_VALID */
__u64 dimm_dsc;
}; };
__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
}; };

View File

@ -115,6 +115,9 @@ struct papr_scm_priv {
/* Health information for the dimm */ /* Health information for the dimm */
u64 health_bitmap; u64 health_bitmap;
/* Holds the last known dirty shutdown counter value */
u64 dirty_shutdown_counter;
/* length of the stat buffer as expected by phyp */ /* length of the stat buffer as expected by phyp */
size_t stat_buffer_len; size_t stat_buffer_len;
}; };
@ -604,6 +607,16 @@ free_stats:
return rc; return rc;
} }
/* Add the dirty-shutdown-counter value to the pdsm */
static int papr_pdsm_dsc(struct papr_scm_priv *p,
union nd_pdsm_payload *payload)
{
payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
payload->health.dimm_dsc = p->dirty_shutdown_counter;
return sizeof(struct nd_papr_pdsm_health);
}
/* Fetch the DIMM health info and populate it in provided package. */ /* Fetch the DIMM health info and populate it in provided package. */
static int papr_pdsm_health(struct papr_scm_priv *p, static int papr_pdsm_health(struct papr_scm_priv *p,
union nd_pdsm_payload *payload) union nd_pdsm_payload *payload)
@ -647,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
/* Populate the fuel gauge meter in the payload */ /* Populate the fuel gauge meter in the payload */
papr_pdsm_fuel_gauge(p, payload); papr_pdsm_fuel_gauge(p, payload);
/* Populate the dirty-shutdown-counter field */
papr_pdsm_dsc(p, payload);
rc = sizeof(struct nd_papr_pdsm_health); rc = sizeof(struct nd_papr_pdsm_health);
@ -908,6 +923,16 @@ static ssize_t flags_show(struct device *dev,
} }
DEVICE_ATTR_RO(flags); DEVICE_ATTR_RO(flags);
static ssize_t dirty_shutdown_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm *dimm = to_nvdimm(dev);
struct papr_scm_priv *p = nvdimm_provider_data(dimm);
return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
}
DEVICE_ATTR_RO(dirty_shutdown);
static umode_t papr_nd_attribute_visible(struct kobject *kobj, static umode_t papr_nd_attribute_visible(struct kobject *kobj,
struct attribute *attr, int n) struct attribute *attr, int n)
{ {
@ -926,6 +951,7 @@ static umode_t papr_nd_attribute_visible(struct kobject *kobj,
static struct attribute *papr_nd_attributes[] = { static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr, &dev_attr_flags.attr,
&dev_attr_perf_stats.attr, &dev_attr_perf_stats.attr,
&dev_attr_dirty_shutdown.attr,
NULL, NULL,
}; };
@ -1151,6 +1177,10 @@ static int papr_scm_probe(struct platform_device *pdev)
p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
if (of_property_read_u64(dn, "ibm,persistence-failed-count",
&p->dirty_shutdown_counter))
p->dirty_shutdown_counter = 0;
/* We just need to ensure that set cookies are unique across */ /* We just need to ensure that set cookies are unique across */
uuid_parse(uuid_str, &uuid); uuid_parse(uuid_str, &uuid);