diff options
author | Vaibhav Jain <vaibhav@linux.ibm.com> | 2022-01-24 23:22:04 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2022-02-16 15:10:47 +0300 |
commit | bbbca72352bb9484bc057c91a408332b35ee8f4c (patch) | |
tree | 1362ffad9a0d5cd99f249645c04fae4f5122e05c /arch/powerpc/platforms | |
parent | 76b372814b088aeb76f0f753d968c8aa6d297f2a (diff) | |
download | linux-bbbca72352bb9484bc057c91a408332b35ee8f4c.tar.xz |
powerpc/papr_scm: Implement initial support for injecting smart errors
Presently PAPR doesn't support injecting smart errors on an
NVDIMM. This makes testing the NVDIMM health reporting functionality
difficult as simulating NVDIMM health related events need a hacked up
qemu version.
To solve this problem this patch proposes simulating certain set of
NVDIMM health related events in papr_scm. Specifically 'fatal' health
state and 'dirty' shutdown state. These error can be injected via the
user-space 'ndctl-inject-smart(1)' command. With the proposed patch and
corresponding ndctl patches following command flow is expected:
$ sudo ndctl list -DH -d nmem0
...
"health_state":"ok",
"shutdown_state":"clean",
...
# inject unsafe shutdown and fatal health error
$ sudo ndctl inject-smart nmem0 -Uf
...
"health_state":"fatal",
"shutdown_state":"dirty",
...
# uninject all errors
$ sudo ndctl inject-smart nmem0 -N
...
"health_state":"ok",
"shutdown_state":"clean",
...
The patch adds a new member 'health_bitmap_inject_mask' inside struct
papr_scm_priv which is then bitwise ANDed to the health bitmap fetched from the
hypervisor. The value for 'health_bitmap_inject_mask' is accessible from sysfs
at nmemX/papr/health_bitmap_inject.
A new PDSM named 'SMART_INJECT' is proposed that accepts newly
introduced 'struct nd_papr_pdsm_smart_inject' as payload thats
exchanged between libndctl and papr_scm to indicate the requested
smart-error states.
When the processing the PDSM 'SMART_INJECT', papr_pdsm_smart_inject()
constructs a pair or 'inject_mask' and 'clear_mask' bitmaps from the payload
and bit-blt it to the 'health_bitmap_inject_mask'. This ensures the after being
fetched from the hypervisor, the health_bitmap reflects requested smart-error
states.
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220124202204.1488346-1-vaibhav@linux.ibm.com
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/pseries/papr_scm.c | 90 |
1 files changed, 87 insertions, 3 deletions
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f48e87ac89c9..20aafd387840 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -120,6 +120,10 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; + }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -347,19 +351,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; long rc; /* issue the hcall */ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); - if (rc != H_SUCCESS) { + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO; } p->lasthealth_jiffies = jiffies; - p->health_bitmap = ret[0] & ret[1]; - + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", ret[0], ret[1]); @@ -669,6 +683,56 @@ out: return rc; } +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -702,6 +766,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { .size_out = sizeof(struct nd_papr_pdsm_health), .service = papr_pdsm_health, }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, @@ -838,6 +908,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -952,6 +1035,7 @@ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, NULL, }; |