From dbed963ed62c4c2b8870a02c8b7dcb0c2af3ee0b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 26 Apr 2022 10:49:56 -0500 Subject: [PATCH] hwmon (occ): Retry for checksum failure Due to the OCC communication design with a shared SRAM area, checkum errors are expected due to corrupted buffer from OCC communications with other system components. Therefore, retry the command twice in the event of a checksum failure. Signed-off-by: Eddie James Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20220426154956.27205-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/hwmon/occ/p9_sbe.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index c1e0a1d96cd4..f3791a589b01 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -14,6 +14,8 @@ #include "common.h" +#define OCC_CHECKSUM_RETRIES 3 + struct p9_sbe_occ { struct occ occ; bool sbe_error; @@ -80,18 +82,23 @@ done: static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len, void *resp, size_t resp_len) { + size_t original_resp_len = resp_len; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); - int rc; + int rc, i; - rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); - if (rc < 0) { + for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) { + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); + if (rc >= 0) + break; if (resp_len) { if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len)) sysfs_notify(&occ->bus_dev->kobj, NULL, bin_attr_ffdc.attr.name); + return rc; } - - return rc; + if (rc != -EBADE) + return rc; + resp_len = original_resp_len; } switch (((struct occ_response *)resp)->return_status) {