ionic: avoid races in ionic_heartbeat_check
Rework the heartbeat checks to be sure that we're getting an atomic operation. Through testing we found occasions where a separate thread could clash with this check and cause erroneous heartbeat check results. Signed-off-by: Allen Hubbe <allenbh@pensando.io> Signed-off-by: Shannon Nelson <snelson@pensando.io> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
230efff47a
commit
b2b9a8d7ed
2 changed files with 63 additions and 37 deletions
|
@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hb = ionic_heartbeat_check(ionic);
|
hb = ionic_heartbeat_check(ionic);
|
||||||
|
dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n",
|
||||||
|
__func__, hb, netif_running(lif->netdev),
|
||||||
|
test_bit(IONIC_LIF_F_UP, lif->state));
|
||||||
|
|
||||||
if (hb >= 0 &&
|
if (hb >= 0 &&
|
||||||
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
|
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
|
||||||
|
@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
idev->last_fw_status = 0xff;
|
|
||||||
timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
|
timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
|
||||||
ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
|
ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
|
||||||
|
|
||||||
|
/* set times to ensure the first check will proceed */
|
||||||
|
atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
|
||||||
|
idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
|
||||||
|
/* init as ready, so no transition if the first check succeeds */
|
||||||
|
idev->last_fw_hb = 0;
|
||||||
|
idev->fw_hb_ready = true;
|
||||||
|
idev->fw_status_ready = true;
|
||||||
|
|
||||||
mod_timer(&ionic->watchdog_timer,
|
mod_timer(&ionic->watchdog_timer,
|
||||||
round_jiffies(jiffies + ionic->watchdog_period));
|
round_jiffies(jiffies + ionic->watchdog_period));
|
||||||
|
|
||||||
|
@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic)
|
||||||
int ionic_heartbeat_check(struct ionic *ionic)
|
int ionic_heartbeat_check(struct ionic *ionic)
|
||||||
{
|
{
|
||||||
struct ionic_dev *idev = &ionic->idev;
|
struct ionic_dev *idev = &ionic->idev;
|
||||||
unsigned long hb_time;
|
unsigned long check_time, last_check_time;
|
||||||
|
bool fw_status_ready, fw_hb_ready;
|
||||||
u8 fw_status;
|
u8 fw_status;
|
||||||
u32 hb;
|
u32 fw_hb;
|
||||||
|
|
||||||
/* wait a little more than one second before testing again */
|
/* wait a least one second before testing again */
|
||||||
hb_time = jiffies;
|
check_time = jiffies;
|
||||||
if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
|
last_check_time = atomic_long_read(&idev->last_check_time);
|
||||||
|
do_check_time:
|
||||||
|
if (time_before(check_time, last_check_time + HZ))
|
||||||
return 0;
|
return 0;
|
||||||
|
if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time,
|
||||||
|
&last_check_time, check_time)) {
|
||||||
|
/* if called concurrently, only the first should proceed. */
|
||||||
|
dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__);
|
||||||
|
goto do_check_time;
|
||||||
|
}
|
||||||
|
|
||||||
/* firmware is useful only if the running bit is set and
|
/* firmware is useful only if the running bit is set and
|
||||||
* fw_status != 0xff (bad PCI read)
|
* fw_status != 0xff (bad PCI read)
|
||||||
*/
|
*/
|
||||||
fw_status = ioread8(&idev->dev_info_regs->fw_status);
|
fw_status = ioread8(&idev->dev_info_regs->fw_status);
|
||||||
if (fw_status != 0xff)
|
fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
|
||||||
fw_status &= IONIC_FW_STS_F_RUNNING; /* use only the run bit */
|
|
||||||
|
|
||||||
/* is this a transition? */
|
/* is this a transition? */
|
||||||
if (fw_status != idev->last_fw_status &&
|
if (fw_status_ready != idev->fw_status_ready) {
|
||||||
idev->last_fw_status != 0xff) {
|
|
||||||
struct ionic_lif *lif = ionic->lif;
|
struct ionic_lif *lif = ionic->lif;
|
||||||
bool trigger = false;
|
bool trigger = false;
|
||||||
|
|
||||||
if (!fw_status || fw_status == 0xff) {
|
idev->fw_status_ready = fw_status_ready;
|
||||||
|
|
||||||
|
if (!fw_status_ready) {
|
||||||
dev_info(ionic->dev, "FW stopped %u\n", fw_status);
|
dev_info(ionic->dev, "FW stopped %u\n", fw_status);
|
||||||
if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
|
if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
|
||||||
trigger = true;
|
trigger = true;
|
||||||
|
@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic)
|
||||||
struct ionic_deferred_work *work;
|
struct ionic_deferred_work *work;
|
||||||
|
|
||||||
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
||||||
if (!work) {
|
if (work) {
|
||||||
dev_err(ionic->dev, "LIF reset trigger dropped\n");
|
|
||||||
} else {
|
|
||||||
work->type = IONIC_DW_TYPE_LIF_RESET;
|
work->type = IONIC_DW_TYPE_LIF_RESET;
|
||||||
if (fw_status & IONIC_FW_STS_F_RUNNING &&
|
work->fw_status = fw_status_ready;
|
||||||
fw_status != 0xff)
|
|
||||||
work->fw_status = 1;
|
|
||||||
ionic_lif_deferred_enqueue(&lif->deferred, work);
|
ionic_lif_deferred_enqueue(&lif->deferred, work);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
idev->last_fw_status = fw_status;
|
|
||||||
|
|
||||||
if (!fw_status || fw_status == 0xff)
|
if (!fw_status_ready)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
/* early FW has no heartbeat, else FW will return non-zero */
|
/* wait at least one watchdog period since the last heartbeat */
|
||||||
hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
|
last_check_time = idev->last_hb_time;
|
||||||
if (!hb)
|
if (time_before(check_time, last_check_time + ionic->watchdog_period))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* are we stalled? */
|
fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
|
||||||
if (hb == idev->last_hb) {
|
fw_hb_ready = fw_hb != idev->last_fw_hb;
|
||||||
/* only complain once for each stall seen */
|
|
||||||
if (idev->last_hb_time != 1) {
|
|
||||||
dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
|
|
||||||
idev->last_hb);
|
|
||||||
idev->last_hb_time = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -ENXIO;
|
/* early FW version had no heartbeat, so fake it */
|
||||||
|
if (!fw_hb_ready && !fw_hb)
|
||||||
|
fw_hb_ready = true;
|
||||||
|
|
||||||
|
dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n",
|
||||||
|
__func__, fw_hb, idev->last_fw_hb, fw_hb_ready);
|
||||||
|
|
||||||
|
idev->last_fw_hb = fw_hb;
|
||||||
|
|
||||||
|
/* log a transition */
|
||||||
|
if (fw_hb_ready != idev->fw_hb_ready) {
|
||||||
|
idev->fw_hb_ready = fw_hb_ready;
|
||||||
|
if (!fw_hb_ready)
|
||||||
|
dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb);
|
||||||
|
else
|
||||||
|
dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idev->last_hb_time == 1)
|
if (!fw_hb_ready)
|
||||||
dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
|
return -ENXIO;
|
||||||
|
|
||||||
idev->last_hb = hb;
|
idev->last_hb_time = check_time;
|
||||||
idev->last_hb_time = hb_time;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#ifndef _IONIC_DEV_H_
|
#ifndef _IONIC_DEV_H_
|
||||||
#define _IONIC_DEV_H_
|
#define _IONIC_DEV_H_
|
||||||
|
|
||||||
|
#include <linux/atomic.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
|
|
||||||
|
@ -135,9 +136,11 @@ struct ionic_dev {
|
||||||
union ionic_dev_info_regs __iomem *dev_info_regs;
|
union ionic_dev_info_regs __iomem *dev_info_regs;
|
||||||
union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
|
union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
|
||||||
|
|
||||||
|
atomic_long_t last_check_time;
|
||||||
unsigned long last_hb_time;
|
unsigned long last_hb_time;
|
||||||
u32 last_hb;
|
u32 last_fw_hb;
|
||||||
u8 last_fw_status;
|
bool fw_hb_ready;
|
||||||
|
bool fw_status_ready;
|
||||||
|
|
||||||
u64 __iomem *db_pages;
|
u64 __iomem *db_pages;
|
||||||
dma_addr_t phy_db_pages;
|
dma_addr_t phy_db_pages;
|
||||||
|
|
Loading…
Add table
Reference in a new issue