1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/net/ethernet/intel/ice/ice_adapter.c
Michal Schmidt d29a8134c7 ice: avoid the PTP hardware semaphore in gettimex64 path
The PTP hardware semaphore (PFTSYN_SEM) is used to synchronize
operations that program the PTP timers. The operations involve issuing
commands to the sideband queue. The E810 does not have a hardware
sideband queue, so the admin queue is used. The admin queue is slow.
I have observed delays in hundreds of milliseconds waiting for
ice_sq_done.

When phc2sys reads the time from the ice PTP clock and PFTSYN_SEM is
held by a task performing one of the slow operations, ice_ptp_lock can
easily time out. phc2sys gets -EBUSY and the kernel prints:
  ice 0000:XX:YY.0: PTP failed to get time
These messages appear once every few seconds, causing log spam.

The E810 datasheet recommends an algorithm for reading the upper 64 bits
of the GLTSYN_TIME register. It matches what's implemented in
ice_ptp_read_src_clk_reg. It is robust against wrap-around, but not
necessarily against the concurrent setting of the register (with
GLTSYN_CMD_{INIT,ADJ}_TIME commands). Perhaps that's why
ice_ptp_gettimex64 also takes PFTSYN_SEM.

The race with time setters can be prevented without relying on the PTP
hardware semaphore. Using the "ice_adapter" from the previous patch,
we can have a common spinlock for the PFs that share the clock hardware.
It will protect the reading and writing to the GLTSYN_TIME register.
The writing is performed indirectly, by the hardware, as a result of
the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't
sure if the ice_flush there is enough to make sure GLTSYN_TIME has been
updated, but it works well in my testing.

My test code can be seen here:
https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock-10
It consists of:
 - kernel threads reading the time in a busy loop and looking at the
   deltas between consecutive values, reporting new maxima.
 - a shell script that sets the time repeatedly;
 - a bpftrace probe to produce a histogram of the measured deltas.
Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing.
Deltas in the [2G, 4G) range appear in the histograms.
With the spinlock added, there is no tearing and the biggest delta I saw
was in the range [1M, 2M), that is under 2 ms.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2024-04-01 08:58:09 -07:00

116 lines
3.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
// SPDX-FileCopyrightText: Copyright Red Hat
#include <linux/bitfield.h>
#include <linux/cleanup.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/xarray.h>
#include "ice_adapter.h"
static DEFINE_XARRAY(ice_adapters);
/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */
#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13)
#define INDEX_FIELD_BUS GENMASK(12, 5)
#define INDEX_FIELD_SLOT GENMASK(4, 0)
static unsigned long ice_adapter_index(const struct pci_dev *pdev)
{
unsigned int domain = pci_domain_nr(pdev->bus);
WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN));
return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) |
FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) |
FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn));
}
static struct ice_adapter *ice_adapter_new(void)
{
struct ice_adapter *adapter;
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
if (!adapter)
return NULL;
spin_lock_init(&adapter->ptp_gltsyn_time_lock);
refcount_set(&adapter->refcount, 1);
return adapter;
}
static void ice_adapter_free(struct ice_adapter *adapter)
{
kfree(adapter);
}
DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
/**
* ice_adapter_get - Get a shared ice_adapter structure.
* @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
*
* Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
* of the same multi-function PCI device share one ice_adapter structure.
* The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
* to release its reference.
*
* Context: Process, may sleep.
* Return: Pointer to ice_adapter on success.
* ERR_PTR() on error. -ENOMEM is the only possible error.
*/
struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
{
struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
unsigned long index = ice_adapter_index(pdev);
adapter = ice_adapter_new();
if (!adapter)
return ERR_PTR(-ENOMEM);
xa_lock(&ice_adapters);
ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
goto unlock;
}
if (ret) {
refcount_inc(&ret->refcount);
goto unlock;
}
ret = no_free_ptr(adapter);
unlock:
xa_unlock(&ice_adapters);
return ret;
}
/**
* ice_adapter_put - Release a reference to the shared ice_adapter structure.
* @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter.
*
* Releases the reference to ice_adapter previously obtained with
* ice_adapter_get.
*
* Context: Any.
*/
void ice_adapter_put(const struct pci_dev *pdev)
{
unsigned long index = ice_adapter_index(pdev);
struct ice_adapter *adapter;
xa_lock(&ice_adapters);
adapter = xa_load(&ice_adapters, index);
if (WARN_ON(!adapter))
goto unlock;
if (!refcount_dec_and_test(&adapter->refcount))
goto unlock;
WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
ice_adapter_free(adapter);
unlock:
xa_unlock(&ice_adapters);
}