Currently, there are two major issues with stmmac driver statistics First of all, statistics in stmmac_extra_stats, stmmac_rxq_stats and stmmac_txq_stats are 32 bit variables on 32 bit platforms. This can cause some stats to overflow after several minutes of high traffic, for example rx_pkt_n, tx_pkt_n and so on. Secondly, if HW supports multiqueues, there are frequent cacheline ping pongs on some driver statistic vars, for example, normal_irq_n, tx_pkt_n and so on. What's more, frequent cacheline ping pongs on normal_irq_n happens in ISR, this makes the situation worse. To improve the driver, we convert those statistics to 64 bit, implement ndo_get_stats64 and update .get_ethtool_stats implementation accordingly. We also use per-queue statistics where necessary to remove the cacheline ping pongs as much as possible to make multiqueue operations faster. Those statistics which are not possible to overflow and not frequently updated are kept as is. Signed-off-by: Jisheng Zhang <jszhang@kernel.org> Link: https://lore.kernel.org/r/20230717160630.1892-3-jszhang@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
319 lines
7.8 KiB
C
319 lines
7.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*******************************************************************************
|
|
This contains the functions to handle the normal descriptors.
|
|
|
|
Copyright (C) 2007-2009 STMicroelectronics Ltd
|
|
|
|
|
|
Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
|
|
*******************************************************************************/
|
|
|
|
#include <linux/stmmac.h>
|
|
#include "common.h"
|
|
#include "descs_com.h"
|
|
|
|
static int ndesc_get_tx_status(struct stmmac_extra_stats *x,
|
|
struct dma_desc *p, void __iomem *ioaddr)
|
|
{
|
|
unsigned int tdes0 = le32_to_cpu(p->des0);
|
|
unsigned int tdes1 = le32_to_cpu(p->des1);
|
|
int ret = tx_done;
|
|
|
|
/* Get tx owner first */
|
|
if (unlikely(tdes0 & TDES0_OWN))
|
|
return tx_dma_own;
|
|
|
|
/* Verify tx error by looking at the last segment. */
|
|
if (likely(!(tdes1 & TDES1_LAST_SEGMENT)))
|
|
return tx_not_ls;
|
|
|
|
if (unlikely(tdes0 & TDES0_ERROR_SUMMARY)) {
|
|
if (unlikely(tdes0 & TDES0_UNDERFLOW_ERROR)) {
|
|
x->tx_underflow++;
|
|
}
|
|
if (unlikely(tdes0 & TDES0_NO_CARRIER)) {
|
|
x->tx_carrier++;
|
|
}
|
|
if (unlikely(tdes0 & TDES0_LOSS_CARRIER)) {
|
|
x->tx_losscarrier++;
|
|
}
|
|
if (unlikely((tdes0 & TDES0_EXCESSIVE_DEFERRAL) ||
|
|
(tdes0 & TDES0_EXCESSIVE_COLLISIONS) ||
|
|
(tdes0 & TDES0_LATE_COLLISION))) {
|
|
unsigned int collisions;
|
|
|
|
collisions = (tdes0 & TDES0_COLLISION_COUNT_MASK) >> 3;
|
|
x->tx_collision += collisions;
|
|
}
|
|
ret = tx_err;
|
|
}
|
|
|
|
if (tdes0 & TDES0_VLAN_FRAME)
|
|
x->tx_vlan++;
|
|
|
|
if (unlikely(tdes0 & TDES0_DEFERRED))
|
|
x->tx_deferred++;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ndesc_get_tx_len(struct dma_desc *p)
|
|
{
|
|
return (le32_to_cpu(p->des1) & RDES1_BUFFER1_SIZE_MASK);
|
|
}
|
|
|
|
/* This function verifies if each incoming frame has some errors
|
|
* and, if required, updates the multicast statistics.
|
|
* In case of success, it returns good_frame because the GMAC device
|
|
* is supposed to be able to compute the csum in HW. */
|
|
static int ndesc_get_rx_status(struct stmmac_extra_stats *x,
|
|
struct dma_desc *p)
|
|
{
|
|
int ret = good_frame;
|
|
unsigned int rdes0 = le32_to_cpu(p->des0);
|
|
|
|
if (unlikely(rdes0 & RDES0_OWN))
|
|
return dma_own;
|
|
|
|
if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
|
|
x->rx_length++;
|
|
return discard_frame;
|
|
}
|
|
|
|
if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
|
|
if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR))
|
|
x->rx_desc++;
|
|
if (unlikely(rdes0 & RDES0_SA_FILTER_FAIL))
|
|
x->sa_filter_fail++;
|
|
if (unlikely(rdes0 & RDES0_OVERFLOW_ERROR))
|
|
x->overflow_error++;
|
|
if (unlikely(rdes0 & RDES0_IPC_CSUM_ERROR))
|
|
x->ipc_csum_error++;
|
|
if (unlikely(rdes0 & RDES0_COLLISION)) {
|
|
x->rx_collision++;
|
|
}
|
|
if (unlikely(rdes0 & RDES0_CRC_ERROR)) {
|
|
x->rx_crc_errors++;
|
|
}
|
|
ret = discard_frame;
|
|
}
|
|
if (unlikely(rdes0 & RDES0_DRIBBLING))
|
|
x->dribbling_bit++;
|
|
|
|
if (unlikely(rdes0 & RDES0_LENGTH_ERROR)) {
|
|
x->rx_length++;
|
|
ret = discard_frame;
|
|
}
|
|
if (unlikely(rdes0 & RDES0_MII_ERROR)) {
|
|
x->rx_mii++;
|
|
ret = discard_frame;
|
|
}
|
|
#ifdef STMMAC_VLAN_TAG_USED
|
|
if (rdes0 & RDES0_VLAN_TAG)
|
|
x->vlan_tag++;
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
|
|
int end, int bfsize)
|
|
{
|
|
int bfsize1;
|
|
|
|
p->des0 |= cpu_to_le32(RDES0_OWN);
|
|
|
|
bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
|
|
p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
|
|
|
|
if (mode == STMMAC_CHAIN_MODE)
|
|
ndesc_rx_set_on_chain(p, end);
|
|
else
|
|
ndesc_rx_set_on_ring(p, end, bfsize);
|
|
|
|
if (disable_rx_ic)
|
|
p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);
|
|
}
|
|
|
|
static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
|
|
{
|
|
p->des0 &= cpu_to_le32(~TDES0_OWN);
|
|
if (mode == STMMAC_CHAIN_MODE)
|
|
ndesc_tx_set_on_chain(p);
|
|
else
|
|
ndesc_end_tx_desc_on_ring(p, end);
|
|
}
|
|
|
|
static int ndesc_get_tx_owner(struct dma_desc *p)
|
|
{
|
|
return (le32_to_cpu(p->des0) & TDES0_OWN) >> 31;
|
|
}
|
|
|
|
static void ndesc_set_tx_owner(struct dma_desc *p)
|
|
{
|
|
p->des0 |= cpu_to_le32(TDES0_OWN);
|
|
}
|
|
|
|
static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
|
|
{
|
|
p->des0 |= cpu_to_le32(RDES0_OWN);
|
|
}
|
|
|
|
static int ndesc_get_tx_ls(struct dma_desc *p)
|
|
{
|
|
return (le32_to_cpu(p->des1) & TDES1_LAST_SEGMENT) >> 30;
|
|
}
|
|
|
|
static void ndesc_release_tx_desc(struct dma_desc *p, int mode)
|
|
{
|
|
int ter = (le32_to_cpu(p->des1) & TDES1_END_RING) >> 25;
|
|
|
|
memset(p, 0, offsetof(struct dma_desc, des2));
|
|
if (mode == STMMAC_CHAIN_MODE)
|
|
ndesc_tx_set_on_chain(p);
|
|
else
|
|
ndesc_end_tx_desc_on_ring(p, ter);
|
|
}
|
|
|
|
static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
|
|
bool csum_flag, int mode, bool tx_own,
|
|
bool ls, unsigned int tot_pkt_len)
|
|
{
|
|
unsigned int tdes1 = le32_to_cpu(p->des1);
|
|
|
|
if (is_fs)
|
|
tdes1 |= TDES1_FIRST_SEGMENT;
|
|
else
|
|
tdes1 &= ~TDES1_FIRST_SEGMENT;
|
|
|
|
if (likely(csum_flag))
|
|
tdes1 |= (TX_CIC_FULL) << TDES1_CHECKSUM_INSERTION_SHIFT;
|
|
else
|
|
tdes1 &= ~(TX_CIC_FULL << TDES1_CHECKSUM_INSERTION_SHIFT);
|
|
|
|
if (ls)
|
|
tdes1 |= TDES1_LAST_SEGMENT;
|
|
|
|
p->des1 = cpu_to_le32(tdes1);
|
|
|
|
if (mode == STMMAC_CHAIN_MODE)
|
|
norm_set_tx_desc_len_on_chain(p, len);
|
|
else
|
|
norm_set_tx_desc_len_on_ring(p, len);
|
|
|
|
if (tx_own)
|
|
p->des0 |= cpu_to_le32(TDES0_OWN);
|
|
}
|
|
|
|
static void ndesc_set_tx_ic(struct dma_desc *p)
|
|
{
|
|
p->des1 |= cpu_to_le32(TDES1_INTERRUPT);
|
|
}
|
|
|
|
static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
|
|
{
|
|
unsigned int csum = 0;
|
|
|
|
/* The type-1 checksum offload engines append the checksum at
|
|
* the end of frame and the two bytes of checksum are added in
|
|
* the length.
|
|
* Adjust for that in the framelen for type-1 checksum offload
|
|
* engines
|
|
*/
|
|
if (rx_coe_type == STMMAC_RX_COE_TYPE1)
|
|
csum = 2;
|
|
|
|
return (((le32_to_cpu(p->des0) & RDES0_FRAME_LEN_MASK)
|
|
>> RDES0_FRAME_LEN_SHIFT) -
|
|
csum);
|
|
|
|
}
|
|
|
|
static void ndesc_enable_tx_timestamp(struct dma_desc *p)
|
|
{
|
|
p->des1 |= cpu_to_le32(TDES1_TIME_STAMP_ENABLE);
|
|
}
|
|
|
|
static int ndesc_get_tx_timestamp_status(struct dma_desc *p)
|
|
{
|
|
return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17;
|
|
}
|
|
|
|
static void ndesc_get_timestamp(void *desc, u32 ats, u64 *ts)
|
|
{
|
|
struct dma_desc *p = (struct dma_desc *)desc;
|
|
u64 ns;
|
|
|
|
ns = le32_to_cpu(p->des2);
|
|
/* convert high/sec time stamp value to nanosecond */
|
|
ns += le32_to_cpu(p->des3) * 1000000000ULL;
|
|
|
|
*ts = ns;
|
|
}
|
|
|
|
static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
|
|
{
|
|
struct dma_desc *p = (struct dma_desc *)desc;
|
|
|
|
if ((le32_to_cpu(p->des2) == 0xffffffff) &&
|
|
(le32_to_cpu(p->des3) == 0xffffffff))
|
|
/* timestamp is corrupted, hence don't store it */
|
|
return 0;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
static void ndesc_display_ring(void *head, unsigned int size, bool rx,
|
|
dma_addr_t dma_rx_phy, unsigned int desc_size)
|
|
{
|
|
struct dma_desc *p = (struct dma_desc *)head;
|
|
dma_addr_t dma_addr;
|
|
int i;
|
|
|
|
pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
|
|
|
|
for (i = 0; i < size; i++) {
|
|
u64 x;
|
|
dma_addr = dma_rx_phy + i * sizeof(*p);
|
|
|
|
x = *(u64 *)p;
|
|
pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x",
|
|
i, &dma_addr,
|
|
(unsigned int)x, (unsigned int)(x >> 32),
|
|
p->des2, p->des3);
|
|
p++;
|
|
}
|
|
pr_info("\n");
|
|
}
|
|
|
|
static void ndesc_set_addr(struct dma_desc *p, dma_addr_t addr)
|
|
{
|
|
p->des2 = cpu_to_le32(addr);
|
|
}
|
|
|
|
static void ndesc_clear(struct dma_desc *p)
|
|
{
|
|
p->des2 = 0;
|
|
}
|
|
|
|
const struct stmmac_desc_ops ndesc_ops = {
|
|
.tx_status = ndesc_get_tx_status,
|
|
.rx_status = ndesc_get_rx_status,
|
|
.get_tx_len = ndesc_get_tx_len,
|
|
.init_rx_desc = ndesc_init_rx_desc,
|
|
.init_tx_desc = ndesc_init_tx_desc,
|
|
.get_tx_owner = ndesc_get_tx_owner,
|
|
.release_tx_desc = ndesc_release_tx_desc,
|
|
.prepare_tx_desc = ndesc_prepare_tx_desc,
|
|
.set_tx_ic = ndesc_set_tx_ic,
|
|
.get_tx_ls = ndesc_get_tx_ls,
|
|
.set_tx_owner = ndesc_set_tx_owner,
|
|
.set_rx_owner = ndesc_set_rx_owner,
|
|
.get_rx_frame_len = ndesc_get_rx_frame_len,
|
|
.enable_tx_timestamp = ndesc_enable_tx_timestamp,
|
|
.get_tx_timestamp_status = ndesc_get_tx_timestamp_status,
|
|
.get_timestamp = ndesc_get_timestamp,
|
|
.get_rx_timestamp_status = ndesc_get_rx_timestamp_status,
|
|
.display_ring = ndesc_display_ring,
|
|
.set_addr = ndesc_set_addr,
|
|
.clear = ndesc_clear,
|
|
};
|