vmxnet3: Fix tx queue race condition with XDP
If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.
This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.
Fixes: 54f00cce11
("vmxnet3: Add XDP support.")
Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
a8aa6a6ddc
commit
3f1baa91a1
1 changed files with 12 additions and 2 deletions
|
@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
|
|||
if (likely(cpu < tq_number))
|
||||
tq = &adapter->tx_queue[cpu];
|
||||
else
|
||||
tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
|
||||
tq = &adapter->tx_queue[cpu % tq_number];
|
||||
|
||||
return tq;
|
||||
}
|
||||
|
@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
|
|||
u32 buf_size;
|
||||
u32 dw2;
|
||||
|
||||
spin_lock_irq(&tq->tx_lock);
|
||||
dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
|
||||
dw2 |= xdpf->len;
|
||||
ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
|
||||
|
@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
|
|||
|
||||
if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
|
||||
tq->stats.tx_ring_full++;
|
||||
spin_unlock_irq(&tq->tx_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
|
@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
|
|||
tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
|
||||
xdpf->data, buf_size,
|
||||
DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
|
||||
if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
|
||||
spin_unlock_irq(&tq->tx_lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
tbi->map_type |= VMXNET3_MAP_SINGLE;
|
||||
} else { /* XDP buffer from page pool */
|
||||
page = virt_to_page(xdpf->data);
|
||||
|
@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
|
|||
dma_wmb();
|
||||
gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
|
||||
VMXNET3_TXD_GEN);
|
||||
spin_unlock_irq(&tq->tx_lock);
|
||||
|
||||
/* No need to handle the case when tx_num_deferred doesn't reach
|
||||
* threshold. Backend driver at hypervisor side will poll and reset
|
||||
|
@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
|
|||
{
|
||||
struct vmxnet3_adapter *adapter = netdev_priv(dev);
|
||||
struct vmxnet3_tx_queue *tq;
|
||||
struct netdev_queue *nq;
|
||||
int i;
|
||||
|
||||
if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
|
||||
|
@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
|
|||
if (tq->stopped)
|
||||
return -ENETDOWN;
|
||||
|
||||
nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
|
||||
|
||||
__netif_tx_lock(nq, smp_processor_id());
|
||||
for (i = 0; i < n; i++) {
|
||||
if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
|
||||
tq->stats.xdp_xmit_err++;
|
||||
|
@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
|
|||
}
|
||||
}
|
||||
tq->stats.xdp_xmit += i;
|
||||
__netif_tx_unlock(nq);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue