Improve the performance of the AF_XDP zero-copy Tx completion path. When there are no XDP buffers being sent using XDP_TX or XDP_REDIRECT, we do not have go through the SW ring to clean up any entries since the AF_XDP path does not use these. In these cases, just fast forward the next-to-use counter and skip going through the SW ring. The limit on the maximum number of entries to complete is also removed since the algorithm is now O(1). To simplify the code path, the maximum number of entries to complete for the XDP path is therefore also increased from 256 to 512 (the default number of Tx HW descriptors). This should be fine since the completion in the XDP path is faster than in the SKB path that has 256 as the maximum number. This patch provides around 4% throughput improvement for the l2fwd application in xdpsock on my machine. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
23 lines
826 B
C
23 lines
826 B
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright(c) 2018 Intel Corporation. */
|
|
|
|
#ifndef _I40E_XSK_H_
|
|
#define _I40E_XSK_H_
|
|
|
|
struct i40e_vsi;
|
|
struct xdp_umem;
|
|
struct zero_copy_allocator;
|
|
|
|
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
|
|
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
|
|
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
|
|
u16 qid);
|
|
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
|
|
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
|
|
|
|
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
|
|
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
|
|
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
|
|
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
|
|
|
|
#endif /* _I40E_XSK_H_ */
|