1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
Maciej Fijalkowski ad2047cf5d ice: work on pre-XDP prog frag count
Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a
multi-buffer packet by 4k bytes and then redirects it to an AF_XDP
socket.

Since support for handling multi-buffer frames was added to XDP, usage
of bpf_xdp_adjust_tail() helper within XDP program can free the page
that given fragment occupies and in turn decrease the fragment count
within skb_shared_info that is embedded in xdp_buff struct. In current
ice driver codebase, it can become problematic when page recycling logic
decides not to reuse the page. In such case, __page_frag_cache_drain()
is used with ice_rx_buf::pagecnt_bias that was not adjusted after
refcount of page was changed by XDP prog which in turn does not drain
the refcount to 0 and page is never freed.

To address this, let us store the count of frags before the XDP program
was executed on Rx ring struct. This will be used to compare with
current frag count from skb_shared_info embedded in xdp_buff. A smaller
value in the latter indicates that XDP prog freed frag(s). Then, for
given delta decrement pagecnt_bias for XDP_DROP verdict.

While at it, let us also handle the EOP frag within
ice_set_rx_bufs_act() to make our life easier, so all of the adjustments
needed to be applied against freed frags are performed in the single
place.

Fixes: 2fba7dc515 ("ice: Add support for XDP multi-buffer on Rx side")
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20240124191602.566724-5-maciej.fijalkowski@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-01-24 16:24:06 -08:00

177 lines
5.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019, Intel Corporation. */
#ifndef _ICE_TXRX_LIB_H_
#define _ICE_TXRX_LIB_H_
#include "ice.h"
/**
* ice_set_rx_bufs_act - propagate Rx buffer action to frags
* @xdp: XDP buffer representing frame (linear and frags part)
* @rx_ring: Rx ring struct
* act: action to store onto Rx buffers related to XDP buffer parts
*
* Set action that should be taken before putting Rx buffer from first frag
* to the last.
*/
static inline void
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
const unsigned int act)
{
u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
u32 nr_frags = rx_ring->nr_frags + 1;
u32 idx = rx_ring->first_desc;
u32 cnt = rx_ring->count;
struct ice_rx_buf *buf;
for (int i = 0; i < nr_frags; i++) {
buf = &rx_ring->rx_buf[idx];
buf->act = act;
if (++idx == cnt)
idx = 0;
}
/* adjust pagecnt_bias on frags freed by XDP prog */
if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
u32 delta = rx_ring->nr_frags - sinfo_frags;
while (delta) {
if (idx == 0)
idx = cnt - 1;
else
idx--;
buf = &rx_ring->rx_buf[idx];
buf->pagecnt_bias--;
delta--;
}
}
}
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @status_err_n: Rx descriptor status_error0 or status_error1 bits
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
* value of the mask which is really only used for boolean tests.
* The status_error_len doesn't need to be shifted because it begins
* at offset zero.
*/
static inline bool
ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
{
return !!(status_err_n & cpu_to_le16(stat_err_bits));
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
*
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static inline bool
ice_is_non_eop(const struct ice_rx_ring *rx_ring,
const union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->ring_stats->rx_stats.non_eop_descs++;
return true;
}
static inline __le64
ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
(td_cmd << ICE_TXD_QW1_CMD_S) |
(td_offset << ICE_TXD_QW1_OFFSET_S) |
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
(td_tag << ICE_TXD_QW1_L2TAG1_S));
}
/**
* ice_get_vlan_tci - get VLAN TCI from Rx flex descriptor
* @rx_desc: Rx 32b flex descriptor with RXDID=2
*
* The OS and current PF implementation only support stripping a single VLAN tag
* at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
* one is found return the tag, else return 0 to mean no VLAN tag was found.
*/
static inline u16
ice_get_vlan_tci(const union ice_32b_rx_flex_desc *rx_desc)
{
u16 stat_err_bits;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
return le16_to_cpu(rx_desc->wb.l2tag1);
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
return 0;
}
/**
* ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
*
* This function updates the XDP Tx ring tail register.
*/
static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
{
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*
* returns index of descriptor that had RS bit produced on
*/
static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring)
{
u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, rs_idx);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
return rs_idx;
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx);
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
bool frame);
void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb);
void
ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci);
static inline void
ice_xdp_meta_set_desc(struct xdp_buff *xdp,
union ice_32b_rx_flex_desc *eop_desc)
{
struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
xdp_buff);
xdp_ext->eop_desc = eop_desc;
}
#endif /* !_ICE_TXRX_LIB_H_ */