Reapply "net: skb: introduce and use a single page frag cache"
This reverts commit011b033590
. Sabrina reports that the revert may trigger warnings due to intervening changes, especially the ability to rise MAX_SKB_FRAGS. Let's drop it and revisit once that part is also ironed out. Fixes:011b033590
("Revert "net: skb: introduce and use a single page frag cache"") Reported-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/6bf54579233038bc0e76056c5ea459872ce362ab.1739375933.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
0d0b752f24
commit
0892b84031
3 changed files with 99 additions and 22 deletions
|
@ -4115,6 +4115,7 @@ void netif_receive_skb_list(struct list_head *head);
|
||||||
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
|
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
|
||||||
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
|
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
|
||||||
struct sk_buff *napi_get_frags(struct napi_struct *napi);
|
struct sk_buff *napi_get_frags(struct napi_struct *napi);
|
||||||
|
void napi_get_frags_check(struct napi_struct *napi);
|
||||||
gro_result_t napi_gro_frags(struct napi_struct *napi);
|
gro_result_t napi_gro_frags(struct napi_struct *napi);
|
||||||
|
|
||||||
static inline void napi_free_frags(struct napi_struct *napi)
|
static inline void napi_free_frags(struct napi_struct *napi)
|
||||||
|
|
|
@ -6920,23 +6920,6 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
|
||||||
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
|
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Double check that napi_get_frags() allocates skbs with
|
|
||||||
* skb->head being backed by slab, not a page fragment.
|
|
||||||
* This is to make sure bug fixed in 3226b158e67c
|
|
||||||
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
|
|
||||||
* does not accidentally come back.
|
|
||||||
*/
|
|
||||||
static void napi_get_frags_check(struct napi_struct *napi)
|
|
||||||
{
|
|
||||||
struct sk_buff *skb;
|
|
||||||
|
|
||||||
local_bh_disable();
|
|
||||||
skb = napi_get_frags(napi);
|
|
||||||
WARN_ON_ONCE(skb && skb->head_frag);
|
|
||||||
napi_free_frags(napi);
|
|
||||||
local_bh_enable();
|
|
||||||
}
|
|
||||||
|
|
||||||
void netif_napi_add_weight_locked(struct net_device *dev,
|
void netif_napi_add_weight_locked(struct net_device *dev,
|
||||||
struct napi_struct *napi,
|
struct napi_struct *napi,
|
||||||
int (*poll)(struct napi_struct *, int),
|
int (*poll)(struct napi_struct *, int),
|
||||||
|
|
|
@ -220,9 +220,67 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
|
||||||
#define NAPI_SKB_CACHE_BULK 16
|
#define NAPI_SKB_CACHE_BULK 16
|
||||||
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
|
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
|
||||||
|
|
||||||
|
#if PAGE_SIZE == SZ_4K
|
||||||
|
|
||||||
|
#define NAPI_HAS_SMALL_PAGE_FRAG 1
|
||||||
|
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
|
||||||
|
|
||||||
|
/* specialized page frag allocator using a single order 0 page
|
||||||
|
* and slicing it into 1K sized fragment. Constrained to systems
|
||||||
|
* with a very limited amount of 1K fragments fitting a single
|
||||||
|
* page - to avoid excessive truesize underestimation
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct page_frag_1k {
|
||||||
|
void *va;
|
||||||
|
u16 offset;
|
||||||
|
bool pfmemalloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
offset = nc->offset - SZ_1K;
|
||||||
|
if (likely(offset >= 0))
|
||||||
|
goto use_frag;
|
||||||
|
|
||||||
|
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
||||||
|
if (!page)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
nc->va = page_address(page);
|
||||||
|
nc->pfmemalloc = page_is_pfmemalloc(page);
|
||||||
|
offset = PAGE_SIZE - SZ_1K;
|
||||||
|
page_ref_add(page, offset / SZ_1K);
|
||||||
|
|
||||||
|
use_frag:
|
||||||
|
nc->offset = offset;
|
||||||
|
return nc->va + offset;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* the small page is actually unused in this build; add dummy helpers
|
||||||
|
* to please the compiler and avoid later preprocessor's conditionals
|
||||||
|
*/
|
||||||
|
#define NAPI_HAS_SMALL_PAGE_FRAG 0
|
||||||
|
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
|
||||||
|
|
||||||
|
struct page_frag_1k {
|
||||||
|
};
|
||||||
|
|
||||||
|
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
struct napi_alloc_cache {
|
struct napi_alloc_cache {
|
||||||
local_lock_t bh_lock;
|
local_lock_t bh_lock;
|
||||||
struct page_frag_cache page;
|
struct page_frag_cache page;
|
||||||
|
struct page_frag_1k page_small;
|
||||||
unsigned int skb_count;
|
unsigned int skb_count;
|
||||||
void *skb_cache[NAPI_SKB_CACHE_SIZE];
|
void *skb_cache[NAPI_SKB_CACHE_SIZE];
|
||||||
};
|
};
|
||||||
|
@ -232,6 +290,23 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
|
||||||
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
|
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Double check that napi_get_frags() allocates skbs with
|
||||||
|
* skb->head being backed by slab, not a page fragment.
|
||||||
|
* This is to make sure bug fixed in 3226b158e67c
|
||||||
|
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
|
||||||
|
* does not accidentally come back.
|
||||||
|
*/
|
||||||
|
void napi_get_frags_check(struct napi_struct *napi)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
local_bh_disable();
|
||||||
|
skb = napi_get_frags(napi);
|
||||||
|
WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
|
||||||
|
napi_free_frags(napi);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
|
||||||
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
|
||||||
{
|
{
|
||||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||||
|
@ -738,8 +813,10 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
|
||||||
|
|
||||||
/* If requested length is either too small or too big,
|
/* If requested length is either too small or too big,
|
||||||
* we use kmalloc() for skb->head allocation.
|
* we use kmalloc() for skb->head allocation.
|
||||||
|
* When the small frag allocator is available, prefer it over kmalloc
|
||||||
|
* for small fragments
|
||||||
*/
|
*/
|
||||||
if (len <= SKB_WITH_OVERHEAD(1024) ||
|
if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
|
||||||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
|
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
|
||||||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
|
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
|
||||||
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
|
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
|
||||||
|
@ -749,16 +826,32 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
|
||||||
goto skb_success;
|
goto skb_success;
|
||||||
}
|
}
|
||||||
|
|
||||||
len = SKB_HEAD_ALIGN(len);
|
|
||||||
|
|
||||||
if (sk_memalloc_socks())
|
if (sk_memalloc_socks())
|
||||||
gfp_mask |= __GFP_MEMALLOC;
|
gfp_mask |= __GFP_MEMALLOC;
|
||||||
|
|
||||||
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
|
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
|
||||||
nc = this_cpu_ptr(&napi_alloc_cache);
|
nc = this_cpu_ptr(&napi_alloc_cache);
|
||||||
|
if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
|
||||||
|
/* we are artificially inflating the allocation size, but
|
||||||
|
* that is not as bad as it may look like, as:
|
||||||
|
* - 'len' less than GRO_MAX_HEAD makes little sense
|
||||||
|
* - On most systems, larger 'len' values lead to fragment
|
||||||
|
* size above 512 bytes
|
||||||
|
* - kmalloc would use the kmalloc-1k slab for such values
|
||||||
|
* - Builds with smaller GRO_MAX_HEAD will very likely do
|
||||||
|
* little networking, as that implies no WiFi and no
|
||||||
|
* tunnels support, and 32 bits arches.
|
||||||
|
*/
|
||||||
|
len = SZ_1K;
|
||||||
|
|
||||||
data = page_frag_alloc(&nc->page, len, gfp_mask);
|
data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
|
||||||
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
|
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
|
||||||
|
} else {
|
||||||
|
len = SKB_HEAD_ALIGN(len);
|
||||||
|
|
||||||
|
data = page_frag_alloc(&nc->page, len, gfp_mask);
|
||||||
|
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
|
||||||
|
}
|
||||||
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
|
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
|
||||||
|
|
||||||
if (unlikely(!data))
|
if (unlikely(!data))
|
||||||
|
|
Loading…
Add table
Reference in a new issue