maple_tree: remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()
Preallocations are common in the VMA code to avoid allocating under certain locking conditions. The preallocations must also cover the worst-case scenario. Removing the GFP_ZERO flag from the kmem_cache_alloc() (and bulk variant) calls will reduce the amount of time spent zeroing memory that may not be used. Only zero out the necessary area to keep track of the allocations in the maple state. Zero the entire node prior to using it in the tree. This required internal changes to node counting on allocation, so the test code is also updated. This restores some micro-benchmark performance: up to +9% in mmtests mmap1 by my testing +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636 Link: https://lkml.kernel.org/r/20230105160427.2988454-1-Liam.Howlett@oracle.com Signed-off-by: Liam Howlett <Liam.Howlett@oracle.com> Reported-by: Jirka Hladky <jhladky@redhat.com> Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
fc5744881e
commit
541e06b772
2 changed files with 52 additions and 46 deletions
|
@ -149,13 +149,12 @@ struct maple_subtree_state {
|
||||||
/* Functions */
|
/* Functions */
|
||||||
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
|
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
|
||||||
{
|
{
|
||||||
return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
|
return kmem_cache_alloc(maple_node_cache, gfp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
|
static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
|
||||||
{
|
{
|
||||||
return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
|
return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
|
||||||
nodes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mt_free_bulk(size_t size, void __rcu **nodes)
|
static inline void mt_free_bulk(size_t size, void __rcu **nodes)
|
||||||
|
@ -1125,9 +1124,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
|
||||||
{
|
{
|
||||||
struct maple_alloc *ret, *node = mas->alloc;
|
struct maple_alloc *ret, *node = mas->alloc;
|
||||||
unsigned long total = mas_allocated(mas);
|
unsigned long total = mas_allocated(mas);
|
||||||
|
unsigned int req = mas_alloc_req(mas);
|
||||||
|
|
||||||
/* nothing or a request pending. */
|
/* nothing or a request pending. */
|
||||||
if (unlikely(!total))
|
if (WARN_ON(!total))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (total == 1) {
|
if (total == 1) {
|
||||||
|
@ -1137,27 +1137,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
|
||||||
goto single_node;
|
goto single_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!node->node_count) {
|
if (node->node_count == 1) {
|
||||||
/* Single allocation in this node. */
|
/* Single allocation in this node. */
|
||||||
mas->alloc = node->slot[0];
|
mas->alloc = node->slot[0];
|
||||||
node->slot[0] = NULL;
|
|
||||||
mas->alloc->total = node->total - 1;
|
mas->alloc->total = node->total - 1;
|
||||||
ret = node;
|
ret = node;
|
||||||
goto new_head;
|
goto new_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
node->total--;
|
node->total--;
|
||||||
ret = node->slot[node->node_count];
|
ret = node->slot[--node->node_count];
|
||||||
node->slot[node->node_count--] = NULL;
|
node->slot[node->node_count] = NULL;
|
||||||
|
|
||||||
single_node:
|
single_node:
|
||||||
new_head:
|
new_head:
|
||||||
ret->total = 0;
|
if (req) {
|
||||||
ret->node_count = 0;
|
req++;
|
||||||
if (ret->request_count) {
|
mas_set_alloc_req(mas, req);
|
||||||
mas_set_alloc_req(mas, ret->request_count + 1);
|
|
||||||
ret->request_count = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(ret, 0, sizeof(*ret));
|
||||||
return (struct maple_node *)ret;
|
return (struct maple_node *)ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1176,21 +1174,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
|
||||||
unsigned long count;
|
unsigned long count;
|
||||||
unsigned int requested = mas_alloc_req(mas);
|
unsigned int requested = mas_alloc_req(mas);
|
||||||
|
|
||||||
memset(reuse, 0, sizeof(*reuse));
|
|
||||||
count = mas_allocated(mas);
|
count = mas_allocated(mas);
|
||||||
|
|
||||||
if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
|
reuse->request_count = 0;
|
||||||
if (head->slot[0])
|
reuse->node_count = 0;
|
||||||
head->node_count++;
|
if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
|
||||||
head->slot[head->node_count] = reuse;
|
head->slot[head->node_count++] = reuse;
|
||||||
head->total++;
|
head->total++;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
reuse->total = 1;
|
reuse->total = 1;
|
||||||
if ((head) && !((unsigned long)head & 0x1)) {
|
if ((head) && !((unsigned long)head & 0x1)) {
|
||||||
head->request_count = 0;
|
|
||||||
reuse->slot[0] = head;
|
reuse->slot[0] = head;
|
||||||
|
reuse->node_count = 1;
|
||||||
reuse->total += head->total;
|
reuse->total += head->total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1209,7 +1206,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct maple_alloc *node;
|
struct maple_alloc *node;
|
||||||
unsigned long allocated = mas_allocated(mas);
|
unsigned long allocated = mas_allocated(mas);
|
||||||
unsigned long success = allocated;
|
|
||||||
unsigned int requested = mas_alloc_req(mas);
|
unsigned int requested = mas_alloc_req(mas);
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
void **slots = NULL;
|
void **slots = NULL;
|
||||||
|
@ -1225,24 +1221,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
|
||||||
WARN_ON(!allocated);
|
WARN_ON(!allocated);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
|
if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
|
||||||
node = (struct maple_alloc *)mt_alloc_one(gfp);
|
node = (struct maple_alloc *)mt_alloc_one(gfp);
|
||||||
if (!node)
|
if (!node)
|
||||||
goto nomem_one;
|
goto nomem_one;
|
||||||
|
|
||||||
if (allocated)
|
if (allocated) {
|
||||||
node->slot[0] = mas->alloc;
|
node->slot[0] = mas->alloc;
|
||||||
|
node->node_count = 1;
|
||||||
|
} else {
|
||||||
|
node->node_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
success++;
|
|
||||||
mas->alloc = node;
|
mas->alloc = node;
|
||||||
|
node->total = ++allocated;
|
||||||
requested--;
|
requested--;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = mas->alloc;
|
node = mas->alloc;
|
||||||
|
node->request_count = 0;
|
||||||
while (requested) {
|
while (requested) {
|
||||||
max_req = MAPLE_ALLOC_SLOTS;
|
max_req = MAPLE_ALLOC_SLOTS;
|
||||||
if (node->slot[0]) {
|
if (node->node_count) {
|
||||||
unsigned int offset = node->node_count + 1;
|
unsigned int offset = node->node_count;
|
||||||
|
|
||||||
slots = (void **)&node->slot[offset];
|
slots = (void **)&node->slot[offset];
|
||||||
max_req -= offset;
|
max_req -= offset;
|
||||||
|
@ -1256,15 +1257,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
|
||||||
goto nomem_bulk;
|
goto nomem_bulk;
|
||||||
|
|
||||||
node->node_count += count;
|
node->node_count += count;
|
||||||
/* zero indexed. */
|
allocated += count;
|
||||||
if (slots == (void **)&node->slot)
|
|
||||||
node->node_count--;
|
|
||||||
|
|
||||||
success += count;
|
|
||||||
node = node->slot[0];
|
node = node->slot[0];
|
||||||
|
node->node_count = 0;
|
||||||
|
node->request_count = 0;
|
||||||
requested -= count;
|
requested -= count;
|
||||||
}
|
}
|
||||||
mas->alloc->total = success;
|
mas->alloc->total = allocated;
|
||||||
return;
|
return;
|
||||||
|
|
||||||
nomem_bulk:
|
nomem_bulk:
|
||||||
|
@ -1273,7 +1272,7 @@ nomem_bulk:
|
||||||
nomem_one:
|
nomem_one:
|
||||||
mas_set_alloc_req(mas, requested);
|
mas_set_alloc_req(mas, requested);
|
||||||
if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
|
if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
|
||||||
mas->alloc->total = success;
|
mas->alloc->total = allocated;
|
||||||
mas_set_err(mas, -ENOMEM);
|
mas_set_err(mas, -ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5734,6 +5733,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
|
||||||
void mas_destroy(struct ma_state *mas)
|
void mas_destroy(struct ma_state *mas)
|
||||||
{
|
{
|
||||||
struct maple_alloc *node;
|
struct maple_alloc *node;
|
||||||
|
unsigned long total;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When using mas_for_each() to insert an expected number of elements,
|
* When using mas_for_each() to insert an expected number of elements,
|
||||||
|
@ -5756,14 +5756,20 @@ void mas_destroy(struct ma_state *mas)
|
||||||
}
|
}
|
||||||
mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
|
mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
|
||||||
|
|
||||||
while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
|
total = mas_allocated(mas);
|
||||||
|
while (total) {
|
||||||
node = mas->alloc;
|
node = mas->alloc;
|
||||||
mas->alloc = node->slot[0];
|
mas->alloc = node->slot[0];
|
||||||
if (node->node_count > 0)
|
if (node->node_count > 1) {
|
||||||
mt_free_bulk(node->node_count,
|
size_t count = node->node_count - 1;
|
||||||
(void __rcu **)&node->slot[1]);
|
|
||||||
kmem_cache_free(maple_node_cache, node);
|
mt_free_bulk(count, (void __rcu **)&node->slot[1]);
|
||||||
|
total -= count;
|
||||||
}
|
}
|
||||||
|
kmem_cache_free(maple_node_cache, node);
|
||||||
|
total--;
|
||||||
|
}
|
||||||
|
|
||||||
mas->alloc = NULL;
|
mas->alloc = NULL;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(mas_destroy);
|
EXPORT_SYMBOL_GPL(mas_destroy);
|
||||||
|
|
|
@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt)
|
||||||
|
|
||||||
if (!MAPLE_32BIT) {
|
if (!MAPLE_32BIT) {
|
||||||
if (i >= 35)
|
if (i >= 35)
|
||||||
e = i - 35;
|
e = i - 34;
|
||||||
else if (i >= 5)
|
else if (i >= 5)
|
||||||
e = i - 5;
|
e = i - 4;
|
||||||
else if (i >= 2)
|
else if (i >= 2)
|
||||||
e = i - 2;
|
e = i - 1;
|
||||||
} else {
|
} else {
|
||||||
if (i >= 4)
|
if (i >= 4)
|
||||||
e = i - 4;
|
e = i - 4;
|
||||||
|
@ -305,17 +305,17 @@ static noinline void check_new_node(struct maple_tree *mt)
|
||||||
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
|
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
|
||||||
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
|
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
|
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
|
||||||
|
|
||||||
mn = mas_pop_node(&mas); /* get the next node. */
|
mn = mas_pop_node(&mas); /* get the next node. */
|
||||||
MT_BUG_ON(mt, mn == NULL);
|
MT_BUG_ON(mt, mn == NULL);
|
||||||
MT_BUG_ON(mt, not_empty(mn));
|
MT_BUG_ON(mt, not_empty(mn));
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2);
|
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
|
||||||
|
|
||||||
mas_push_node(&mas, mn);
|
mas_push_node(&mas, mn);
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
|
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
|
||||||
|
|
||||||
/* Check the limit of pop/push/pop */
|
/* Check the limit of pop/push/pop */
|
||||||
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
|
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
|
||||||
|
@ -323,14 +323,14 @@ static noinline void check_new_node(struct maple_tree *mt)
|
||||||
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
|
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
|
||||||
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
|
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
|
||||||
MT_BUG_ON(mt, mas_alloc_req(&mas));
|
MT_BUG_ON(mt, mas_alloc_req(&mas));
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count);
|
MT_BUG_ON(mt, mas.alloc->node_count != 1);
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
|
||||||
mn = mas_pop_node(&mas);
|
mn = mas_pop_node(&mas);
|
||||||
MT_BUG_ON(mt, not_empty(mn));
|
MT_BUG_ON(mt, not_empty(mn));
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
|
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
|
||||||
mas_push_node(&mas, mn);
|
mas_push_node(&mas, mn);
|
||||||
MT_BUG_ON(mt, mas.alloc->node_count);
|
MT_BUG_ON(mt, mas.alloc->node_count != 1);
|
||||||
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
|
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
|
||||||
mn = mas_pop_node(&mas);
|
mn = mas_pop_node(&mas);
|
||||||
MT_BUG_ON(mt, not_empty(mn));
|
MT_BUG_ON(mt, not_empty(mn));
|
||||||
|
|
Loading…
Add table
Reference in a new issue