Add a new benchmark which measures hashmap lookup operations speed. A user can control the following parameters of the benchmark: * key_size (max 1024): the key size to use * max_entries: the hashmap max entries * nr_entries: the number of entries to insert/lookup * nr_loops: the number of loops for the benchmark * map_flags The hashmap flags passed to BPF_MAP_CREATE The BPF program performing the benchmarks calls two nested bpf_loop: bpf_loop(nr_loops/nr_entries) bpf_loop(nr_entries) bpf_map_lookup() So the nr_loops determines the number of actual map lookups. All lookups are successful. Example (the output is generated on a AMD Ryzen 9 3950X machine): for nr_entries in `seq 4096 4096 65536`; do echo -n "$((nr_entries*100/65536))% full: "; sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=4 --nr_entries=$nr_entries --max_entries=65536 --nr_loops=1000000 --map_flags=0x40 | grep cpu; done 6% full: cpu01: lookup 50.739M ± 0.018M events/sec (approximated from 32 samples of ~19ms) 12% full: cpu01: lookup 47.751M ± 0.015M events/sec (approximated from 32 samples of ~20ms) 18% full: cpu01: lookup 45.153M ± 0.013M events/sec (approximated from 32 samples of ~22ms) 25% full: cpu01: lookup 43.826M ± 0.014M events/sec (approximated from 32 samples of ~22ms) 31% full: cpu01: lookup 41.971M ± 0.012M events/sec (approximated from 32 samples of ~23ms) 37% full: cpu01: lookup 41.034M ± 0.015M events/sec (approximated from 32 samples of ~24ms) 43% full: cpu01: lookup 39.946M ± 0.012M events/sec (approximated from 32 samples of ~25ms) 50% full: cpu01: lookup 38.256M ± 0.014M events/sec (approximated from 32 samples of ~26ms) 56% full: cpu01: lookup 36.580M ± 0.018M events/sec (approximated from 32 samples of ~27ms) 62% full: cpu01: lookup 36.252M ± 0.012M events/sec (approximated from 32 samples of ~27ms) 68% full: cpu01: lookup 35.200M ± 0.012M events/sec (approximated from 32 samples of ~28ms) 75% full: cpu01: lookup 34.061M ± 0.009M events/sec (approximated from 32 samples of ~29ms) 81% full: cpu01: lookup 34.374M ± 0.010M events/sec (approximated from 32 samples of ~29ms) 87% full: cpu01: lookup 33.244M ± 0.011M events/sec (approximated from 32 samples of ~30ms) 93% full: cpu01: lookup 32.182M ± 0.013M events/sec (approximated from 32 samples of ~31ms) 100% full: cpu01: lookup 31.497M ± 0.016M events/sec (approximated from 32 samples of ~31ms) Signed-off-by: Anton Protopopov <aspsk@isovalent.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20230213091519.1202813-8-aspsk@isovalent.com
63 lines
1.5 KiB
C
63 lines
1.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2023 Isovalent */
|
|
|
|
#include "vmlinux.h"
|
|
|
|
#include <bpf/bpf_helpers.h>
|
|
#include "bpf_misc.h"
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
} hash_map_bench SEC(".maps");
|
|
|
|
/* The number of slots to store times */
|
|
#define NR_SLOTS 32
|
|
#define NR_CPUS 256
|
|
#define CPU_MASK (NR_CPUS-1)
|
|
|
|
/* Configured by userspace */
|
|
u64 nr_entries;
|
|
u64 nr_loops;
|
|
u32 __attribute__((__aligned__(8))) key[NR_CPUS];
|
|
|
|
/* Filled by us */
|
|
u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
|
|
u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
|
|
|
|
static inline void patch_key(u32 i)
|
|
{
|
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
key[0] = i + 1;
|
|
#else
|
|
key[0] = __builtin_bswap32(i + 1);
|
|
#endif
|
|
/* the rest of key is random and is configured by userspace */
|
|
}
|
|
|
|
static int lookup_callback(__u32 index, u32 *unused)
|
|
{
|
|
patch_key(index);
|
|
return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
|
|
}
|
|
|
|
static int loop_lookup_callback(__u32 index, u32 *unused)
|
|
{
|
|
return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
|
|
}
|
|
|
|
SEC("fentry/" SYS_PREFIX "sys_getpgid")
|
|
int benchmark(void *ctx)
|
|
{
|
|
u32 cpu = bpf_get_smp_processor_id();
|
|
u32 times_index;
|
|
u64 start_time;
|
|
|
|
times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
|
|
start_time = bpf_ktime_get_ns();
|
|
bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
|
|
percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
|
|
percpu_times_index[cpu & CPU_MASK] += 1;
|
|
return 0;
|
|
}
|