1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
Anton Protopopov f371f2dc53 selftest/bpf/benchs: Add benchmark for hashmap lookups
Add a new benchmark which measures hashmap lookup operations speed.  A user can
control the following parameters of the benchmark:

    * key_size (max 1024): the key size to use
    * max_entries: the hashmap max entries
    * nr_entries: the number of entries to insert/lookup
    * nr_loops: the number of loops for the benchmark
    * map_flags The hashmap flags passed to BPF_MAP_CREATE

The BPF program performing the benchmarks calls two nested bpf_loop:

    bpf_loop(nr_loops/nr_entries)
            bpf_loop(nr_entries)
                     bpf_map_lookup()

So the nr_loops determines the number of actual map lookups. All lookups are
successful.

Example (the output is generated on a AMD Ryzen 9 3950X machine):

    for nr_entries in `seq 4096 4096 65536`; do echo -n "$((nr_entries*100/65536))% full: "; sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=4 --nr_entries=$nr_entries --max_entries=65536 --nr_loops=1000000 --map_flags=0x40 | grep cpu; done
    6% full: cpu01: lookup 50.739M ± 0.018M events/sec (approximated from 32 samples of ~19ms)
    12% full: cpu01: lookup 47.751M ± 0.015M events/sec (approximated from 32 samples of ~20ms)
    18% full: cpu01: lookup 45.153M ± 0.013M events/sec (approximated from 32 samples of ~22ms)
    25% full: cpu01: lookup 43.826M ± 0.014M events/sec (approximated from 32 samples of ~22ms)
    31% full: cpu01: lookup 41.971M ± 0.012M events/sec (approximated from 32 samples of ~23ms)
    37% full: cpu01: lookup 41.034M ± 0.015M events/sec (approximated from 32 samples of ~24ms)
    43% full: cpu01: lookup 39.946M ± 0.012M events/sec (approximated from 32 samples of ~25ms)
    50% full: cpu01: lookup 38.256M ± 0.014M events/sec (approximated from 32 samples of ~26ms)
    56% full: cpu01: lookup 36.580M ± 0.018M events/sec (approximated from 32 samples of ~27ms)
    62% full: cpu01: lookup 36.252M ± 0.012M events/sec (approximated from 32 samples of ~27ms)
    68% full: cpu01: lookup 35.200M ± 0.012M events/sec (approximated from 32 samples of ~28ms)
    75% full: cpu01: lookup 34.061M ± 0.009M events/sec (approximated from 32 samples of ~29ms)
    81% full: cpu01: lookup 34.374M ± 0.010M events/sec (approximated from 32 samples of ~29ms)
    87% full: cpu01: lookup 33.244M ± 0.011M events/sec (approximated from 32 samples of ~30ms)
    93% full: cpu01: lookup 32.182M ± 0.013M events/sec (approximated from 32 samples of ~31ms)
    100% full: cpu01: lookup 31.497M ± 0.016M events/sec (approximated from 32 samples of ~31ms)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-8-aspsk@isovalent.com
2023-02-15 16:29:31 -08:00

63 lines
1.5 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Isovalent */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
} hash_map_bench SEC(".maps");
/* The number of slots to store times */
#define NR_SLOTS 32
#define NR_CPUS 256
#define CPU_MASK (NR_CPUS-1)
/* Configured by userspace */
u64 nr_entries;
u64 nr_loops;
u32 __attribute__((__aligned__(8))) key[NR_CPUS];
/* Filled by us */
u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
static inline void patch_key(u32 i)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
key[0] = i + 1;
#else
key[0] = __builtin_bswap32(i + 1);
#endif
/* the rest of key is random and is configured by userspace */
}
static int lookup_callback(__u32 index, u32 *unused)
{
patch_key(index);
return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
}
static int loop_lookup_callback(__u32 index, u32 *unused)
{
return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
}
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int benchmark(void *ctx)
{
u32 cpu = bpf_get_smp_processor_id();
u32 times_index;
u64 start_time;
times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
start_time = bpf_ktime_get_ns();
bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
percpu_times_index[cpu & CPU_MASK] += 1;
return 0;
}