1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

BPF fixes:

- Fix a soft-lockup in BPF arena_map_free on 64k page size
   kernels (Alan Maguire)
 
 - Fix a missing allocation failure check in BPF verifier's
   acquire_lock_state (Kumar Kartikeya Dwivedi)
 
 - Fix a NULL-pointer dereference in trace_kfree_skb by adding
   kfree_skb to the raw_tp_null_args set (Kuniyuki Iwashima)
 
 - Fix a deadlock when freeing BPF cgroup storage (Abel Wu)
 
 - Fix a syzbot-reported deadlock when holding BPF map's
   freeze_mutex (Andrii Nakryiko)
 
 - Fix a use-after-free issue in bpf_test_init when
   eth_skb_pkt_type is accessing skb data not containing an
   Ethernet header (Shigeru Yoshida)
 
 - Fix skipping non-existing keys in generic_map_lookup_batch
   (Yan Zhai)
 
 - Several BPF sockmap fixes to address incorrect TCP copied_seq
   calculations, which prevented correct data reads from recv(2)
   in user space (Jiayuan Chen)
 
 - Two fixes for BPF map lookup nullness elision (Daniel Xu)
 
 - Fix a NULL-pointer dereference from vmlinux BTF lookup in
   bpf_sk_storage_tracing_allowed (Jared Kangas)
 
 Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
 -----BEGIN PGP SIGNATURE-----
 
 iIsEABYKADMWIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZ7evlRUcZGFuaWVsQGlv
 Z2VhcmJveC5uZXQACgkQ2yufC7HISIPwHgD/dTvM00Ck4Q73fPivyT7tcqxeXJlD
 D6ggzWl/SG9LAbwA/2/cSgAM9Jm1g7ddvn/S9QaDYOs5GmFl6urq6krs+tYD
 =FCs9
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull BPF fixes from Daniel Borkmann:

 - Fix a soft-lockup in BPF arena_map_free on 64k page size kernels
   (Alan Maguire)

 - Fix a missing allocation failure check in BPF verifier's
   acquire_lock_state (Kumar Kartikeya Dwivedi)

 - Fix a NULL-pointer dereference in trace_kfree_skb by adding kfree_skb
   to the raw_tp_null_args set (Kuniyuki Iwashima)

 - Fix a deadlock when freeing BPF cgroup storage (Abel Wu)

 - Fix a syzbot-reported deadlock when holding BPF map's freeze_mutex
   (Andrii Nakryiko)

 - Fix a use-after-free issue in bpf_test_init when eth_skb_pkt_type is
   accessing skb data not containing an Ethernet header (Shigeru
   Yoshida)

 - Fix skipping non-existing keys in generic_map_lookup_batch (Yan Zhai)

 - Several BPF sockmap fixes to address incorrect TCP copied_seq
   calculations, which prevented correct data reads from recv(2) in user
   space (Jiayuan Chen)

 - Two fixes for BPF map lookup nullness elision (Daniel Xu)

 - Fix a NULL-pointer dereference from vmlinux BTF lookup in
   bpf_sk_storage_tracing_allowed (Jared Kangas)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests: bpf: test batch lookup on array of maps with holes
  bpf: skip non exist keys in generic_map_lookup_batch
  bpf: Handle allocation failure in acquire_lock_state
  bpf: verifier: Disambiguate get_constant_map_key() errors
  bpf: selftests: Test constant key extraction on irrelevant maps
  bpf: verifier: Do not extract constant map keys for irrelevant maps
  bpf: Fix softlockup in arena_map_free on 64k page kernel
  net: Add rx_skb of kfree_skb to raw_tp_null_args[].
  bpf: Fix deadlock when freeing cgroup storage
  selftests/bpf: Add strparser test for bpf
  selftests/bpf: Fix invalid flag of recv()
  bpf: Disable non stream socket for strparser
  bpf: Fix wrong copied_seq calculation
  strparser: Add read_sock callback
  bpf: avoid holding freeze_mutex during mmap operation
  bpf: unify VM_WRITE vs VM_MAYWRITE use in BPF map mmaping logic
  selftests/bpf: Adjust data size to have ETH_HLEN
  bpf, test_run: Fix use-after-free issue in eth_skb_pkt_type()
  bpf: Remove unnecessary BTF lookups in bpf_sk_storage_tracing_allowed
This commit is contained in:
Linus Torvalds 2025-02-20 15:37:17 -08:00
commit 319fc77f8f
24 changed files with 726 additions and 140 deletions

View file

@ -112,7 +112,7 @@ Functions
Callbacks
=========
There are six callbacks:
There are seven callbacks:
::
@ -182,6 +182,13 @@ There are six callbacks:
the length of the message. skb->len - offset may be greater
then full_len since strparser does not trim the skb.
::
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
The read_sock callback is used by strparser instead of
sock->ops->read_sock, if provided.
::
int (*read_sock_done)(struct strparser *strp, int err);

View file

@ -91,6 +91,8 @@ struct sk_psock {
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
struct strparser strp;
u32 copied_seq;
u32 ingress_bytes;
#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;

View file

@ -43,6 +43,8 @@ struct strparser;
struct strp_callbacks {
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
int (*read_sock_done)(struct strparser *strp, int err);
void (*abort_parser)(struct strparser *strp, int err);
void (*lock)(struct strparser *strp);

View file

@ -743,6 +743,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor, bool noack,
u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@ -2613,6 +2616,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#ifdef CONFIG_BPF_STREAM_PARSER
struct strparser;
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET

View file

@ -39,7 +39,7 @@
*/
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
#define GUARD_SZ (1ull << sizeof_field(struct bpf_insn, off) * 8)
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {

View file

@ -153,7 +153,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *map)
{
bpf_local_storage_map_free(map, &cgroup_cache, NULL);
bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
}
/* *gfp_flags* is a hidden argument provided by the verifier */

View file

@ -6507,6 +6507,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
/* rxrpc */
{ "rxrpc_recvdata", 0x1 },
{ "rxrpc_resend", 0x10 },
/* skb */
{"kfree_skb", 0x1000},
/* sunrpc */
{ "xs_stream_read_data", 0x1 },
/* ... from xprt_cong_event event class */

View file

@ -268,8 +268,6 @@ static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma
/* allow writable mapping for the consumer_pos only */
if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EPERM;
} else {
vm_flags_clear(vma, VM_MAYWRITE);
}
/* remap_vmalloc_range() checks size and offset constraints */
return remap_vmalloc_range(vma, rb_map->rb,
@ -289,8 +287,6 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma
* position, and the ring buffer data itself.
*/
return -EPERM;
} else {
vm_flags_clear(vma, VM_MAYWRITE);
}
/* remap_vmalloc_range() checks size and offset constraints */
return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);

View file

@ -1035,7 +1035,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = {
static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct bpf_map *map = filp->private_data;
int err;
int err = 0;
if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
return -ENOTSUPP;
@ -1059,24 +1059,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
err = -EACCES;
goto out;
}
bpf_map_write_active_inc(map);
}
out:
mutex_unlock(&map->freeze_mutex);
if (err)
return err;
/* set default open/close callbacks */
vma->vm_ops = &bpf_map_default_vmops;
vma->vm_private_data = map;
vm_flags_clear(vma, VM_MAYEXEC);
/* If mapping is read-only, then disallow potentially re-mapping with
* PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing
* means that as far as BPF map's memory-mapped VMAs are concerned,
* VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set,
* both should be set, so we can forget about VM_MAYWRITE and always
* check just VM_WRITE
*/
if (!(vma->vm_flags & VM_WRITE))
/* disallow re-mapping with PROT_WRITE */
vm_flags_clear(vma, VM_MAYWRITE);
err = map->ops->map_mmap(map, vma);
if (err)
goto out;
if (err) {
if (vma->vm_flags & VM_WRITE)
bpf_map_write_active_dec(map);
}
if (vma->vm_flags & VM_MAYWRITE)
bpf_map_write_active_inc(map);
out:
mutex_unlock(&map->freeze_mutex);
return err;
}
@ -1968,8 +1977,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
return err;
}
#define MAP_LOOKUP_RETRIES 3
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
@ -1979,8 +1986,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
void *buf, *buf_prevkey, *prev_key, *key, *value;
int err, retry = MAP_LOOKUP_RETRIES;
u32 value_size, cp, max_count;
int err;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
@ -2026,14 +2033,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
err = bpf_map_copy_value(map, key, value,
attr->batch.elem_flags);
if (err == -ENOENT) {
if (retry) {
retry--;
continue;
}
err = -EINTR;
break;
}
if (err == -ENOENT)
goto next_key;
if (err)
goto free_buf;
@ -2048,12 +2049,12 @@ int generic_map_lookup_batch(struct bpf_map *map,
goto free_buf;
}
cp++;
next_key:
if (!prev_key)
prev_key = buf_prevkey;
swap(prev_key, key);
retry = MAP_LOOKUP_RETRIES;
cp++;
cond_resched();
}

View file

@ -1501,6 +1501,8 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r
struct bpf_reference_state *s;
s = acquire_reference_state(env, insn_idx);
if (!s)
return -ENOMEM;
s->type = type;
s->id = id;
s->ptr = ptr;
@ -9149,10 +9151,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
return 0;
}
/* Returns constant key value if possible, else negative error */
static s64 get_constant_map_key(struct bpf_verifier_env *env,
/* Returns constant key value in `value` if possible, else negative error */
static int get_constant_map_key(struct bpf_verifier_env *env,
struct bpf_reg_state *key,
u32 key_size)
u32 key_size,
s64 *value)
{
struct bpf_func_state *state = func(env, key);
struct bpf_reg_state *reg;
@ -9179,8 +9182,10 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
/* First handle precisely tracked STACK_ZERO */
for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
zero_size++;
if (zero_size >= key_size)
if (zero_size >= key_size) {
*value = 0;
return 0;
}
/* Check that stack contains a scalar spill of expected size */
if (!is_spilled_scalar_reg(&state->stack[spi]))
@ -9203,9 +9208,12 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
if (err < 0)
return err;
return reg->var_off.value;
*value = reg->var_off.value;
return 0;
}
static bool can_elide_value_nullness(enum bpf_map_type type);
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn,
@ -9354,9 +9362,16 @@ skip_type_check:
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
if (err)
return err;
meta->const_map_key = get_constant_map_key(env, reg, key_size);
if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP)
return meta->const_map_key;
if (can_elide_value_nullness(meta->map_ptr->map_type)) {
err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
if (err < 0) {
meta->const_map_key = -1;
if (err == -EOPNOTSUPP)
err = 0;
else
return err;
}
}
break;
case ARG_PTR_TO_MAP_VALUE:
if (type_may_be_null(arg_type) && register_is_null(reg))

View file

@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
if (user_size > size)
return ERR_PTR(-EMSGSIZE);
size = SKB_DATA_ALIGN(size);
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)

View file

@ -355,11 +355,6 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
{
const struct btf *btf_vmlinux;
const struct btf_type *t;
const char *tname;
u32 btf_id;
if (prog->aux->dst_prog)
return false;
@ -374,13 +369,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return true;
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
btf_vmlinux = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(btf_vmlinux))
return false;
btf_id = prog->aux->attach_btf_id;
t = btf_type_by_id(btf_vmlinux, btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
return !!strncmp(tname, "bpf_sk_storage",
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
strlen("bpf_sk_storage"));
default:
return false;

View file

@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
return num_sge;
}
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
psock->ingress_bytes += len;
#endif
copied = len;
msg->sg.start = 0;
msg->sg.size = copied;
@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
if (!ret)
sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
if (sk_is_tcp(sk)) {
psock->strp.cb.read_sock = tcp_bpf_strp_read_sock;
psock->copied_seq = tcp_sk(sk)->copied_seq;
}
return ret;
}

View file

@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
write_lock_bh(&sk->sk_callback_lock);
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
ret = sk_psock_init_strp(sk, psock);
if (sk_is_tcp(sk))
ret = sk_psock_init_strp(sk, psock);
else
ret = -EOPNOTSUPP;
if (ret) {
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_put(sk, psock);

View file

@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb);
* or for 'peeking' the socket using this routine
* (although both would be easy to implement).
*/
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor)
static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor, bool noack,
u32 *copied_seq)
{
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
u32 seq = tp->copied_seq;
u32 seq = *copied_seq;
u32 offset;
int copied = 0;
@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_eat_recv_skb(sk, skb);
if (!desc->count)
break;
WRITE_ONCE(tp->copied_seq, seq);
WRITE_ONCE(*copied_seq, seq);
}
WRITE_ONCE(tp->copied_seq, seq);
WRITE_ONCE(*copied_seq, seq);
if (noack)
goto out;
tcp_rcv_space_adjust(sk);
@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
}
out:
return copied;
}
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor)
{
return __tcp_read_sock(sk, desc, recv_actor, false,
&tcp_sk(sk)->copied_seq);
}
EXPORT_SYMBOL(tcp_read_sock);
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor, bool noack,
u32 *copied_seq)
{
return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq);
}
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;

View file

@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP;
}
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
sk_read_actor_t recv_actor)
{
struct sock *sk = strp->sk;
struct sk_psock *psock;
struct tcp_sock *tp;
int copied = 0;
tp = tcp_sk(sk);
rcu_read_lock();
psock = sk_psock(sk);
if (WARN_ON_ONCE(!psock)) {
desc->error = -EINVAL;
goto out;
}
psock->ingress_bytes = 0;
copied = tcp_read_sock_noack(sk, desc, recv_actor, true,
&psock->copied_seq);
if (copied < 0)
goto out;
/* recv_actor may redirect skb to another socket (SK_REDIRECT) or
* just put skb into ingress queue of current socket (SK_PASS).
* For SK_REDIRECT, we need to ack the frame immediately but for
* SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser().
*/
tp->copied_seq = psock->copied_seq - psock->ingress_bytes;
tcp_rcv_space_adjust(sk);
__tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes);
out:
rcu_read_unlock();
return copied;
}
#endif /* CONFIG_BPF_STREAM_PARSER */
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;

View file

@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp)
struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
if (unlikely(!sock || !sock->ops))
return -EBUSY;
if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock))
return -EBUSY;
desc.arg.data = strp;
@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp)
desc.count = 1; /* give more than one skb per call */
/* sk should be locked here, so okay to do read_sock */
sock->ops->read_sock(strp->sk, &desc, strp_recv);
if (strp->cb.read_sock)
strp->cb.read_sock(strp, &desc, strp_recv);
else
sock->ops->read_sock(strp->sk, &desc, strp_recv);
desc.error = strp->cb.read_sock_done(strp, desc.error);
@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk,
strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
strp->cb.rcv_msg = cb->rcv_msg;
strp->cb.parse_msg = cb->parse_msg;
strp->cb.read_sock = cb->read_sock;
strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;

View file

@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
static void fetch_and_validate(int outer_map_fd,
struct bpf_map_batch_opts *opts,
__u32 batch_size, bool delete_entries)
__u32 batch_size, bool delete_entries,
bool has_holes)
{
__u32 *fetched_keys, *fetched_values, total_fetched = 0;
__u32 batch_key = 0, fetch_count, step_size;
int err, max_entries = OUTER_MAP_ENTRIES;
int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
__u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
__u32 batch_key = 0, fetch_count, step_size = batch_size;
__u32 value_size = sizeof(__u32);
/* Total entries needs to be fetched */
@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
"Memory allocation failed for fetched_keys or fetched_values",
"error=%s\n", strerror(errno));
for (step_size = batch_size;
step_size <= max_entries;
step_size += batch_size) {
/* hash map may not always return full batch */
for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
fetch_count = step_size;
err = delete_entries
? bpf_map_lookup_and_delete_batch(outer_map_fd,
@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
if (err && errno == ENOSPC) {
/* Fetch again with higher batch size */
total_fetched = 0;
step_size += batch_size;
continue;
}
@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
}
static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
enum bpf_map_type inner_map_type)
enum bpf_map_type inner_map_type,
bool has_holes)
{
__u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
__u32 *outer_map_keys, *inner_map_fds;
__u32 max_entries = OUTER_MAP_ENTRIES;
LIBBPF_OPTS(bpf_map_batch_opts, opts);
__u32 value_size = sizeof(__u32);
int batch_size[2] = {5, 10};
__u32 map_index, op_index;
int outer_map_fd, ret;
outer_map_keys = calloc(max_entries, value_size);
inner_map_fds = calloc(max_entries, value_size);
outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
CHECK((!outer_map_keys || !inner_map_fds),
"Memory allocation failed for outer_map_keys or inner_map_fds",
"error=%s\n", strerror(errno));
@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
? 9 : 1000) - map_index;
/* This condition is only meaningful for array of maps.
*
* max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
* max_entries is short for n, then outer_map_keys looks like:
*
* [n, n-1, ... 2, 1]
*
* We change it to
*
* [n, n-1, ... 2, 0]
*
* So it will leave key 1 as a hole. It will serve to test the
* correctness when batch on an array: a "non-exist" key might be
* actually allocated and returned from key iteration.
*/
if (has_holes)
outer_map_keys[max_entries - 1]--;
/* batch operation - map_update */
ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
inner_map_fds, &max_entries, &opts);
@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
/* batch operation - map_lookup */
for (op_index = 0; op_index < 2; ++op_index)
fetch_and_validate(outer_map_fd, &opts,
batch_size[op_index], false);
batch_size[op_index], false,
has_holes);
/* batch operation - map_lookup_delete */
if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
fetch_and_validate(outer_map_fd, &opts,
max_entries, true /*delete*/);
max_entries, true /*delete*/,
has_holes);
/* close all map fds */
for (map_index = 0; map_index < max_entries; map_index++)
for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
close(inner_map_fds[map_index]);
close(outer_map_fd);
@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
void test_map_in_map_batch_ops_array(void)
{
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
printf("%s:PASS with inner ARRAY map with holes\n", __func__);
_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
printf("%s:PASS with inner HASH map with holes\n", __func__);
}
void test_map_in_map_batch_ops_hash(void)
{
_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
}

View file

@ -526,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
n = recv(c1, &b, 1, SOCK_NONBLOCK);
ASSERT_EQ(n, 0, "recv_timeout(fin)");
n = recv(c1, &b, 1, MSG_DONTWAIT);
ASSERT_EQ(n, 0, "recv(fin)");
out_close:
close(c1);
close(p1);
@ -535,57 +535,6 @@ out:
test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_stream_pass(void)
{
int zero = 0, sent, recvd;
int verdict, parser;
int err, map;
int c = -1, p = -1;
struct test_sockmap_pass_prog *pass = NULL;
char snd[256] = "0123456789";
char rcv[256] = "0";
pass = test_sockmap_pass_prog__open_and_load();
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
parser = bpf_program__fd(pass->progs.prog_skb_parser);
map = bpf_map__fd(pass->maps.sock_map_rx);
err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
goto out;
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
goto out;
err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
if (err)
goto out;
/* sk_data_ready of 'p' will be replaced by strparser handler */
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
goto out_close;
/*
* as 'prog_skb_parser' return the original skb len and
* 'prog_skb_verdict' return SK_PASS, the kernel will just
* pass it through to original socket 'p'
*/
sent = xsend(c, snd, sizeof(snd), 0);
ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
out_close:
close(c);
close(p);
out:
test_sockmap_pass_prog__destroy(pass);
}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
@ -632,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
@ -1133,8 +1082,6 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
if (test__start_subtest("sockmap stream parser and verdict pass"))
test_sockmap_stream_pass();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))

View file

@ -0,0 +1,454 @@
// SPDX-License-Identifier: GPL-2.0
#include <error.h>
#include <netinet/tcp.h>
#include <test_progs.h>
#include "sockmap_helpers.h"
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_strp.skel.h"
#define STRP_PKT_HEAD_LEN 4
#define STRP_PKT_BODY_LEN 6
#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
static const int test_packet_num = 100;
/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
* with sk held if an skb exists in sk_receive_queue. Then for the
* data_ready implementation of strparser, it will delay the read
* operation if sk is held and EAGAIN is returned.
*/
static int sockmap_strp_consume_pre_data(int p)
{
int recvd;
bool retried = false;
char rcv[10];
retry:
errno = 0;
recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
if (recvd < 0 && errno == EAGAIN && retried == false) {
/* On the first call, EAGAIN will certainly be returned.
* A 1-second wait is enough for the workqueue to finish.
*/
sleep(1);
retried = true;
goto retry;
}
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
"data mismatch"))
return -1;
return 0;
}
static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
bool need_parser)
{
struct test_sockmap_strp *strp = NULL;
int verdict, parser;
int err;
strp = test_sockmap_strp__open_and_load();
*out_map = bpf_map__fd(strp->maps.sock_map);
if (need_parser)
parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
else
parser = bpf_program__fd(strp->progs.prog_skb_parser);
if (pass)
verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
else
verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
goto err;
err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
goto err;
return strp;
err:
test_sockmap_strp__destroy(strp);
return NULL;
}
/* Dispatch packets to different socket by packet size:
*
* ------ ------
* | pkt4 || pkt1 |... > remote socket
* ------ ------ / ------ ------
* | pkt8 | pkt7 |...
* ------ ------ \ ------ ------
* | pkt3 || pkt2 |... > local socket
* ------ ------
*/
static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
{
int i, j, zero = 0, one = 1, recvd;
int err, map;
int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
struct test_sockmap_strp *strp = NULL;
int test_cnt = 6;
char rcv[10];
struct {
char data[7];
int data_len;
int send_cnt;
int *receiver;
} send_dir[2] = {
/* data expected to deliver to local */
{"llllll", 6, 0, &p0},
/* data expected to deliver to remote */
{"rrrrr", 5, 0, &c1}
};
strp = sockmap_strp_init(&map, false, false);
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
return;
err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
goto out_close;
err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
goto out_close;
err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
goto out_close;
/* deliver data with data size greater than 5 to local */
strp->data->verdict_max_size = 5;
for (i = 0; i < test_cnt; i++) {
int d = i % 2;
xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
send_dir[d].send_cnt++;
}
for (i = 0; i < 2; i++) {
for (j = 0; j < send_dir[i].send_cnt; j++) {
int expected = send_dir[i].data_len;
recvd = recv_timeout(*send_dir[i].receiver, rcv,
expected, MSG_DONTWAIT,
IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
goto out_close;
if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
"data mismatch"))
goto out_close;
}
}
out_close:
close(c0);
close(c1);
close(p0);
close(p1);
out:
test_sockmap_strp__destroy(strp);
}
/* We have multiple packets in one skb
* ------------ ------------ ------------
* | packet1 | packet2 | ...
* ------------ ------------ ------------
*/
static void test_sockmap_strp_multiple_pkt(int family, int sotype)
{
int i, zero = 0;
int sent, recvd, total;
int err, map;
int c = -1, p = -1;
struct test_sockmap_strp *strp = NULL;
char *snd = NULL, *rcv = NULL;
strp = sockmap_strp_init(&map, true, true);
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
return;
err = create_pair(family, sotype, &c, &p);
if (err)
goto out;
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
goto out_close;
/* construct multiple packets in one buffer */
total = test_packet_num * STRP_PKT_FULL_LEN;
snd = malloc(total);
rcv = malloc(total + 1);
if (!ASSERT_TRUE(snd, "malloc(snd)") ||
!ASSERT_TRUE(rcv, "malloc(rcv)"))
goto out_close;
for (i = 0; i < test_packet_num; i++) {
memcpy(snd + i * STRP_PKT_FULL_LEN,
packet, STRP_PKT_FULL_LEN);
}
sent = xsend(c, snd, total, 0);
if (!ASSERT_EQ(sent, total, "xsend(c)"))
goto out_close;
/* try to recv one more byte to avoid truncation check */
recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
goto out_close;
/* we sent TCP segment with multiple encapsulation
* then check whether packets are handled correctly
*/
if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
goto out_close;
out_close:
close(c);
close(p);
if (snd)
free(snd);
if (rcv)
free(rcv);
out:
test_sockmap_strp__destroy(strp);
}
/* Test strparser with partial read */
static void test_sockmap_strp_partial_read(int family, int sotype)
{
int zero = 0, recvd, off;
int err, map;
int c = -1, p = -1;
struct test_sockmap_strp *strp = NULL;
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
strp = sockmap_strp_init(&map, true, true);
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
return;
err = create_pair(family, sotype, &c, &p);
if (err)
goto out;
/* sk_data_ready of 'p' will be replaced by strparser handler */
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
goto out_close;
/* 1.1 send partial head, 1 byte header left */
off = STRP_PKT_HEAD_LEN - 1;
xsend(c, packet, off, 0);
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
goto out_close;
/* 1.2 send remaining head and body */
xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
goto out_close;
/* 2.1 send partial head, 1 byte header left */
off = STRP_PKT_HEAD_LEN - 1;
xsend(c, packet, off, 0);
/* 2.2 send remaining head and partial body, 1 byte body left */
xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
off = STRP_PKT_FULL_LEN - 1;
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
goto out_close;
/* 2.3 send remaining body */
xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
goto out_close;
out_close:
close(c);
close(p);
out:
test_sockmap_strp__destroy(strp);
}
/* Test simple socket read/write with strparser + FIONREAD */
static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
{
int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
int err, map;
int c = -1, p = -1;
int test_cnt = 10, i;
struct test_sockmap_strp *strp = NULL;
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
strp = sockmap_strp_init(&map, true, true);
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
return;
err = create_pair(family, sotype, &c, &p);
if (err)
goto out;
/* inject some data before bpf process, it should be read
* correctly because we check sk_receive_queue in
* tcp_bpf_recvmsg_parser().
*/
sent = xsend(c, packet, pkt_size, 0);
if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
goto out_close;
/* sk_data_ready of 'p' will be replaced by strparser handler */
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
goto out_close;
/* consume previous data we injected */
if (sockmap_strp_consume_pre_data(p))
goto out_close;
/* Previously, we encountered issues such as deadlocks and
* sequence errors that resulted in the inability to read
* continuously. Therefore, we perform multiple iterations
* of testing here.
*/
for (i = 0; i < test_cnt; i++) {
sent = xsend(c, packet, pkt_size, 0);
if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
goto out_close;
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
!ASSERT_OK(memcmp(packet, rcv, pkt_size),
"memcmp, data mismatch"))
goto out_close;
}
if (fionread) {
sent = xsend(c, packet, pkt_size, 0);
if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
goto out_close;
err = ioctl(p, FIONREAD, &avail);
if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
!ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
goto out_close;
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
!ASSERT_OK(memcmp(packet, rcv, pkt_size),
"second memcmp, data mismatch"))
goto out_close;
}
out_close:
close(c);
close(p);
out:
test_sockmap_strp__destroy(strp);
}
/* Test strparser with verdict mode */
static void test_sockmap_strp_verdict(int family, int sotype)
{
int zero = 0, one = 1, sent, recvd, off;
int err, map;
int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
struct test_sockmap_strp *strp = NULL;
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
strp = sockmap_strp_init(&map, false, true);
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
return;
/* We simulate a reverse proxy server.
* When p0 receives data from c0, we forward it to c1.
* From c1's perspective, it will consider this data
* as being sent by p1.
*/
err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
goto out_close;
err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
goto out_close;
sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
goto out_close;
recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
"received data does not match the sent data"))
goto out_close;
/* send again to ensure the stream is functioning correctly. */
sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
goto out_close;
/* partial read */
off = STRP_PKT_FULL_LEN / 2;
recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
IO_TIMEOUT_SEC);
recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
IO_TIMEOUT_SEC);
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
"partial received data does not match the sent data"))
goto out_close;
out_close:
close(c0);
close(c1);
close(p0);
close(p1);
out:
test_sockmap_strp__destroy(strp);
}
void test_sockmap_strp(void)
{
if (test__start_subtest("sockmap strp tcp pass"))
test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
if (test__start_subtest("sockmap strp tcp v6 pass"))
test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
if (test__start_subtest("sockmap strp tcp pass fionread"))
test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
if (test__start_subtest("sockmap strp tcp verdict"))
test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
if (test__start_subtest("sockmap strp tcp v6 verdict"))
test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
if (test__start_subtest("sockmap strp tcp partial read"))
test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
if (test__start_subtest("sockmap strp tcp multiple packets"))
test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
if (test__start_subtest("sockmap strp tcp dispatch"))
test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
}

View file

@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* send a packet to trigger any potential bugs in there */
char data[10] = {};
char data[ETH_HLEN] = {};
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
.data_size_in = 10,
.data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);

View file

@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void)
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd;
struct nstoken *nstoken = NULL;
char data[10] = {};
char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
.data_size_in = 10,
.data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
struct nstoken *nstoken = NULL;
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
char data[10] = {};
char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
.data_size_in = 10,
.data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);

View file

@ -0,0 +1,53 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
int verdict_max_size = 10000;
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map SEC(".maps");
SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
__u32 one = 1;
if (skb->len > verdict_max_size)
return SK_PASS;
return bpf_sk_redirect_map(skb, &sock_map, one, 0);
}
SEC("sk_skb/stream_verdict")
int prog_skb_verdict_pass(struct __sk_buff *skb)
{
return SK_PASS;
}
SEC("sk_skb/stream_parser")
int prog_skb_parser(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb/stream_parser")
int prog_skb_parser_partial(struct __sk_buff *skb)
{
/* agreement with the test program on a 4-byte size header
* and 6-byte body.
*/
if (skb->len < 4) {
/* need more header to determine full length */
return 0;
}
/* return full length decoded from header.
* the return value may be larger than skb->len which
* means framework must wait body coming.
*/
return 10;
}
char _license[] SEC("license") = "GPL";

View file

@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void)
return val->index;
}
SEC("socket")
__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
__success __retval(42)
unsigned int doesnt_reject_irrelevant_maps(void)
{
__u64 key = 0xFFFFFFFFFFFFFFFF;
struct test_val *val;
val = bpf_map_lookup_elem(&map_hash_48b, &key);
if (val)
return val->index;
return 42;
}
char _license[] SEC("license") = "GPL";