Merge branch 'bpf-fix-wrong-copied_seq-calculation-and-add-tests'
Jiayuan Chen says: ==================== A previous commit described in this topic http://lore.kernel.org/bpf/20230523025618.113937-9-john.fastabend@gmail.com directly updated 'sk->copied_seq' in the tcp_eat_skb() function when the action of a BPF program was SK_REDIRECT. For other actions, like SK_PASS, the update logic for 'sk->copied_seq' was moved to tcp_bpf_recvmsg_parser() to ensure the accuracy of the 'fionread' feature. That commit works for a single stream_verdict scenario, as it also modified 'sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb' to remove updating 'sk->copied_seq'. However, for programs where both stream_parser and stream_verdict are active (strparser purpose), tcp_read_sock() was used instead of tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock). tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicated updates. In summary, for strparser + SK_PASS, copied_seq is redundantly calculated in both tcp_read_sock() and tcp_bpf_recvmsg_parser(). The issue causes incorrect copied_seq calculations, which prevent correct data reads from the recv() interface in user-land. Also we added test cases for bpf + strparser and separated them from sockmap_basic, as strparser has more encapsulation and parsing capabilities compared to sockmap. ==================== Link: https://patch.msgid.link/20250122100917.49845-1-mrpre@163.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
commit
9bf412d4d5
12 changed files with 610 additions and 65 deletions
|
@ -112,7 +112,7 @@ Functions
|
|||
Callbacks
|
||||
=========
|
||||
|
||||
There are six callbacks:
|
||||
There are seven callbacks:
|
||||
|
||||
::
|
||||
|
||||
|
@ -182,6 +182,13 @@ There are six callbacks:
|
|||
the length of the message. skb->len - offset may be greater
|
||||
then full_len since strparser does not trim the skb.
|
||||
|
||||
::
|
||||
|
||||
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor);
|
||||
|
||||
The read_sock callback is used by strparser instead of
|
||||
sock->ops->read_sock, if provided.
|
||||
::
|
||||
|
||||
int (*read_sock_done)(struct strparser *strp, int err);
|
||||
|
|
|
@ -91,6 +91,8 @@ struct sk_psock {
|
|||
struct sk_psock_progs progs;
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
struct strparser strp;
|
||||
u32 copied_seq;
|
||||
u32 ingress_bytes;
|
||||
#endif
|
||||
struct sk_buff_head ingress_skb;
|
||||
struct list_head ingress_msg;
|
||||
|
|
|
@ -43,6 +43,8 @@ struct strparser;
|
|||
struct strp_callbacks {
|
||||
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
|
||||
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
|
||||
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor);
|
||||
int (*read_sock_done)(struct strparser *strp, int err);
|
||||
void (*abort_parser)(struct strparser *strp, int err);
|
||||
void (*lock)(struct strparser *strp);
|
||||
|
|
|
@ -729,6 +729,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
|
|||
/* Read 'sendfile()'-style from a TCP socket */
|
||||
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor);
|
||||
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor, bool noack,
|
||||
u32 *copied_seq);
|
||||
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
|
||||
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
|
||||
void tcp_read_done(struct sock *sk, size_t len);
|
||||
|
@ -2599,6 +2602,11 @@ struct sk_psock;
|
|||
#ifdef CONFIG_BPF_SYSCALL
|
||||
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
|
||||
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
|
||||
#ifdef CONFIG_BPF_STREAM_PARSER
|
||||
struct strparser;
|
||||
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor);
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
|
|
|
@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
|
|||
return num_sge;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
psock->ingress_bytes += len;
|
||||
#endif
|
||||
copied = len;
|
||||
msg->sg.start = 0;
|
||||
msg->sg.size = copied;
|
||||
|
@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
|||
if (!ret)
|
||||
sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
|
||||
|
||||
if (sk_is_tcp(sk)) {
|
||||
psock->strp.cb.read_sock = tcp_bpf_strp_read_sock;
|
||||
psock->copied_seq = tcp_sk(sk)->copied_seq;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
|||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||
ret = sk_psock_init_strp(sk, psock);
|
||||
if (sk_is_tcp(sk))
|
||||
ret = sk_psock_init_strp(sk, psock);
|
||||
else
|
||||
ret = -EOPNOTSUPP;
|
||||
if (ret) {
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_put(sk, psock);
|
||||
|
|
|
@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb);
|
|||
* or for 'peeking' the socket using this routine
|
||||
* (although both would be easy to implement).
|
||||
*/
|
||||
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor)
|
||||
static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor, bool noack,
|
||||
u32 *copied_seq)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
u32 seq = tp->copied_seq;
|
||||
u32 seq = *copied_seq;
|
||||
u32 offset;
|
||||
int copied = 0;
|
||||
|
||||
|
@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
|||
tcp_eat_recv_skb(sk, skb);
|
||||
if (!desc->count)
|
||||
break;
|
||||
WRITE_ONCE(tp->copied_seq, seq);
|
||||
WRITE_ONCE(*copied_seq, seq);
|
||||
}
|
||||
WRITE_ONCE(tp->copied_seq, seq);
|
||||
WRITE_ONCE(*copied_seq, seq);
|
||||
|
||||
if (noack)
|
||||
goto out;
|
||||
|
||||
tcp_rcv_space_adjust(sk);
|
||||
|
||||
|
@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
|||
tcp_recv_skb(sk, seq, &offset);
|
||||
tcp_cleanup_rbuf(sk, copied);
|
||||
}
|
||||
out:
|
||||
return copied;
|
||||
}
|
||||
|
||||
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor)
|
||||
{
|
||||
return __tcp_read_sock(sk, desc, recv_actor, false,
|
||||
&tcp_sk(sk)->copied_seq);
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_read_sock);
|
||||
|
||||
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor, bool noack,
|
||||
u32 *copied_seq)
|
||||
{
|
||||
return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq);
|
||||
}
|
||||
|
||||
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
|
|
@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
|
|||
ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
|
||||
sk_read_actor_t recv_actor)
|
||||
{
|
||||
struct sock *sk = strp->sk;
|
||||
struct sk_psock *psock;
|
||||
struct tcp_sock *tp;
|
||||
int copied = 0;
|
||||
|
||||
tp = tcp_sk(sk);
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (WARN_ON_ONCE(!psock)) {
|
||||
desc->error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
psock->ingress_bytes = 0;
|
||||
copied = tcp_read_sock_noack(sk, desc, recv_actor, true,
|
||||
&psock->copied_seq);
|
||||
if (copied < 0)
|
||||
goto out;
|
||||
/* recv_actor may redirect skb to another socket (SK_REDIRECT) or
|
||||
* just put skb into ingress queue of current socket (SK_PASS).
|
||||
* For SK_REDIRECT, we need to ack the frame immediately but for
|
||||
* SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser().
|
||||
*/
|
||||
tp->copied_seq = psock->copied_seq - psock->ingress_bytes;
|
||||
tcp_rcv_space_adjust(sk);
|
||||
__tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return copied;
|
||||
}
|
||||
#endif /* CONFIG_BPF_STREAM_PARSER */
|
||||
|
||||
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
|
|
|
@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp)
|
|||
struct socket *sock = strp->sk->sk_socket;
|
||||
read_descriptor_t desc;
|
||||
|
||||
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
|
||||
if (unlikely(!sock || !sock->ops))
|
||||
return -EBUSY;
|
||||
|
||||
if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock))
|
||||
return -EBUSY;
|
||||
|
||||
desc.arg.data = strp;
|
||||
|
@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp)
|
|||
desc.count = 1; /* give more than one skb per call */
|
||||
|
||||
/* sk should be locked here, so okay to do read_sock */
|
||||
sock->ops->read_sock(strp->sk, &desc, strp_recv);
|
||||
if (strp->cb.read_sock)
|
||||
strp->cb.read_sock(strp, &desc, strp_recv);
|
||||
else
|
||||
sock->ops->read_sock(strp->sk, &desc, strp_recv);
|
||||
|
||||
desc.error = strp->cb.read_sock_done(strp, desc.error);
|
||||
|
||||
|
@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk,
|
|||
strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
|
||||
strp->cb.rcv_msg = cb->rcv_msg;
|
||||
strp->cb.parse_msg = cb->parse_msg;
|
||||
strp->cb.read_sock = cb->read_sock;
|
||||
strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
|
||||
strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
|
||||
|
||||
|
|
|
@ -522,8 +522,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
|
|||
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
|
||||
goto out_close;
|
||||
|
||||
n = recv(c1, &b, 1, SOCK_NONBLOCK);
|
||||
ASSERT_EQ(n, 0, "recv_timeout(fin)");
|
||||
n = recv(c1, &b, 1, MSG_DONTWAIT);
|
||||
ASSERT_EQ(n, 0, "recv(fin)");
|
||||
out_close:
|
||||
close(c1);
|
||||
close(p1);
|
||||
|
@ -531,57 +531,6 @@ out:
|
|||
test_sockmap_pass_prog__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_sockmap_stream_pass(void)
|
||||
{
|
||||
int zero = 0, sent, recvd;
|
||||
int verdict, parser;
|
||||
int err, map;
|
||||
int c = -1, p = -1;
|
||||
struct test_sockmap_pass_prog *pass = NULL;
|
||||
char snd[256] = "0123456789";
|
||||
char rcv[256] = "0";
|
||||
|
||||
pass = test_sockmap_pass_prog__open_and_load();
|
||||
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
|
||||
parser = bpf_program__fd(pass->progs.prog_skb_parser);
|
||||
map = bpf_map__fd(pass->maps.sock_map_rx);
|
||||
|
||||
err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
|
||||
goto out;
|
||||
|
||||
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
|
||||
goto out;
|
||||
|
||||
err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* sk_data_ready of 'p' will be replaced by strparser handler */
|
||||
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
|
||||
goto out_close;
|
||||
|
||||
/*
|
||||
* as 'prog_skb_parser' return the original skb len and
|
||||
* 'prog_skb_verdict' return SK_PASS, the kernel will just
|
||||
* pass it through to original socket 'p'
|
||||
*/
|
||||
sent = xsend(c, snd, sizeof(snd), 0);
|
||||
ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
|
||||
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
|
||||
IO_TIMEOUT_SEC);
|
||||
ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
|
||||
|
||||
out_close:
|
||||
close(c);
|
||||
close(p);
|
||||
|
||||
out:
|
||||
test_sockmap_pass_prog__destroy(pass);
|
||||
}
|
||||
|
||||
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
|
||||
{
|
||||
|
@ -628,7 +577,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
|
|||
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
|
||||
/* On DROP test there will be no data to read */
|
||||
if (pass_prog) {
|
||||
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
|
||||
recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
|
||||
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
|
||||
}
|
||||
|
||||
|
@ -1101,8 +1050,6 @@ void test_sockmap_basic(void)
|
|||
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
|
||||
if (test__start_subtest("sockmap skb_verdict shutdown"))
|
||||
test_sockmap_skb_verdict_shutdown();
|
||||
if (test__start_subtest("sockmap stream parser and verdict pass"))
|
||||
test_sockmap_stream_pass();
|
||||
if (test__start_subtest("sockmap skb_verdict fionread"))
|
||||
test_sockmap_skb_verdict_fionread(true);
|
||||
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
|
||||
|
|
454
tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
Normal file
454
tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
Normal file
|
@ -0,0 +1,454 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <error.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <test_progs.h>
|
||||
#include "sockmap_helpers.h"
|
||||
#include "test_skmsg_load_helpers.skel.h"
|
||||
#include "test_sockmap_strp.skel.h"
|
||||
|
||||
#define STRP_PKT_HEAD_LEN 4
|
||||
#define STRP_PKT_BODY_LEN 6
|
||||
#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
|
||||
|
||||
static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
|
||||
static const int test_packet_num = 100;
|
||||
|
||||
/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
|
||||
* with sk held if an skb exists in sk_receive_queue. Then for the
|
||||
* data_ready implementation of strparser, it will delay the read
|
||||
* operation if sk is held and EAGAIN is returned.
|
||||
*/
|
||||
static int sockmap_strp_consume_pre_data(int p)
|
||||
{
|
||||
int recvd;
|
||||
bool retried = false;
|
||||
char rcv[10];
|
||||
|
||||
retry:
|
||||
errno = 0;
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
|
||||
if (recvd < 0 && errno == EAGAIN && retried == false) {
|
||||
/* On the first call, EAGAIN will certainly be returned.
|
||||
* A 1-second wait is enough for the workqueue to finish.
|
||||
*/
|
||||
sleep(1);
|
||||
retried = true;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
|
||||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
|
||||
"data mismatch"))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
|
||||
bool need_parser)
|
||||
{
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
int verdict, parser;
|
||||
int err;
|
||||
|
||||
strp = test_sockmap_strp__open_and_load();
|
||||
*out_map = bpf_map__fd(strp->maps.sock_map);
|
||||
|
||||
if (need_parser)
|
||||
parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
|
||||
else
|
||||
parser = bpf_program__fd(strp->progs.prog_skb_parser);
|
||||
|
||||
if (pass)
|
||||
verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
|
||||
else
|
||||
verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
|
||||
|
||||
err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
|
||||
goto err;
|
||||
|
||||
err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
|
||||
goto err;
|
||||
|
||||
return strp;
|
||||
err:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Dispatch packets to different socket by packet size:
|
||||
*
|
||||
* ------ ------
|
||||
* | pkt4 || pkt1 |... > remote socket
|
||||
* ------ ------ / ------ ------
|
||||
* | pkt8 | pkt7 |...
|
||||
* ------ ------ \ ------ ------
|
||||
* | pkt3 || pkt2 |... > local socket
|
||||
* ------ ------
|
||||
*/
|
||||
static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
|
||||
{
|
||||
int i, j, zero = 0, one = 1, recvd;
|
||||
int err, map;
|
||||
int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
int test_cnt = 6;
|
||||
char rcv[10];
|
||||
struct {
|
||||
char data[7];
|
||||
int data_len;
|
||||
int send_cnt;
|
||||
int *receiver;
|
||||
} send_dir[2] = {
|
||||
/* data expected to deliver to local */
|
||||
{"llllll", 6, 0, &p0},
|
||||
/* data expected to deliver to remote */
|
||||
{"rrrrr", 5, 0, &c1}
|
||||
};
|
||||
|
||||
strp = sockmap_strp_init(&map, false, false);
|
||||
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
|
||||
return;
|
||||
|
||||
err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
|
||||
if (!ASSERT_OK(err, "create_socket_pairs()"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
|
||||
goto out_close;
|
||||
|
||||
err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
|
||||
goto out_close;
|
||||
|
||||
err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
|
||||
if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
|
||||
goto out_close;
|
||||
|
||||
/* deliver data with data size greater than 5 to local */
|
||||
strp->data->verdict_max_size = 5;
|
||||
|
||||
for (i = 0; i < test_cnt; i++) {
|
||||
int d = i % 2;
|
||||
|
||||
xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
|
||||
send_dir[d].send_cnt++;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < send_dir[i].send_cnt; j++) {
|
||||
int expected = send_dir[i].data_len;
|
||||
|
||||
recvd = recv_timeout(*send_dir[i].receiver, rcv,
|
||||
expected, MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
|
||||
goto out_close;
|
||||
if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
|
||||
"data mismatch"))
|
||||
goto out_close;
|
||||
}
|
||||
}
|
||||
out_close:
|
||||
close(c0);
|
||||
close(c1);
|
||||
close(p0);
|
||||
close(p1);
|
||||
out:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
}
|
||||
|
||||
/* We have multiple packets in one skb
|
||||
* ------------ ------------ ------------
|
||||
* | packet1 | packet2 | ...
|
||||
* ------------ ------------ ------------
|
||||
*/
|
||||
static void test_sockmap_strp_multiple_pkt(int family, int sotype)
|
||||
{
|
||||
int i, zero = 0;
|
||||
int sent, recvd, total;
|
||||
int err, map;
|
||||
int c = -1, p = -1;
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
char *snd = NULL, *rcv = NULL;
|
||||
|
||||
strp = sockmap_strp_init(&map, true, true);
|
||||
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
|
||||
return;
|
||||
|
||||
err = create_pair(family, sotype, &c, &p);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
|
||||
goto out_close;
|
||||
|
||||
/* construct multiple packets in one buffer */
|
||||
total = test_packet_num * STRP_PKT_FULL_LEN;
|
||||
snd = malloc(total);
|
||||
rcv = malloc(total + 1);
|
||||
if (!ASSERT_TRUE(snd, "malloc(snd)") ||
|
||||
!ASSERT_TRUE(rcv, "malloc(rcv)"))
|
||||
goto out_close;
|
||||
|
||||
for (i = 0; i < test_packet_num; i++) {
|
||||
memcpy(snd + i * STRP_PKT_FULL_LEN,
|
||||
packet, STRP_PKT_FULL_LEN);
|
||||
}
|
||||
|
||||
sent = xsend(c, snd, total, 0);
|
||||
if (!ASSERT_EQ(sent, total, "xsend(c)"))
|
||||
goto out_close;
|
||||
|
||||
/* try to recv one more byte to avoid truncation check */
|
||||
recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
|
||||
goto out_close;
|
||||
|
||||
/* we sent TCP segment with multiple encapsulation
|
||||
* then check whether packets are handled correctly
|
||||
*/
|
||||
if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
|
||||
goto out_close;
|
||||
|
||||
out_close:
|
||||
close(c);
|
||||
close(p);
|
||||
if (snd)
|
||||
free(snd);
|
||||
if (rcv)
|
||||
free(rcv);
|
||||
out:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
}
|
||||
|
||||
/* Test strparser with partial read */
|
||||
static void test_sockmap_strp_partial_read(int family, int sotype)
|
||||
{
|
||||
int zero = 0, recvd, off;
|
||||
int err, map;
|
||||
int c = -1, p = -1;
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
|
||||
|
||||
strp = sockmap_strp_init(&map, true, true);
|
||||
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
|
||||
return;
|
||||
|
||||
err = create_pair(family, sotype, &c, &p);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* sk_data_ready of 'p' will be replaced by strparser handler */
|
||||
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
|
||||
goto out_close;
|
||||
|
||||
/* 1.1 send partial head, 1 byte header left */
|
||||
off = STRP_PKT_HEAD_LEN - 1;
|
||||
xsend(c, packet, off, 0);
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
|
||||
if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
|
||||
goto out_close;
|
||||
|
||||
/* 1.2 send remaining head and body */
|
||||
xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
|
||||
goto out_close;
|
||||
|
||||
/* 2.1 send partial head, 1 byte header left */
|
||||
off = STRP_PKT_HEAD_LEN - 1;
|
||||
xsend(c, packet, off, 0);
|
||||
|
||||
/* 2.2 send remaining head and partial body, 1 byte body left */
|
||||
xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
|
||||
off = STRP_PKT_FULL_LEN - 1;
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
|
||||
if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
|
||||
goto out_close;
|
||||
|
||||
/* 2.3 send remaining body */
|
||||
xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
|
||||
goto out_close;
|
||||
|
||||
out_close:
|
||||
close(c);
|
||||
close(p);
|
||||
|
||||
out:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
}
|
||||
|
||||
/* Test simple socket read/write with strparser + FIONREAD */
|
||||
static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
|
||||
{
|
||||
int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
|
||||
int err, map;
|
||||
int c = -1, p = -1;
|
||||
int test_cnt = 10, i;
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
|
||||
|
||||
strp = sockmap_strp_init(&map, true, true);
|
||||
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
|
||||
return;
|
||||
|
||||
err = create_pair(family, sotype, &c, &p);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* inject some data before bpf process, it should be read
|
||||
* correctly because we check sk_receive_queue in
|
||||
* tcp_bpf_recvmsg_parser().
|
||||
*/
|
||||
sent = xsend(c, packet, pkt_size, 0);
|
||||
if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
|
||||
goto out_close;
|
||||
|
||||
/* sk_data_ready of 'p' will be replaced by strparser handler */
|
||||
err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
|
||||
goto out_close;
|
||||
|
||||
/* consume previous data we injected */
|
||||
if (sockmap_strp_consume_pre_data(p))
|
||||
goto out_close;
|
||||
|
||||
/* Previously, we encountered issues such as deadlocks and
|
||||
* sequence errors that resulted in the inability to read
|
||||
* continuously. Therefore, we perform multiple iterations
|
||||
* of testing here.
|
||||
*/
|
||||
for (i = 0; i < test_cnt; i++) {
|
||||
sent = xsend(c, packet, pkt_size, 0);
|
||||
if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
|
||||
goto out_close;
|
||||
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
|
||||
!ASSERT_OK(memcmp(packet, rcv, pkt_size),
|
||||
"memcmp, data mismatch"))
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (fionread) {
|
||||
sent = xsend(c, packet, pkt_size, 0);
|
||||
if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
|
||||
goto out_close;
|
||||
|
||||
err = ioctl(p, FIONREAD, &avail);
|
||||
if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
|
||||
!ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
|
||||
goto out_close;
|
||||
|
||||
recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
|
||||
!ASSERT_OK(memcmp(packet, rcv, pkt_size),
|
||||
"second memcmp, data mismatch"))
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
out_close:
|
||||
close(c);
|
||||
close(p);
|
||||
|
||||
out:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
}
|
||||
|
||||
/* Test strparser with verdict mode */
|
||||
static void test_sockmap_strp_verdict(int family, int sotype)
|
||||
{
|
||||
int zero = 0, one = 1, sent, recvd, off;
|
||||
int err, map;
|
||||
int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
|
||||
struct test_sockmap_strp *strp = NULL;
|
||||
char rcv[STRP_PKT_FULL_LEN + 1] = "0";
|
||||
|
||||
strp = sockmap_strp_init(&map, false, true);
|
||||
if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
|
||||
return;
|
||||
|
||||
/* We simulate a reverse proxy server.
|
||||
* When p0 receives data from c0, we forward it to c1.
|
||||
* From c1's perspective, it will consider this data
|
||||
* as being sent by p1.
|
||||
*/
|
||||
err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
|
||||
if (!ASSERT_OK(err, "create_socket_pairs()"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
|
||||
goto out_close;
|
||||
|
||||
err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
|
||||
goto out_close;
|
||||
|
||||
sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
|
||||
if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
|
||||
goto out_close;
|
||||
|
||||
recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
|
||||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
|
||||
"received data does not match the sent data"))
|
||||
goto out_close;
|
||||
|
||||
/* send again to ensure the stream is functioning correctly. */
|
||||
sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
|
||||
if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
|
||||
goto out_close;
|
||||
|
||||
/* partial read */
|
||||
off = STRP_PKT_FULL_LEN / 2;
|
||||
recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
|
||||
IO_TIMEOUT_SEC);
|
||||
|
||||
if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
|
||||
!ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
|
||||
"partial received data does not match the sent data"))
|
||||
goto out_close;
|
||||
|
||||
out_close:
|
||||
close(c0);
|
||||
close(c1);
|
||||
close(p0);
|
||||
close(p1);
|
||||
out:
|
||||
test_sockmap_strp__destroy(strp);
|
||||
}
|
||||
|
||||
void test_sockmap_strp(void)
|
||||
{
|
||||
if (test__start_subtest("sockmap strp tcp pass"))
|
||||
test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
|
||||
if (test__start_subtest("sockmap strp tcp v6 pass"))
|
||||
test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
|
||||
if (test__start_subtest("sockmap strp tcp pass fionread"))
|
||||
test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
|
||||
if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
|
||||
test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
|
||||
if (test__start_subtest("sockmap strp tcp verdict"))
|
||||
test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
|
||||
if (test__start_subtest("sockmap strp tcp v6 verdict"))
|
||||
test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
|
||||
if (test__start_subtest("sockmap strp tcp partial read"))
|
||||
test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
|
||||
if (test__start_subtest("sockmap strp tcp multiple packets"))
|
||||
test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
|
||||
if (test__start_subtest("sockmap strp tcp dispatch"))
|
||||
test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
|
||||
}
|
53
tools/testing/selftests/bpf/progs/test_sockmap_strp.c
Normal file
53
tools/testing/selftests/bpf/progs/test_sockmap_strp.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
int verdict_max_size = 10000;
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map SEC(".maps");
|
||||
|
||||
SEC("sk_skb/stream_verdict")
|
||||
int prog_skb_verdict(struct __sk_buff *skb)
|
||||
{
|
||||
__u32 one = 1;
|
||||
|
||||
if (skb->len > verdict_max_size)
|
||||
return SK_PASS;
|
||||
|
||||
return bpf_sk_redirect_map(skb, &sock_map, one, 0);
|
||||
}
|
||||
|
||||
SEC("sk_skb/stream_verdict")
|
||||
int prog_skb_verdict_pass(struct __sk_buff *skb)
|
||||
{
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
SEC("sk_skb/stream_parser")
|
||||
int prog_skb_parser(struct __sk_buff *skb)
|
||||
{
|
||||
return skb->len;
|
||||
}
|
||||
|
||||
SEC("sk_skb/stream_parser")
|
||||
int prog_skb_parser_partial(struct __sk_buff *skb)
|
||||
{
|
||||
/* agreement with the test program on a 4-byte size header
|
||||
* and 6-byte body.
|
||||
*/
|
||||
if (skb->len < 4) {
|
||||
/* need more header to determine full length */
|
||||
return 0;
|
||||
}
|
||||
/* return full length decoded from header.
|
||||
* the return value may be larger than skb->len which
|
||||
* means framework must wait body coming.
|
||||
*/
|
||||
return 10;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
Loading…
Add table
Reference in a new issue