tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets in accept queues.
When we call close() or shutdown() for listening sockets, each child socket in the accept queue are freed at inet_csk_listen_stop(). If we can get a new listener by reuseport_migrate_sock() and clone the request by inet_reqsk_clone(), we try to add it into the new listener's accept queue by inet_csk_reqsk_queue_add(). If it fails, we have to call __reqsk_free() to call sock_put() for its listener and free the cloned request. After putting the full socket into ehash, tcp_v[46]_syn_recv_sock() sets NULL to ireq_opt/pktopts in struct inet_request_sock, but ipv6_opt can be non-NULL. So, we have to set NULL to ipv6_opt of the old request to avoid double free. Note that we do not update req->rsk_listener and instead clone the req to migrate because another path may reference the original request. If we protected it by RCU, we would need to add rcu_read_lock() in many places. Suggested-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/ Link: https://lore.kernel.org/bpf/20210612123224.12525-6-kuniyu@amazon.co.jp
This commit is contained in:
parent
1cd62c2157
commit
54b92e8419
1 changed files with 69 additions and 1 deletions
|
@ -695,6 +695,52 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(inet_rtx_syn_ack);
|
EXPORT_SYMBOL(inet_rtx_syn_ack);
|
||||||
|
|
||||||
|
static struct request_sock *inet_reqsk_clone(struct request_sock *req,
|
||||||
|
struct sock *sk)
|
||||||
|
{
|
||||||
|
struct sock *req_sk, *nreq_sk;
|
||||||
|
struct request_sock *nreq;
|
||||||
|
|
||||||
|
nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
|
||||||
|
if (!nreq) {
|
||||||
|
/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
|
||||||
|
sock_put(sk);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
req_sk = req_to_sk(req);
|
||||||
|
nreq_sk = req_to_sk(nreq);
|
||||||
|
|
||||||
|
memcpy(nreq_sk, req_sk,
|
||||||
|
offsetof(struct sock, sk_dontcopy_begin));
|
||||||
|
memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
|
||||||
|
req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
|
||||||
|
|
||||||
|
sk_node_init(&nreq_sk->sk_node);
|
||||||
|
nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
|
||||||
|
#ifdef CONFIG_XPS
|
||||||
|
nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
|
||||||
|
#endif
|
||||||
|
nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
|
||||||
|
|
||||||
|
nreq->rsk_listener = sk;
|
||||||
|
|
||||||
|
/* We need not acquire fastopenq->lock
|
||||||
|
* because the child socket is locked in inet_csk_listen_stop().
|
||||||
|
*/
|
||||||
|
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
|
||||||
|
rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
|
||||||
|
|
||||||
|
return nreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reqsk_migrate_reset(struct request_sock *req)
|
||||||
|
{
|
||||||
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
|
inet_rsk(req)->ipv6_opt = NULL;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* return true if req was found in the ehash table */
|
/* return true if req was found in the ehash table */
|
||||||
static bool reqsk_queue_unlink(struct request_sock *req)
|
static bool reqsk_queue_unlink(struct request_sock *req)
|
||||||
{
|
{
|
||||||
|
@ -1036,14 +1082,36 @@ void inet_csk_listen_stop(struct sock *sk)
|
||||||
* of the variants now. --ANK
|
* of the variants now. --ANK
|
||||||
*/
|
*/
|
||||||
while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
|
while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
|
||||||
struct sock *child = req->sk;
|
struct sock *child = req->sk, *nsk;
|
||||||
|
struct request_sock *nreq;
|
||||||
|
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
bh_lock_sock(child);
|
bh_lock_sock(child);
|
||||||
WARN_ON(sock_owned_by_user(child));
|
WARN_ON(sock_owned_by_user(child));
|
||||||
sock_hold(child);
|
sock_hold(child);
|
||||||
|
|
||||||
|
nsk = reuseport_migrate_sock(sk, child, NULL);
|
||||||
|
if (nsk) {
|
||||||
|
nreq = inet_reqsk_clone(req, nsk);
|
||||||
|
if (nreq) {
|
||||||
|
refcount_set(&nreq->rsk_refcnt, 1);
|
||||||
|
|
||||||
|
if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
|
||||||
|
reqsk_migrate_reset(req);
|
||||||
|
} else {
|
||||||
|
reqsk_migrate_reset(nreq);
|
||||||
|
__reqsk_free(nreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* inet_csk_reqsk_queue_add() has already
|
||||||
|
* called inet_child_forget() on failure case.
|
||||||
|
*/
|
||||||
|
goto skip_child_forget;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inet_child_forget(sk, req, child);
|
inet_child_forget(sk, req, child);
|
||||||
|
skip_child_forget:
|
||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
bh_unlock_sock(child);
|
bh_unlock_sock(child);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
|
|
Loading…
Add table
Reference in a new issue