1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

net: better track kernel sockets lifetime

While kernel sockets are dismantled during pernet_operations->exit(),
their freeing can be delayed by any tx packets still held in qdisc
or device queues, due to skb_set_owner_w() prior calls.

This then trigger the following warning from ref_tracker_dir_exit() [1]

To fix this, make sure that kernel sockets own a reference on net->passive.

Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket
is converted to a refcounted one.

[1]

[  136.263918][   T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at
[  136.263918][   T35]      sk_alloc+0x2b3/0x370
[  136.263918][   T35]      inet6_create+0x6ce/0x10f0
[  136.263918][   T35]      __sock_create+0x4c0/0xa30
[  136.263918][   T35]      inet_ctl_sock_create+0xc2/0x250
[  136.263918][   T35]      igmp6_net_init+0x39/0x390
[  136.263918][   T35]      ops_init+0x31e/0x590
[  136.263918][   T35]      setup_net+0x287/0x9e0
[  136.263918][   T35]      copy_net_ns+0x33f/0x570
[  136.263918][   T35]      create_new_namespaces+0x425/0x7b0
[  136.263918][   T35]      unshare_nsproxy_namespaces+0x124/0x180
[  136.263918][   T35]      ksys_unshare+0x57d/0xa70
[  136.263918][   T35]      __x64_sys_unshare+0x38/0x40
[  136.263918][   T35]      do_syscall_64+0xf3/0x230
[  136.263918][   T35]      entry_SYSCALL_64_after_hwframe+0x77/0x7f
[  136.263918][   T35]
[  136.343488][   T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at
[  136.343488][   T35]      sk_alloc+0x2b3/0x370
[  136.343488][   T35]      inet6_create+0x6ce/0x10f0
[  136.343488][   T35]      __sock_create+0x4c0/0xa30
[  136.343488][   T35]      inet_ctl_sock_create+0xc2/0x250
[  136.343488][   T35]      ndisc_net_init+0xa7/0x2b0
[  136.343488][   T35]      ops_init+0x31e/0x590
[  136.343488][   T35]      setup_net+0x287/0x9e0
[  136.343488][   T35]      copy_net_ns+0x33f/0x570
[  136.343488][   T35]      create_new_namespaces+0x425/0x7b0
[  136.343488][   T35]      unshare_nsproxy_namespaces+0x124/0x180
[  136.343488][   T35]      ksys_unshare+0x57d/0xa70
[  136.343488][   T35]      __x64_sys_unshare+0x38/0x40
[  136.343488][   T35]      do_syscall_64+0xf3/0x230
[  136.343488][   T35]      entry_SYSCALL_64_after_hwframe+0x77/0x7f

Fixes: 0cafd77dcd ("net: add a refcount tracker for kernel sockets")
Reported-by: syzbot+30a19e01a97420719891@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/67b72aeb.050a0220.14d86d.0283.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250220131854.4048077-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2025-02-20 13:18:54 +00:00 committed by Jakub Kicinski
parent fde9836c40
commit 5c70eb5c59
8 changed files with 30 additions and 39 deletions

View file

@ -1751,6 +1751,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk)
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern);
void sk_free(struct sock *sk);
void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
void sk_free_unlock_clone(struct sock *sk);

View file

@ -2246,6 +2246,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
get_net_track(net, &sk->ns_tracker, priority);
sock_inuse_add(net, 1);
} else {
net_passive_inc(net);
__netns_tracker_alloc(net, &sk->ns_tracker,
false, priority);
}
@ -2270,6 +2271,7 @@ EXPORT_SYMBOL(sk_alloc);
static void __sk_destruct(struct rcu_head *head)
{
struct sock *sk = container_of(head, struct sock, sk_rcu);
struct net *net = sock_net(sk);
struct sk_filter *filter;
if (sk->sk_destruct)
@ -2301,14 +2303,28 @@ static void __sk_destruct(struct rcu_head *head)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
if (likely(sk->sk_net_refcnt))
put_net_track(sock_net(sk), &sk->ns_tracker);
else
__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
if (likely(sk->sk_net_refcnt)) {
put_net_track(net, &sk->ns_tracker);
} else {
__netns_tracker_free(net, &sk->ns_tracker, false);
net_passive_dec(net);
}
sk_prot_free(sk->sk_prot_creator, sk);
}
void sk_net_refcnt_upgrade(struct sock *sk)
{
struct net *net = sock_net(sk);
WARN_ON_ONCE(sk->sk_net_refcnt);
__netns_tracker_free(net, &sk->ns_tracker, false);
net_passive_dec(net);
sk->sk_net_refcnt = 1;
get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
}
EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade);
void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
@ -2405,6 +2421,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
* is not properly dismantling its kernel sockets at netns
* destroy time.
*/
net_passive_inc(sock_net(newsk));
__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
false, priority);
}

View file

@ -1772,10 +1772,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
* needs it.
* Update ns_tracker to current stack trace and refcounted tracker.
*/
__netns_tracker_free(net, &sf->sk->ns_tracker, false);
sf->sk->sk_net_refcnt = 1;
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
sk_net_refcnt_upgrade(sf->sk);
err = tcp_set_ulp(sf->sk, "mptcp");
if (err)
goto err_free;

View file

@ -795,16 +795,6 @@ static int netlink_release(struct socket *sock)
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
/* Because struct net might disappear soon, do not keep a pointer. */
if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
/* Because of deferred_put_nlk_sk and use of work queue,
* it is possible netns will be freed before this socket.
*/
sock_net_set(sk, &init_net);
__netns_tracker_alloc(&init_net, &sk->ns_tracker,
false, GFP_KERNEL);
}
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}

View file

@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock)
release_sock(sk);
return false;
}
/* Update ns_tracker to current stack trace and refcounted tracker */
__netns_tracker_free(net, &sk->ns_tracker, false);
sk->sk_net_refcnt = 1;
netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
sk_net_refcnt_upgrade(sk);
put_net(net);
}
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {

View file

@ -3337,10 +3337,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family)
* which need net ref.
*/
sk = smc->clcsock->sk;
__netns_tracker_free(net, &sk->ns_tracker, false);
sk->sk_net_refcnt = 1;
get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
sk_net_refcnt_upgrade(sk);
return 0;
}

View file

@ -1541,10 +1541,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
__netns_tracker_free(net, &sock->sk->ns_tracker, false);
sock->sk->sk_net_refcnt = 1;
get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
sk_net_refcnt_upgrade(sock->sk);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}

View file

@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
if (protocol == IPPROTO_TCP) {
__netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
sock->sk->sk_net_refcnt = 1;
get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(xprt->xprt_net, 1);
}
if (protocol == IPPROTO_TCP)
sk_net_refcnt_upgrade(sock->sk);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))