1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/net/pfcp.c
Kuniyuki Iwashima ffc90e9ca6 pfcp: Destroy device along with udp socket's netns dismantle.
pfcp_newlink() links the device to a list in dev_net(dev) instead
of net, where a udp tunnel socket is created.

Even when net is removed, the device stays alive on dev_net(dev).
Then, removing net triggers the splat below. [0]

In this example, pfcp0 is created in ns2, but the udp socket is
created in ns1.

  ip netns add ns1
  ip netns add ns2
  ip -n ns1 link add netns ns2 name pfcp0 type pfcp
  ip netns del ns1

Let's link the device to the socket's netns instead.

Now, pfcp_net_exit() needs another netdev iteration to remove
all pfcp devices in the netns.

pfcp_dev_list is not used under RCU, so the list API is converted
to the non-RCU variant.

pfcp_net_exit() can be converted to .exit_batch_rtnl() in net-next.

[0]:
ref_tracker: net notrefcnt@00000000128b34dc has 1/1 users at
     sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236)
     inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252)
     __sock_create (net/socket.c:1558)
     udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18)
     pfcp_create_sock (drivers/net/pfcp.c:168)
     pfcp_newlink (drivers/net/pfcp.c:182 drivers/net/pfcp.c:197)
     rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012)
     rtnetlink_rcv_msg (net/core/rtnetlink.c:6922)
     netlink_rcv_skb (net/netlink/af_netlink.c:2542)
     netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347)
     netlink_sendmsg (net/netlink/af_netlink.c:1891)
     ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583)
     ___sys_sendmsg (net/socket.c:2639)
     __sys_sendmsg (net/socket.c:2669)
     do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
     entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)

WARNING: CPU: 1 PID: 11 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179)
Modules linked in:
CPU: 1 UID: 0 PID: 11 Comm: kworker/u16:0 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: netns cleanup_net
RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179)
Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89
RSP: 0018:ff11000007f3fb60 EFLAGS: 00010286
RAX: 00000000000020ef RBX: ff1100000d6481e0 RCX: 1ffffffff0e40d82
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c
RBP: ff1100000d648230 R08: 0000000000000001 R09: fffffbfff0e395af
R10: 0000000000000001 R11: 0000000000000000 R12: ff1100000d648230
R13: dead000000000100 R14: ff1100000d648230 R15: dffffc0000000000
FS:  0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005620e1363990 CR3: 000000000eeb2002 CR4: 0000000000771ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 <TASK>
 ? __warn (kernel/panic.c:748)
 ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
 ? report_bug (lib/bug.c:201 lib/bug.c:219)
 ? handle_bug (arch/x86/kernel/traps.c:285)
 ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1))
 ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
 ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
 ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
 ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158)
 ? kfree (mm/slub.c:4613 mm/slub.c:4761)
 net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467)
 cleanup_net (net/core/net_namespace.c:664 (discriminator 3))
 process_one_work (kernel/workqueue.c:3229)
 worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391)
 kthread (kernel/kthread.c:389)
 ret_from_fork (arch/x86/kernel/process.c:147)
 ret_from_fork_asm (arch/x86/entry/entry_64.S:257)
  </TASK>

Fixes: 76c8764ef3 ("pfcp: add PFCP module")
Reported-by: Xiao Liang <shaw.leon@gmail.com>
Closes: https://lore.kernel.org/netdev/20250104125732.17335-1-shaw.leon@gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-01-14 11:20:04 +01:00

306 lines
6.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PFCP according to 3GPP TS 29.244
*
* Copyright (C) 2022, Intel Corporation.
*/
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/pfcp.h>
struct pfcp_dev {
struct list_head list;
struct socket *sock;
struct net_device *dev;
struct net *net;
struct gro_cells gro_cells;
};
static unsigned int pfcp_net_id __read_mostly;
struct pfcp_net {
struct list_head pfcp_dev_list;
};
static void
pfcp_session_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
struct pfcp_metadata *md)
{
struct pfcphdr_session *unparsed = pfcp_hdr_session(skb);
md->seid = unparsed->seid;
md->type = PFCP_TYPE_SESSION;
}
static void
pfcp_node_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
struct pfcp_metadata *md)
{
md->type = PFCP_TYPE_NODE;
}
static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tun_dst;
struct pfcp_metadata *md;
struct pfcphdr *unparsed;
struct pfcp_dev *pfcp;
if (unlikely(!pskb_may_pull(skb, PFCP_HLEN)))
goto drop;
pfcp = rcu_dereference_sk_user_data(sk);
if (unlikely(!pfcp))
goto drop;
unparsed = pfcp_hdr(skb);
ip_tunnel_flags_zero(flags);
tun_dst = udp_tun_rx_dst(skb, sk->sk_family, flags, 0,
sizeof(*md));
if (unlikely(!tun_dst))
goto drop;
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
if (unlikely(!md))
goto drop;
if (unparsed->flags & PFCP_SEID_FLAG)
pfcp_session_recv(pfcp, skb, md);
else
pfcp_node_recv(pfcp, skb, md);
__set_bit(IP_TUNNEL_PFCP_OPT_BIT, tun_dst->u.tun_info.key.tun_flags);
tun_dst->u.tun_info.options_len = sizeof(*md);
if (unlikely(iptunnel_pull_header(skb, PFCP_HLEN, skb->protocol,
!net_eq(sock_net(sk),
dev_net(pfcp->dev)))))
goto drop;
skb_dst_set(skb, (struct dst_entry *)tun_dst);
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
skb->dev = pfcp->dev;
gro_cells_receive(&pfcp->gro_cells, skb);
return 0;
drop:
kfree_skb(skb);
return 0;
}
static void pfcp_del_sock(struct pfcp_dev *pfcp)
{
udp_tunnel_sock_release(pfcp->sock);
pfcp->sock = NULL;
}
static void pfcp_dev_uninit(struct net_device *dev)
{
struct pfcp_dev *pfcp = netdev_priv(dev);
gro_cells_destroy(&pfcp->gro_cells);
pfcp_del_sock(pfcp);
}
static int pfcp_dev_init(struct net_device *dev)
{
struct pfcp_dev *pfcp = netdev_priv(dev);
pfcp->dev = dev;
return gro_cells_init(&pfcp->gro_cells, dev);
}
static const struct net_device_ops pfcp_netdev_ops = {
.ndo_init = pfcp_dev_init,
.ndo_uninit = pfcp_dev_uninit,
.ndo_get_stats64 = dev_get_tstats64,
};
static const struct device_type pfcp_type = {
.name = "pfcp",
};
static void pfcp_link_setup(struct net_device *dev)
{
dev->netdev_ops = &pfcp_netdev_ops;
dev->needs_free_netdev = true;
SET_NETDEV_DEVTYPE(dev, &pfcp_type);
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->type = ARPHRD_NONE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
dev->priv_flags |= IFF_NO_QUEUE;
netif_keep_dst(dev);
}
static struct socket *pfcp_create_sock(struct pfcp_dev *pfcp)
{
struct udp_tunnel_sock_cfg tuncfg = {};
struct udp_port_cfg udp_conf = {
.local_ip.s_addr = htonl(INADDR_ANY),
.family = AF_INET,
};
struct net *net = pfcp->net;
struct socket *sock;
int err;
udp_conf.local_udp_port = htons(PFCP_PORT);
err = udp_sock_create(net, &udp_conf, &sock);
if (err)
return ERR_PTR(err);
tuncfg.sk_user_data = pfcp;
tuncfg.encap_rcv = pfcp_encap_recv;
tuncfg.encap_type = 1;
setup_udp_tunnel_sock(net, sock, &tuncfg);
return sock;
}
static int pfcp_add_sock(struct pfcp_dev *pfcp)
{
pfcp->sock = pfcp_create_sock(pfcp);
return PTR_ERR_OR_ZERO(pfcp->sock);
}
static int pfcp_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct pfcp_dev *pfcp = netdev_priv(dev);
struct pfcp_net *pn;
int err;
pfcp->net = net;
err = pfcp_add_sock(pfcp);
if (err) {
netdev_dbg(dev, "failed to add pfcp socket %d\n", err);
goto exit_err;
}
err = register_netdevice(dev);
if (err) {
netdev_dbg(dev, "failed to register pfcp netdev %d\n", err);
goto exit_del_pfcp_sock;
}
pn = net_generic(net, pfcp_net_id);
list_add(&pfcp->list, &pn->pfcp_dev_list);
netdev_dbg(dev, "registered new PFCP interface\n");
return 0;
exit_del_pfcp_sock:
pfcp_del_sock(pfcp);
exit_err:
pfcp->net = NULL;
return err;
}
static void pfcp_dellink(struct net_device *dev, struct list_head *head)
{
struct pfcp_dev *pfcp = netdev_priv(dev);
list_del(&pfcp->list);
unregister_netdevice_queue(dev, head);
}
static struct rtnl_link_ops pfcp_link_ops __read_mostly = {
.kind = "pfcp",
.priv_size = sizeof(struct pfcp_dev),
.setup = pfcp_link_setup,
.newlink = pfcp_newlink,
.dellink = pfcp_dellink,
};
static int __net_init pfcp_net_init(struct net *net)
{
struct pfcp_net *pn = net_generic(net, pfcp_net_id);
INIT_LIST_HEAD(&pn->pfcp_dev_list);
return 0;
}
static void __net_exit pfcp_net_exit(struct net *net)
{
struct pfcp_net *pn = net_generic(net, pfcp_net_id);
struct pfcp_dev *pfcp, *pfcp_next;
struct net_device *dev;
LIST_HEAD(list);
rtnl_lock();
for_each_netdev(net, dev)
if (dev->rtnl_link_ops == &pfcp_link_ops)
pfcp_dellink(dev, &list);
list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list)
pfcp_dellink(pfcp->dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations pfcp_net_ops = {
.init = pfcp_net_init,
.exit = pfcp_net_exit,
.id = &pfcp_net_id,
.size = sizeof(struct pfcp_net),
};
static int __init pfcp_init(void)
{
int err;
err = register_pernet_subsys(&pfcp_net_ops);
if (err)
goto exit_err;
err = rtnl_link_register(&pfcp_link_ops);
if (err)
goto exit_unregister_subsys;
return 0;
exit_unregister_subsys:
unregister_pernet_subsys(&pfcp_net_ops);
exit_err:
pr_err("loading PFCP module failed: err %d\n", err);
return err;
}
late_initcall(pfcp_init);
static void __exit pfcp_exit(void)
{
rtnl_link_unregister(&pfcp_link_ops);
unregister_pernet_subsys(&pfcp_net_ops);
pr_info("PFCP module unloaded\n");
}
module_exit(pfcp_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Wojciech Drewek <wojciech.drewek@intel.com>");
MODULE_DESCRIPTION("Interface driver for PFCP encapsulated traffic");
MODULE_ALIAS_RTNL_LINK("pfcp");