pfcp_newlink() links the device to a list in dev_net(dev) instead
of net, where a udp tunnel socket is created.
Even when net is removed, the device stays alive on dev_net(dev).
Then, removing net triggers the splat below. [0]
In this example, pfcp0 is created in ns2, but the udp socket is
created in ns1.
ip netns add ns1
ip netns add ns2
ip -n ns1 link add netns ns2 name pfcp0 type pfcp
ip netns del ns1
Let's link the device to the socket's netns instead.
Now, pfcp_net_exit() needs another netdev iteration to remove
all pfcp devices in the netns.
pfcp_dev_list is not used under RCU, so the list API is converted
to the non-RCU variant.
pfcp_net_exit() can be converted to .exit_batch_rtnl() in net-next.
[0]:
ref_tracker: net notrefcnt@00000000128b34dc has 1/1 users at
sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236)
inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252)
__sock_create (net/socket.c:1558)
udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18)
pfcp_create_sock (drivers/net/pfcp.c:168)
pfcp_newlink (drivers/net/pfcp.c:182 drivers/net/pfcp.c:197)
rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012)
rtnetlink_rcv_msg (net/core/rtnetlink.c:6922)
netlink_rcv_skb (net/netlink/af_netlink.c:2542)
netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347)
netlink_sendmsg (net/netlink/af_netlink.c:1891)
____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583)
___sys_sendmsg (net/socket.c:2639)
__sys_sendmsg (net/socket.c:2669)
do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
WARNING: CPU: 1 PID: 11 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179)
Modules linked in:
CPU: 1 UID: 0 PID: 11 Comm: kworker/u16:0 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: netns cleanup_net
RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179)
Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89
RSP: 0018:ff11000007f3fb60 EFLAGS: 00010286
RAX: 00000000000020ef RBX: ff1100000d6481e0 RCX: 1ffffffff0e40d82
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c
RBP: ff1100000d648230 R08: 0000000000000001 R09: fffffbfff0e395af
R10: 0000000000000001 R11: 0000000000000000 R12: ff1100000d648230
R13: dead000000000100 R14: ff1100000d648230 R15: dffffc0000000000
FS: 0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005620e1363990 CR3: 000000000eeb2002 CR4: 0000000000771ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
<TASK>
? __warn (kernel/panic.c:748)
? ref_tracker_dir_exit (lib/ref_tracker.c:179)
? report_bug (lib/bug.c:201 lib/bug.c:219)
? handle_bug (arch/x86/kernel/traps.c:285)
? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1))
? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
? ref_tracker_dir_exit (lib/ref_tracker.c:179)
? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158)
? kfree (mm/slub.c:4613 mm/slub.c:4761)
net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467)
cleanup_net (net/core/net_namespace.c:664 (discriminator 3))
process_one_work (kernel/workqueue.c:3229)
worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391)
kthread (kernel/kthread.c:389)
ret_from_fork (arch/x86/kernel/process.c:147)
ret_from_fork_asm (arch/x86/entry/entry_64.S:257)
</TASK>
Fixes: 76c8764ef3
("pfcp: add PFCP module")
Reported-by: Xiao Liang <shaw.leon@gmail.com>
Closes: https://lore.kernel.org/netdev/20250104125732.17335-1-shaw.leon@gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
306 lines
6.4 KiB
C
306 lines
6.4 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* PFCP according to 3GPP TS 29.244
|
|
*
|
|
* Copyright (C) 2022, Intel Corporation.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <net/udp.h>
|
|
#include <net/udp_tunnel.h>
|
|
#include <net/pfcp.h>
|
|
|
|
struct pfcp_dev {
|
|
struct list_head list;
|
|
|
|
struct socket *sock;
|
|
struct net_device *dev;
|
|
struct net *net;
|
|
|
|
struct gro_cells gro_cells;
|
|
};
|
|
|
|
static unsigned int pfcp_net_id __read_mostly;
|
|
|
|
struct pfcp_net {
|
|
struct list_head pfcp_dev_list;
|
|
};
|
|
|
|
static void
|
|
pfcp_session_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
|
|
struct pfcp_metadata *md)
|
|
{
|
|
struct pfcphdr_session *unparsed = pfcp_hdr_session(skb);
|
|
|
|
md->seid = unparsed->seid;
|
|
md->type = PFCP_TYPE_SESSION;
|
|
}
|
|
|
|
static void
|
|
pfcp_node_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
|
|
struct pfcp_metadata *md)
|
|
{
|
|
md->type = PFCP_TYPE_NODE;
|
|
}
|
|
|
|
static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
|
|
struct metadata_dst *tun_dst;
|
|
struct pfcp_metadata *md;
|
|
struct pfcphdr *unparsed;
|
|
struct pfcp_dev *pfcp;
|
|
|
|
if (unlikely(!pskb_may_pull(skb, PFCP_HLEN)))
|
|
goto drop;
|
|
|
|
pfcp = rcu_dereference_sk_user_data(sk);
|
|
if (unlikely(!pfcp))
|
|
goto drop;
|
|
|
|
unparsed = pfcp_hdr(skb);
|
|
|
|
ip_tunnel_flags_zero(flags);
|
|
tun_dst = udp_tun_rx_dst(skb, sk->sk_family, flags, 0,
|
|
sizeof(*md));
|
|
if (unlikely(!tun_dst))
|
|
goto drop;
|
|
|
|
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
|
|
if (unlikely(!md))
|
|
goto drop;
|
|
|
|
if (unparsed->flags & PFCP_SEID_FLAG)
|
|
pfcp_session_recv(pfcp, skb, md);
|
|
else
|
|
pfcp_node_recv(pfcp, skb, md);
|
|
|
|
__set_bit(IP_TUNNEL_PFCP_OPT_BIT, tun_dst->u.tun_info.key.tun_flags);
|
|
tun_dst->u.tun_info.options_len = sizeof(*md);
|
|
|
|
if (unlikely(iptunnel_pull_header(skb, PFCP_HLEN, skb->protocol,
|
|
!net_eq(sock_net(sk),
|
|
dev_net(pfcp->dev)))))
|
|
goto drop;
|
|
|
|
skb_dst_set(skb, (struct dst_entry *)tun_dst);
|
|
|
|
skb_reset_network_header(skb);
|
|
skb_reset_mac_header(skb);
|
|
skb->dev = pfcp->dev;
|
|
|
|
gro_cells_receive(&pfcp->gro_cells, skb);
|
|
|
|
return 0;
|
|
drop:
|
|
kfree_skb(skb);
|
|
return 0;
|
|
}
|
|
|
|
static void pfcp_del_sock(struct pfcp_dev *pfcp)
|
|
{
|
|
udp_tunnel_sock_release(pfcp->sock);
|
|
pfcp->sock = NULL;
|
|
}
|
|
|
|
static void pfcp_dev_uninit(struct net_device *dev)
|
|
{
|
|
struct pfcp_dev *pfcp = netdev_priv(dev);
|
|
|
|
gro_cells_destroy(&pfcp->gro_cells);
|
|
pfcp_del_sock(pfcp);
|
|
}
|
|
|
|
static int pfcp_dev_init(struct net_device *dev)
|
|
{
|
|
struct pfcp_dev *pfcp = netdev_priv(dev);
|
|
|
|
pfcp->dev = dev;
|
|
|
|
return gro_cells_init(&pfcp->gro_cells, dev);
|
|
}
|
|
|
|
static const struct net_device_ops pfcp_netdev_ops = {
|
|
.ndo_init = pfcp_dev_init,
|
|
.ndo_uninit = pfcp_dev_uninit,
|
|
.ndo_get_stats64 = dev_get_tstats64,
|
|
};
|
|
|
|
static const struct device_type pfcp_type = {
|
|
.name = "pfcp",
|
|
};
|
|
|
|
static void pfcp_link_setup(struct net_device *dev)
|
|
{
|
|
dev->netdev_ops = &pfcp_netdev_ops;
|
|
dev->needs_free_netdev = true;
|
|
SET_NETDEV_DEVTYPE(dev, &pfcp_type);
|
|
|
|
dev->hard_header_len = 0;
|
|
dev->addr_len = 0;
|
|
|
|
dev->type = ARPHRD_NONE;
|
|
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
|
|
dev->priv_flags |= IFF_NO_QUEUE;
|
|
|
|
netif_keep_dst(dev);
|
|
}
|
|
|
|
static struct socket *pfcp_create_sock(struct pfcp_dev *pfcp)
|
|
{
|
|
struct udp_tunnel_sock_cfg tuncfg = {};
|
|
struct udp_port_cfg udp_conf = {
|
|
.local_ip.s_addr = htonl(INADDR_ANY),
|
|
.family = AF_INET,
|
|
};
|
|
struct net *net = pfcp->net;
|
|
struct socket *sock;
|
|
int err;
|
|
|
|
udp_conf.local_udp_port = htons(PFCP_PORT);
|
|
|
|
err = udp_sock_create(net, &udp_conf, &sock);
|
|
if (err)
|
|
return ERR_PTR(err);
|
|
|
|
tuncfg.sk_user_data = pfcp;
|
|
tuncfg.encap_rcv = pfcp_encap_recv;
|
|
tuncfg.encap_type = 1;
|
|
|
|
setup_udp_tunnel_sock(net, sock, &tuncfg);
|
|
|
|
return sock;
|
|
}
|
|
|
|
static int pfcp_add_sock(struct pfcp_dev *pfcp)
|
|
{
|
|
pfcp->sock = pfcp_create_sock(pfcp);
|
|
|
|
return PTR_ERR_OR_ZERO(pfcp->sock);
|
|
}
|
|
|
|
static int pfcp_newlink(struct net *net, struct net_device *dev,
|
|
struct nlattr *tb[], struct nlattr *data[],
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct pfcp_dev *pfcp = netdev_priv(dev);
|
|
struct pfcp_net *pn;
|
|
int err;
|
|
|
|
pfcp->net = net;
|
|
|
|
err = pfcp_add_sock(pfcp);
|
|
if (err) {
|
|
netdev_dbg(dev, "failed to add pfcp socket %d\n", err);
|
|
goto exit_err;
|
|
}
|
|
|
|
err = register_netdevice(dev);
|
|
if (err) {
|
|
netdev_dbg(dev, "failed to register pfcp netdev %d\n", err);
|
|
goto exit_del_pfcp_sock;
|
|
}
|
|
|
|
pn = net_generic(net, pfcp_net_id);
|
|
list_add(&pfcp->list, &pn->pfcp_dev_list);
|
|
|
|
netdev_dbg(dev, "registered new PFCP interface\n");
|
|
|
|
return 0;
|
|
|
|
exit_del_pfcp_sock:
|
|
pfcp_del_sock(pfcp);
|
|
exit_err:
|
|
pfcp->net = NULL;
|
|
return err;
|
|
}
|
|
|
|
static void pfcp_dellink(struct net_device *dev, struct list_head *head)
|
|
{
|
|
struct pfcp_dev *pfcp = netdev_priv(dev);
|
|
|
|
list_del(&pfcp->list);
|
|
unregister_netdevice_queue(dev, head);
|
|
}
|
|
|
|
static struct rtnl_link_ops pfcp_link_ops __read_mostly = {
|
|
.kind = "pfcp",
|
|
.priv_size = sizeof(struct pfcp_dev),
|
|
.setup = pfcp_link_setup,
|
|
.newlink = pfcp_newlink,
|
|
.dellink = pfcp_dellink,
|
|
};
|
|
|
|
static int __net_init pfcp_net_init(struct net *net)
|
|
{
|
|
struct pfcp_net *pn = net_generic(net, pfcp_net_id);
|
|
|
|
INIT_LIST_HEAD(&pn->pfcp_dev_list);
|
|
return 0;
|
|
}
|
|
|
|
static void __net_exit pfcp_net_exit(struct net *net)
|
|
{
|
|
struct pfcp_net *pn = net_generic(net, pfcp_net_id);
|
|
struct pfcp_dev *pfcp, *pfcp_next;
|
|
struct net_device *dev;
|
|
LIST_HEAD(list);
|
|
|
|
rtnl_lock();
|
|
for_each_netdev(net, dev)
|
|
if (dev->rtnl_link_ops == &pfcp_link_ops)
|
|
pfcp_dellink(dev, &list);
|
|
|
|
list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list)
|
|
pfcp_dellink(pfcp->dev, &list);
|
|
|
|
unregister_netdevice_many(&list);
|
|
rtnl_unlock();
|
|
}
|
|
|
|
static struct pernet_operations pfcp_net_ops = {
|
|
.init = pfcp_net_init,
|
|
.exit = pfcp_net_exit,
|
|
.id = &pfcp_net_id,
|
|
.size = sizeof(struct pfcp_net),
|
|
};
|
|
|
|
static int __init pfcp_init(void)
|
|
{
|
|
int err;
|
|
|
|
err = register_pernet_subsys(&pfcp_net_ops);
|
|
if (err)
|
|
goto exit_err;
|
|
|
|
err = rtnl_link_register(&pfcp_link_ops);
|
|
if (err)
|
|
goto exit_unregister_subsys;
|
|
return 0;
|
|
|
|
exit_unregister_subsys:
|
|
unregister_pernet_subsys(&pfcp_net_ops);
|
|
exit_err:
|
|
pr_err("loading PFCP module failed: err %d\n", err);
|
|
return err;
|
|
}
|
|
late_initcall(pfcp_init);
|
|
|
|
static void __exit pfcp_exit(void)
|
|
{
|
|
rtnl_link_unregister(&pfcp_link_ops);
|
|
unregister_pernet_subsys(&pfcp_net_ops);
|
|
|
|
pr_info("PFCP module unloaded\n");
|
|
}
|
|
module_exit(pfcp_exit);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Wojciech Drewek <wojciech.drewek@intel.com>");
|
|
MODULE_DESCRIPTION("Interface driver for PFCP encapsulated traffic");
|
|
MODULE_ALIAS_RTNL_LINK("pfcp");
|