net: Fix dev_net(dev) race in unregister_netdevice_notifier_dev_net().
After the cited commit, dev_net(dev) is fetched before holding RTNL
and passed to __unregister_netdevice_notifier_net().
However, dev_net(dev) might be different after holding RTNL.
In the reported case [0], while removing a VF device, its netns was
being dismantled and the VF was moved to init_net.
So the following sequence is basically illegal when dev was fetched
without lookup:
net = dev_net(dev);
rtnl_net_lock(net);
Let's use a new helper rtnl_net_dev_lock() to fix the race.
It fetches dev_net_rcu(dev), bumps its net->passive, and checks if
dev_net_rcu(dev) is changed after rtnl_net_lock().
[0]:
BUG: KASAN: slab-use-after-free in notifier_call_chain (kernel/notifier.c:75 (discriminator 2))
Read of size 8 at addr ffff88810cefb4c8 by task test-bridge-lag/21127
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl (lib/dump_stack.c:123)
print_report (mm/kasan/report.c:379 mm/kasan/report.c:489)
kasan_report (mm/kasan/report.c:604)
notifier_call_chain (kernel/notifier.c:75 (discriminator 2))
call_netdevice_notifiers_info (net/core/dev.c:2011)
unregister_netdevice_many_notify (net/core/dev.c:11551)
unregister_netdevice_queue (net/core/dev.c:11487)
unregister_netdev (net/core/dev.c:11635)
mlx5e_remove (drivers/net/ethernet/mellanox/mlx5/core/en_main.c:6552 drivers/net/ethernet/mellanox/mlx5/core/en_main.c:6579) mlx5_core
auxiliary_bus_remove (drivers/base/auxiliary.c:230)
device_release_driver_internal (drivers/base/dd.c:1275 drivers/base/dd.c:1296)
bus_remove_device (./include/linux/kobject.h:193 drivers/base/base.h:73 drivers/base/bus.c:583)
device_del (drivers/base/power/power.h:142 drivers/base/core.c:3855)
mlx5_rescan_drivers_locked (./include/linux/auxiliary_bus.h:241 drivers/net/ethernet/mellanox/mlx5/core/dev.c:333 drivers/net/ethernet/mellanox/mlx5/core/dev.c:535 drivers/net/ethernet/mellanox/mlx5/core/dev.c:549) mlx5_core
mlx5_unregister_device (drivers/net/ethernet/mellanox/mlx5/core/dev.c:468) mlx5_core
mlx5_uninit_one (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 drivers/net/ethernet/mellanox/mlx5/core/main.c:1563) mlx5_core
remove_one (drivers/net/ethernet/mellanox/mlx5/core/main.c:965 drivers/net/ethernet/mellanox/mlx5/core/main.c:2019) mlx5_core
pci_device_remove (./include/linux/pm_runtime.h:129 drivers/pci/pci-driver.c:475)
device_release_driver_internal (drivers/base/dd.c:1275 drivers/base/dd.c:1296)
unbind_store (drivers/base/bus.c:245)
kernfs_fop_write_iter (fs/kernfs/file.c:338)
vfs_write (fs/read_write.c:587 (discriminator 1) fs/read_write.c:679 (discriminator 1))
ksys_write (fs/read_write.c:732)
do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0033:0x7f6a4d5018b7
Fixes: 7fb1073300
("net: Hold rtnl_net_lock() in (un)?register_netdevice_notifier_dev_net().")
Reported-by: Yael Chemla <ychemla@nvidia.com>
Closes: https://lore.kernel.org/netdev/146eabfe-123c-4970-901e-e961b4c09bc3@nvidia.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250217191129.19967-3-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
e57a632021
commit
65161fb544
1 changed files with 44 additions and 4 deletions
|
@ -2070,6 +2070,42 @@ static void __move_netdevice_notifier_net(struct net *src_net,
|
|||
__register_netdevice_notifier_net(dst_net, nb, true);
|
||||
}
|
||||
|
||||
static void rtnl_net_dev_lock(struct net_device *dev)
|
||||
{
|
||||
bool again;
|
||||
|
||||
do {
|
||||
struct net *net;
|
||||
|
||||
again = false;
|
||||
|
||||
/* netns might be being dismantled. */
|
||||
rcu_read_lock();
|
||||
net = dev_net_rcu(dev);
|
||||
net_passive_inc(net);
|
||||
rcu_read_unlock();
|
||||
|
||||
rtnl_net_lock(net);
|
||||
|
||||
#ifdef CONFIG_NET_NS
|
||||
/* dev might have been moved to another netns. */
|
||||
if (!net_eq(net, rcu_access_pointer(dev->nd_net.net))) {
|
||||
rtnl_net_unlock(net);
|
||||
net_passive_dec(net);
|
||||
again = true;
|
||||
}
|
||||
#endif
|
||||
} while (again);
|
||||
}
|
||||
|
||||
static void rtnl_net_dev_unlock(struct net_device *dev)
|
||||
{
|
||||
struct net *net = dev_net(dev);
|
||||
|
||||
rtnl_net_unlock(net);
|
||||
net_passive_dec(net);
|
||||
}
|
||||
|
||||
int register_netdevice_notifier_dev_net(struct net_device *dev,
|
||||
struct notifier_block *nb,
|
||||
struct netdev_net_notifier *nn)
|
||||
|
@ -2077,6 +2113,11 @@ int register_netdevice_notifier_dev_net(struct net_device *dev,
|
|||
struct net *net = dev_net(dev);
|
||||
int err;
|
||||
|
||||
/* rtnl_net_lock() assumes dev is not yet published by
|
||||
* register_netdevice().
|
||||
*/
|
||||
DEBUG_NET_WARN_ON_ONCE(!list_empty(&dev->dev_list));
|
||||
|
||||
rtnl_net_lock(net);
|
||||
err = __register_netdevice_notifier_net(net, nb, false);
|
||||
if (!err) {
|
||||
|
@ -2093,13 +2134,12 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev,
|
|||
struct notifier_block *nb,
|
||||
struct netdev_net_notifier *nn)
|
||||
{
|
||||
struct net *net = dev_net(dev);
|
||||
int err;
|
||||
|
||||
rtnl_net_lock(net);
|
||||
rtnl_net_dev_lock(dev);
|
||||
list_del(&nn->list);
|
||||
err = __unregister_netdevice_notifier_net(net, nb);
|
||||
rtnl_net_unlock(net);
|
||||
err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
|
||||
rtnl_net_dev_unlock(dev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue