From a45835a0bb6ef7d5ddbc0714dd760de979cb6ece Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Tue, 14 May 2024 15:57:29 -0400 Subject: [PATCH] bonding: fix oops during rmmod "rmmod bonding" causes an oops ever since commit cc317ea3d927 ("bonding: remove redundant NULL check in debugfs function"). Here are the relevant functions being called: bonding_exit() bond_destroy_debugfs() debugfs_remove_recursive(bonding_debug_root); bonding_debug_root = NULL; <--------- SET TO NULL HERE bond_netlink_fini() rtnl_link_unregister() __rtnl_link_unregister() unregister_netdevice_many_notify() bond_uninit() bond_debug_unregister() (commit removed check for bonding_debug_root == NULL) debugfs_remove() simple_recursive_removal() down_write() -> OOPS However, reverting the bad commit does not solve the problem completely because the original code contains a race that could cause the same oops, although it was much less likely to be triggered unintentionally: CPU1 rmmod bonding bonding_exit() bond_destroy_debugfs() debugfs_remove_recursive(bonding_debug_root); CPU2 echo -bond0 > /sys/class/net/bonding_masters bond_uninit() bond_debug_unregister() if (!bonding_debug_root) CPU1 bonding_debug_root = NULL; So do NOT revert the bad commit (since the removed checks were racy anyway), and instead change the order of actions taken during module removal. The same oops can also happen if there is an error during module init, so apply the same fix there. Fixes: cc317ea3d927 ("bonding: remove redundant NULL check in debugfs function") Cc: stable@vger.kernel.org Signed-off-by: Tony Battersby Reviewed-by: Simon Horman Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/641f914f-3216-4eeb-87dd-91b78aa97773@cybernetics.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3b0c7758ea4f..3c3fcce4acd4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -6477,16 +6477,16 @@ static int __init bonding_init(void) if (res) goto out; + bond_create_debugfs(); + res = register_pernet_subsys(&bond_net_ops); if (res) - goto out; + goto err_net_ops; res = bond_netlink_init(); if (res) goto err_link; - bond_create_debugfs(); - for (i = 0; i < max_bonds; i++) { res = bond_create(&init_net, NULL); if (res) @@ -6501,10 +6501,11 @@ static int __init bonding_init(void) out: return res; err: - bond_destroy_debugfs(); bond_netlink_fini(); err_link: unregister_pernet_subsys(&bond_net_ops); +err_net_ops: + bond_destroy_debugfs(); goto out; } @@ -6513,11 +6514,11 @@ static void __exit bonding_exit(void) { unregister_netdevice_notifier(&bond_netdev_notifier); - bond_destroy_debugfs(); - bond_netlink_fini(); unregister_pernet_subsys(&bond_net_ops); + bond_destroy_debugfs(); + #ifdef CONFIG_NET_POLL_CONTROLLER /* Make sure we don't have an imbalance on our netpoll blocking */ WARN_ON(atomic_read(&netpoll_block_tx));