net/mlx5: Serialize module cleanup with reload and remove

Currently, remove and reload flows can run in parallel to module cleanup.
This design is error prone. For example: aux_drivers callbacks are called
from both cleanup and remove flows with different lockings, which can
cause a deadlock[1].
Hence, serialize module cleanup with reload and remove.

[1]
       cleanup                        remove
       -------                        ------
   auxiliary_driver_unregister();
                                     devl_lock()
                                      auxiliary_device_delete(mlx5e_aux)
    device_lock(mlx5e_aux)
     devl_lock()
                                       device_lock(mlx5e_aux)

Fixes: 912cebf420 ("net/mlx5e: Connect ethernet part to auxiliary bus")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
Shay Drory 2022-12-14 22:16:23 +02:00 committed by Saeed Mahameed
parent 184e1e4474
commit 8f0d1451ec

View File

@ -2110,7 +2110,7 @@ static int __init mlx5_init(void)
mlx5_core_verify_params();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
err = mlx5e_init();
if (err)
goto err_debug;
@ -2118,16 +2118,16 @@ static int __init mlx5_init(void)
if (err)
goto err_sf;
err = mlx5e_init();
err = pci_register_driver(&mlx5_core_driver);
if (err)
goto err_en;
goto err_pci;
return 0;
err_en:
err_pci:
mlx5_sf_driver_unregister();
err_sf:
pci_unregister_driver(&mlx5_core_driver);
mlx5e_cleanup();
err_debug:
mlx5_unregister_debugfs();
return err;
@ -2135,9 +2135,9 @@ err_debug:
static void __exit mlx5_cleanup(void)
{
mlx5e_cleanup();
mlx5_sf_driver_unregister();
pci_unregister_driver(&mlx5_core_driver);
mlx5_sf_driver_unregister();
mlx5e_cleanup();
mlx5_unregister_debugfs();
}