linux/drivers/scsi/fcoe/fcoe.c

2754 lines
72 KiB
C
Raw Normal View History

/*
* Copyright(c) 2007 - 2009 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* Maintained at www.Open-FCoE.org
*/
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <net/dcbnl.h>
#include <net/dcbevent.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsicam.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_transport_fc.h>
#include <net/rtnetlink.h>
#include <scsi/fc/fc_encaps.h>
#include <scsi/fc/fc_fip.h>
#include <scsi/libfc.h>
#include <scsi/fc_frame.h>
#include <scsi/libfcoe.h>
#include "fcoe.h"
MODULE_AUTHOR("Open-FCoE.org");
MODULE_DESCRIPTION("FCoE");
MODULE_LICENSE("GPL v2");
/* Performance tuning parameters for fcoe */
static unsigned int fcoe_ddp_min = 4096;
module_param_named(ddp_min, fcoe_ddp_min, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ddp_min, "Minimum I/O size in bytes for " \
"Direct Data Placement (DDP).");
unsigned int fcoe_debug_logging;
module_param_named(debug_logging, fcoe_debug_logging, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels");
static DEFINE_MUTEX(fcoe_config_mutex);
static struct workqueue_struct *fcoe_wq;
/* fcoe_percpu_clean completion. Waiter protected by fcoe_create_mutex */
static DECLARE_COMPLETION(fcoe_flush_completion);
/* fcoe host list */
/* must only by accessed under the RTNL mutex */
static LIST_HEAD(fcoe_hostlist);
static DEFINE_PER_CPU(struct fcoe_percpu_s, fcoe_percpu);
/* Function Prototypes */
static int fcoe_reset(struct Scsi_Host *);
static int fcoe_xmit(struct fc_lport *, struct fc_frame *);
static int fcoe_rcv(struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
static int fcoe_percpu_receive_thread(void *);
static void fcoe_percpu_clean(struct fc_lport *);
static int fcoe_link_speed_update(struct fc_lport *);
static int fcoe_link_ok(struct fc_lport *);
static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
static int fcoe_hostlist_add(const struct fc_lport *);
static int fcoe_device_notification(struct notifier_block *, ulong, void *);
static void fcoe_dev_setup(void);
static void fcoe_dev_cleanup(void);
static struct fcoe_interface
*fcoe_hostlist_lookup_port(const struct net_device *);
static int fcoe_fip_recv(struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
static void fcoe_fip_send(struct fcoe_ctlr *, struct sk_buff *);
static void fcoe_update_src_mac(struct fc_lport *, u8 *);
static u8 *fcoe_get_src_mac(struct fc_lport *);
static void fcoe_destroy_work(struct work_struct *);
static int fcoe_ddp_setup(struct fc_lport *, u16, struct scatterlist *,
unsigned int);
static int fcoe_ddp_done(struct fc_lport *, u16);
static int fcoe_ddp_target(struct fc_lport *, u16, struct scatterlist *,
unsigned int);
static int fcoe_cpu_callback(struct notifier_block *, unsigned long, void *);
static int fcoe_dcb_app_notification(struct notifier_block *notifier,
ulong event, void *ptr);
static bool fcoe_match(struct net_device *netdev);
static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode);
static int fcoe_destroy(struct net_device *netdev);
static int fcoe_enable(struct net_device *netdev);
static int fcoe_disable(struct net_device *netdev);
static struct fc_seq *fcoe_elsct_send(struct fc_lport *,
u32 did, struct fc_frame *,
unsigned int op,
void (*resp)(struct fc_seq *,
struct fc_frame *,
void *),
void *, u32 timeout);
static void fcoe_recv_frame(struct sk_buff *skb);
static void fcoe_get_lesb(struct fc_lport *, struct fc_els_lesb *);
/* notification function for packets from net device */
static struct notifier_block fcoe_notifier = {
.notifier_call = fcoe_device_notification,
};
/* notification function for CPU hotplug events */
static struct notifier_block fcoe_cpu_notifier = {
.notifier_call = fcoe_cpu_callback,
};
/* notification function for DCB events */
static struct notifier_block dcb_notifier = {
.notifier_call = fcoe_dcb_app_notification,
};
static struct scsi_transport_template *fcoe_nport_scsi_transport;
static struct scsi_transport_template *fcoe_vport_scsi_transport;
static int fcoe_vport_destroy(struct fc_vport *);
static int fcoe_vport_create(struct fc_vport *, bool disabled);
static int fcoe_vport_disable(struct fc_vport *, bool disable);
static void fcoe_set_vport_symbolic_name(struct fc_vport *);
static void fcoe_set_port_id(struct fc_lport *, u32, struct fc_frame *);
static struct libfc_function_template fcoe_libfc_fcn_templ = {
.frame_send = fcoe_xmit,
.ddp_setup = fcoe_ddp_setup,
.ddp_done = fcoe_ddp_done,
.ddp_target = fcoe_ddp_target,
.elsct_send = fcoe_elsct_send,
.get_lesb = fcoe_get_lesb,
.lport_set_port_id = fcoe_set_port_id,
};
static struct fc_function_template fcoe_nport_fc_functions = {
.show_host_node_name = 1,
.show_host_port_name = 1,
.show_host_supported_classes = 1,
.show_host_supported_fc4s = 1,
.show_host_active_fc4s = 1,
.show_host_maxframe_size = 1,
.show_host_serial_number = 1,
.show_host_manufacturer = 1,
.show_host_model = 1,
.show_host_model_description = 1,
.show_host_hardware_version = 1,
.show_host_driver_version = 1,
.show_host_firmware_version = 1,
.show_host_optionrom_version = 1,
.show_host_port_id = 1,
.show_host_supported_speeds = 1,
.get_host_speed = fc_get_host_speed,
.show_host_speed = 1,
.show_host_port_type = 1,
.get_host_port_state = fc_get_host_port_state,
.show_host_port_state = 1,
.show_host_symbolic_name = 1,
.dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
.show_rport_maxframe_size = 1,
.show_rport_supported_classes = 1,
.show_host_fabric_name = 1,
.show_starget_node_name = 1,
.show_starget_port_name = 1,
.show_starget_port_id = 1,
.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
.show_rport_dev_loss_tmo = 1,
.get_fc_host_stats = fc_get_host_stats,
.issue_fc_host_lip = fcoe_reset,
.terminate_rport_io = fc_rport_terminate_io,
.vport_create = fcoe_vport_create,
.vport_delete = fcoe_vport_destroy,
.vport_disable = fcoe_vport_disable,
.set_vport_symbolic_name = fcoe_set_vport_symbolic_name,
.bsg_request = fc_lport_bsg_request,
};
static struct fc_function_template fcoe_vport_fc_functions = {
.show_host_node_name = 1,
.show_host_port_name = 1,
.show_host_supported_classes = 1,
.show_host_supported_fc4s = 1,
.show_host_active_fc4s = 1,
.show_host_maxframe_size = 1,
.show_host_serial_number = 1,
.show_host_manufacturer = 1,
.show_host_model = 1,
.show_host_model_description = 1,
.show_host_hardware_version = 1,
.show_host_driver_version = 1,
.show_host_firmware_version = 1,
.show_host_optionrom_version = 1,
.show_host_port_id = 1,
.show_host_supported_speeds = 1,
.get_host_speed = fc_get_host_speed,
.show_host_speed = 1,
.show_host_port_type = 1,
.get_host_port_state = fc_get_host_port_state,
.show_host_port_state = 1,
.show_host_symbolic_name = 1,
.dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
.show_rport_maxframe_size = 1,
.show_rport_supported_classes = 1,
.show_host_fabric_name = 1,
.show_starget_node_name = 1,
.show_starget_port_name = 1,
.show_starget_port_id = 1,
.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
.show_rport_dev_loss_tmo = 1,
.get_fc_host_stats = fc_get_host_stats,
.issue_fc_host_lip = fcoe_reset,
.terminate_rport_io = fc_rport_terminate_io,
.bsg_request = fc_lport_bsg_request,
};
static struct scsi_host_template fcoe_shost_template = {
.module = THIS_MODULE,
.name = "FCoE Driver",
.proc_name = FCOE_NAME,
.queuecommand = fc_queuecommand,
.eh_abort_handler = fc_eh_abort,
.eh_device_reset_handler = fc_eh_device_reset,
.eh_host_reset_handler = fc_eh_host_reset,
.slave_alloc = fc_slave_alloc,
.change_queue_depth = fc_change_queue_depth,
.change_queue_type = fc_change_queue_type,
.this_id = -1,
.cmd_per_lun = 3,
.can_queue = FCOE_MAX_OUTSTANDING_COMMANDS,
.use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = SG_ALL,
.max_sectors = 0xffff,
};
/**
* fcoe_interface_setup() - Setup a FCoE interface
* @fcoe: The new FCoE interface
* @netdev: The net device that the fcoe interface is on
*
* Returns : 0 for success
* Locking: must be called with the RTNL mutex held
*/
static int fcoe_interface_setup(struct fcoe_interface *fcoe,
struct net_device *netdev)
{
struct fcoe_ctlr *fip = &fcoe->ctlr;
struct netdev_hw_addr *ha;
struct net_device *real_dev;
u8 flogi_maddr[ETH_ALEN];
const struct net_device_ops *ops;
fcoe->netdev = netdev;
/* Let LLD initialize for FCoE */
ops = netdev->netdev_ops;
if (ops->ndo_fcoe_enable) {
if (ops->ndo_fcoe_enable(netdev))
FCOE_NETDEV_DBG(netdev, "Failed to enable FCoE"
" specific feature for LLD.\n");
}
/* Do not support for bonding device */
if (netdev->priv_flags & IFF_BONDING && netdev->flags & IFF_MASTER) {
FCOE_NETDEV_DBG(netdev, "Bonded interfaces not supported\n");
return -EOPNOTSUPP;
}
/* look for SAN MAC address, if multiple SAN MACs exist, only
* use the first one for SPMA */
real_dev = (netdev->priv_flags & IFF_802_1Q_VLAN) ?
vlan_dev_real_dev(netdev) : netdev;
fcoe->realdev = real_dev;
rcu_read_lock();
for_each_dev_addr(real_dev, ha) {
if ((ha->type == NETDEV_HW_ADDR_T_SAN) &&
(is_valid_ether_addr(ha->addr))) {
memcpy(fip->ctl_src_addr, ha->addr, ETH_ALEN);
fip->spma = 1;
break;
}
}
rcu_read_unlock();
/* setup Source Mac Address */
if (!fip->spma)
memcpy(fip->ctl_src_addr, netdev->dev_addr, netdev->addr_len);
/*
* Add FCoE MAC address as second unicast MAC address
* or enter promiscuous mode if not capable of listening
* for multiple unicast MACs.
*/
memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
dev_uc_add(netdev, flogi_maddr);
if (fip->spma)
dev_uc_add(netdev, fip->ctl_src_addr);
[SCSI] libfcoe: fcoe: fnic: add FIP VN2VN point-to-multipoint support The FC-BB-6 committee is proposing a new FIP usage model called VN_port to VN_port mode. It allows VN_ports to discover each other over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric services. This is point-to-multipoint. There is also a variant of this called point-to-point which provides for making sure there is just one pair of ports operating over the Ethernet fabric. We add these new states: VNMP_START, _PROBE1, _PROBE2, _CLAIM, and _UP. These usually go quickly in that sequence. After waiting a random amount of time up to 100 ms in START, we select a pseudo-random proposed locally-unique port ID and send out probes in states PROBE1 and PROBE2, 100 ms apart. If no probe responses are heard, we proceed to CLAIM state 400 ms later and send a claim notification. We wait another 400 ms to receive claim responses, which give us a list of the other nodes on the network, including their FC-4 capabilities. After another 400 ms we go to VNMP_UP state and should start interoperating with any of the nodes for whic we receivec claim responses. More details are in the spec.j Add the new mode as FIP_MODE_VN2VN. The driver must specify explicitly that it wants to operate in this mode. There is no automatic detection between point-to-multipoint and fabric mode, and the local port initialization is affected, so it isn't anticipated that there will ever be any such automatic switchover. It may eventually be possible to have both fabric and VN2VN modes on the same L2 network, which may be done by two separate local VN_ports (lports). When in VN2VN mode, FIP replaces libfc's fabric-oriented discovery module with its own simple code that adds remote ports as they are discovered from incoming claim notifications and responses. These hooks are placed by fcoe_disc_init(). A linear list of discovered vn_ports is maintained under the fcoe_ctlr struct. It is expected to be short for now, and accessed infrequently. It is kept under RCU for lock-ordering reasons. The lport and/or rport mutexes may be held when we need to lookup a fcoe_vnport during an ELS send. Change fcoe_ctlr_encaps() to lookup the destination vn_port in the list of peers for the destination MAC address of the FIP-encapsulated frame. Add a new function fcoe_disc_init() to initialize just the discovery portion of libfcoe for VN2VN mode. Signed-off-by: Joe Eykholt <jeykholt@cisco.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-07-21 06:20:30 +08:00
if (fip->mode == FIP_MODE_VN2VN) {
dev_mc_add(netdev, FIP_ALL_VN2VN_MACS);
dev_mc_add(netdev, FIP_ALL_P2P_MACS);
} else
dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
/*
* setup the receive function from ethernet driver
* on the ethertype for the given device
*/
fcoe->fcoe_packet_type.func = fcoe_rcv;
fcoe->fcoe_packet_type.type = __constant_htons(ETH_P_FCOE);
fcoe->fcoe_packet_type.dev = netdev;
dev_add_pack(&fcoe->fcoe_packet_type);
fcoe->fip_packet_type.func = fcoe_fip_recv;
fcoe->fip_packet_type.type = htons(ETH_P_FIP);
fcoe->fip_packet_type.dev = netdev;
dev_add_pack(&fcoe->fip_packet_type);
return 0;
}
/**
* fcoe_interface_create() - Create a FCoE interface on a net device
* @netdev: The net device to create the FCoE interface on
* @fip_mode: The mode to use for FIP
*
* Returns: pointer to a struct fcoe_interface or NULL on error
*/
static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev,
enum fip_state fip_mode)
{
struct fcoe_interface *fcoe;
int err;
if (!try_module_get(THIS_MODULE)) {
FCOE_NETDEV_DBG(netdev,
"Could not get a reference to the module\n");
fcoe = ERR_PTR(-EBUSY);
goto out;
}
fcoe = kzalloc(sizeof(*fcoe), GFP_KERNEL);
if (!fcoe) {
FCOE_NETDEV_DBG(netdev, "Could not allocate fcoe structure\n");
fcoe = ERR_PTR(-ENOMEM);
goto out_putmod;
}
dev_hold(netdev);
/*
* Initialize FIP.
*/
fcoe_ctlr_init(&fcoe->ctlr, fip_mode);
fcoe->ctlr.send = fcoe_fip_send;
fcoe->ctlr.update_mac = fcoe_update_src_mac;
fcoe->ctlr.get_src_addr = fcoe_get_src_mac;
err = fcoe_interface_setup(fcoe, netdev);
if (err) {
fcoe_ctlr_destroy(&fcoe->ctlr);
kfree(fcoe);
dev_put(netdev);
fcoe = ERR_PTR(err);
goto out_putmod;
}
goto out;
out_putmod:
module_put(THIS_MODULE);
out:
return fcoe;
}
/**
* fcoe_interface_remove() - remove FCoE interface from netdev
* @fcoe: The FCoE interface to be cleaned up
*
* Caller must be holding the RTNL mutex
*/
static void fcoe_interface_remove(struct fcoe_interface *fcoe)
{
struct net_device *netdev = fcoe->netdev;
struct fcoe_ctlr *fip = &fcoe->ctlr;
u8 flogi_maddr[ETH_ALEN];
const struct net_device_ops *ops;
/*
* Don't listen for Ethernet packets anymore.
* synchronize_net() ensures that the packet handlers are not running
* on another CPU. dev_remove_pack() would do that, this calls the
* unsyncronized version __dev_remove_pack() to avoid multiple delays.
*/
__dev_remove_pack(&fcoe->fcoe_packet_type);
__dev_remove_pack(&fcoe->fip_packet_type);
synchronize_net();
/* Delete secondary MAC addresses */
memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
dev_uc_del(netdev, flogi_maddr);
if (fip->spma)
dev_uc_del(netdev, fip->ctl_src_addr);
[SCSI] libfcoe: fcoe: fnic: add FIP VN2VN point-to-multipoint support The FC-BB-6 committee is proposing a new FIP usage model called VN_port to VN_port mode. It allows VN_ports to discover each other over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric services. This is point-to-multipoint. There is also a variant of this called point-to-point which provides for making sure there is just one pair of ports operating over the Ethernet fabric. We add these new states: VNMP_START, _PROBE1, _PROBE2, _CLAIM, and _UP. These usually go quickly in that sequence. After waiting a random amount of time up to 100 ms in START, we select a pseudo-random proposed locally-unique port ID and send out probes in states PROBE1 and PROBE2, 100 ms apart. If no probe responses are heard, we proceed to CLAIM state 400 ms later and send a claim notification. We wait another 400 ms to receive claim responses, which give us a list of the other nodes on the network, including their FC-4 capabilities. After another 400 ms we go to VNMP_UP state and should start interoperating with any of the nodes for whic we receivec claim responses. More details are in the spec.j Add the new mode as FIP_MODE_VN2VN. The driver must specify explicitly that it wants to operate in this mode. There is no automatic detection between point-to-multipoint and fabric mode, and the local port initialization is affected, so it isn't anticipated that there will ever be any such automatic switchover. It may eventually be possible to have both fabric and VN2VN modes on the same L2 network, which may be done by two separate local VN_ports (lports). When in VN2VN mode, FIP replaces libfc's fabric-oriented discovery module with its own simple code that adds remote ports as they are discovered from incoming claim notifications and responses. These hooks are placed by fcoe_disc_init(). A linear list of discovered vn_ports is maintained under the fcoe_ctlr struct. It is expected to be short for now, and accessed infrequently. It is kept under RCU for lock-ordering reasons. The lport and/or rport mutexes may be held when we need to lookup a fcoe_vnport during an ELS send. Change fcoe_ctlr_encaps() to lookup the destination vn_port in the list of peers for the destination MAC address of the FIP-encapsulated frame. Add a new function fcoe_disc_init() to initialize just the discovery portion of libfcoe for VN2VN mode. Signed-off-by: Joe Eykholt <jeykholt@cisco.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-07-21 06:20:30 +08:00
if (fip->mode == FIP_MODE_VN2VN) {
dev_mc_del(netdev, FIP_ALL_VN2VN_MACS);
dev_mc_del(netdev, FIP_ALL_P2P_MACS);
} else
dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
/* Tell the LLD we are done w/ FCoE */
ops = netdev->netdev_ops;
if (ops->ndo_fcoe_disable) {
if (ops->ndo_fcoe_disable(netdev))
FCOE_NETDEV_DBG(netdev, "Failed to disable FCoE"
" specific feature for LLD.\n");
}
fcoe->removed = 1;
}
/**
* fcoe_interface_cleanup() - Clean up a FCoE interface
* @fcoe: The FCoE interface to be cleaned up
*/
static void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
{
struct net_device *netdev = fcoe->netdev;
struct fcoe_ctlr *fip = &fcoe->ctlr;
rtnl_lock();
if (!fcoe->removed)
fcoe_interface_remove(fcoe);
[SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl The rtnl cannot be held durrng the fcoe_interface_put. If it is the last reference on the fcoe_interface the fcoe_ctlr_destroy will be called as a part of the cleanup, ultimately calling cancel_work_sync(&fip->recv_work); If we are processing a flogi response we will be in the recv_work context and we will lock the rtnl to add a new unicast MAC address. This is how the deadlock can occur. The fix is simply to move the rtnl_lock/unlock into fcoe_interface_cleanup so that it can be unlocked before fcoe_interface_put is called. Here is the lockdep report: Jul 21 11:26:35 bubba [ 223.870702] ul 21 11:26:35 bubba [ 223.870704] ======================================================= Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ] Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.871797] ------------------------------------------------------- Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock: Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff810531f1>] wait_on_work+0x0/0xbd Jul 21 11:26:35 bubba [ 223.873022] Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock: Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.874229] Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock. Jul 21 11:26:35 bubba [ 223.874231] Jul 21 11:26:35 bubba [ 223.875032] Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is: Jul 21 11:26:35 bubba [ 223.875573] Jul 21 11:26:35 bubba [ 223.875573] -> #1 Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.876301] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.876645] Jul 21 11:26:35 bubba [<ffffffff8151d975>] __mutex_lock_common+0x47/0x30d Jul 21 11:26:35 bubba [ 223.876991] Jul 21 11:26:35 bubba [<ffffffff8151dd36>] mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.877334] Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.877675] Jul 21 11:26:35 bubba [<ffffffffa003d5a0>] fcoe_update_src_mac+0x2b/0x80 [fcoe] Jul 21 11:26:35 bubba [ 223.878022] Jul 21 11:26:35 bubba [<ffffffffa003d698>] fcoe_flogi_resp+0x5e/0x79 [fcoe] Jul 21 11:26:35 bubba [ 223.878366] Jul 21 11:26:35 bubba [<ffffffffa001566f>] fc_exch_recv+0x7f5/0x9da [libfc] Jul 21 11:26:35 bubba [ 223.878713] Jul 21 11:26:35 bubba [<ffffffffa00327d8>] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe] Jul 21 11:26:35 bubba [ 223.879258] Jul 21 11:26:35 bubba [<ffffffff81053761>] process_one_work+0x1d7/0x347 Jul 21 11:26:35 bubba [ 223.879601] Jul 21 11:26:35 bubba [<ffffffff81054ade>] worker_thread+0xf8/0x17c Jul 21 11:26:35 bubba [ 223.879944] Jul 21 11:26:35 bubba [<ffffffff81058184>] kthread+0x7d/0x85 Jul 21 11:26:35 bubba [ 223.880287] Jul 21 11:26:35 bubba [<ffffffff81526414>] kernel_thread_helper+0x4/0x10 Jul 21 11:26:35 bubba [ 223.880634] Jul 21 11:26:35 bubba [ 223.880635] -> #0 Jul 21 11:26:35 bubba ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.881357] Jul 21 11:26:35 bubba [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.881695] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.882033] Jul 21 11:26:35 bubba [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.882378] Jul 21 11:26:35 bubba [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.882718] Jul 21 11:26:35 bubba [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.883057] Jul 21 11:26:35 bubba [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.883399] Jul 21 11:26:35 bubba [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.883940] Jul 21 11:26:35 bubba [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.884280] Jul 21 11:26:35 bubba [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.884624] Jul 21 11:26:35 bubba [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.885163] Jul 21 11:26:35 bubba [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.885502] Jul 21 11:26:35 bubba [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.886045] Jul 21 11:26:35 bubba [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.886385] Jul 21 11:26:35 bubba [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.886728] Jul 21 11:26:35 bubba [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.887068] Jul 21 11:26:35 bubba [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.887406] Jul 21 11:26:35 bubba [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.887742] Jul 21 11:26:35 bubba [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:35 bubba [ 223.888083] Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this: Jul 21 11:26:35 bubba [ 223.888085] Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario: Jul 21 11:26:35 bubba [ 223.888881] Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1 Jul 21 11:26:35 bubba [ 223.889683] ---- ---- Jul 21 11:26:35 bubba [ 223.889955] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890349] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890751] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891154] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891549] Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK *** Jul 21 11:26:35 bubba [ 223.891551] Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464: Jul 21 11:26:35 bubba [ 223.892621] #0: Jul 21 11:26:35 bubba (&buffer->mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c171>] sysfs_write_file+0x37/0x13f Jul 21 11:26:35 bubba [ 223.893359] #1: Jul 21 11:26:35 bubba (s_active Jul 21 11:26:35 bubba ){++++.+} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c21c>] sysfs_write_file+0xe2/0x13f Jul 21 11:26:35 bubba [ 223.894094] #2: Jul 21 11:26:35 bubba (param_lock Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff81056146>] param_attr_store+0x36/0x62 Jul 21 11:26:35 bubba [ 223.894835] #3: Jul 21 11:26:35 bubba (ft_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa0034017>] fcoe_transport_destroy+0x1e/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.895574] #4: Jul 21 11:26:35 bubba (fcoe_config_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa003f2c9>] fcoe_destroy+0x18/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.896314] #5: Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.897047] Jul 21 11:26:35 bubba [ 223.897048] stack backtrace: Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.897853] Call Trace: Jul 21 11:26:35 bubba [ 223.898128] [<ffffffff81068e16>] print_circular_bug+0x1f8/0x209 Jul 21 11:26:35 bubba [ 223.898416] [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.898699] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.898982] [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.899263] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.899547] [<ffffffff8104a097>] ? mod_timer+0x8f/0x98 Jul 21 11:26:35 bubba [ 223.899827] [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.900108] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.900390] [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.900671] [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.900953] [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.901237] [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.901522] [<ffffffffa003e4fd>] ? fcoe_enable+0x6b/0x6b [fcoe] Jul 21 11:26:35 bubba [ 223.901803] [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.902083] [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.902367] [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.902653] [<ffffffff8151dd36>] ? mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.902939] [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.903223] [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.903508] [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.903792] [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.904075] [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.904357] [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.904642] [<ffffffff810f51d6>] ? fget_light+0x35/0x96 Jul 21 11:26:35 bubba [ 223.904923] [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.905204] [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5 Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None Signed-off-by: Robert Love <robert.w.love@intel.com> Tested-by: Ross Brattain <ross.b.brattain@intel.com> Reviewed-by: Yi Zou <yi.zou@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2011-08-26 03:40:47 +08:00
rtnl_unlock();
/* Release the self-reference taken during fcoe_interface_create() */
/* tear-down the FCoE controller */
fcoe_ctlr_destroy(fip);
scsi_host_put(fcoe->ctlr.lp->host);
kfree(fcoe);
dev_put(netdev);
module_put(THIS_MODULE);
}
/**
* fcoe_fip_recv() - Handler for received FIP frames
* @skb: The receive skb
* @netdev: The associated net device
* @ptype: The packet_type structure which was used to register this handler
* @orig_dev: The original net_device the the skb was received on.
* (in case dev is a bond)
*
* Returns: 0 for success
*/
static int fcoe_fip_recv(struct sk_buff *skb, struct net_device *netdev,
struct packet_type *ptype,
struct net_device *orig_dev)
{
struct fcoe_interface *fcoe;
fcoe = container_of(ptype, struct fcoe_interface, fip_packet_type);
fcoe_ctlr_recv(&fcoe->ctlr, skb);
return 0;
}
/**
* fcoe_port_send() - Send an Ethernet-encapsulated FIP/FCoE frame
* @port: The FCoE port
* @skb: The FIP/FCoE packet to be sent
*/
static void fcoe_port_send(struct fcoe_port *port, struct sk_buff *skb)
{
if (port->fcoe_pending_queue.qlen)
fcoe_check_wait_queue(port->lport, skb);
else if (fcoe_start_io(skb))
fcoe_check_wait_queue(port->lport, skb);
}
/**
* fcoe_fip_send() - Send an Ethernet-encapsulated FIP frame
* @fip: The FCoE controller
* @skb: The FIP packet to be sent
*/
static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
{
skb->dev = fcoe_from_ctlr(fip)->netdev;
fcoe_port_send(lport_priv(fip->lp), skb);
}
/**
* fcoe_update_src_mac() - Update the Ethernet MAC filters
* @lport: The local port to update the source MAC on
* @addr: Unicast MAC address to add
*
* Remove any previously-set unicast MAC filter.
* Add secondary FCoE MAC address filter for our OUI.
*/
static void fcoe_update_src_mac(struct fc_lport *lport, u8 *addr)
{
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
if (!is_zero_ether_addr(port->data_src_addr))
dev_uc_del(fcoe->netdev, port->data_src_addr);
if (!is_zero_ether_addr(addr))
dev_uc_add(fcoe->netdev, addr);
memcpy(port->data_src_addr, addr, ETH_ALEN);
}
/**
* fcoe_get_src_mac() - return the Ethernet source address for an lport
* @lport: libfc lport
*/
static u8 *fcoe_get_src_mac(struct fc_lport *lport)
{
struct fcoe_port *port = lport_priv(lport);
return port->data_src_addr;
}
/**
* fcoe_lport_config() - Set up a local port
* @lport: The local port to be setup
*
* Returns: 0 for success
*/
static int fcoe_lport_config(struct fc_lport *lport)
{
lport->link_up = 0;
lport->qfull = 0;
lport->max_retry_count = 3;
lport->max_rport_retry_count = 3;
lport->e_d_tov = 2 * 1000; /* FC-FS default */
lport->r_a_tov = 2 * 2 * 1000;
lport->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
lport->does_npiv = 1;
fc_lport_init_stats(lport);
/* lport fc_lport related configuration */
fc_lport_config(lport);
/* offload related configuration */
lport->crc_offload = 0;
lport->seq_offload = 0;
lport->lro_enabled = 0;
lport->lro_xid = 0;
lport->lso_max = 0;
return 0;
}
/**
* fcoe_netdev_features_change - Updates the lport's offload flags based
* on the LLD netdev's FCoE feature flags
*/
static void fcoe_netdev_features_change(struct fc_lport *lport,
struct net_device *netdev)
{
mutex_lock(&lport->lp_mutex);
if (netdev->features & NETIF_F_SG)
lport->sg_supp = 1;
else
lport->sg_supp = 0;
if (netdev->features & NETIF_F_FCOE_CRC) {
lport->crc_offload = 1;
FCOE_NETDEV_DBG(netdev, "Supports FCCRC offload\n");
} else {
lport->crc_offload = 0;
}
if (netdev->features & NETIF_F_FSO) {
lport->seq_offload = 1;
lport->lso_max = netdev->gso_max_size;
FCOE_NETDEV_DBG(netdev, "Supports LSO for max len 0x%x\n",
lport->lso_max);
} else {
lport->seq_offload = 0;
lport->lso_max = 0;
}
if (netdev->fcoe_ddp_xid) {
lport->lro_enabled = 1;
lport->lro_xid = netdev->fcoe_ddp_xid;
FCOE_NETDEV_DBG(netdev, "Supports LRO for max xid 0x%x\n",
lport->lro_xid);
} else {
lport->lro_enabled = 0;
lport->lro_xid = 0;
}
mutex_unlock(&lport->lp_mutex);
}
/**
* fcoe_netdev_config() - Set up net devive for SW FCoE
* @lport: The local port that is associated with the net device
* @netdev: The associated net device
*
* Must be called after fcoe_lport_config() as it will use local port mutex
*
* Returns: 0 for success
*/
static int fcoe_netdev_config(struct fc_lport *lport, struct net_device *netdev)
{
u32 mfs;
u64 wwnn, wwpn;
struct fcoe_interface *fcoe;
struct fcoe_port *port;
/* Setup lport private data to point to fcoe softc */
port = lport_priv(lport);
fcoe = port->priv;
/*
* Determine max frame size based on underlying device and optional
* user-configured limit. If the MFS is too low, fcoe_link_ok()
* will return 0, so do this first.
*/
mfs = netdev->mtu;
if (netdev->features & NETIF_F_FCOE_MTU) {
mfs = FCOE_MTU;
FCOE_NETDEV_DBG(netdev, "Supports FCOE_MTU of %d bytes\n", mfs);
}
mfs -= (sizeof(struct fcoe_hdr) + sizeof(struct fcoe_crc_eof));
if (fc_set_mfs(lport, mfs))
return -EINVAL;
/* offload features support */
fcoe_netdev_features_change(lport, netdev);
skb_queue_head_init(&port->fcoe_pending_queue);
port->fcoe_pending_queue_active = 0;
setup_timer(&port->timer, fcoe_queue_timer, (unsigned long)lport);
fcoe_link_speed_update(lport);
if (!lport->vport) {
if (fcoe_get_wwn(netdev, &wwnn, NETDEV_FCOE_WWNN))
wwnn = fcoe_wwn_from_mac(fcoe->ctlr.ctl_src_addr, 1, 0);
fc_set_wwnn(lport, wwnn);
if (fcoe_get_wwn(netdev, &wwpn, NETDEV_FCOE_WWPN))
wwpn = fcoe_wwn_from_mac(fcoe->ctlr.ctl_src_addr,
2, 0);
fc_set_wwpn(lport, wwpn);
}
return 0;
}
/**
* fcoe_shost_config() - Set up the SCSI host associated with a local port
* @lport: The local port
* @dev: The device associated with the SCSI host
*
* Must be called after fcoe_lport_config() and fcoe_netdev_config()
*
* Returns: 0 for success
*/
static int fcoe_shost_config(struct fc_lport *lport, struct device *dev)
{
int rc = 0;
/* lport scsi host config */
lport->host->max_lun = FCOE_MAX_LUN;
lport->host->max_id = FCOE_MAX_FCP_TARGET;
lport->host->max_channel = 0;
lport->host->max_cmd_len = FCOE_MAX_CMD_LEN;
if (lport->vport)
lport->host->transportt = fcoe_vport_scsi_transport;
else
lport->host->transportt = fcoe_nport_scsi_transport;
/* add the new host to the SCSI-ml */
rc = scsi_add_host(lport->host, dev);
if (rc) {
FCOE_NETDEV_DBG(fcoe_netdev(lport), "fcoe_shost_config: "
"error on scsi_add_host\n");
return rc;
}
if (!lport->vport)
fc_host_max_npiv_vports(lport->host) = USHRT_MAX;
snprintf(fc_host_symbolic_name(lport->host), FC_SYMBOLIC_NAME_SIZE,
"%s v%s over %s", FCOE_NAME, FCOE_VERSION,
fcoe_netdev(lport)->name);
return 0;
}
/**
* fcoe_fdmi_info() - Get FDMI related info from net devive for SW FCoE
* @lport: The local port that is associated with the net device
* @netdev: The associated net device
*
* Must be called after fcoe_shost_config() as it will use local port mutex
*
*/
static void fcoe_fdmi_info(struct fc_lport *lport, struct net_device *netdev)
{
struct fcoe_interface *fcoe;
struct fcoe_port *port;
struct net_device *realdev;
int rc;
struct netdev_fcoe_hbainfo fdmi;
port = lport_priv(lport);
fcoe = port->priv;
realdev = fcoe->realdev;
if (!realdev)
return;
/* No FDMI state m/c for NPIV ports */
if (lport->vport)
return;
if (realdev->netdev_ops->ndo_fcoe_get_hbainfo) {
memset(&fdmi, 0, sizeof(fdmi));
rc = realdev->netdev_ops->ndo_fcoe_get_hbainfo(realdev,
&fdmi);
if (rc) {
printk(KERN_INFO "fcoe: Failed to retrieve FDMI "
"information from netdev.\n");
return;
}
snprintf(fc_host_serial_number(lport->host),
FC_SERIAL_NUMBER_SIZE,
"%s",
fdmi.serial_number);
snprintf(fc_host_manufacturer(lport->host),
FC_SERIAL_NUMBER_SIZE,
"%s",
fdmi.manufacturer);
snprintf(fc_host_model(lport->host),
FC_SYMBOLIC_NAME_SIZE,
"%s",
fdmi.model);
snprintf(fc_host_model_description(lport->host),
FC_SYMBOLIC_NAME_SIZE,
"%s",
fdmi.model_description);
snprintf(fc_host_hardware_version(lport->host),
FC_VERSION_STRING_SIZE,
"%s",
fdmi.hardware_version);
snprintf(fc_host_driver_version(lport->host),
FC_VERSION_STRING_SIZE,
"%s",
fdmi.driver_version);
snprintf(fc_host_optionrom_version(lport->host),
FC_VERSION_STRING_SIZE,
"%s",
fdmi.optionrom_version);
snprintf(fc_host_firmware_version(lport->host),
FC_VERSION_STRING_SIZE,
"%s",
fdmi.firmware_version);
/* Enable FDMI lport states */
lport->fdmi_enabled = 1;
} else {
lport->fdmi_enabled = 0;
printk(KERN_INFO "fcoe: No FDMI support.\n");
}
}
/**
* fcoe_oem_match() - The match routine for the offloaded exchange manager
* @fp: The I/O frame
*
* This routine will be associated with an exchange manager (EM). When
* the libfc exchange handling code is looking for an EM to use it will
* call this routine and pass it the frame that it wishes to send. This
* routine will return True if the associated EM is to be used and False
* if the echange code should continue looking for an EM.
*
* The offload EM that this routine is associated with will handle any
* packets that are for SCSI read requests.
*
* This has been enhanced to work when FCoE stack is operating in target
* mode.
*
* Returns: True for read types I/O, otherwise returns false.
*/
static bool fcoe_oem_match(struct fc_frame *fp)
{
struct fc_frame_header *fh = fc_frame_header_get(fp);
struct fcp_cmnd *fcp;
if (fc_fcp_is_read(fr_fsp(fp)) &&
(fr_fsp(fp)->data_len > fcoe_ddp_min))
return true;
else if ((fr_fsp(fp) == NULL) &&
(fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) &&
(ntohs(fh->fh_rx_id) == FC_XID_UNKNOWN)) {
fcp = fc_frame_payload_get(fp, sizeof(*fcp));
if ((fcp->fc_flags & FCP_CFL_WRDATA) &&
(ntohl(fcp->fc_dl) > fcoe_ddp_min))
return true;
}
return false;
}
/**
* fcoe_em_config() - Allocate and configure an exchange manager
* @lport: The local port that the new EM will be associated with
*
* Returns: 0 on success
*/
static inline int fcoe_em_config(struct fc_lport *lport)
{
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
struct fcoe_interface *oldfcoe = NULL;
struct net_device *old_real_dev, *cur_real_dev;
u16 min_xid = FCOE_MIN_XID;
u16 max_xid = FCOE_MAX_XID;
/*
* Check if need to allocate an em instance for
* offload exchange ids to be shared across all VN_PORTs/lport.
*/
if (!lport->lro_enabled || !lport->lro_xid ||
(lport->lro_xid >= max_xid)) {
lport->lro_xid = 0;
goto skip_oem;
}
/*
* Reuse existing offload em instance in case
* it is already allocated on real eth device
*/
if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN)
cur_real_dev = vlan_dev_real_dev(fcoe->netdev);
else
cur_real_dev = fcoe->netdev;
list_for_each_entry(oldfcoe, &fcoe_hostlist, list) {
if (oldfcoe->netdev->priv_flags & IFF_802_1Q_VLAN)
old_real_dev = vlan_dev_real_dev(oldfcoe->netdev);
else
old_real_dev = oldfcoe->netdev;
if (cur_real_dev == old_real_dev) {
fcoe->oem = oldfcoe->oem;
break;
}
}
if (fcoe->oem) {
if (!fc_exch_mgr_add(lport, fcoe->oem, fcoe_oem_match)) {
printk(KERN_ERR "fcoe_em_config: failed to add "
"offload em:%p on interface:%s\n",
fcoe->oem, fcoe->netdev->name);
return -ENOMEM;
}
} else {
fcoe->oem = fc_exch_mgr_alloc(lport, FC_CLASS_3,
FCOE_MIN_XID, lport->lro_xid,
fcoe_oem_match);
if (!fcoe->oem) {
printk(KERN_ERR "fcoe_em_config: failed to allocate "
"em for offload exches on interface:%s\n",
fcoe->netdev->name);
return -ENOMEM;
}
}
/*
* Exclude offload EM xid range from next EM xid range.
*/
min_xid += lport->lro_xid + 1;
skip_oem:
if (!fc_exch_mgr_alloc(lport, FC_CLASS_3, min_xid, max_xid, NULL)) {
printk(KERN_ERR "fcoe_em_config: failed to "
"allocate em on interface %s\n", fcoe->netdev->name);
return -ENOMEM;
}
return 0;
}
/**
* fcoe_if_destroy() - Tear down a SW FCoE instance
* @lport: The local port to be destroyed
[SCSI] fcoe: fix a circular locking issue with rtnl and sysfs mutex Currently rtnl mutex is grabbed during fcoe create, destroy, enable and disable operations while sysfs s_active read mutex is already held, but simultaneously other networking events could try grabbing write s_active mutex while rtnl is already held and that is causing circular lock warning, its detailed log pasted at end. In this log, the rtnl was held before write s_active during device renaming but there are more such cases as Joe reported another instance with tg3 open at:- http://www.open-fcoe.org/pipermail/devel/2010-February/008263.html This patch fixes this issue by not waiting for rtnl mutex during fcoe ops, that means if rtnl mutex is not immediately available then restart_syscall() to allow others waiting in line to grab s_active along with rtnl mutex to finish their work first under these mutex. Currently rtnl mutex was grabbed twice during fcoe_destroy call flow, second grab was from fcoe_if_destroy called from fcoe_destroy after dropping rtnl mutex before calling fcoe_if_destroy, so instead made fcoe_if_destroy always called with rtnl mutex held to have this mutex grabbed only once in this code path. However left matching rtnl_unlock as-is in its original place as it was dropped there for good reason since very next call causes synchronous fip worker flush and if rtnl mutex is still held before flush then that would cause new circular warning between fip->recv_work and rtnl mutex, I've added detailed comment for this on fcoe_if_destroy calling and rtnl muxtes unlocking. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.33.1linux-stable-2.6.33 #1 ------------------------------------------------------- fcoemon/18823 is trying to acquire lock: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] but task is already holding lock: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (s_active){++++.+}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8115e5df>] sysfs_deactivate+0x8b/0xe0 [<ffffffff8115edfb>] sysfs_addrm_finish+0x36/0x55 [<ffffffff8115d0cc>] sysfs_hash_and_remove+0x53/0x6a [<ffffffff8115f353>] sysfs_remove_link+0x21/0x23 [<ffffffff812b6c93>] device_rename+0x99/0xcb [<ffffffff8138dbf0>] dev_change_name+0xd5/0x1d2 [<ffffffff8138deee>] dev_ifsioc+0x201/0x2ac [<ffffffff8138e4ba>] dev_ioctl+0x521/0x632 [<ffffffff81379e43>] sock_do_ioctl+0x3d/0x47 [<ffffffff8137a254>] sock_ioctl+0x213/0x222 [<ffffffff81114614>] vfs_ioctl+0x32/0xa6 [<ffffffff81114b94>] do_vfs_ioctl+0x490/0x4d6 [<ffffffff81114c30>] sys_ioctl+0x56/0x79 [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #1 (rtnl_mutex){+.+.+.}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffff813959f9>] rtnl_lock+0x17/0x19 [<ffffffff8138ccae>] register_netdevice_notifier+0x1e/0x19b [<ffffffffa02580c1>] 0xffffffffa02580c1 [<ffffffff81002069>] do_one_initcall+0x5e/0x15e [<ffffffff81084094>] sys_init_module+0xd8/0x23a [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #0 (fcoe_config_mutex){+.+.+.}: [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b other info that might help us debug this: 3 locks held by fcoemon/18823: #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8115da17>] sysfs_write_file+0x3c/0x144 #1: (s_active){++++.+}, at: [<ffffffff8115ef86>] sysfs_get_active_two+0x24/0x48 #2: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 stack backtrace: Pid: 18823, comm: fcoemon Tainted: G W 2.6.33.1linux-stable-2.6.33 #1 Call Trace: [<ffffffff81076c38>] print_circular_bug+0xa8/0xb6 [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8106ac70>] ? cpu_clock+0x43/0x5e [<ffffffff81074e12>] ? lockstat_clock+0x11/0x13 [<ffffffff81074e40>] ? lock_release_holdtime+0x2c/0x127 [<ffffffff8115ef93>] ? sysfs_get_active_two+0x31/0x48 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81076596>] ? trace_hardirqs_on_caller+0x125/0x150 [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-05-08 06:18:46 +08:00
*
*/
static void fcoe_if_destroy(struct fc_lport *lport)
{
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
struct net_device *netdev = fcoe->netdev;
FCOE_NETDEV_DBG(netdev, "Destroying interface\n");
/* Logout of the fabric */
fc_fabric_logoff(lport);
/* Cleanup the fc_lport */
fc_lport_destroy(lport);
/* Stop the transmit retry timer */
del_timer_sync(&port->timer);
/* Free existing transmit skbs */
fcoe_clean_pending_queue(lport);
rtnl_lock();
if (!is_zero_ether_addr(port->data_src_addr))
dev_uc_del(netdev, port->data_src_addr);
if (lport->vport)
synchronize_net();
else
fcoe_interface_remove(fcoe);
rtnl_unlock();
/* Free queued packets for the per-CPU receive threads */
fcoe_percpu_clean(lport);
/* Detach from the scsi-ml */
fc_remove_host(lport->host);
scsi_remove_host(lport->host);
[SCSI] libfc: fix NULL pointer dereference bug in fc_fcp_pkt_release This happens when then tearing down the fcoe interface with active I/O. The back trace shows dead000000200200 in RAX, i.e., LIST_POISON2, indicating that the fsp is already being dequeued, which is probably why no complaining was seen in fc_fcp_destroy() about outstanding fsp not freed, since we dequeue it in the end of fc_io_compl() before releasing it. The bug is due to the fact that we have already destroyed lport's scsi_pkt_pool while on-going i/o is still accessing it through fc_fcp_pkt_release(), like this trace or the similar code path from scsi-ml to fc_eh_abort, etc. This is fixed by moving the fc_fcp_destroy() after lport is detached from scsi-ml since fc_fcp_destroy is supposed to called only once where no lport lock is taken, otherwise the fc_fcp_pkt_release() would have to grab the lport lock. BUG: unable to handle kernel NULL pointer dereference at (null) ....... RIP: 0010:[<0000000000000000>] [<(null)>] (null) RSP: 0018:ffff8803270f7b88 EFLAGS: 00010282 RAX: dead000000200200 RBX: ffff880197d2fbc0 RCX: 0000000000005908 RDX: ffff880195ea6d08 RSI: 0000000000000282 RDI: ffff880180f4fec0 RBP: ffff8803270f7bc0 R08: ffff880197d2fbe0 R09: 0000000000000000 R10: ffff88032867f090 R11: 0000000000000000 R12: ffff880195ea6d08 R13: 0000000000000282 R14: ffff880180f4fec0 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8801b5820000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000000 CR3: 00000001a6eae000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process fc_rport_eq (pid: 5278, threadinfo ffff8803270f6000, task ffff880326254ab0) Stack: ffffffffa02c39ca ffff8803270f7ba0 ffff88019331cbc0 ffff880197d2fbc0 0000000000000000 ffff8801a8c895e0 ffff8801a8c895e0 ffff8803270f7c10 ffffffffa02c4962 ffff8803270f7be0 ffffffff814c94ab ffff8803270f7c10 Call Trace: [<ffffffffa02c39ca>] ? fc_io_compl+0x10a/0x530 [libfc] [<ffffffffa02c4962>] fc_fcp_complete_locked+0x72/0x150 [libfc] [<ffffffff814c94ab>] ? _spin_unlock_bh+0x1b/0x20 [<ffffffffa02b98ff>] ? fc_exch_done+0x3f/0x60 [libfc] [<ffffffffa02c4a8f>] fc_fcp_retry_cmd+0x4f/0x60 [libfc] [<ffffffffa02c6150>] fc_fcp_recv+0x9b0/0xc30 [libfc] [<ffffffff8106ba7a>] ? _call_console_drivers+0x4a/0x80 [<ffffffff8107d5ec>] ? lock_timer_base+0x3c/0x70 [<ffffffff8107e06b>] ? try_to_del_timer_sync+0x7b/0xe0 [<ffffffffa02b9dcf>] fc_exch_mgr_reset+0x1df/0x250 [libfc] [<ffffffffa02c57a0>] ? fc_fcp_recv+0x0/0xc30 [libfc] [<ffffffffa02c1042>] fc_rport_work+0xf2/0x4e0 [libfc] [<ffffffff8109203e>] ? prepare_to_wait+0x4e/0x80 [<ffffffffa02c0f50>] ? fc_rport_work+0x0/0x4e0 [libfc] [<ffffffff8108c6c0>] worker_thread+0x170/0x2a0 [<ffffffff81091d50>] ? autoremove_wake_function+0x0/0x40 [<ffffffff8108c550>] ? worker_thread+0x0/0x2a0 [<ffffffff810919e6>] kthread+0x96/0xa0 [<ffffffff810141ca>] child_rip+0xa/0x20 [<ffffffff81091950>] ? kthread+0x0/0xa0 [<ffffffff810141c0>] ? child_rip+0x0/0x20 Code: Bad RIP value. RIP [<(null)>] (null) RSP <ffff8803270f7b88> CR2: 0000000000000000 Signed-off-by: Yi Zou <yi.zou@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-12-01 08:18:07 +08:00
/* Destroy lport scsi_priv */
fc_fcp_destroy(lport);
/* There are no more rports or I/O, free the EM */
fc_exch_mgr_free(lport);
/* Free memory used by statistical counters */
fc_lport_free_stats(lport);
/*
* Release the Scsi_Host for vport but hold on to
* master lport until it fcoe interface fully cleaned-up.
*/
if (lport->vport)
scsi_host_put(lport->host);
}
/**
* fcoe_ddp_setup() - Call a LLD's ddp_setup through the net device
* @lport: The local port to setup DDP for
* @xid: The exchange ID for this DDP transfer
* @sgl: The scatterlist describing this transfer
* @sgc: The number of sg items
*
* Returns: 0 if the DDP context was not configured
*/
static int fcoe_ddp_setup(struct fc_lport *lport, u16 xid,
struct scatterlist *sgl, unsigned int sgc)
{
struct net_device *netdev = fcoe_netdev(lport);
if (netdev->netdev_ops->ndo_fcoe_ddp_setup)
return netdev->netdev_ops->ndo_fcoe_ddp_setup(netdev,
xid, sgl,
sgc);
return 0;
}
/**
* fcoe_ddp_target() - Call a LLD's ddp_target through the net device
* @lport: The local port to setup DDP for
* @xid: The exchange ID for this DDP transfer
* @sgl: The scatterlist describing this transfer
* @sgc: The number of sg items
*
* Returns: 0 if the DDP context was not configured
*/
static int fcoe_ddp_target(struct fc_lport *lport, u16 xid,
struct scatterlist *sgl, unsigned int sgc)
{
struct net_device *netdev = fcoe_netdev(lport);
if (netdev->netdev_ops->ndo_fcoe_ddp_target)
return netdev->netdev_ops->ndo_fcoe_ddp_target(netdev, xid,
sgl, sgc);
return 0;
}
/**
* fcoe_ddp_done() - Call a LLD's ddp_done through the net device
* @lport: The local port to complete DDP on
* @xid: The exchange ID for this DDP transfer
*
* Returns: the length of data that have been completed by DDP
*/
static int fcoe_ddp_done(struct fc_lport *lport, u16 xid)
{
struct net_device *netdev = fcoe_netdev(lport);
if (netdev->netdev_ops->ndo_fcoe_ddp_done)
return netdev->netdev_ops->ndo_fcoe_ddp_done(netdev, xid);
return 0;
}
/**
* fcoe_if_create() - Create a FCoE instance on an interface
* @fcoe: The FCoE interface to create a local port on
* @parent: The device pointer to be the parent in sysfs for the SCSI host
* @npiv: Indicates if the port is a vport or not
*
* Creates a fc_lport instance and a Scsi_Host instance and configure them.
*
* Returns: The allocated fc_lport or an error pointer
*/
static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
struct device *parent, int npiv)
{
struct net_device *netdev = fcoe->netdev;
struct fc_lport *lport, *n_port;
struct fcoe_port *port;
struct Scsi_Host *shost;
int rc;
/*
* parent is only a vport if npiv is 1,
* but we'll only use vport in that case so go ahead and set it
*/
struct fc_vport *vport = dev_to_vport(parent);
FCOE_NETDEV_DBG(netdev, "Create Interface\n");
if (!npiv)
lport = libfc_host_alloc(&fcoe_shost_template, sizeof(*port));
else
lport = libfc_vport_create(vport, sizeof(*port));
if (!lport) {
FCOE_NETDEV_DBG(netdev, "Could not allocate host structure\n");
rc = -ENOMEM;
goto out;
}
port = lport_priv(lport);
port->lport = lport;
port->priv = fcoe;
port->max_queue_depth = FCOE_MAX_QUEUE_DEPTH;
port->min_queue_depth = FCOE_MIN_QUEUE_DEPTH;
INIT_WORK(&port->destroy_work, fcoe_destroy_work);
/* configure a fc_lport including the exchange manager */
rc = fcoe_lport_config(lport);
if (rc) {
FCOE_NETDEV_DBG(netdev, "Could not configure lport for the "
"interface\n");
goto out_host_put;
}
if (npiv) {
FCOE_NETDEV_DBG(netdev, "Setting vport names, "
"%16.16llx %16.16llx\n",
vport->node_name, vport->port_name);
fc_set_wwnn(lport, vport->node_name);
fc_set_wwpn(lport, vport->port_name);
}
/* configure lport network properties */
rc = fcoe_netdev_config(lport, netdev);
if (rc) {
FCOE_NETDEV_DBG(netdev, "Could not configure netdev for the "
"interface\n");
goto out_lp_destroy;
}
/* configure lport scsi host properties */
rc = fcoe_shost_config(lport, parent);
if (rc) {
FCOE_NETDEV_DBG(netdev, "Could not configure shost for the "
"interface\n");
goto out_lp_destroy;
}
/* Initialize the library */
[SCSI] libfcoe: fcoe: fnic: add FIP VN2VN point-to-multipoint support The FC-BB-6 committee is proposing a new FIP usage model called VN_port to VN_port mode. It allows VN_ports to discover each other over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric services. This is point-to-multipoint. There is also a variant of this called point-to-point which provides for making sure there is just one pair of ports operating over the Ethernet fabric. We add these new states: VNMP_START, _PROBE1, _PROBE2, _CLAIM, and _UP. These usually go quickly in that sequence. After waiting a random amount of time up to 100 ms in START, we select a pseudo-random proposed locally-unique port ID and send out probes in states PROBE1 and PROBE2, 100 ms apart. If no probe responses are heard, we proceed to CLAIM state 400 ms later and send a claim notification. We wait another 400 ms to receive claim responses, which give us a list of the other nodes on the network, including their FC-4 capabilities. After another 400 ms we go to VNMP_UP state and should start interoperating with any of the nodes for whic we receivec claim responses. More details are in the spec.j Add the new mode as FIP_MODE_VN2VN. The driver must specify explicitly that it wants to operate in this mode. There is no automatic detection between point-to-multipoint and fabric mode, and the local port initialization is affected, so it isn't anticipated that there will ever be any such automatic switchover. It may eventually be possible to have both fabric and VN2VN modes on the same L2 network, which may be done by two separate local VN_ports (lports). When in VN2VN mode, FIP replaces libfc's fabric-oriented discovery module with its own simple code that adds remote ports as they are discovered from incoming claim notifications and responses. These hooks are placed by fcoe_disc_init(). A linear list of discovered vn_ports is maintained under the fcoe_ctlr struct. It is expected to be short for now, and accessed infrequently. It is kept under RCU for lock-ordering reasons. The lport and/or rport mutexes may be held when we need to lookup a fcoe_vnport during an ELS send. Change fcoe_ctlr_encaps() to lookup the destination vn_port in the list of peers for the destination MAC address of the FIP-encapsulated frame. Add a new function fcoe_disc_init() to initialize just the discovery portion of libfcoe for VN2VN mode. Signed-off-by: Joe Eykholt <jeykholt@cisco.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-07-21 06:20:30 +08:00
rc = fcoe_libfc_config(lport, &fcoe->ctlr, &fcoe_libfc_fcn_templ, 1);
if (rc) {
FCOE_NETDEV_DBG(netdev, "Could not configure libfc for the "
"interface\n");
goto out_lp_destroy;
}
/* Initialized FDMI information */
fcoe_fdmi_info(lport, netdev);
/*
* fcoe_em_alloc() and fcoe_hostlist_add() both
* need to be atomic with respect to other changes to the
* hostlist since fcoe_em_alloc() looks for an existing EM
* instance on host list updated by fcoe_hostlist_add().
*
* This is currently handled through the fcoe_config_mutex
* begin held.
*/
if (!npiv)
/* lport exch manager allocation */
rc = fcoe_em_config(lport);
else {
shost = vport_to_shost(vport);
n_port = shost_priv(shost);
rc = fc_exch_mgr_list_clone(n_port, lport);
}
if (rc) {
FCOE_NETDEV_DBG(netdev, "Could not configure the EM\n");
goto out_lp_destroy;
}
return lport;
out_lp_destroy:
fc_exch_mgr_free(lport);
out_host_put:
scsi_host_put(lport->host);
out:
return ERR_PTR(rc);
}
/**
* fcoe_if_init() - Initialization routine for fcoe.ko
*
* Attaches the SW FCoE transport to the FC transport
*
* Returns: 0 on success
*/
static int __init fcoe_if_init(void)
{
/* attach to scsi transport */
fcoe_nport_scsi_transport =
fc_attach_transport(&fcoe_nport_fc_functions);
fcoe_vport_scsi_transport =
fc_attach_transport(&fcoe_vport_fc_functions);
if (!fcoe_nport_scsi_transport) {
printk(KERN_ERR "fcoe: Failed to attach to the FC transport\n");
return -ENODEV;
}
return 0;
}
/**
* fcoe_if_exit() - Tear down fcoe.ko
*
* Detaches the SW FCoE transport from the FC transport
*
* Returns: 0 on success
*/
static int __exit fcoe_if_exit(void)
{
fc_release_transport(fcoe_nport_scsi_transport);
fc_release_transport(fcoe_vport_scsi_transport);
fcoe_nport_scsi_transport = NULL;
fcoe_vport_scsi_transport = NULL;
return 0;
}
/**
* fcoe_percpu_thread_create() - Create a receive thread for an online CPU
* @cpu: The CPU index of the CPU to create a receive thread for
*/
static void fcoe_percpu_thread_create(unsigned int cpu)
{
struct fcoe_percpu_s *p;
struct task_struct *thread;
p = &per_cpu(fcoe_percpu, cpu);
thread = kthread_create_on_node(fcoe_percpu_receive_thread,
(void *)p, cpu_to_node(cpu),
"fcoethread/%d", cpu);
if (likely(!IS_ERR(thread))) {
kthread_bind(thread, cpu);
wake_up_process(thread);
spin_lock_bh(&p->fcoe_rx_list.lock);
p->thread = thread;
spin_unlock_bh(&p->fcoe_rx_list.lock);
}
}
/**
* fcoe_percpu_thread_destroy() - Remove the receive thread of a CPU
* @cpu: The CPU index of the CPU whose receive thread is to be destroyed
*
* Destroys a per-CPU Rx thread. Any pending skbs are moved to the
* current CPU's Rx thread. If the thread being destroyed is bound to
* the CPU processing this context the skbs will be freed.
*/
static void fcoe_percpu_thread_destroy(unsigned int cpu)
{
struct fcoe_percpu_s *p;
struct task_struct *thread;
struct page *crc_eof;
struct sk_buff *skb;
#ifdef CONFIG_SMP
struct fcoe_percpu_s *p0;
unsigned targ_cpu = get_cpu();
#endif /* CONFIG_SMP */
FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
/* Prevent any new skbs from being queued for this CPU. */
p = &per_cpu(fcoe_percpu, cpu);
spin_lock_bh(&p->fcoe_rx_list.lock);
thread = p->thread;
p->thread = NULL;
crc_eof = p->crc_eof_page;
p->crc_eof_page = NULL;
p->crc_eof_offset = 0;
spin_unlock_bh(&p->fcoe_rx_list.lock);
#ifdef CONFIG_SMP
/*
* Don't bother moving the skb's if this context is running
* on the same CPU that is having its thread destroyed. This
* can easily happen when the module is removed.
*/
if (cpu != targ_cpu) {
p0 = &per_cpu(fcoe_percpu, targ_cpu);
spin_lock_bh(&p0->fcoe_rx_list.lock);
if (p0->thread) {
FCOE_DBG("Moving frames from CPU %d to CPU %d\n",
cpu, targ_cpu);
while ((skb = __skb_dequeue(&p->fcoe_rx_list)) != NULL)
__skb_queue_tail(&p0->fcoe_rx_list, skb);
spin_unlock_bh(&p0->fcoe_rx_list.lock);
} else {
/*
* The targeted CPU is not initialized and cannot accept
* new skbs. Unlock the targeted CPU and drop the skbs
* on the CPU that is going offline.
*/
while ((skb = __skb_dequeue(&p->fcoe_rx_list)) != NULL)
kfree_skb(skb);
spin_unlock_bh(&p0->fcoe_rx_list.lock);
}
} else {
/*
* This scenario occurs when the module is being removed
* and all threads are being destroyed. skbs will continue
* to be shifted from the CPU thread that is being removed
* to the CPU thread associated with the CPU that is processing
* the module removal. Once there is only one CPU Rx thread it
* will reach this case and we will drop all skbs and later
* stop the thread.
*/
spin_lock_bh(&p->fcoe_rx_list.lock);
while ((skb = __skb_dequeue(&p->fcoe_rx_list)) != NULL)
kfree_skb(skb);
spin_unlock_bh(&p->fcoe_rx_list.lock);
}
put_cpu();
#else
/*
* This a non-SMP scenario where the singular Rx thread is
* being removed. Free all skbs and stop the thread.
*/
spin_lock_bh(&p->fcoe_rx_list.lock);
while ((skb = __skb_dequeue(&p->fcoe_rx_list)) != NULL)
kfree_skb(skb);
spin_unlock_bh(&p->fcoe_rx_list.lock);
#endif
if (thread)
kthread_stop(thread);
if (crc_eof)
put_page(crc_eof);
}
/**
* fcoe_cpu_callback() - Handler for CPU hotplug events
* @nfb: The callback data block
* @action: The event triggering the callback
* @hcpu: The index of the CPU that the event is for
*
* This creates or destroys per-CPU data for fcoe
*
* Returns NOTIFY_OK always.
*/
static int fcoe_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned cpu = (unsigned long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
FCOE_DBG("CPU %x online: Create Rx thread\n", cpu);
fcoe_percpu_thread_create(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
FCOE_DBG("CPU %x offline: Remove Rx thread\n", cpu);
fcoe_percpu_thread_destroy(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
/**
* fcoe_select_cpu() - Selects CPU to handle post-processing of incoming
* command.
*
* This routine selects next CPU based on cpumask to distribute
* incoming requests in round robin.
*
* Returns: int CPU number
*/
static inline unsigned int fcoe_select_cpu(void)
{
static unsigned int selected_cpu;
selected_cpu = cpumask_next(selected_cpu, cpu_online_mask);
if (selected_cpu >= nr_cpu_ids)
selected_cpu = cpumask_first(cpu_online_mask);
return selected_cpu;
}
/**
* fcoe_rcv() - Receive packets from a net device
* @skb: The received packet
* @netdev: The net device that the packet was received on
* @ptype: The packet type context
* @olddev: The last device net device
*
* This routine is called by NET_RX_SOFTIRQ. It receives a packet, builds a
* FC frame and passes the frame to libfc.
*
* Returns: 0 for success
*/
static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
struct packet_type *ptype, struct net_device *olddev)
{
struct fc_lport *lport;
struct fcoe_rcv_info *fr;
struct fcoe_interface *fcoe;
struct fc_frame_header *fh;
struct fcoe_percpu_s *fps;
struct ethhdr *eh;
[SCSI] fcoe, libfc: fully makes use of per cpu exch pool and then removes em_lock 1. Updates fcoe_rcv() to queue incoming frames to the fcoe per cpu thread on which this frame's exch was originated and simply use current cpu for request exch not originated by initiator. It is redundant to add this code under CONFIG_SMP, so removes CONFIG_SMP uses around this code. 2. Updates fc_exch_em_alloc, fc_exch_delete, fc_exch_find to use per cpu exch pools, here fc_exch_delete is rename of older fc_exch_mgr_delete_ep since ep/exch are now deleted in pools of EM and so brief new name is sufficient and better name. Updates these functions to map exch id to their index into exch pool using fc_cpu_mask, fc_cpu_order and EM min_xid. This mapping is as per detailed explanation about this in last patch and basically this is just as lower fc_cpu_mask bits of exch id as cpu number and upper bit sum of EM min_xid and exch index in pool. Uses pool next_index to keep track of exch allocation from pool along with pool_max_index as upper bound of exches array in pool. 3. Adds exch pool ptr to fc_exch to free exch to its pool in fc_exch_delete. 4. Updates fc_exch_mgr_reset to reset all exch pools of an EM, this required adding fc_exch_pool_reset func to reset exches in pool and then have fc_exch_mgr_reset call fc_exch_pool_reset for each pool within each EM for a lport. 5. Removes no longer needed exches array, em_lock, next_xid, and total_exches from struct fc_exch_mgr, these are not needed after use of per cpu exch pool, also removes not used max_read, last_read from struct fc_exch_mgr. 6. Updates locking notes for exch pool lock with fc_exch lock and uses pool lock in exch allocation, lookup and reset. Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2009-08-26 04:58:53 +08:00
unsigned int cpu;
fcoe = container_of(ptype, struct fcoe_interface, fcoe_packet_type);
lport = fcoe->ctlr.lp;
if (unlikely(!lport)) {
FCOE_NETDEV_DBG(netdev, "Cannot find hba structure");
goto err2;
}
if (!lport->link_up)
goto err2;
FCOE_NETDEV_DBG(netdev, "skb_info: len:%d data_len:%d head:%p "
"data:%p tail:%p end:%p sum:%d dev:%s",
skb->len, skb->data_len, skb->head, skb->data,
skb_tail_pointer(skb), skb_end_pointer(skb),
skb->csum, skb->dev ? skb->dev->name : "<NULL>");
eh = eth_hdr(skb);
if (is_fip_mode(&fcoe->ctlr) &&
compare_ether_addr(eh->h_source, fcoe->ctlr.dest_addr)) {
FCOE_NETDEV_DBG(netdev, "wrong source mac address:%pM\n",
eh->h_source);
goto err;
}
/*
* Check for minimum frame length, and make sure required FCoE
* and FC headers are pulled into the linear data area.
*/
if (unlikely((skb->len < FCOE_MIN_FRAME) ||
!pskb_may_pull(skb, FCOE_HEADER_LEN)))
goto err;
skb_set_transport_header(skb, sizeof(struct fcoe_hdr));
fh = (struct fc_frame_header *) skb_transport_header(skb);
[SCSI] fcoe: Fix broken NPIV with correction to MAC validation A previous patch attempted to validate the destination MAC address of a FCoE frame by checking that MAC address against the received port's MAC address. The implementation seems fine on the surface, but any VN_Ports added using the NPIV feature will have their own MAC addresses and these MACs were not being checked, which prevented any NPIV VN_Ports from receiving frames. In other words, the following patch has broken NPIV. 519e5135e2537c9dbc1cbcc0891b0a936ff5dcd2 [SCSI] fcoe: adds src and dest mac address checking for fcoe frames Part of the offending patch is correct, but the part that broke NPIV was attempting to satisfy FC-BB-5 section D.5, 2.1- (discard frames that) "contain a destination MAC address/destination N_Port_ID pair that was not assigned by an FCF to one of the VN_Ports on the ENode" The language does _not_ say to compare the destination FC-MAP/destination N_Port_ID, but instead to compare the destination MAC address/destination N_Port_ID. >From the FC-BB-5 specification, "A properly formed FPMA is one in which the 24 most significant bits equal the Fabric’s FC-MAP value and the least significant 24 bits equal the N_Port_ID assigned to the VN_Port by the FCF." This means that we need to compare the FC Frame's destination FCID against the embedded FCID in the destination MAC address. This patch checks the lower 24 bits of the destination MAC address against destination FCID in the Fibre Channel frame. For MAC validation the first line of defense is the hardware MAC filtering. Each VN_Port will have a unicast MAC addresses added to the hardware's filtering table. The Ethernet driver should drop any MACs not destined for a programmed MAC. This patch adds a second line of defense that very specfically compares an element in the FC frame against an element in the Ethernet header, which is appropriate for the FCoE layer. Many alternative approaches were considered, including a LLD callback from libfc. The second most reasonable approach seemed to be walking the list of NPIV ports and check each of their MAC addresses against the destination MAC address of the received frame. The problem with this approach was that it is likely that performance would suffer with the more NPIV ports added to the system since every received frame would need to walk this list, comparing each entry's MAC. Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-10-09 08:12:46 +08:00
if (ntoh24(&eh->h_dest[3]) != ntoh24(fh->fh_d_id)) {
FCOE_NETDEV_DBG(netdev, "FC frame d_id mismatch with MAC:%pM\n",
eh->h_dest);
goto err;
}
fr = fcoe_dev_from_skb(skb);
fr->fr_dev = lport;
/*
[SCSI] fcoe, libfc: fully makes use of per cpu exch pool and then removes em_lock 1. Updates fcoe_rcv() to queue incoming frames to the fcoe per cpu thread on which this frame's exch was originated and simply use current cpu for request exch not originated by initiator. It is redundant to add this code under CONFIG_SMP, so removes CONFIG_SMP uses around this code. 2. Updates fc_exch_em_alloc, fc_exch_delete, fc_exch_find to use per cpu exch pools, here fc_exch_delete is rename of older fc_exch_mgr_delete_ep since ep/exch are now deleted in pools of EM and so brief new name is sufficient and better name. Updates these functions to map exch id to their index into exch pool using fc_cpu_mask, fc_cpu_order and EM min_xid. This mapping is as per detailed explanation about this in last patch and basically this is just as lower fc_cpu_mask bits of exch id as cpu number and upper bit sum of EM min_xid and exch index in pool. Uses pool next_index to keep track of exch allocation from pool along with pool_max_index as upper bound of exches array in pool. 3. Adds exch pool ptr to fc_exch to free exch to its pool in fc_exch_delete. 4. Updates fc_exch_mgr_reset to reset all exch pools of an EM, this required adding fc_exch_pool_reset func to reset exches in pool and then have fc_exch_mgr_reset call fc_exch_pool_reset for each pool within each EM for a lport. 5. Removes no longer needed exches array, em_lock, next_xid, and total_exches from struct fc_exch_mgr, these are not needed after use of per cpu exch pool, also removes not used max_read, last_read from struct fc_exch_mgr. 6. Updates locking notes for exch pool lock with fc_exch lock and uses pool lock in exch allocation, lookup and reset. Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2009-08-26 04:58:53 +08:00
* In case the incoming frame's exchange is originated from
* the initiator, then received frame's exchange id is ANDed
* with fc_cpu_mask bits to get the same cpu on which exchange
* was originated, otherwise select cpu using rx exchange id
* or fcoe_select_cpu().
*/
[SCSI] fcoe, libfc: fully makes use of per cpu exch pool and then removes em_lock 1. Updates fcoe_rcv() to queue incoming frames to the fcoe per cpu thread on which this frame's exch was originated and simply use current cpu for request exch not originated by initiator. It is redundant to add this code under CONFIG_SMP, so removes CONFIG_SMP uses around this code. 2. Updates fc_exch_em_alloc, fc_exch_delete, fc_exch_find to use per cpu exch pools, here fc_exch_delete is rename of older fc_exch_mgr_delete_ep since ep/exch are now deleted in pools of EM and so brief new name is sufficient and better name. Updates these functions to map exch id to their index into exch pool using fc_cpu_mask, fc_cpu_order and EM min_xid. This mapping is as per detailed explanation about this in last patch and basically this is just as lower fc_cpu_mask bits of exch id as cpu number and upper bit sum of EM min_xid and exch index in pool. Uses pool next_index to keep track of exch allocation from pool along with pool_max_index as upper bound of exches array in pool. 3. Adds exch pool ptr to fc_exch to free exch to its pool in fc_exch_delete. 4. Updates fc_exch_mgr_reset to reset all exch pools of an EM, this required adding fc_exch_pool_reset func to reset exches in pool and then have fc_exch_mgr_reset call fc_exch_pool_reset for each pool within each EM for a lport. 5. Removes no longer needed exches array, em_lock, next_xid, and total_exches from struct fc_exch_mgr, these are not needed after use of per cpu exch pool, also removes not used max_read, last_read from struct fc_exch_mgr. 6. Updates locking notes for exch pool lock with fc_exch lock and uses pool lock in exch allocation, lookup and reset. Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2009-08-26 04:58:53 +08:00
if (ntoh24(fh->fh_f_ctl) & FC_FC_EX_CTX)
cpu = ntohs(fh->fh_ox_id) & fc_cpu_mask;
else {
if (ntohs(fh->fh_rx_id) == FC_XID_UNKNOWN)
cpu = fcoe_select_cpu();
else
cpu = ntohs(fh->fh_rx_id) & fc_cpu_mask;
}
if (cpu >= nr_cpu_ids)
goto err;
fps = &per_cpu(fcoe_percpu, cpu);
spin_lock(&fps->fcoe_rx_list.lock);
if (unlikely(!fps->thread)) {
/*
* The targeted CPU is not ready, let's target
* the first CPU now. For non-SMP systems this
* will check the same CPU twice.
*/
FCOE_NETDEV_DBG(netdev, "CPU is online, but no receive thread "
"ready for incoming skb- using first online "
"CPU.\n");
spin_unlock(&fps->fcoe_rx_list.lock);
cpu = cpumask_first(cpu_online_mask);
fps = &per_cpu(fcoe_percpu, cpu);
spin_lock(&fps->fcoe_rx_list.lock);
if (!fps->thread) {
spin_unlock(&fps->fcoe_rx_list.lock);
goto err;
}
}
/*
* We now have a valid CPU that we're targeting for
* this skb. We also have this receive thread locked,
* so we're free to queue skbs into it's queue.
*/
[SCSI] fcoe: Ensure fcoe_recv_frame is always called in process context commit 859b7b649ab58ee5cbfb761491317d5b315c1b0f introduced the ability to call fcoe_recv_frame in softirq context. While this is beneficial to performance, its not safe to do, as it breaks the serialization of access to the lport structure (i.e. when an fcoe interface is being torn down, theres no way to serialize the teardown effort with the completion of receieve operations occuring in softirq context. As a result, lport (and other) data structures can be read and modified in parallel leading to corruption. Most notable is the vport list, which is protected by a mutex, that will cause a panic if a softirq receive while said mutex is locked. Additionaly, the ema_list, discussed here: http://lists.open-fcoe.org/pipermail/devel/2012-February/011947.html Can be corrupted if a list traversal occurs in softirq context at the same time as a list delete in process context. And generally the lport state variables will not be stable, and may lead to unpredictable results. The most direct fix is to remove the bits from the above commit that allowed fcoe_recv_frame to be called in softirq context. We just force all frames to be handled by the per-cpu rx threads. This will allow the fcoe_if_destroy's use of fcoe_percpu_clean to function properly, ensuring that no frames are being received while the lport is being torn down. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reviewed-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-03-10 06:49:48 +08:00
/*
* Note: We used to have a set of conditions under which we would
* call fcoe_recv_frame directly, rather than queuing to the rx list
* as it could save a few cycles, but doing so is prohibited, as
* fcoe_recv_frame has several paths that may sleep, which is forbidden
* in softirq context.
*/
[SCSI] fcoe: Ensure fcoe_recv_frame is always called in process context commit 859b7b649ab58ee5cbfb761491317d5b315c1b0f introduced the ability to call fcoe_recv_frame in softirq context. While this is beneficial to performance, its not safe to do, as it breaks the serialization of access to the lport structure (i.e. when an fcoe interface is being torn down, theres no way to serialize the teardown effort with the completion of receieve operations occuring in softirq context. As a result, lport (and other) data structures can be read and modified in parallel leading to corruption. Most notable is the vport list, which is protected by a mutex, that will cause a panic if a softirq receive while said mutex is locked. Additionaly, the ema_list, discussed here: http://lists.open-fcoe.org/pipermail/devel/2012-February/011947.html Can be corrupted if a list traversal occurs in softirq context at the same time as a list delete in process context. And generally the lport state variables will not be stable, and may lead to unpredictable results. The most direct fix is to remove the bits from the above commit that allowed fcoe_recv_frame to be called in softirq context. We just force all frames to be handled by the per-cpu rx threads. This will allow the fcoe_if_destroy's use of fcoe_percpu_clean to function properly, ensuring that no frames are being received while the lport is being torn down. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reviewed-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-03-10 06:49:48 +08:00
__skb_queue_tail(&fps->fcoe_rx_list, skb);
if (fps->thread->state == TASK_INTERRUPTIBLE)
[SCSI] fcoe: Ensure fcoe_recv_frame is always called in process context commit 859b7b649ab58ee5cbfb761491317d5b315c1b0f introduced the ability to call fcoe_recv_frame in softirq context. While this is beneficial to performance, its not safe to do, as it breaks the serialization of access to the lport structure (i.e. when an fcoe interface is being torn down, theres no way to serialize the teardown effort with the completion of receieve operations occuring in softirq context. As a result, lport (and other) data structures can be read and modified in parallel leading to corruption. Most notable is the vport list, which is protected by a mutex, that will cause a panic if a softirq receive while said mutex is locked. Additionaly, the ema_list, discussed here: http://lists.open-fcoe.org/pipermail/devel/2012-February/011947.html Can be corrupted if a list traversal occurs in softirq context at the same time as a list delete in process context. And generally the lport state variables will not be stable, and may lead to unpredictable results. The most direct fix is to remove the bits from the above commit that allowed fcoe_recv_frame to be called in softirq context. We just force all frames to be handled by the per-cpu rx threads. This will allow the fcoe_if_destroy's use of fcoe_percpu_clean to function properly, ensuring that no frames are being received while the lport is being torn down. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reviewed-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-03-10 06:49:48 +08:00
wake_up_process(fps->thread);
spin_unlock(&fps->fcoe_rx_list.lock);
return 0;
err:
per_cpu_ptr(lport->dev_stats, get_cpu())->ErrorFrames++;
put_cpu();
err2:
kfree_skb(skb);
return -1;
}
/**
* fcoe_alloc_paged_crc_eof() - Allocate a page to be used for the trailer CRC
* @skb: The packet to be transmitted
* @tlen: The total length of the trailer
*
* Returns: 0 for success
*/
static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
{
struct fcoe_percpu_s *fps;
int rc;
fps = &get_cpu_var(fcoe_percpu);
rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
put_cpu_var(fcoe_percpu);
return rc;
}
/**
* fcoe_xmit() - Transmit a FCoE frame
* @lport: The local port that the frame is to be transmitted for
* @fp: The frame to be transmitted
*
* Return: 0 for success
*/
static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
{
int wlen;
u32 crc;
struct ethhdr *eh;
struct fcoe_crc_eof *cp;
struct sk_buff *skb;
struct fcoe_dev_stats *stats;
struct fc_frame_header *fh;
unsigned int hlen; /* header length implies the version */
unsigned int tlen; /* trailer length */
unsigned int elen; /* eth header, may include vlan */
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
u8 sof, eof;
struct fcoe_hdr *hp;
WARN_ON((fr_len(fp) % sizeof(u32)) != 0);
fh = fc_frame_header_get(fp);
skb = fp_skb(fp);
wlen = skb->len / FCOE_WORD_TO_BYTE;
if (!lport->link_up) {
kfree_skb(skb);
return 0;
}
if (unlikely(fh->fh_type == FC_TYPE_ELS) &&
fcoe_ctlr_els_send(&fcoe->ctlr, lport, skb))
return 0;
sof = fr_sof(fp);
eof = fr_eof(fp);
elen = sizeof(struct ethhdr);
hlen = sizeof(struct fcoe_hdr);
tlen = sizeof(struct fcoe_crc_eof);
wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
/* crc offload */
if (likely(lport->crc_offload)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_start = skb_headroom(skb);
skb->csum_offset = skb->len;
crc = 0;
} else {
skb->ip_summed = CHECKSUM_NONE;
crc = fcoe_fc_crc(fp);
}
/* copy port crc and eof to the skb buff */
if (skb_is_nonlinear(skb)) {
skb_frag_t *frag;
if (fcoe_alloc_paged_crc_eof(skb, tlen)) {
kfree_skb(skb);
return -ENOMEM;
}
frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
cp = kmap_atomic(skb_frag_page(frag))
+ frag->page_offset;
} else {
cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
}
memset(cp, 0, sizeof(*cp));
cp->fcoe_eof = eof;
cp->fcoe_crc32 = cpu_to_le32(~crc);
if (skb_is_nonlinear(skb)) {
kunmap_atomic(cp);
cp = NULL;
}
/* adjust skb network/transport offsets to match mac/fcoe/port */
skb_push(skb, elen + hlen);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb->mac_len = elen;
skb->protocol = htons(ETH_P_FCOE);
skb->priority = port->priority;
if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN &&
fcoe->realdev->features & NETIF_F_HW_VLAN_TX) {
skb->vlan_tci = VLAN_TAG_PRESENT |
vlan_dev_vlan_id(fcoe->netdev);
skb->dev = fcoe->realdev;
} else
skb->dev = fcoe->netdev;
/* fill up mac and fcoe headers */
eh = eth_hdr(skb);
eh->h_proto = htons(ETH_P_FCOE);
memcpy(eh->h_dest, fcoe->ctlr.dest_addr, ETH_ALEN);
if (fcoe->ctlr.map_dest)
memcpy(eh->h_dest + 3, fh->fh_d_id, 3);
if (unlikely(fcoe->ctlr.flogi_oxid != FC_XID_UNKNOWN))
memcpy(eh->h_source, fcoe->ctlr.ctl_src_addr, ETH_ALEN);
else
memcpy(eh->h_source, port->data_src_addr, ETH_ALEN);
hp = (struct fcoe_hdr *)(eh + 1);
memset(hp, 0, sizeof(*hp));
if (FC_FCOE_VER)
FC_FCOE_ENCAPS_VER(hp, FC_FCOE_VER);
hp->fcoe_sof = sof;
/* fcoe lso, mss is in max_payload which is non-zero for FCP data */
if (lport->seq_offload && fr_max_payload(fp)) {
skb_shinfo(skb)->gso_type = SKB_GSO_FCOE;
skb_shinfo(skb)->gso_size = fr_max_payload(fp);
} else {
skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->gso_size = 0;
}
/* update tx stats: regardless if LLD fails */
stats = per_cpu_ptr(lport->dev_stats, get_cpu());
stats->TxFrames++;
stats->TxWords += wlen;
put_cpu();
/* send down to lld */
fr_dev(fp) = lport;
fcoe_port_send(port, skb);
return 0;
}
/**
* fcoe_percpu_flush_done() - Indicate per-CPU queue flush completion
* @skb: The completed skb (argument required by destructor)
*/
static void fcoe_percpu_flush_done(struct sk_buff *skb)
{
complete(&fcoe_flush_completion);
}
/**
* fcoe_filter_frames() - filter out bad fcoe frames, i.e. bad CRC
* @lport: The local port the frame was received on
* @fp: The received frame
*
* Return: 0 on passing filtering checks
*/
static inline int fcoe_filter_frames(struct fc_lport *lport,
struct fc_frame *fp)
{
struct fcoe_interface *fcoe;
struct fc_frame_header *fh;
struct sk_buff *skb = (struct sk_buff *)fp;
struct fcoe_dev_stats *stats;
/*
* We only check CRC if no offload is available and if it is
* it's solicited data, in which case, the FCP layer would
* check it during the copy.
*/
if (lport->crc_offload && skb->ip_summed == CHECKSUM_UNNECESSARY)
fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED;
else
fr_flags(fp) |= FCPHF_CRC_UNCHECKED;
fh = (struct fc_frame_header *) skb_transport_header(skb);
fh = fc_frame_header_get(fp);
if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA && fh->fh_type == FC_TYPE_FCP)
return 0;
fcoe = ((struct fcoe_port *)lport_priv(lport))->priv;
if (is_fip_mode(&fcoe->ctlr) && fc_frame_payload_op(fp) == ELS_LOGO &&
ntoh24(fh->fh_s_id) == FC_FID_FLOGI) {
FCOE_DBG("fcoe: dropping FCoE lport LOGO in fip mode\n");
return -EINVAL;
}
if (!(fr_flags(fp) & FCPHF_CRC_UNCHECKED) ||
le32_to_cpu(fr_crc(fp)) == ~crc32(~0, skb->data, skb->len)) {
fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED;
return 0;
}
stats = per_cpu_ptr(lport->dev_stats, get_cpu());
stats->InvalidCRCCount++;
if (stats->InvalidCRCCount < 5)
printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
put_cpu();
return -EINVAL;
}
/**
* fcoe_recv_frame() - process a single received frame
* @skb: frame to process
*/
static void fcoe_recv_frame(struct sk_buff *skb)
{
u32 fr_len;
struct fc_lport *lport;
struct fcoe_rcv_info *fr;
struct fcoe_dev_stats *stats;
struct fcoe_crc_eof crc_eof;
struct fc_frame *fp;
struct fcoe_port *port;
struct fcoe_hdr *hp;
fr = fcoe_dev_from_skb(skb);
lport = fr->fr_dev;
if (unlikely(!lport)) {
if (skb->destructor != fcoe_percpu_flush_done)
FCOE_NETDEV_DBG(skb->dev, "NULL lport in skb");
kfree_skb(skb);
return;
}
FCOE_NETDEV_DBG(skb->dev, "skb_info: len:%d data_len:%d "
"head:%p data:%p tail:%p end:%p sum:%d dev:%s",
skb->len, skb->data_len,
skb->head, skb->data, skb_tail_pointer(skb),
skb_end_pointer(skb), skb->csum,
skb->dev ? skb->dev->name : "<NULL>");
port = lport_priv(lport);
skb_linearize(skb); /* check for skb_is_nonlinear is within skb_linearize */
/*
* Frame length checks and setting up the header pointers
* was done in fcoe_rcv already.
*/
hp = (struct fcoe_hdr *) skb_network_header(skb);
stats = per_cpu_ptr(lport->dev_stats, get_cpu());
if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
if (stats->ErrorFrames < 5)
printk(KERN_WARNING "fcoe: FCoE version "
"mismatch: The frame has "
"version %x, but the "
"initiator supports version "
"%x\n", FC_FCOE_DECAPS_VER(hp),
FC_FCOE_VER);
goto drop;
}
skb_pull(skb, sizeof(struct fcoe_hdr));
fr_len = skb->len - sizeof(struct fcoe_crc_eof);
stats->RxFrames++;
stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
fp = (struct fc_frame *)skb;
fc_frame_init(fp);
fr_dev(fp) = lport;
fr_sof(fp) = hp->fcoe_sof;
/* Copy out the CRC and EOF trailer for access */
if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof)))
goto drop;
fr_eof(fp) = crc_eof.fcoe_eof;
fr_crc(fp) = crc_eof.fcoe_crc32;
if (pskb_trim(skb, fr_len))
goto drop;
if (!fcoe_filter_frames(lport, fp)) {
put_cpu();
fc_exch_recv(lport, fp);
return;
}
drop:
stats->ErrorFrames++;
put_cpu();
kfree_skb(skb);
}
/**
* fcoe_percpu_receive_thread() - The per-CPU packet receive thread
* @arg: The per-CPU context
*
* Return: 0 for success
*/
static int fcoe_percpu_receive_thread(void *arg)
{
struct fcoe_percpu_s *p = arg;
struct sk_buff *skb;
struct sk_buff_head tmp;
skb_queue_head_init(&tmp);
set_user_nice(current, -20);
while (!kthread_should_stop()) {
spin_lock_bh(&p->fcoe_rx_list.lock);
skb_queue_splice_init(&p->fcoe_rx_list, &tmp);
spin_unlock_bh(&p->fcoe_rx_list.lock);
while ((skb = __skb_dequeue(&tmp)) != NULL)
fcoe_recv_frame(skb);
spin_lock_bh(&p->fcoe_rx_list.lock);
if (!skb_queue_len(&p->fcoe_rx_list)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&p->fcoe_rx_list.lock);
schedule();
set_current_state(TASK_RUNNING);
} else
spin_unlock_bh(&p->fcoe_rx_list.lock);
}
return 0;
}
/**
* fcoe_dev_setup() - Setup the link change notification interface
*/
static void fcoe_dev_setup(void)
{
register_dcbevent_notifier(&dcb_notifier);
register_netdevice_notifier(&fcoe_notifier);
}
/**
* fcoe_dev_cleanup() - Cleanup the link change notification interface
*/
static void fcoe_dev_cleanup(void)
{
unregister_dcbevent_notifier(&dcb_notifier);
unregister_netdevice_notifier(&fcoe_notifier);
}
static struct fcoe_interface *
fcoe_hostlist_lookup_realdev_port(struct net_device *netdev)
{
struct fcoe_interface *fcoe;
struct net_device *real_dev;
list_for_each_entry(fcoe, &fcoe_hostlist, list) {
if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN)
real_dev = vlan_dev_real_dev(fcoe->netdev);
else
real_dev = fcoe->netdev;
if (netdev == real_dev)
return fcoe;
}
return NULL;
}
static int fcoe_dcb_app_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
struct dcb_app_type *entry = ptr;
struct fcoe_interface *fcoe;
struct net_device *netdev;
struct fcoe_port *port;
int prio;
if (entry->app.selector != DCB_APP_IDTYPE_ETHTYPE)
return NOTIFY_OK;
netdev = dev_get_by_index(&init_net, entry->ifindex);
if (!netdev)
return NOTIFY_OK;
fcoe = fcoe_hostlist_lookup_realdev_port(netdev);
dev_put(netdev);
if (!fcoe)
return NOTIFY_OK;
if (entry->dcbx & DCB_CAP_DCBX_VER_CEE)
prio = ffs(entry->app.priority) - 1;
else
prio = entry->app.priority;
if (prio < 0)
return NOTIFY_OK;
if (entry->app.protocol == ETH_P_FIP ||
entry->app.protocol == ETH_P_FCOE)
fcoe->ctlr.priority = prio;
if (entry->app.protocol == ETH_P_FCOE) {
port = lport_priv(fcoe->ctlr.lp);
port->priority = prio;
}
return NOTIFY_OK;
}
/**
* fcoe_device_notification() - Handler for net device events
* @notifier: The context of the notification
* @event: The type of event
* @ptr: The net device that the event was on
*
* This function is called by the Ethernet driver in case of link change event.
*
* Returns: 0 for success
*/
static int fcoe_device_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
struct fc_lport *lport = NULL;
struct net_device *netdev = ptr;
struct fcoe_interface *fcoe;
struct fcoe_port *port;
struct fcoe_dev_stats *stats;
u32 link_possible = 1;
u32 mfs;
int rc = NOTIFY_OK;
list_for_each_entry(fcoe, &fcoe_hostlist, list) {
if (fcoe->netdev == netdev) {
lport = fcoe->ctlr.lp;
break;
}
}
if (!lport) {
rc = NOTIFY_DONE;
goto out;
}
switch (event) {
case NETDEV_DOWN:
case NETDEV_GOING_DOWN:
link_possible = 0;
break;
case NETDEV_UP:
case NETDEV_CHANGE:
break;
case NETDEV_CHANGEMTU:
if (netdev->features & NETIF_F_FCOE_MTU)
break;
mfs = netdev->mtu - (sizeof(struct fcoe_hdr) +
sizeof(struct fcoe_crc_eof));
if (mfs >= FC_MIN_MAX_FRAME)
fc_set_mfs(lport, mfs);
break;
case NETDEV_REGISTER:
break;
case NETDEV_UNREGISTER:
list_del(&fcoe->list);
port = lport_priv(fcoe->ctlr.lp);
queue_work(fcoe_wq, &port->destroy_work);
goto out;
break;
case NETDEV_FEAT_CHANGE:
fcoe_netdev_features_change(lport, netdev);
break;
default:
FCOE_NETDEV_DBG(netdev, "Unknown event %ld "
"from netdev netlink\n", event);
}
fcoe_link_speed_update(lport);
if (link_possible && !fcoe_link_ok(lport))
fcoe_ctlr_link_up(&fcoe->ctlr);
else if (fcoe_ctlr_link_down(&fcoe->ctlr)) {
stats = per_cpu_ptr(lport->dev_stats, get_cpu());
stats->LinkFailureCount++;
put_cpu();
fcoe_clean_pending_queue(lport);
}
out:
return rc;
}
/**
* fcoe_disable() - Disables a FCoE interface
* @netdev : The net_device object the Ethernet interface to create on
*
* Called from fcoe transport.
*
* Returns: 0 for success
*/
static int fcoe_disable(struct net_device *netdev)
{
struct fcoe_interface *fcoe;
int rc = 0;
mutex_lock(&fcoe_config_mutex);
rtnl_lock();
fcoe = fcoe_hostlist_lookup_port(netdev);
rtnl_unlock();
if (fcoe) {
fcoe_ctlr_link_down(&fcoe->ctlr);
fcoe_clean_pending_queue(fcoe->ctlr.lp);
} else
rc = -ENODEV;
mutex_unlock(&fcoe_config_mutex);
return rc;
}
/**
* fcoe_enable() - Enables a FCoE interface
* @netdev : The net_device object the Ethernet interface to create on
*
* Called from fcoe transport.
*
* Returns: 0 for success
*/
static int fcoe_enable(struct net_device *netdev)
{
struct fcoe_interface *fcoe;
int rc = 0;
mutex_lock(&fcoe_config_mutex);
rtnl_lock();
fcoe = fcoe_hostlist_lookup_port(netdev);
rtnl_unlock();
if (!fcoe)
rc = -ENODEV;
else if (!fcoe_link_ok(fcoe->ctlr.lp))
fcoe_ctlr_link_up(&fcoe->ctlr);
mutex_unlock(&fcoe_config_mutex);
return rc;
}
/**
* fcoe_destroy() - Destroy a FCoE interface
* @netdev : The net_device object the Ethernet interface to create on
*
* Called from fcoe transport
*
* Returns: 0 for success
*/
static int fcoe_destroy(struct net_device *netdev)
{
struct fcoe_interface *fcoe;
struct fc_lport *lport;
struct fcoe_port *port;
int rc = 0;
mutex_lock(&fcoe_config_mutex);
rtnl_lock();
fcoe = fcoe_hostlist_lookup_port(netdev);
if (!fcoe) {
rc = -ENODEV;
goto out_nodev;
}
lport = fcoe->ctlr.lp;
port = lport_priv(lport);
list_del(&fcoe->list);
queue_work(fcoe_wq, &port->destroy_work);
out_nodev:
rtnl_unlock();
mutex_unlock(&fcoe_config_mutex);
return rc;
}
/**
* fcoe_destroy_work() - Destroy a FCoE port in a deferred work context
* @work: Handle to the FCoE port to be destroyed
*/
static void fcoe_destroy_work(struct work_struct *work)
{
struct fcoe_port *port;
struct fcoe_interface *fcoe;
port = container_of(work, struct fcoe_port, destroy_work);
mutex_lock(&fcoe_config_mutex);
fcoe = port->priv;
fcoe_if_destroy(port->lport);
fcoe_interface_cleanup(fcoe);
mutex_unlock(&fcoe_config_mutex);
}
/**
* fcoe_match() - Check if the FCoE is supported on the given netdevice
* @netdev : The net_device object the Ethernet interface to create on
*
* Called from fcoe transport.
*
* Returns: always returns true as this is the default FCoE transport,
* i.e., support all netdevs.
*/
static bool fcoe_match(struct net_device *netdev)
{
return true;
}
/**
* fcoe_dcb_create() - Initialize DCB attributes and hooks
* @netdev: The net_device object of the L2 link that should be queried
* @port: The fcoe_port to bind FCoE APP priority with
* @
*/
static void fcoe_dcb_create(struct fcoe_interface *fcoe)
{
#ifdef CONFIG_DCB
int dcbx;
u8 fup, up;
struct net_device *netdev = fcoe->realdev;
struct fcoe_port *port = lport_priv(fcoe->ctlr.lp);
struct dcb_app app = {
.priority = 0,
.protocol = ETH_P_FCOE
};
/* setup DCB priority attributes. */
if (netdev && netdev->dcbnl_ops && netdev->dcbnl_ops->getdcbx) {
dcbx = netdev->dcbnl_ops->getdcbx(netdev);
if (dcbx & DCB_CAP_DCBX_VER_IEEE) {
app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
up = dcb_ieee_getapp_mask(netdev, &app);
app.protocol = ETH_P_FIP;
fup = dcb_ieee_getapp_mask(netdev, &app);
} else {
app.selector = DCB_APP_IDTYPE_ETHTYPE;
up = dcb_getapp(netdev, &app);
app.protocol = ETH_P_FIP;
fup = dcb_getapp(netdev, &app);
}
port->priority = ffs(up) ? ffs(up) - 1 : 0;
fcoe->ctlr.priority = ffs(fup) ? ffs(fup) - 1 : port->priority;
}
#endif
}
/**
* fcoe_create() - Create a fcoe interface
* @netdev : The net_device object the Ethernet interface to create on
* @fip_mode: The FIP mode for this creation
*
* Called from fcoe transport
*
* Returns: 0 for success
*/
static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
{
int rc = 0;
struct fcoe_interface *fcoe;
struct fc_lport *lport;
mutex_lock(&fcoe_config_mutex);
rtnl_lock();
[SCSI] fcoe: fix a circular locking issue with rtnl and sysfs mutex Currently rtnl mutex is grabbed during fcoe create, destroy, enable and disable operations while sysfs s_active read mutex is already held, but simultaneously other networking events could try grabbing write s_active mutex while rtnl is already held and that is causing circular lock warning, its detailed log pasted at end. In this log, the rtnl was held before write s_active during device renaming but there are more such cases as Joe reported another instance with tg3 open at:- http://www.open-fcoe.org/pipermail/devel/2010-February/008263.html This patch fixes this issue by not waiting for rtnl mutex during fcoe ops, that means if rtnl mutex is not immediately available then restart_syscall() to allow others waiting in line to grab s_active along with rtnl mutex to finish their work first under these mutex. Currently rtnl mutex was grabbed twice during fcoe_destroy call flow, second grab was from fcoe_if_destroy called from fcoe_destroy after dropping rtnl mutex before calling fcoe_if_destroy, so instead made fcoe_if_destroy always called with rtnl mutex held to have this mutex grabbed only once in this code path. However left matching rtnl_unlock as-is in its original place as it was dropped there for good reason since very next call causes synchronous fip worker flush and if rtnl mutex is still held before flush then that would cause new circular warning between fip->recv_work and rtnl mutex, I've added detailed comment for this on fcoe_if_destroy calling and rtnl muxtes unlocking. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.33.1linux-stable-2.6.33 #1 ------------------------------------------------------- fcoemon/18823 is trying to acquire lock: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] but task is already holding lock: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (s_active){++++.+}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8115e5df>] sysfs_deactivate+0x8b/0xe0 [<ffffffff8115edfb>] sysfs_addrm_finish+0x36/0x55 [<ffffffff8115d0cc>] sysfs_hash_and_remove+0x53/0x6a [<ffffffff8115f353>] sysfs_remove_link+0x21/0x23 [<ffffffff812b6c93>] device_rename+0x99/0xcb [<ffffffff8138dbf0>] dev_change_name+0xd5/0x1d2 [<ffffffff8138deee>] dev_ifsioc+0x201/0x2ac [<ffffffff8138e4ba>] dev_ioctl+0x521/0x632 [<ffffffff81379e43>] sock_do_ioctl+0x3d/0x47 [<ffffffff8137a254>] sock_ioctl+0x213/0x222 [<ffffffff81114614>] vfs_ioctl+0x32/0xa6 [<ffffffff81114b94>] do_vfs_ioctl+0x490/0x4d6 [<ffffffff81114c30>] sys_ioctl+0x56/0x79 [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #1 (rtnl_mutex){+.+.+.}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffff813959f9>] rtnl_lock+0x17/0x19 [<ffffffff8138ccae>] register_netdevice_notifier+0x1e/0x19b [<ffffffffa02580c1>] 0xffffffffa02580c1 [<ffffffff81002069>] do_one_initcall+0x5e/0x15e [<ffffffff81084094>] sys_init_module+0xd8/0x23a [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #0 (fcoe_config_mutex){+.+.+.}: [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b other info that might help us debug this: 3 locks held by fcoemon/18823: #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8115da17>] sysfs_write_file+0x3c/0x144 #1: (s_active){++++.+}, at: [<ffffffff8115ef86>] sysfs_get_active_two+0x24/0x48 #2: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 stack backtrace: Pid: 18823, comm: fcoemon Tainted: G W 2.6.33.1linux-stable-2.6.33 #1 Call Trace: [<ffffffff81076c38>] print_circular_bug+0xa8/0xb6 [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8106ac70>] ? cpu_clock+0x43/0x5e [<ffffffff81074e12>] ? lockstat_clock+0x11/0x13 [<ffffffff81074e40>] ? lock_release_holdtime+0x2c/0x127 [<ffffffff8115ef93>] ? sysfs_get_active_two+0x31/0x48 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81076596>] ? trace_hardirqs_on_caller+0x125/0x150 [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-05-08 06:18:46 +08:00
/* look for existing lport */
if (fcoe_hostlist_lookup(netdev)) {
rc = -EEXIST;
goto out_nodev;
}
fcoe = fcoe_interface_create(netdev, fip_mode);
if (IS_ERR(fcoe)) {
rc = PTR_ERR(fcoe);
goto out_nodev;
}
lport = fcoe_if_create(fcoe, &netdev->dev, 0);
if (IS_ERR(lport)) {
printk(KERN_ERR "fcoe: Failed to create interface (%s)\n",
netdev->name);
rc = -EIO;
[SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl The rtnl cannot be held durrng the fcoe_interface_put. If it is the last reference on the fcoe_interface the fcoe_ctlr_destroy will be called as a part of the cleanup, ultimately calling cancel_work_sync(&fip->recv_work); If we are processing a flogi response we will be in the recv_work context and we will lock the rtnl to add a new unicast MAC address. This is how the deadlock can occur. The fix is simply to move the rtnl_lock/unlock into fcoe_interface_cleanup so that it can be unlocked before fcoe_interface_put is called. Here is the lockdep report: Jul 21 11:26:35 bubba [ 223.870702] ul 21 11:26:35 bubba [ 223.870704] ======================================================= Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ] Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.871797] ------------------------------------------------------- Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock: Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff810531f1>] wait_on_work+0x0/0xbd Jul 21 11:26:35 bubba [ 223.873022] Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock: Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.874229] Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock. Jul 21 11:26:35 bubba [ 223.874231] Jul 21 11:26:35 bubba [ 223.875032] Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is: Jul 21 11:26:35 bubba [ 223.875573] Jul 21 11:26:35 bubba [ 223.875573] -> #1 Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.876301] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.876645] Jul 21 11:26:35 bubba [<ffffffff8151d975>] __mutex_lock_common+0x47/0x30d Jul 21 11:26:35 bubba [ 223.876991] Jul 21 11:26:35 bubba [<ffffffff8151dd36>] mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.877334] Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.877675] Jul 21 11:26:35 bubba [<ffffffffa003d5a0>] fcoe_update_src_mac+0x2b/0x80 [fcoe] Jul 21 11:26:35 bubba [ 223.878022] Jul 21 11:26:35 bubba [<ffffffffa003d698>] fcoe_flogi_resp+0x5e/0x79 [fcoe] Jul 21 11:26:35 bubba [ 223.878366] Jul 21 11:26:35 bubba [<ffffffffa001566f>] fc_exch_recv+0x7f5/0x9da [libfc] Jul 21 11:26:35 bubba [ 223.878713] Jul 21 11:26:35 bubba [<ffffffffa00327d8>] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe] Jul 21 11:26:35 bubba [ 223.879258] Jul 21 11:26:35 bubba [<ffffffff81053761>] process_one_work+0x1d7/0x347 Jul 21 11:26:35 bubba [ 223.879601] Jul 21 11:26:35 bubba [<ffffffff81054ade>] worker_thread+0xf8/0x17c Jul 21 11:26:35 bubba [ 223.879944] Jul 21 11:26:35 bubba [<ffffffff81058184>] kthread+0x7d/0x85 Jul 21 11:26:35 bubba [ 223.880287] Jul 21 11:26:35 bubba [<ffffffff81526414>] kernel_thread_helper+0x4/0x10 Jul 21 11:26:35 bubba [ 223.880634] Jul 21 11:26:35 bubba [ 223.880635] -> #0 Jul 21 11:26:35 bubba ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.881357] Jul 21 11:26:35 bubba [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.881695] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.882033] Jul 21 11:26:35 bubba [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.882378] Jul 21 11:26:35 bubba [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.882718] Jul 21 11:26:35 bubba [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.883057] Jul 21 11:26:35 bubba [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.883399] Jul 21 11:26:35 bubba [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.883940] Jul 21 11:26:35 bubba [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.884280] Jul 21 11:26:35 bubba [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.884624] Jul 21 11:26:35 bubba [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.885163] Jul 21 11:26:35 bubba [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.885502] Jul 21 11:26:35 bubba [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.886045] Jul 21 11:26:35 bubba [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.886385] Jul 21 11:26:35 bubba [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.886728] Jul 21 11:26:35 bubba [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.887068] Jul 21 11:26:35 bubba [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.887406] Jul 21 11:26:35 bubba [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.887742] Jul 21 11:26:35 bubba [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:35 bubba [ 223.888083] Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this: Jul 21 11:26:35 bubba [ 223.888085] Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario: Jul 21 11:26:35 bubba [ 223.888881] Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1 Jul 21 11:26:35 bubba [ 223.889683] ---- ---- Jul 21 11:26:35 bubba [ 223.889955] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890349] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890751] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891154] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891549] Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK *** Jul 21 11:26:35 bubba [ 223.891551] Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464: Jul 21 11:26:35 bubba [ 223.892621] #0: Jul 21 11:26:35 bubba (&buffer->mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c171>] sysfs_write_file+0x37/0x13f Jul 21 11:26:35 bubba [ 223.893359] #1: Jul 21 11:26:35 bubba (s_active Jul 21 11:26:35 bubba ){++++.+} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c21c>] sysfs_write_file+0xe2/0x13f Jul 21 11:26:35 bubba [ 223.894094] #2: Jul 21 11:26:35 bubba (param_lock Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff81056146>] param_attr_store+0x36/0x62 Jul 21 11:26:35 bubba [ 223.894835] #3: Jul 21 11:26:35 bubba (ft_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa0034017>] fcoe_transport_destroy+0x1e/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.895574] #4: Jul 21 11:26:35 bubba (fcoe_config_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa003f2c9>] fcoe_destroy+0x18/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.896314] #5: Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.897047] Jul 21 11:26:35 bubba [ 223.897048] stack backtrace: Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.897853] Call Trace: Jul 21 11:26:35 bubba [ 223.898128] [<ffffffff81068e16>] print_circular_bug+0x1f8/0x209 Jul 21 11:26:35 bubba [ 223.898416] [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.898699] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.898982] [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.899263] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.899547] [<ffffffff8104a097>] ? mod_timer+0x8f/0x98 Jul 21 11:26:35 bubba [ 223.899827] [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.900108] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.900390] [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.900671] [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.900953] [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.901237] [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.901522] [<ffffffffa003e4fd>] ? fcoe_enable+0x6b/0x6b [fcoe] Jul 21 11:26:35 bubba [ 223.901803] [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.902083] [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.902367] [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.902653] [<ffffffff8151dd36>] ? mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.902939] [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.903223] [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.903508] [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.903792] [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.904075] [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.904357] [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.904642] [<ffffffff810f51d6>] ? fget_light+0x35/0x96 Jul 21 11:26:35 bubba [ 223.904923] [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.905204] [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5 Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None Signed-off-by: Robert Love <robert.w.love@intel.com> Tested-by: Ross Brattain <ross.b.brattain@intel.com> Reviewed-by: Yi Zou <yi.zou@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2011-08-26 03:40:47 +08:00
rtnl_unlock();
fcoe_interface_cleanup(fcoe);
[SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl The rtnl cannot be held durrng the fcoe_interface_put. If it is the last reference on the fcoe_interface the fcoe_ctlr_destroy will be called as a part of the cleanup, ultimately calling cancel_work_sync(&fip->recv_work); If we are processing a flogi response we will be in the recv_work context and we will lock the rtnl to add a new unicast MAC address. This is how the deadlock can occur. The fix is simply to move the rtnl_lock/unlock into fcoe_interface_cleanup so that it can be unlocked before fcoe_interface_put is called. Here is the lockdep report: Jul 21 11:26:35 bubba [ 223.870702] ul 21 11:26:35 bubba [ 223.870704] ======================================================= Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ] Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.871797] ------------------------------------------------------- Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock: Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff810531f1>] wait_on_work+0x0/0xbd Jul 21 11:26:35 bubba [ 223.873022] Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock: Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.874229] Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock. Jul 21 11:26:35 bubba [ 223.874231] Jul 21 11:26:35 bubba [ 223.875032] Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is: Jul 21 11:26:35 bubba [ 223.875573] Jul 21 11:26:35 bubba [ 223.875573] -> #1 Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.876301] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.876645] Jul 21 11:26:35 bubba [<ffffffff8151d975>] __mutex_lock_common+0x47/0x30d Jul 21 11:26:35 bubba [ 223.876991] Jul 21 11:26:35 bubba [<ffffffff8151dd36>] mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.877334] Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.877675] Jul 21 11:26:35 bubba [<ffffffffa003d5a0>] fcoe_update_src_mac+0x2b/0x80 [fcoe] Jul 21 11:26:35 bubba [ 223.878022] Jul 21 11:26:35 bubba [<ffffffffa003d698>] fcoe_flogi_resp+0x5e/0x79 [fcoe] Jul 21 11:26:35 bubba [ 223.878366] Jul 21 11:26:35 bubba [<ffffffffa001566f>] fc_exch_recv+0x7f5/0x9da [libfc] Jul 21 11:26:35 bubba [ 223.878713] Jul 21 11:26:35 bubba [<ffffffffa00327d8>] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe] Jul 21 11:26:35 bubba [ 223.879258] Jul 21 11:26:35 bubba [<ffffffff81053761>] process_one_work+0x1d7/0x347 Jul 21 11:26:35 bubba [ 223.879601] Jul 21 11:26:35 bubba [<ffffffff81054ade>] worker_thread+0xf8/0x17c Jul 21 11:26:35 bubba [ 223.879944] Jul 21 11:26:35 bubba [<ffffffff81058184>] kthread+0x7d/0x85 Jul 21 11:26:35 bubba [ 223.880287] Jul 21 11:26:35 bubba [<ffffffff81526414>] kernel_thread_helper+0x4/0x10 Jul 21 11:26:35 bubba [ 223.880634] Jul 21 11:26:35 bubba [ 223.880635] -> #0 Jul 21 11:26:35 bubba ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.881357] Jul 21 11:26:35 bubba [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.881695] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.882033] Jul 21 11:26:35 bubba [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.882378] Jul 21 11:26:35 bubba [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.882718] Jul 21 11:26:35 bubba [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.883057] Jul 21 11:26:35 bubba [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.883399] Jul 21 11:26:35 bubba [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.883940] Jul 21 11:26:35 bubba [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.884280] Jul 21 11:26:35 bubba [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.884624] Jul 21 11:26:35 bubba [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.885163] Jul 21 11:26:35 bubba [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.885502] Jul 21 11:26:35 bubba [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.886045] Jul 21 11:26:35 bubba [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.886385] Jul 21 11:26:35 bubba [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.886728] Jul 21 11:26:35 bubba [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.887068] Jul 21 11:26:35 bubba [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.887406] Jul 21 11:26:35 bubba [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.887742] Jul 21 11:26:35 bubba [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:35 bubba [ 223.888083] Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this: Jul 21 11:26:35 bubba [ 223.888085] Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario: Jul 21 11:26:35 bubba [ 223.888881] Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1 Jul 21 11:26:35 bubba [ 223.889683] ---- ---- Jul 21 11:26:35 bubba [ 223.889955] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890349] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890751] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891154] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891549] Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK *** Jul 21 11:26:35 bubba [ 223.891551] Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464: Jul 21 11:26:35 bubba [ 223.892621] #0: Jul 21 11:26:35 bubba (&buffer->mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c171>] sysfs_write_file+0x37/0x13f Jul 21 11:26:35 bubba [ 223.893359] #1: Jul 21 11:26:35 bubba (s_active Jul 21 11:26:35 bubba ){++++.+} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c21c>] sysfs_write_file+0xe2/0x13f Jul 21 11:26:35 bubba [ 223.894094] #2: Jul 21 11:26:35 bubba (param_lock Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff81056146>] param_attr_store+0x36/0x62 Jul 21 11:26:35 bubba [ 223.894835] #3: Jul 21 11:26:35 bubba (ft_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa0034017>] fcoe_transport_destroy+0x1e/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.895574] #4: Jul 21 11:26:35 bubba (fcoe_config_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa003f2c9>] fcoe_destroy+0x18/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.896314] #5: Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.897047] Jul 21 11:26:35 bubba [ 223.897048] stack backtrace: Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.897853] Call Trace: Jul 21 11:26:35 bubba [ 223.898128] [<ffffffff81068e16>] print_circular_bug+0x1f8/0x209 Jul 21 11:26:35 bubba [ 223.898416] [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.898699] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.898982] [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.899263] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.899547] [<ffffffff8104a097>] ? mod_timer+0x8f/0x98 Jul 21 11:26:35 bubba [ 223.899827] [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.900108] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.900390] [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.900671] [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.900953] [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.901237] [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.901522] [<ffffffffa003e4fd>] ? fcoe_enable+0x6b/0x6b [fcoe] Jul 21 11:26:35 bubba [ 223.901803] [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.902083] [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.902367] [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.902653] [<ffffffff8151dd36>] ? mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.902939] [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.903223] [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.903508] [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.903792] [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.904075] [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.904357] [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.904642] [<ffffffff810f51d6>] ? fget_light+0x35/0x96 Jul 21 11:26:35 bubba [ 223.904923] [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.905204] [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5 Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None Signed-off-by: Robert Love <robert.w.love@intel.com> Tested-by: Ross Brattain <ross.b.brattain@intel.com> Reviewed-by: Yi Zou <yi.zou@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2011-08-26 03:40:47 +08:00
goto out_nortnl;
}
/* Make this the "master" N_Port */
fcoe->ctlr.lp = lport;
/* setup DCB priority attributes. */
fcoe_dcb_create(fcoe);
/* add to lports list */
fcoe_hostlist_add(lport);
/* start FIP Discovery and FLOGI */
lport->boot_time = jiffies;
fc_fabric_login(lport);
[SCSI] fcoe: Drop the rtnl_mutex before calling fcoe_ctlr_link_up The rtnl_lock is primarily used to serialize networking driver changes as well as to ensure that a networking driver is not removed when making changes to it. fcoe also uses the rtnl_lock to protect the fcoe hostlist. fcoe_create holds the rtnl_lock over the entirity of the routine including a the call to fcoe_ctlr_link_up. This causes the below deadlock because fcoe_ctlr_link_up acquires the fcoe_ctlr ctlr_mutex and this deadlocks with a libfcoe thread that acquires the fcoe_ctlr ctlr_mutex and then the rtnl_lock (to update a MAC address). This patch drops the rtnl_lock before calling fcoe_ctlr_link_up and therefore the deadlock is prevented. https://bugzilla.kernel.org/show_bug.cgi?id=42918 the existing dependency chain (in reverse order) is: -> #1 (&fip->ctlr_mutex){+.+...}: [<c1091f70>] lock_acquire+0x80/0x1b0 [<c147655d>] mutex_lock_nested+0x6d/0x340 [<f8970c32>] fcoe_ctlr_link_up+0x22/0x180 [libfcoe] [<f894620e>] fcoe_create+0x47e/0x6e0 [fcoe] [<f8973dd3>] fcoe_transport_create+0x143/0x250 [libfcoe] [<c10527e0>] param_attr_store+0x30/0x60 [<c1052696>] module_attr_store+0x26/0x40 [<c11a201e>] sysfs_write_file+0xae/0x100 [<c11449df>] vfs_write+0x8f/0x160 [<c1144cbd>] sys_write+0x3d/0x70 [<c147a0c4>] syscall_call+0x7/0xb -> #0 (rtnl_mutex){+.+.+.}: [<c109164b>] __lock_acquire+0x140b/0x1720 [<c1091f70>] lock_acquire+0x80/0x1b0 [<c147655d>] mutex_lock_nested+0x6d/0x340 [<c13a10c4>] rtnl_lock+0x14/0x20 [<f89445ac>] fcoe_update_src_mac+0x2c/0xb0 [fcoe] [<f8971712>] fcoe_ctlr_timer_work+0x712/0xb60 [libfcoe] [<c104fb69>] process_one_work+0x179/0x5d0 [<c10502f1>] worker_thread+0x121/0x2d0 [<c10550ed>] kthread+0x7d/0x90 [<c1481a82>] kernel_thread_helper+0x6/0x10 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fip->ctlr_mutex); lock(rtnl_mutex); lock(&fip->ctlr_mutex); lock(rtnl_mutex); *** DEADLOCK *** Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-03-14 09:22:12 +08:00
if (!fcoe_link_ok(lport)) {
rtnl_unlock();
fcoe_ctlr_link_up(&fcoe->ctlr);
[SCSI] fcoe: Drop the rtnl_mutex before calling fcoe_ctlr_link_up The rtnl_lock is primarily used to serialize networking driver changes as well as to ensure that a networking driver is not removed when making changes to it. fcoe also uses the rtnl_lock to protect the fcoe hostlist. fcoe_create holds the rtnl_lock over the entirity of the routine including a the call to fcoe_ctlr_link_up. This causes the below deadlock because fcoe_ctlr_link_up acquires the fcoe_ctlr ctlr_mutex and this deadlocks with a libfcoe thread that acquires the fcoe_ctlr ctlr_mutex and then the rtnl_lock (to update a MAC address). This patch drops the rtnl_lock before calling fcoe_ctlr_link_up and therefore the deadlock is prevented. https://bugzilla.kernel.org/show_bug.cgi?id=42918 the existing dependency chain (in reverse order) is: -> #1 (&fip->ctlr_mutex){+.+...}: [<c1091f70>] lock_acquire+0x80/0x1b0 [<c147655d>] mutex_lock_nested+0x6d/0x340 [<f8970c32>] fcoe_ctlr_link_up+0x22/0x180 [libfcoe] [<f894620e>] fcoe_create+0x47e/0x6e0 [fcoe] [<f8973dd3>] fcoe_transport_create+0x143/0x250 [libfcoe] [<c10527e0>] param_attr_store+0x30/0x60 [<c1052696>] module_attr_store+0x26/0x40 [<c11a201e>] sysfs_write_file+0xae/0x100 [<c11449df>] vfs_write+0x8f/0x160 [<c1144cbd>] sys_write+0x3d/0x70 [<c147a0c4>] syscall_call+0x7/0xb -> #0 (rtnl_mutex){+.+.+.}: [<c109164b>] __lock_acquire+0x140b/0x1720 [<c1091f70>] lock_acquire+0x80/0x1b0 [<c147655d>] mutex_lock_nested+0x6d/0x340 [<c13a10c4>] rtnl_lock+0x14/0x20 [<f89445ac>] fcoe_update_src_mac+0x2c/0xb0 [fcoe] [<f8971712>] fcoe_ctlr_timer_work+0x712/0xb60 [libfcoe] [<c104fb69>] process_one_work+0x179/0x5d0 [<c10502f1>] worker_thread+0x121/0x2d0 [<c10550ed>] kthread+0x7d/0x90 [<c1481a82>] kernel_thread_helper+0x6/0x10 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fip->ctlr_mutex); lock(rtnl_mutex); lock(&fip->ctlr_mutex); lock(rtnl_mutex); *** DEADLOCK *** Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-03-14 09:22:12 +08:00
mutex_unlock(&fcoe_config_mutex);
return rc;
}
out_nodev:
[SCSI] fcoe: fix a circular locking issue with rtnl and sysfs mutex Currently rtnl mutex is grabbed during fcoe create, destroy, enable and disable operations while sysfs s_active read mutex is already held, but simultaneously other networking events could try grabbing write s_active mutex while rtnl is already held and that is causing circular lock warning, its detailed log pasted at end. In this log, the rtnl was held before write s_active during device renaming but there are more such cases as Joe reported another instance with tg3 open at:- http://www.open-fcoe.org/pipermail/devel/2010-February/008263.html This patch fixes this issue by not waiting for rtnl mutex during fcoe ops, that means if rtnl mutex is not immediately available then restart_syscall() to allow others waiting in line to grab s_active along with rtnl mutex to finish their work first under these mutex. Currently rtnl mutex was grabbed twice during fcoe_destroy call flow, second grab was from fcoe_if_destroy called from fcoe_destroy after dropping rtnl mutex before calling fcoe_if_destroy, so instead made fcoe_if_destroy always called with rtnl mutex held to have this mutex grabbed only once in this code path. However left matching rtnl_unlock as-is in its original place as it was dropped there for good reason since very next call causes synchronous fip worker flush and if rtnl mutex is still held before flush then that would cause new circular warning between fip->recv_work and rtnl mutex, I've added detailed comment for this on fcoe_if_destroy calling and rtnl muxtes unlocking. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.33.1linux-stable-2.6.33 #1 ------------------------------------------------------- fcoemon/18823 is trying to acquire lock: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] but task is already holding lock: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (s_active){++++.+}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8115e5df>] sysfs_deactivate+0x8b/0xe0 [<ffffffff8115edfb>] sysfs_addrm_finish+0x36/0x55 [<ffffffff8115d0cc>] sysfs_hash_and_remove+0x53/0x6a [<ffffffff8115f353>] sysfs_remove_link+0x21/0x23 [<ffffffff812b6c93>] device_rename+0x99/0xcb [<ffffffff8138dbf0>] dev_change_name+0xd5/0x1d2 [<ffffffff8138deee>] dev_ifsioc+0x201/0x2ac [<ffffffff8138e4ba>] dev_ioctl+0x521/0x632 [<ffffffff81379e43>] sock_do_ioctl+0x3d/0x47 [<ffffffff8137a254>] sock_ioctl+0x213/0x222 [<ffffffff81114614>] vfs_ioctl+0x32/0xa6 [<ffffffff81114b94>] do_vfs_ioctl+0x490/0x4d6 [<ffffffff81114c30>] sys_ioctl+0x56/0x79 [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #1 (rtnl_mutex){+.+.+.}: [<ffffffff81077bdb>] __lock_acquire+0xb73/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffff813959f9>] rtnl_lock+0x17/0x19 [<ffffffff8138ccae>] register_netdevice_notifier+0x1e/0x19b [<ffffffffa02580c1>] 0xffffffffa02580c1 [<ffffffff81002069>] do_one_initcall+0x5e/0x15e [<ffffffff81084094>] sys_init_module+0xd8/0x23a [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b -> #0 (fcoe_config_mutex){+.+.+.}: [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b other info that might help us debug this: 3 locks held by fcoemon/18823: #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8115da17>] sysfs_write_file+0x3c/0x144 #1: (s_active){++++.+}, at: [<ffffffff8115ef86>] sysfs_get_active_two+0x24/0x48 #2: (s_active){++++.+}, at: [<ffffffff8115ef93>] sysfs_get_active_two+0x31/0x48 stack backtrace: Pid: 18823, comm: fcoemon Tainted: G W 2.6.33.1linux-stable-2.6.33 #1 Call Trace: [<ffffffff81076c38>] print_circular_bug+0xa8/0xb6 [<ffffffff81077a85>] __lock_acquire+0xa1d/0xd2b [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff81077e60>] lock_acquire+0xcd/0xf1 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8142f343>] __mutex_lock_common+0x4b/0x383 [<ffffffffa02ba5fc>] ? fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff8106ac70>] ? cpu_clock+0x43/0x5e [<ffffffff81074e12>] ? lockstat_clock+0x11/0x13 [<ffffffff81074e40>] ? lock_release_holdtime+0x2c/0x127 [<ffffffff8115ef93>] ? sysfs_get_active_two+0x31/0x48 [<ffffffff8142f73f>] mutex_lock_nested+0x3e/0x43 [<ffffffffa02ba5fc>] fcoe_create+0x27/0x4f7 [fcoe] [<ffffffff810635b1>] param_attr_store+0x27/0x35 [<ffffffff81063619>] module_attr_store+0x26/0x2a [<ffffffff8115dae3>] sysfs_write_file+0x108/0x144 [<ffffffff81107bd1>] vfs_write+0xae/0x10b [<ffffffff81076596>] ? trace_hardirqs_on_caller+0x125/0x150 [<ffffffff81107cee>] sys_write+0x4a/0x6e [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b Signed-off-by: Vasu Dev <vasu.dev@intel.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-05-08 06:18:46 +08:00
rtnl_unlock();
[SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl The rtnl cannot be held durrng the fcoe_interface_put. If it is the last reference on the fcoe_interface the fcoe_ctlr_destroy will be called as a part of the cleanup, ultimately calling cancel_work_sync(&fip->recv_work); If we are processing a flogi response we will be in the recv_work context and we will lock the rtnl to add a new unicast MAC address. This is how the deadlock can occur. The fix is simply to move the rtnl_lock/unlock into fcoe_interface_cleanup so that it can be unlocked before fcoe_interface_put is called. Here is the lockdep report: Jul 21 11:26:35 bubba [ 223.870702] ul 21 11:26:35 bubba [ 223.870704] ======================================================= Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ] Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.871797] ------------------------------------------------------- Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock: Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff810531f1>] wait_on_work+0x0/0xbd Jul 21 11:26:35 bubba [ 223.873022] Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock: Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.874229] Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock. Jul 21 11:26:35 bubba [ 223.874231] Jul 21 11:26:35 bubba [ 223.875032] Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is: Jul 21 11:26:35 bubba [ 223.875573] Jul 21 11:26:35 bubba [ 223.875573] -> #1 Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.876301] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.876645] Jul 21 11:26:35 bubba [<ffffffff8151d975>] __mutex_lock_common+0x47/0x30d Jul 21 11:26:35 bubba [ 223.876991] Jul 21 11:26:35 bubba [<ffffffff8151dd36>] mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.877334] Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.877675] Jul 21 11:26:35 bubba [<ffffffffa003d5a0>] fcoe_update_src_mac+0x2b/0x80 [fcoe] Jul 21 11:26:35 bubba [ 223.878022] Jul 21 11:26:35 bubba [<ffffffffa003d698>] fcoe_flogi_resp+0x5e/0x79 [fcoe] Jul 21 11:26:35 bubba [ 223.878366] Jul 21 11:26:35 bubba [<ffffffffa001566f>] fc_exch_recv+0x7f5/0x9da [libfc] Jul 21 11:26:35 bubba [ 223.878713] Jul 21 11:26:35 bubba [<ffffffffa00327d8>] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe] Jul 21 11:26:35 bubba [ 223.879258] Jul 21 11:26:35 bubba [<ffffffff81053761>] process_one_work+0x1d7/0x347 Jul 21 11:26:35 bubba [ 223.879601] Jul 21 11:26:35 bubba [<ffffffff81054ade>] worker_thread+0xf8/0x17c Jul 21 11:26:35 bubba [ 223.879944] Jul 21 11:26:35 bubba [<ffffffff81058184>] kthread+0x7d/0x85 Jul 21 11:26:35 bubba [ 223.880287] Jul 21 11:26:35 bubba [<ffffffff81526414>] kernel_thread_helper+0x4/0x10 Jul 21 11:26:35 bubba [ 223.880634] Jul 21 11:26:35 bubba [ 223.880635] -> #0 Jul 21 11:26:35 bubba ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.881357] Jul 21 11:26:35 bubba [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.881695] Jul 21 11:26:35 bubba [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.882033] Jul 21 11:26:35 bubba [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.882378] Jul 21 11:26:35 bubba [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.882718] Jul 21 11:26:35 bubba [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.883057] Jul 21 11:26:35 bubba [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.883399] Jul 21 11:26:35 bubba [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.883940] Jul 21 11:26:35 bubba [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.884280] Jul 21 11:26:35 bubba [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.884624] Jul 21 11:26:35 bubba [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.885163] Jul 21 11:26:35 bubba [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.885502] Jul 21 11:26:35 bubba [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.886045] Jul 21 11:26:35 bubba [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.886385] Jul 21 11:26:35 bubba [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.886728] Jul 21 11:26:35 bubba [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.887068] Jul 21 11:26:35 bubba [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.887406] Jul 21 11:26:35 bubba [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.887742] Jul 21 11:26:35 bubba [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:35 bubba [ 223.888083] Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this: Jul 21 11:26:35 bubba [ 223.888085] Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario: Jul 21 11:26:35 bubba [ 223.888881] Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1 Jul 21 11:26:35 bubba [ 223.889683] ---- ---- Jul 21 11:26:35 bubba [ 223.889955] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890349] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890751] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891154] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891549] Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK *** Jul 21 11:26:35 bubba [ 223.891551] Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464: Jul 21 11:26:35 bubba [ 223.892621] #0: Jul 21 11:26:35 bubba (&buffer->mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c171>] sysfs_write_file+0x37/0x13f Jul 21 11:26:35 bubba [ 223.893359] #1: Jul 21 11:26:35 bubba (s_active Jul 21 11:26:35 bubba ){++++.+} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff8114c21c>] sysfs_write_file+0xe2/0x13f Jul 21 11:26:35 bubba [ 223.894094] #2: Jul 21 11:26:35 bubba (param_lock Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff81056146>] param_attr_store+0x36/0x62 Jul 21 11:26:35 bubba [ 223.894835] #3: Jul 21 11:26:35 bubba (ft_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa0034017>] fcoe_transport_destroy+0x1e/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.895574] #4: Jul 21 11:26:35 bubba (fcoe_config_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffffa003f2c9>] fcoe_destroy+0x18/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.896314] #5: Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [<ffffffff813e8233>] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.897047] Jul 21 11:26:35 bubba [ 223.897048] stack backtrace: Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.897853] Call Trace: Jul 21 11:26:35 bubba [ 223.898128] [<ffffffff81068e16>] print_circular_bug+0x1f8/0x209 Jul 21 11:26:35 bubba [ 223.898416] [<ffffffff8106b93e>] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.898699] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.898982] [<ffffffff8106c14a>] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.899263] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.899547] [<ffffffff8104a097>] ? mod_timer+0x8f/0x98 Jul 21 11:26:35 bubba [ 223.899827] [<ffffffff81053241>] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.900108] [<ffffffff810531f1>] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.900390] [<ffffffff81053b32>] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.900671] [<ffffffff81053b8a>] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.900953] [<ffffffffa00317e6>] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.901237] [<ffffffffa003e51e>] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.901522] [<ffffffffa003e4fd>] ? fcoe_enable+0x6b/0x6b [fcoe] Jul 21 11:26:35 bubba [ 223.901803] [<ffffffff811fbbe6>] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.902083] [<ffffffffa003ebba>] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.902367] [<ffffffffa003f2a6>] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.902653] [<ffffffff8151dd36>] ? mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.902939] [<ffffffffa003f303>] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.903223] [<ffffffffa00340a4>] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.903508] [<ffffffff81056153>] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.903792] [<ffffffff8105602d>] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.904075] [<ffffffff8114c23d>] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.904357] [<ffffffff810f3e7b>] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.904642] [<ffffffff810f51d6>] ? fget_light+0x35/0x96 Jul 21 11:26:35 bubba [ 223.904923] [<ffffffff810f4073>] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.905204] [<ffffffff815252bb>] system_call_fastpath+0x16/0x1b Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5 Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None Signed-off-by: Robert Love <robert.w.love@intel.com> Tested-by: Ross Brattain <ross.b.brattain@intel.com> Reviewed-by: Yi Zou <yi.zou@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2011-08-26 03:40:47 +08:00
out_nortnl:
mutex_unlock(&fcoe_config_mutex);
return rc;
}
/**
* fcoe_link_speed_update() - Update the supported and actual link speeds
* @lport: The local port to update speeds for
*
* Returns: 0 if the ethtool query was successful
* -1 if the ethtool query failed
*/
static int fcoe_link_speed_update(struct fc_lport *lport)
{
struct net_device *netdev = fcoe_netdev(lport);
struct ethtool_cmd ecmd;
if (!__ethtool_get_settings(netdev, &ecmd)) {
lport->link_supported_speeds &=
~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
if (ecmd.supported & (SUPPORTED_1000baseT_Half |
SUPPORTED_1000baseT_Full))
lport->link_supported_speeds |= FC_PORTSPEED_1GBIT;
if (ecmd.supported & SUPPORTED_10000baseT_Full)
lport->link_supported_speeds |=
FC_PORTSPEED_10GBIT;
switch (ethtool_cmd_speed(&ecmd)) {
case SPEED_1000:
lport->link_speed = FC_PORTSPEED_1GBIT;
break;
case SPEED_10000:
lport->link_speed = FC_PORTSPEED_10GBIT;
break;
}
return 0;
}
return -1;
}
/**
* fcoe_link_ok() - Check if the link is OK for a local port
* @lport: The local port to check link on
*
* Returns: 0 if link is UP and OK, -1 if not
*
*/
static int fcoe_link_ok(struct fc_lport *lport)
{
struct net_device *netdev = fcoe_netdev(lport);
if (netif_oper_up(netdev))
return 0;
return -1;
}
/**
* fcoe_percpu_clean() - Clear all pending skbs for an local port
* @lport: The local port whose skbs are to be cleared
*
* Must be called with fcoe_create_mutex held to single-thread completion.
*
* This flushes the pending skbs by adding a new skb to each queue and
* waiting until they are all freed. This assures us that not only are
* there no packets that will be handled by the lport, but also that any
* threads already handling packet have returned.
*/
static void fcoe_percpu_clean(struct fc_lport *lport)
{
struct fcoe_percpu_s *pp;
struct sk_buff *skb;
unsigned int cpu;
for_each_possible_cpu(cpu) {
pp = &per_cpu(fcoe_percpu, cpu);
if (!pp->thread || !cpu_online(cpu))
continue;
skb = dev_alloc_skb(0);
if (!skb)
continue;
skb->destructor = fcoe_percpu_flush_done;
spin_lock_bh(&pp->fcoe_rx_list.lock);
__skb_queue_tail(&pp->fcoe_rx_list, skb);
if (pp->fcoe_rx_list.qlen == 1)
wake_up_process(pp->thread);
spin_unlock_bh(&pp->fcoe_rx_list.lock);
wait_for_completion(&fcoe_flush_completion);
}
}
/**
* fcoe_reset() - Reset a local port
* @shost: The SCSI host associated with the local port to be reset
*
* Returns: Always 0 (return value required by FC transport template)
*/
static int fcoe_reset(struct Scsi_Host *shost)
{
struct fc_lport *lport = shost_priv(shost);
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
fcoe_ctlr_link_down(&fcoe->ctlr);
fcoe_clean_pending_queue(fcoe->ctlr.lp);
if (!fcoe_link_ok(fcoe->ctlr.lp))
fcoe_ctlr_link_up(&fcoe->ctlr);
return 0;
}
/**
* fcoe_hostlist_lookup_port() - Find the FCoE interface associated with a net device
* @netdev: The net device used as a key
*
* Locking: Must be called with the RNL mutex held.
*
* Returns: NULL or the FCoE interface
*/
static struct fcoe_interface *
fcoe_hostlist_lookup_port(const struct net_device *netdev)
{
struct fcoe_interface *fcoe;
list_for_each_entry(fcoe, &fcoe_hostlist, list) {
if (fcoe->netdev == netdev)
return fcoe;
}
return NULL;
}
/**
* fcoe_hostlist_lookup() - Find the local port associated with a
* given net device
* @netdev: The netdevice used as a key
*
* Locking: Must be called with the RTNL mutex held
*
* Returns: NULL or the local port
*/
static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *netdev)
{
struct fcoe_interface *fcoe;
fcoe = fcoe_hostlist_lookup_port(netdev);
return (fcoe) ? fcoe->ctlr.lp : NULL;
}
/**
* fcoe_hostlist_add() - Add the FCoE interface identified by a local
* port to the hostlist
* @lport: The local port that identifies the FCoE interface to be added
*
* Locking: must be called with the RTNL mutex held
*
* Returns: 0 for success
*/
static int fcoe_hostlist_add(const struct fc_lport *lport)
{
struct fcoe_interface *fcoe;
struct fcoe_port *port;
fcoe = fcoe_hostlist_lookup_port(fcoe_netdev(lport));
if (!fcoe) {
port = lport_priv(lport);
fcoe = port->priv;
list_add_tail(&fcoe->list, &fcoe_hostlist);
}
return 0;
}
static struct fcoe_transport fcoe_sw_transport = {
.name = {FCOE_TRANSPORT_DEFAULT},
.attached = false,
.list = LIST_HEAD_INIT(fcoe_sw_transport.list),
.match = fcoe_match,
.create = fcoe_create,
.destroy = fcoe_destroy,
.enable = fcoe_enable,
.disable = fcoe_disable,
};
/**
* fcoe_init() - Initialize fcoe.ko
*
* Returns: 0 on success, or a negative value on failure
*/
static int __init fcoe_init(void)
{
struct fcoe_percpu_s *p;
unsigned int cpu;
int rc = 0;
fcoe_wq = alloc_workqueue("fcoe", 0, 0);
if (!fcoe_wq)
return -ENOMEM;
/* register as a fcoe transport */
rc = fcoe_transport_attach(&fcoe_sw_transport);
if (rc) {
printk(KERN_ERR "failed to register an fcoe transport, check "
"if libfcoe is loaded\n");
return rc;
}
mutex_lock(&fcoe_config_mutex);
for_each_possible_cpu(cpu) {
p = &per_cpu(fcoe_percpu, cpu);
skb_queue_head_init(&p->fcoe_rx_list);
}
for_each_online_cpu(cpu)
fcoe_percpu_thread_create(cpu);
/* Initialize per CPU interrupt thread */
rc = register_hotcpu_notifier(&fcoe_cpu_notifier);
if (rc)
goto out_free;
/* Setup link change notification */
fcoe_dev_setup();
rc = fcoe_if_init();
if (rc)
goto out_free;
mutex_unlock(&fcoe_config_mutex);
return 0;
out_free:
for_each_online_cpu(cpu) {
fcoe_percpu_thread_destroy(cpu);
}
mutex_unlock(&fcoe_config_mutex);
destroy_workqueue(fcoe_wq);
return rc;
}
module_init(fcoe_init);
/**
* fcoe_exit() - Clean up fcoe.ko
*
* Returns: 0 on success or a negative value on failure
*/
static void __exit fcoe_exit(void)
{
struct fcoe_interface *fcoe, *tmp;
struct fcoe_port *port;
unsigned int cpu;
mutex_lock(&fcoe_config_mutex);
fcoe_dev_cleanup();
/* releases the associated fcoe hosts */
rtnl_lock();
list_for_each_entry_safe(fcoe, tmp, &fcoe_hostlist, list) {
list_del(&fcoe->list);
port = lport_priv(fcoe->ctlr.lp);
queue_work(fcoe_wq, &port->destroy_work);
}
rtnl_unlock();
unregister_hotcpu_notifier(&fcoe_cpu_notifier);
for_each_online_cpu(cpu)
fcoe_percpu_thread_destroy(cpu);
mutex_unlock(&fcoe_config_mutex);
/*
* destroy_work's may be chained but destroy_workqueue()
* can take care of them. Just kill the fcoe_wq.
*/
destroy_workqueue(fcoe_wq);
/*
* Detaching from the scsi transport must happen after all
* destroys are done on the fcoe_wq. destroy_workqueue will
* enusre the fcoe_wq is flushed.
*/
fcoe_if_exit();
/* detach from fcoe transport */
fcoe_transport_detach(&fcoe_sw_transport);
}
module_exit(fcoe_exit);
/**
* fcoe_flogi_resp() - FCoE specific FLOGI and FDISC response handler
* @seq: active sequence in the FLOGI or FDISC exchange
* @fp: response frame, or error encoded in a pointer (timeout)
* @arg: pointer the the fcoe_ctlr structure
*
* This handles MAC address management for FCoE, then passes control on to
* the libfc FLOGI response handler.
*/
static void fcoe_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
{
struct fcoe_ctlr *fip = arg;
struct fc_exch *exch = fc_seq_exch(seq);
struct fc_lport *lport = exch->lp;
u8 *mac;
if (IS_ERR(fp))
goto done;
mac = fr_cb(fp)->granted_mac;
/* pre-FIP */
if (is_zero_ether_addr(mac))
fcoe_ctlr_recv_flogi(fip, lport, fp);
if (!is_zero_ether_addr(mac))
fcoe_update_src_mac(lport, mac);
done:
fc_lport_flogi_resp(seq, fp, lport);
}
/**
* fcoe_logo_resp() - FCoE specific LOGO response handler
* @seq: active sequence in the LOGO exchange
* @fp: response frame, or error encoded in a pointer (timeout)
* @arg: pointer the the fcoe_ctlr structure
*
* This handles MAC address management for FCoE, then passes control on to
* the libfc LOGO response handler.
*/
static void fcoe_logo_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
{
struct fc_lport *lport = arg;
static u8 zero_mac[ETH_ALEN] = { 0 };
if (!IS_ERR(fp))
fcoe_update_src_mac(lport, zero_mac);
fc_lport_logo_resp(seq, fp, lport);
}
/**
* fcoe_elsct_send - FCoE specific ELS handler
*
* This does special case handling of FIP encapsualted ELS exchanges for FCoE,
* using FCoE specific response handlers and passing the FIP controller as
* the argument (the lport is still available from the exchange).
*
* Most of the work here is just handed off to the libfc routine.
*/
static struct fc_seq *fcoe_elsct_send(struct fc_lport *lport, u32 did,
struct fc_frame *fp, unsigned int op,
void (*resp)(struct fc_seq *,
struct fc_frame *,
void *),
void *arg, u32 timeout)
{
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
struct fcoe_ctlr *fip = &fcoe->ctlr;
struct fc_frame_header *fh = fc_frame_header_get(fp);
switch (op) {
case ELS_FLOGI:
case ELS_FDISC:
[SCSI] libfcoe: fcoe: fnic: add FIP VN2VN point-to-multipoint support The FC-BB-6 committee is proposing a new FIP usage model called VN_port to VN_port mode. It allows VN_ports to discover each other over a loss-free L2 Ethernet without any FCF or Fibre-channel fabric services. This is point-to-multipoint. There is also a variant of this called point-to-point which provides for making sure there is just one pair of ports operating over the Ethernet fabric. We add these new states: VNMP_START, _PROBE1, _PROBE2, _CLAIM, and _UP. These usually go quickly in that sequence. After waiting a random amount of time up to 100 ms in START, we select a pseudo-random proposed locally-unique port ID and send out probes in states PROBE1 and PROBE2, 100 ms apart. If no probe responses are heard, we proceed to CLAIM state 400 ms later and send a claim notification. We wait another 400 ms to receive claim responses, which give us a list of the other nodes on the network, including their FC-4 capabilities. After another 400 ms we go to VNMP_UP state and should start interoperating with any of the nodes for whic we receivec claim responses. More details are in the spec.j Add the new mode as FIP_MODE_VN2VN. The driver must specify explicitly that it wants to operate in this mode. There is no automatic detection between point-to-multipoint and fabric mode, and the local port initialization is affected, so it isn't anticipated that there will ever be any such automatic switchover. It may eventually be possible to have both fabric and VN2VN modes on the same L2 network, which may be done by two separate local VN_ports (lports). When in VN2VN mode, FIP replaces libfc's fabric-oriented discovery module with its own simple code that adds remote ports as they are discovered from incoming claim notifications and responses. These hooks are placed by fcoe_disc_init(). A linear list of discovered vn_ports is maintained under the fcoe_ctlr struct. It is expected to be short for now, and accessed infrequently. It is kept under RCU for lock-ordering reasons. The lport and/or rport mutexes may be held when we need to lookup a fcoe_vnport during an ELS send. Change fcoe_ctlr_encaps() to lookup the destination vn_port in the list of peers for the destination MAC address of the FIP-encapsulated frame. Add a new function fcoe_disc_init() to initialize just the discovery portion of libfcoe for VN2VN mode. Signed-off-by: Joe Eykholt <jeykholt@cisco.com> Signed-off-by: Robert Love <robert.w.love@intel.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
2010-07-21 06:20:30 +08:00
if (lport->point_to_multipoint)
break;
return fc_elsct_send(lport, did, fp, op, fcoe_flogi_resp,
fip, timeout);
case ELS_LOGO:
/* only hook onto fabric logouts, not port logouts */
if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI)
break;
return fc_elsct_send(lport, did, fp, op, fcoe_logo_resp,
lport, timeout);
}
return fc_elsct_send(lport, did, fp, op, resp, arg, timeout);
}
/**
* fcoe_vport_create() - create an fc_host/scsi_host for a vport
* @vport: fc_vport object to create a new fc_host for
* @disabled: start the new fc_host in a disabled state by default?
*
* Returns: 0 for success
*/
static int fcoe_vport_create(struct fc_vport *vport, bool disabled)
{
struct Scsi_Host *shost = vport_to_shost(vport);
struct fc_lport *n_port = shost_priv(shost);
struct fcoe_port *port = lport_priv(n_port);
struct fcoe_interface *fcoe = port->priv;
struct net_device *netdev = fcoe->netdev;
struct fc_lport *vn_port;
int rc;
char buf[32];
rc = fcoe_validate_vport_create(vport);
if (rc) {
fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
printk(KERN_ERR "fcoe: Failed to create vport, "
"WWPN (0x%s) already exists\n",
buf);
return rc;
}
mutex_lock(&fcoe_config_mutex);
rtnl_lock();
vn_port = fcoe_if_create(fcoe, &vport->dev, 1);
rtnl_unlock();
mutex_unlock(&fcoe_config_mutex);
if (IS_ERR(vn_port)) {
printk(KERN_ERR "fcoe: fcoe_vport_create(%s) failed\n",
netdev->name);
return -EIO;
}
if (disabled) {
fc_vport_set_state(vport, FC_VPORT_DISABLED);
} else {
vn_port->boot_time = jiffies;
fc_fabric_login(vn_port);
fc_vport_setlink(vn_port);
}
return 0;
}
/**
* fcoe_vport_destroy() - destroy the fc_host/scsi_host for a vport
* @vport: fc_vport object that is being destroyed
*
* Returns: 0 for success
*/
static int fcoe_vport_destroy(struct fc_vport *vport)
{
struct Scsi_Host *shost = vport_to_shost(vport);
struct fc_lport *n_port = shost_priv(shost);
struct fc_lport *vn_port = vport->dd_data;
mutex_lock(&n_port->lp_mutex);
list_del(&vn_port->list);
mutex_unlock(&n_port->lp_mutex);
mutex_lock(&fcoe_config_mutex);
fcoe_if_destroy(vn_port);
mutex_unlock(&fcoe_config_mutex);
return 0;
}
/**
* fcoe_vport_disable() - change vport state
* @vport: vport to bring online/offline
* @disable: should the vport be disabled?
*/
static int fcoe_vport_disable(struct fc_vport *vport, bool disable)
{
struct fc_lport *lport = vport->dd_data;
if (disable) {
fc_vport_set_state(vport, FC_VPORT_DISABLED);
fc_fabric_logoff(lport);
} else {
lport->boot_time = jiffies;
fc_fabric_login(lport);
fc_vport_setlink(lport);
}
return 0;
}
/**
* fcoe_vport_set_symbolic_name() - append vport string to symbolic name
* @vport: fc_vport with a new symbolic name string
*
* After generating a new symbolic name string, a new RSPN_ID request is
* sent to the name server. There is no response handler, so if it fails
* for some reason it will not be retried.
*/
static void fcoe_set_vport_symbolic_name(struct fc_vport *vport)
{
struct fc_lport *lport = vport->dd_data;
struct fc_frame *fp;
size_t len;
snprintf(fc_host_symbolic_name(lport->host), FC_SYMBOLIC_NAME_SIZE,
"%s v%s over %s : %s", FCOE_NAME, FCOE_VERSION,
fcoe_netdev(lport)->name, vport->symbolic_name);
if (lport->state != LPORT_ST_READY)
return;
len = strnlen(fc_host_symbolic_name(lport->host), 255);
fp = fc_frame_alloc(lport,
sizeof(struct fc_ct_hdr) +
sizeof(struct fc_ns_rspn) + len);
if (!fp)
return;
lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RSPN_ID,
NULL, NULL, 3 * lport->r_a_tov);
}
/**
* fcoe_get_lesb() - Fill the FCoE Link Error Status Block
* @lport: the local port
* @fc_lesb: the link error status block
*/
static void fcoe_get_lesb(struct fc_lport *lport,
struct fc_els_lesb *fc_lesb)
{
struct net_device *netdev = fcoe_netdev(lport);
__fcoe_get_lesb(lport, fc_lesb, netdev);
}
/**
* fcoe_set_port_id() - Callback from libfc when Port_ID is set.
* @lport: the local port
* @port_id: the port ID
* @fp: the received frame, if any, that caused the port_id to be set.
*
* This routine handles the case where we received a FLOGI and are
* entering point-to-point mode. We need to call fcoe_ctlr_recv_flogi()
* so it can set the non-mapped mode and gateway address.
*
* The FLOGI LS_ACC is handled by fcoe_flogi_resp().
*/
static void fcoe_set_port_id(struct fc_lport *lport,
u32 port_id, struct fc_frame *fp)
{
struct fcoe_port *port = lport_priv(lport);
struct fcoe_interface *fcoe = port->priv;
if (fp && fc_frame_payload_op(fp) == ELS_FLOGI)
fcoe_ctlr_recv_flogi(&fcoe->ctlr, lport, fp);
}