From 0e2e27990e2dcd415f7974e8460a2f05accdddfb Mon Sep 17 00:00:00 2001 From: Jeff Skirvin Date: Thu, 27 Oct 2011 15:04:50 -0700 Subject: [PATCH] [SCSI] isci: Lookup device references through requests in completions. The LLDD needs to obtain a reference to the device through the request itself and not through the domain_device, because the domain_device.lldd_dev is set to NULL early in the lldd_dev_gone call. This relies on the fact that the isci_remote_device object is keeping a seperate reference count of outstanding requests. TODO: unify the request count tracking with the isci_remote_device kref. The failure signature of this condition looks like the following log, where the important bits are the call to lldd_dev_gone followed by a crash in isci_terminate_request_core: [ 229.151541] isci 0000:0b:00.0: isci_remote_device_gone: domain_device = ffff8801492d4800, isci_device = ffff880143c657d0, isci_port = ffff880143c63658 [ 229.166007] isci 0000:0b:00.0: isci_remote_device_stop: isci_device = ffff880143c657d0 [ 229.175317] isci 0000:0b:00.0: isci_terminate_pending_requests: idev=ffff880143c657d0 request=ffff88014741f000; task=ffff8801470f46c0 old_state=2 [ 229.189702] isci 0000:0b:00.0: isci_terminate_request_core: device = ffff880143c657d0; request = ffff88014741f000 [ 229.201339] isci 0000:0b:00.0: isci_terminate_request_core: before completion wait (ffff88014741f000/ffff880149715ad0) [ 229.213414] isci 0000:0b:00.0: sci_controller_process_completions: completion queue entry:0x8000a0e9 [ 229.214401] BUG: unable to handle kernel NULL pointer dereference at 0000000000000228 [ 229.214401] IP:jdskirvi-testlbo [] sci_request_completed_state_enter+0x50/0xafb [isci] [ 229.214401] PGD 13d19e067 PUD 13d104067 PMD 0 [ 229.214401] Oops: 0000 [#1] SMP [ 229.214401] CPU 0 x kernel: [ 226 [ 229.214401] Modules linked in: ipv6 dm_multipath uinput nouveau snd_hda_codec_realtek snd_hda_intel ttm drm_kms_helper drm snd_hda_codec snd_hwdep snd_pcm snd_timer i2c_algo_bit isci snd libsas ioatdma mxm_wmi iTCO_wdt soundcore snd_page_alloc scsi_transport_sas iTCO_vendor_support wmi dca video i2c_i801 i2c_core [last unloaded: speedstep_lib] [ 229.214401] [ 229.214401] Pid: 5, comm: kworker/u:0 Not tainted 3.0.0-isci-11.7.29+ #30.353196] Buffer Intel Corporation Stoakley/Pearlcity Workstation [ 229.214401] RIP: 0010:[] I/O error on dev [] sci_request_completed_state_enter+0x50/0xafb [isci] [ 229.214401] RSP: 0018:ffff88014fc03d20 EFLAGS: 00010046 [ 229.214401] RAX: 0000000000000000 RBX: ffff88014741f000 RCX: 0000000000000000 [ 229.214401] RDX: ffffffffa00b2c90 RSI: 0000000000000017 RDI: ffff88014741f0a0 [ 229.214401] RBP: ffff88014fc03d90 R08: 0000000000000018 R09: 0000000000000000 [ 229.214401] R10: 0000000000000000 R11: ffffffff81a17d98 R12: 000000000000001d [ 229.214401] R13: ffff8801470f46c0 R14: 0000000000000000 R15: 0000000000008000 [ 229.214401] FS: 0000000000000000(0000) GS:ffff88014fc00000(0000) knlGS:0000000000000000 [ 229.214401] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 229.214401] CR2: 0000000000000228 CR3: 000000013ceaa000 CR4: 00000000000406f0 [ 229.214401] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 229.214401] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 229.214401] Process kworker/u:0 (pid: 5, threadinfo ffff880149714000, task ffff880149718000) [ 229.214401] Call Trace: [ 229.214401] [ 229.214401] [] sci_change_state+0x4a/0x4f [isci] [ 229.214401] [] sci_io_request_tc_completion+0x79c/0x7a0 [isci] [ 229.214401] [] sci_controller_process_completions+0x14f/0x396 [isci] [ 229.214401] [] ? spin_lock_irq+0xe/0x10 [isci] [ 229.214401] [] isci_host_completion_routine+0x71/0x2be [isci] [ 229.214401] [] ? mark_held_locks+0x52/0x70 [ 229.214401] [] tasklet_action+0x90/0xf1 [ 229.214401] [] __do_softirq+0xe5/0x1bf [ 229.214401] [] ? hrtimer_interrupt+0x129/0x1bb [ 229.214401] [] call_softirq+0x1c/0x30 [ 229.214401] [] do_softirq+0x4b/0xa3 [ 229.214401] [] irq_exit+0x53/0xb4 [ 229.214401] [] smp_apic_timer_interrupt+0x83/0x91 [ 229.214401] [] apic_timer_interrupt+0x13/0x20 [ 229.214401] [ 229.214401] [] ? retint_restore_args+0x13/0x13 [ 229.214401] [] ? trace_hardirqs_off+0xd/0xf [ 229.214401] [] ? vprintk+0x40b/0x452 [ 229.214401] [] printk+0x41/0x47 [ 229.214401] [] __dev_printk+0x78/0x7a [ 229.214401] [] dev_printk+0x45/0x47 [ 229.214401] [] isci_terminate_request_core+0x15d/0x317 [isci] [ 229.214401] [] isci_terminate_pending_requests+0x1a4/0x204 [isci] [ 229.214401] [] ? sas_phye_oob_error+0xc3/0xc3 [libsas] [ 229.214401] [] isci_remote_device_nuke_requests+0xa6/0xff [isci] [ 229.214401] [] isci_remote_device_stop+0x7c/0x166 [isci] [ 229.214401] [] ? sas_phye_oob_error+0xc3/0xc3 [libsas] [ 229.214401] [] isci_remote_device_gone+0x76/0x7e [isci] [ 229.214401] [] sas_notify_lldd_dev_gone+0x34/0x36 [libsas] [ 229.214401] [] sas_unregister_dev+0x57/0x9c [libsas] [ 229.214401] [] sas_unregister_domain_devices+0x36/0x65 [libsas] [ 229.214401] [] sas_deform_port+0x72/0x1ac [libsas] [ 229.214401] [] ? sas_phye_oob_error+0xc3/0xc3 [libsas] [ 229.214401] [] sas_phye_loss_of_signal+0x3e/0x42 [libsas] Signed-off-by: Jeff Skirvin Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 565a9f0a9bc2..bfc7379727b1 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2728,9 +2728,9 @@ static void isci_request_io_request_complete(struct isci_host *ihost, struct sas_task *task = isci_request_access_task(request); struct ssp_response_iu *resp_iu; unsigned long task_flags; - struct isci_remote_device *idev = isci_lookup_device(task->dev); - enum service_response response = SAS_TASK_UNDELIVERED; - enum exec_status status = SAS_ABORTED_TASK; + struct isci_remote_device *idev = request->target_device; + enum service_response response = SAS_TASK_UNDELIVERED; + enum exec_status status = SAS_ABORTED_TASK; enum isci_request_status request_status; enum isci_completion_selection complete_to_host = isci_perform_normal_io_completion; @@ -3061,7 +3061,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost, /* complete the io request to the core. */ sci_controller_complete_io(ihost, request->target_device, request); - isci_put_device(idev); /* set terminated handle so it cannot be completed or * terminated again, and to cause any calls into abort