From 29927dfb7f69bcf2ae7fd1cda10997e646a5189c Mon Sep 17 00:00:00 2001 From: Sridhar Pitchai Date: Tue, 1 May 2018 17:56:32 +0000 Subject: [PATCH 1/5] PCI: hv: Make sure the bus domain is really unique When Linux runs as a guest VM in Hyper-V and Hyper-V adds the virtual PCI bus to the guest, Hyper-V always provides unique PCI domain. commit 4a9b0933bdfc ("PCI: hv: Use device serial number as PCI domain") overrode unique domain with the serial number of the first device added to the virtual PCI bus. The reason for that patch was to have a consistent and short name for the device, but Hyper-V doesn't provide unique serial numbers. Using non-unique serial numbers as domain IDs leads to duplicate device addresses, which causes PCI bus registration to fail. commit 0c195567a8f6 ("netvsc: transparent VF management") avoids the need for commit 4a9b0933bdfc ("PCI: hv: Use device serial number as PCI domain"). When scripts were used to configure VF devices, the name of the VF needed to be consistent and short, but with commit 0c195567a8f6 ("netvsc: transparent VF management") all the setup is done in the kernel, and we do not need to maintain consistent name. Revert commit 4a9b0933bdfc ("PCI: hv: Use device serial number as PCI domain") so we can reliably support multiple devices being assigned to a guest. Tag the patch for stable kernels containing commit 0c195567a8f6 ("netvsc: transparent VF management"). Fixes: 4a9b0933bdfc ("PCI: hv: Use device serial number as PCI domain") Signed-off-by: Sridhar Pitchai [lorenzo.pieralisi@arm.com: trimmed commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # v4.14+ Reviewed-by: Bjorn Helgaas --- drivers/pci/host/pci-hyperv.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 50cdefe3f6d3..b68c6dd2b311 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -1575,17 +1575,6 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, get_pcichild(hpdev, hv_pcidev_ref_childlist); spin_lock_irqsave(&hbus->device_list_lock, flags); - /* - * When a device is being added to the bus, we set the PCI domain - * number to be the device serial number, which is non-zero and - * unique on the same VM. The serial numbers start with 1, and - * increase by 1 for each device. So device names including this - * can have shorter names than based on the bus instance UUID. - * Only the first device serial number is used for domain, so the - * domain number will not change after the first device is added. - */ - if (list_empty(&hbus->children)) - hbus->sysdata.domain = desc->ser; list_add_tail(&hpdev->list_entry, &hbus->children); spin_unlock_irqrestore(&hbus->device_list_lock, flags); return hpdev; From 8c99e120ffcabc429d74e1e4d10db893c6888ef4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 23 May 2018 10:11:12 -0700 Subject: [PATCH 2/5] PCI: hv: Remove unused reason for refcount handler The get/put functions were taking a reason code. This appears to be a debug infrastructure that is no longer used. Move the functions to start of file to eliminate need for forward declaration. Forward declarations are discouraged on Linux. Signed-off-by: Stephen Hemminger [lorenzo.pieralisi@arm.com: updated commit subject] Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-hyperv.c | 70 +++++++++++++---------------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index b68c6dd2b311..522fd7687f07 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -488,17 +488,6 @@ enum hv_pcichild_state { hv_pcichild_maximum }; -enum hv_pcidev_ref_reason { - hv_pcidev_ref_invalid = 0, - hv_pcidev_ref_initial, - hv_pcidev_ref_by_slot, - hv_pcidev_ref_packet, - hv_pcidev_ref_pnp, - hv_pcidev_ref_childlist, - hv_pcidev_irqdata, - hv_pcidev_ref_max -}; - struct hv_pci_dev { /* List protected by pci_rescan_remove_lock */ struct list_head list_entry; @@ -548,10 +537,17 @@ static void hv_pci_generic_compl(void *context, struct pci_response *resp, static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus, u32 wslot); -static void get_pcichild(struct hv_pci_dev *hv_pcidev, - enum hv_pcidev_ref_reason reason); -static void put_pcichild(struct hv_pci_dev *hv_pcidev, - enum hv_pcidev_ref_reason reason); + +static void get_pcichild(struct hv_pci_dev *hpdev) +{ + refcount_inc(&hpdev->refs); +} + +static void put_pcichild(struct hv_pci_dev *hpdev) +{ + if (refcount_dec_and_test(&hpdev->refs)) + kfree(hpdev); +} static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); @@ -762,7 +758,7 @@ static int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn, _hv_pcifront_read_config(hpdev, where, size, val); - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); return PCIBIOS_SUCCESSFUL; } @@ -790,7 +786,7 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn, _hv_pcifront_write_config(hpdev, where, size, val); - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); return PCIBIOS_SUCCESSFUL; } @@ -856,7 +852,7 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info, } hv_int_desc_free(hpdev, int_desc); - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); } static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest, @@ -1186,13 +1182,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) msg->address_lo = comp.int_desc.address & 0xffffffff; msg->data = comp.int_desc.data; - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); return; free_int_desc: kfree(int_desc); drop_reference: - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); return_null_message: msg->address_hi = 0; msg->address_lo = 0; @@ -1508,19 +1504,6 @@ static void q_resource_requirements(void *context, struct pci_response *resp, complete(&completion->host_event); } -static void get_pcichild(struct hv_pci_dev *hpdev, - enum hv_pcidev_ref_reason reason) -{ - refcount_inc(&hpdev->refs); -} - -static void put_pcichild(struct hv_pci_dev *hpdev, - enum hv_pcidev_ref_reason reason) -{ - if (refcount_dec_and_test(&hpdev->refs)) - kfree(hpdev); -} - /** * new_pcichild_device() - Create a new child device * @hbus: The internal struct tracking this root PCI bus. @@ -1572,7 +1555,7 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, hpdev->desc = *desc; refcount_set(&hpdev->refs, 1); - get_pcichild(hpdev, hv_pcidev_ref_childlist); + get_pcichild(hpdev); spin_lock_irqsave(&hbus->device_list_lock, flags); list_add_tail(&hpdev->list_entry, &hbus->children); @@ -1607,7 +1590,7 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus, list_for_each_entry(iter, &hbus->children, list_entry) { if (iter->desc.win_slot.slot == wslot) { hpdev = iter; - get_pcichild(hpdev, hv_pcidev_ref_by_slot); + get_pcichild(hpdev); break; } } @@ -1724,7 +1707,7 @@ static void pci_devices_present_work(struct work_struct *work) list_entry); if (hpdev->reported_missing) { found = true; - put_pcichild(hpdev, hv_pcidev_ref_childlist); + put_pcichild(hpdev); list_move_tail(&hpdev->list_entry, &removed); break; } @@ -1737,7 +1720,7 @@ static void pci_devices_present_work(struct work_struct *work) hpdev = list_first_entry(&removed, struct hv_pci_dev, list_entry); list_del(&hpdev->list_entry); - put_pcichild(hpdev, hv_pcidev_ref_initial); + put_pcichild(hpdev); } switch (hbus->state) { @@ -1872,8 +1855,8 @@ static void hv_eject_device_work(struct work_struct *work) sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, 0); - put_pcichild(hpdev, hv_pcidev_ref_childlist); - put_pcichild(hpdev, hv_pcidev_ref_pnp); + put_pcichild(hpdev); + put_pcichild(hpdev); put_hvpcibus(hpdev->hbus); } @@ -1888,7 +1871,7 @@ static void hv_eject_device_work(struct work_struct *work) static void hv_pci_eject_device(struct hv_pci_dev *hpdev) { hpdev->state = hv_pcichild_ejecting; - get_pcichild(hpdev, hv_pcidev_ref_pnp); + get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); get_hvpcibus(hpdev->hbus); queue_work(hpdev->hbus->wq, &hpdev->wrk); @@ -1988,8 +1971,7 @@ static void hv_pci_onchannelcallback(void *context) dev_message->wslot.slot); if (hpdev) { hv_pci_eject_device(hpdev); - put_pcichild(hpdev, - hv_pcidev_ref_by_slot); + put_pcichild(hpdev); } break; @@ -2387,7 +2369,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev) PCI_RESOURCES_ASSIGNED2; res_assigned2->wslot.slot = hpdev->desc.win_slot.slot; } - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); ret = vmbus_sendpacket(hdev->channel, &pkt->message, size_res, (unsigned long)pkt, @@ -2435,7 +2417,7 @@ static int hv_send_resources_released(struct hv_device *hdev) pkt.message_type.type = PCI_RESOURCES_RELEASED; pkt.wslot.slot = hpdev->desc.win_slot.slot; - put_pcichild(hpdev, hv_pcidev_ref_by_slot); + put_pcichild(hpdev); ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0, VM_PKT_DATA_INBAND, 0); From 6708be936715fd673273dede40367b2ea0cba768 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 23 May 2018 10:11:13 -0700 Subject: [PATCH 3/5] PCI: hv: Convert remove_lock to refcount Use refcount instead of atomic for the reference counting on bus. Refcount is safer because it handles overflow correctly. Signed-off-by: Stephen Hemminger [lorenzo.pieralisi@arm.com: updated commit subject] Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-hyperv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 522fd7687f07..b03a81b4581f 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -433,7 +433,7 @@ enum hv_pcibus_state { struct hv_pcibus_device { struct pci_sysdata sysdata; enum hv_pcibus_state state; - atomic_t remove_lock; + refcount_t remove_lock; struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -2430,12 +2430,12 @@ static int hv_send_resources_released(struct hv_device *hdev) static void get_hvpcibus(struct hv_pcibus_device *hbus) { - atomic_inc(&hbus->remove_lock); + refcount_inc(&hbus->remove_lock); } static void put_hvpcibus(struct hv_pcibus_device *hbus) { - if (atomic_dec_and_test(&hbus->remove_lock)) + if (refcount_dec_and_test(&hbus->remove_lock)) complete(&hbus->remove_event); } @@ -2479,7 +2479,7 @@ static int hv_pci_probe(struct hv_device *hdev, hdev->dev_instance.b[8] << 8; hbus->hdev = hdev; - atomic_inc(&hbus->remove_lock); + refcount_set(&hbus->remove_lock, 1); INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->dr_list); INIT_LIST_HEAD(&hbus->resources_for_children); From 5b8db8f66e08fa0c4f536ba96288c1451e66be7e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 23 May 2018 10:11:14 -0700 Subject: [PATCH 4/5] PCI: hv: Use list_for_each_entry() There are several places where list_for_each_entry() could be used to simplify the code. Signed-off-by: Stephen Hemminger [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi --- drivers/pci/host/pci-hyperv.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index b03a81b4581f..a6712bc669bd 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -1279,7 +1279,6 @@ static u64 get_bar_size(u64 bar_val) */ static void survey_child_resources(struct hv_pcibus_device *hbus) { - struct list_head *iter; struct hv_pci_dev *hpdev; resource_size_t bar_size = 0; unsigned long flags; @@ -1305,8 +1304,7 @@ static void survey_child_resources(struct hv_pcibus_device *hbus) * for a child device are a power of 2 in size and aligned in memory, * so it's sufficient to just add them up without tracking alignment. */ - list_for_each(iter, &hbus->children) { - hpdev = container_of(iter, struct hv_pci_dev, list_entry); + list_for_each_entry(hpdev, &hbus->children, list_entry) { for (i = 0; i < 6; i++) { if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO) dev_err(&hbus->hdev->device, @@ -1359,7 +1357,6 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus) resource_size_t low_base = 0; resource_size_t bar_size; struct hv_pci_dev *hpdev; - struct list_head *iter; unsigned long flags; u64 bar_val; u32 command; @@ -1381,9 +1378,7 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus) /* Pick addresses for the BARs. */ do { - list_for_each(iter, &hbus->children) { - hpdev = container_of(iter, struct hv_pci_dev, - list_entry); + list_for_each_entry(hpdev, &hbus->children, list_entry) { for (i = 0; i < 6; i++) { bar_val = hpdev->probed_bar[i]; if (bar_val == 0) @@ -1626,7 +1621,6 @@ static void pci_devices_present_work(struct work_struct *work) { u32 child_no; bool found; - struct list_head *iter; struct pci_function_description *new_desc; struct hv_pci_dev *hpdev; struct hv_pcibus_device *hbus; @@ -1663,10 +1657,8 @@ static void pci_devices_present_work(struct work_struct *work) /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); - list_for_each(iter, &hbus->children) { - hpdev = container_of(iter, struct hv_pci_dev, - list_entry); - hpdev->reported_missing = true; + list_for_each_entry(hpdev, &hbus->children, list_entry) { + hpdev->reported_missing = true; } spin_unlock_irqrestore(&hbus->device_list_lock, flags); @@ -1676,11 +1668,8 @@ static void pci_devices_present_work(struct work_struct *work) new_desc = &dr->func[child_no]; spin_lock_irqsave(&hbus->device_list_lock, flags); - list_for_each(iter, &hbus->children) { - hpdev = container_of(iter, struct hv_pci_dev, - list_entry); - if ((hpdev->desc.win_slot.slot == - new_desc->win_slot.slot) && + list_for_each_entry(hpdev, &hbus->children, list_entry) { + if ((hpdev->desc.win_slot.slot == new_desc->win_slot.slot) && (hpdev->desc.v_id == new_desc->v_id) && (hpdev->desc.d_id == new_desc->d_id) && (hpdev->desc.ser == new_desc->ser)) { @@ -1702,9 +1691,7 @@ static void pci_devices_present_work(struct work_struct *work) spin_lock_irqsave(&hbus->device_list_lock, flags); do { found = false; - list_for_each(iter, &hbus->children) { - hpdev = container_of(iter, struct hv_pci_dev, - list_entry); + list_for_each_entry(hpdev, &hbus->children, list_entry) { if (hpdev->reported_missing) { found = true; put_pcichild(hpdev); From c3635da2a336441253c33298b87b3042db100725 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 23 May 2018 21:12:01 +0000 Subject: [PATCH 5/5] PCI: hv: Do not wait forever on a device that has disappeared Before the guest finishes the device initialization, the device can be removed anytime by the host, and after that the host won't respond to the guest's request, so the guest should be prepared to handle this case. Add a polling mechanism to detect device presence. Signed-off-by: Dexuan Cui [lorenzo.pieralisi@arm.com: edited commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Haiyang Zhang Cc: Stephen Hemminger Cc: K. Y. Srinivasan --- drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index a6712bc669bd..6cc5036ac83c 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -552,6 +552,26 @@ static void put_pcichild(struct hv_pci_dev *hpdev) static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); +/* + * There is no good way to get notified from vmbus_onoffer_rescind(), + * so let's use polling here, since this is not a hot path. + */ +static int wait_for_response(struct hv_device *hdev, + struct completion *comp) +{ + while (true) { + if (hdev->channel->rescind) { + dev_warn_once(&hdev->device, "The device is gone.\n"); + return -ENODEV; + } + + if (wait_for_completion_timeout(comp, HZ / 10)) + break; + } + + return 0; +} + /** * devfn_to_wslot() - Convert from Linux PCI slot to Windows * @devfn: The Linux representation of PCI slot @@ -1546,7 +1566,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, if (ret) goto error; - wait_for_completion(&comp_pkt.host_event); + if (wait_for_response(hbus->hdev, &comp_pkt.host_event)) + goto error; hpdev->desc = *desc; refcount_set(&hpdev->refs, 1); @@ -2027,15 +2048,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev) sizeof(struct pci_version_request), (unsigned long)pkt, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (!ret) + ret = wait_for_response(hdev, &comp_pkt.host_event); + if (ret) { dev_err(&hdev->device, - "PCI Pass-through VSP failed sending version reqquest: %#x", + "PCI Pass-through VSP failed to request version: %d", ret); goto exit; } - wait_for_completion(&comp_pkt.host_event); - if (comp_pkt.completion_status >= 0) { pci_protocol_version = pci_protocol_versions[i]; dev_info(&hdev->device, @@ -2244,11 +2266,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev) ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry), (unsigned long)pkt, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (!ret) + ret = wait_for_response(hdev, &comp_pkt.host_event); + if (ret) goto exit; - wait_for_completion(&comp_pkt.host_event); - if (comp_pkt.completion_status < 0) { dev_err(&hdev->device, "PCI Pass-through VSP failed D0 Entry with status %x\n", @@ -2288,11 +2311,10 @@ static int hv_pci_query_relations(struct hv_device *hdev) ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message), 0, VM_PKT_DATA_INBAND, 0); - if (ret) - return ret; + if (!ret) + ret = wait_for_response(hdev, &comp); - wait_for_completion(&comp); - return 0; + return ret; } /** @@ -2362,11 +2384,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev) size_res, (unsigned long)pkt, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (!ret) + ret = wait_for_response(hdev, &comp_pkt.host_event); if (ret) break; - wait_for_completion(&comp_pkt.host_event); - if (comp_pkt.completion_status < 0) { ret = -EPROTO; dev_err(&hdev->device,