mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 07:34:12 +08:00
hyperv-fixes for 6.4-rc8
-----BEGIN PGP SIGNATURE----- iQFHBAABCgAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAmSQ3ioTHHdlaS5saXVA a2VybmVsLm9yZwAKCRB2FHBfkEGgXpREB/9nMJ5PbgsxpqKiV3ckodXZp7wLkFAv VK12KBZcjAr8kbZON0CHXWssC/QLBV9+UYDjvA7ciEjkzBZoIY8GMAjFZ4NNveTm ssZPaxg0DHX7SzVO6qDrZBwjyGmjPh8vH5TDsb6QPYk8WMuwYy+QZMWTEcxr7QU4 o3GRbt+JShS05s5Q1B3pSeztyxDxJh1potyoTfaY1sbih0c+r6mtewlpRW3KgoSc ukssybTmNyRRpDos/PlT2e0gRpIzlYQnzE+sj4mGOOQFh4wGOR8wGcNPr4yirrcI gy/4nvIwxp0uLW0C30FBlqzNt9dirOSRflXq/Pp4MdQcSM3hpeONbDxx =0aJ5 -----END PGP SIGNATURE----- Merge tag 'hyperv-fixes-signed-20230619' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux Pull hyperv fixes from Wei Liu: - Fix races in Hyper-V PCI controller (Dexuan Cui) - Fix handling of hyperv_pcpu_input_arg (Michael Kelley) - Fix vmbus_wait_for_unload to scan present CPUs (Michael Kelley) - Call hv_synic_free in the failure path of hv_synic_alloc (Dexuan Cui) - Add noop for real mode handlers for virtual trust level code (Saurabh Sengar) * tag 'hyperv-fixes-signed-20230619' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: PCI: hv: Add a per-bus mutex state_lock Revert "PCI: hv: Fix a timing issue which causes kdump to fail occasionally" PCI: hv: Remove the useless hv_pcichild_state from struct hv_pci_dev PCI: hv: Fix a race condition in hv_irq_unmask() that can cause panic PCI: hv: Fix a race condition bug in hv_pci_query_relations() arm64/hyperv: Use CPUHP_AP_HYPERV_ONLINE state to fix CPU online sequencing x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go online/offline Drivers: hv: vmbus: Fix vmbus_wait_for_unload() to scan present CPUs Drivers: hv: vmbus: Call hv_synic_free() if hv_synic_alloc() fails x86/hyperv/vtl: Add noop for realmode pointers
This commit is contained in:
commit
692b7dc87c
@ -67,7 +67,7 @@ static int __init hyperv_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online",
|
||||
ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online",
|
||||
hv_common_cpu_init, hv_common_cpu_die);
|
||||
if (ret < 0) {
|
||||
hv_common_free();
|
||||
|
@ -416,7 +416,7 @@ void __init hyperv_init(void)
|
||||
goto free_vp_assist_page;
|
||||
}
|
||||
|
||||
cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
|
||||
cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
|
||||
hv_cpu_init, hv_cpu_die);
|
||||
if (cpuhp < 0)
|
||||
goto free_ghcb_page;
|
||||
|
@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void)
|
||||
{
|
||||
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
|
||||
|
||||
x86_platform.realmode_reserve = x86_init_noop;
|
||||
x86_platform.realmode_init = x86_init_noop;
|
||||
x86_init.irqs.pre_vector_init = x86_init_noop;
|
||||
x86_init.timers.timer_init = x86_init_noop;
|
||||
|
||||
|
@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void)
|
||||
if (completion_done(&vmbus_connection.unload_event))
|
||||
goto completed;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
for_each_present_cpu(cpu) {
|
||||
struct hv_per_cpu_context *hv_cpu
|
||||
= per_cpu_ptr(hv_context.cpu_context, cpu);
|
||||
|
||||
/*
|
||||
* In a CoCo VM the synic_message_page is not allocated
|
||||
* in hv_synic_alloc(). Instead it is set/cleared in
|
||||
* hv_synic_enable_regs() and hv_synic_disable_regs()
|
||||
* such that it is set only when the CPU is online. If
|
||||
* not all present CPUs are online, the message page
|
||||
* might be NULL, so skip such CPUs.
|
||||
*/
|
||||
page_addr = hv_cpu->synic_message_page;
|
||||
if (!page_addr)
|
||||
continue;
|
||||
|
||||
msg = (struct hv_message *)page_addr
|
||||
+ VMBUS_MESSAGE_SINT;
|
||||
|
||||
@ -867,11 +878,14 @@ completed:
|
||||
* maybe-pending messages on all CPUs to be able to receive new
|
||||
* messages after we reconnect.
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
for_each_present_cpu(cpu) {
|
||||
struct hv_per_cpu_context *hv_cpu
|
||||
= per_cpu_ptr(hv_context.cpu_context, cpu);
|
||||
|
||||
page_addr = hv_cpu->synic_message_page;
|
||||
if (!page_addr)
|
||||
continue;
|
||||
|
||||
msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
|
||||
msg->header.message_type = HVMSG_NONE;
|
||||
}
|
||||
|
@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu)
|
||||
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
|
||||
|
||||
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
||||
if (!(*inputarg))
|
||||
return -ENOMEM;
|
||||
|
||||
if (hv_root_partition) {
|
||||
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
|
||||
/*
|
||||
* hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
|
||||
* allocated if this CPU was previously online and then taken offline
|
||||
*/
|
||||
if (!*inputarg) {
|
||||
*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
||||
if (!(*inputarg))
|
||||
return -ENOMEM;
|
||||
|
||||
if (hv_root_partition) {
|
||||
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
|
||||
@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu)
|
||||
|
||||
int hv_common_cpu_die(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
void **inputarg, **outputarg;
|
||||
void *mem;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
mem = *inputarg;
|
||||
*inputarg = NULL;
|
||||
|
||||
if (hv_root_partition) {
|
||||
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
*outputarg = NULL;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
kfree(mem);
|
||||
/*
|
||||
* The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
|
||||
* is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
|
||||
* may be used by the Hyper-V vPCI driver in reassigning interrupts
|
||||
* as part of the offlining process. The interrupt reassignment
|
||||
* happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
|
||||
* called this function.
|
||||
*
|
||||
* If a previously offlined CPU is brought back online again, the
|
||||
* originally allocated memory is reused in hv_common_cpu_init().
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void)
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
|
||||
hv_synic_init, hv_synic_cleanup);
|
||||
if (ret < 0)
|
||||
goto err_cpuhp;
|
||||
goto err_alloc;
|
||||
hyperv_cpuhp_online = ret;
|
||||
|
||||
ret = vmbus_connect();
|
||||
@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void)
|
||||
|
||||
err_connect:
|
||||
cpuhp_remove_state(hyperv_cpuhp_online);
|
||||
err_cpuhp:
|
||||
hv_synic_free();
|
||||
err_alloc:
|
||||
hv_synic_free();
|
||||
if (vmbus_irq == -1) {
|
||||
hv_remove_vmbus_handler();
|
||||
} else {
|
||||
|
@ -489,7 +489,10 @@ struct hv_pcibus_device {
|
||||
struct fwnode_handle *fwnode;
|
||||
/* Protocol version negotiated with the host */
|
||||
enum pci_protocol_version_t protocol_version;
|
||||
|
||||
struct mutex state_lock;
|
||||
enum hv_pcibus_state state;
|
||||
|
||||
struct hv_device *hdev;
|
||||
resource_size_t low_mmio_space;
|
||||
resource_size_t high_mmio_space;
|
||||
@ -545,19 +548,10 @@ struct hv_dr_state {
|
||||
struct hv_pcidev_description func[];
|
||||
};
|
||||
|
||||
enum hv_pcichild_state {
|
||||
hv_pcichild_init = 0,
|
||||
hv_pcichild_requirements,
|
||||
hv_pcichild_resourced,
|
||||
hv_pcichild_ejecting,
|
||||
hv_pcichild_maximum
|
||||
};
|
||||
|
||||
struct hv_pci_dev {
|
||||
/* List protected by pci_rescan_remove_lock */
|
||||
struct list_head list_entry;
|
||||
refcount_t refs;
|
||||
enum hv_pcichild_state state;
|
||||
struct pci_slot *pci_slot;
|
||||
struct hv_pcidev_description desc;
|
||||
bool reported_missing;
|
||||
@ -635,6 +629,11 @@ static void hv_arch_irq_unmask(struct irq_data *data)
|
||||
pbus = pdev->bus;
|
||||
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
|
||||
int_desc = data->chip_data;
|
||||
if (!int_desc) {
|
||||
dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
|
||||
__func__, data->irq);
|
||||
return;
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
@ -2004,12 +2003,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
||||
hv_pci_onchannelcallback(hbus);
|
||||
spin_unlock_irqrestore(&channel->sched_lock, flags);
|
||||
|
||||
if (hpdev->state == hv_pcichild_ejecting) {
|
||||
dev_err_once(&hbus->hdev->device,
|
||||
"the device is being ejected\n");
|
||||
goto enable_tasklet;
|
||||
}
|
||||
|
||||
udelay(100);
|
||||
}
|
||||
|
||||
@ -2615,6 +2608,8 @@ static void pci_devices_present_work(struct work_struct *work)
|
||||
if (!dr)
|
||||
return;
|
||||
|
||||
mutex_lock(&hbus->state_lock);
|
||||
|
||||
/* First, mark all existing children as reported missing. */
|
||||
spin_lock_irqsave(&hbus->device_list_lock, flags);
|
||||
list_for_each_entry(hpdev, &hbus->children, list_entry) {
|
||||
@ -2696,6 +2691,8 @@ static void pci_devices_present_work(struct work_struct *work)
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
|
||||
kfree(dr);
|
||||
}
|
||||
|
||||
@ -2844,7 +2841,7 @@ static void hv_eject_device_work(struct work_struct *work)
|
||||
hpdev = container_of(work, struct hv_pci_dev, wrk);
|
||||
hbus = hpdev->hbus;
|
||||
|
||||
WARN_ON(hpdev->state != hv_pcichild_ejecting);
|
||||
mutex_lock(&hbus->state_lock);
|
||||
|
||||
/*
|
||||
* Ejection can come before or after the PCI bus has been set up, so
|
||||
@ -2882,6 +2879,8 @@ static void hv_eject_device_work(struct work_struct *work)
|
||||
put_pcichild(hpdev);
|
||||
put_pcichild(hpdev);
|
||||
/* hpdev has been freed. Do not use it any more. */
|
||||
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2902,7 +2901,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
|
||||
return;
|
||||
}
|
||||
|
||||
hpdev->state = hv_pcichild_ejecting;
|
||||
get_pcichild(hpdev);
|
||||
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
|
||||
queue_work(hbus->wq, &hpdev->wrk);
|
||||
@ -3331,8 +3329,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
|
||||
struct pci_bus_d0_entry *d0_entry;
|
||||
struct hv_pci_compl comp_pkt;
|
||||
struct pci_packet *pkt;
|
||||
bool retry = true;
|
||||
int ret;
|
||||
|
||||
enter_d0_retry:
|
||||
/*
|
||||
* Tell the host that the bus is ready to use, and moved into the
|
||||
* powered-on state. This includes telling the host which region
|
||||
@ -3359,6 +3359,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* In certain case (Kdump) the pci device of interest was
|
||||
* not cleanly shut down and resource is still held on host
|
||||
* side, the host could return invalid device status.
|
||||
* We need to explicitly request host to release the resource
|
||||
* and try to enter D0 again.
|
||||
*/
|
||||
if (comp_pkt.completion_status < 0 && retry) {
|
||||
retry = false;
|
||||
|
||||
dev_err(&hdev->device, "Retrying D0 Entry\n");
|
||||
|
||||
/*
|
||||
* Hv_pci_bus_exit() calls hv_send_resource_released()
|
||||
* to free up resources of its child devices.
|
||||
* In the kdump kernel we need to set the
|
||||
* wslot_res_allocated to 255 so it scans all child
|
||||
* devices to release resources allocated in the
|
||||
* normal kernel before panic happened.
|
||||
*/
|
||||
hbus->wslot_res_allocated = 255;
|
||||
|
||||
ret = hv_pci_bus_exit(hdev, true);
|
||||
|
||||
if (ret == 0) {
|
||||
kfree(pkt);
|
||||
goto enter_d0_retry;
|
||||
}
|
||||
dev_err(&hdev->device,
|
||||
"Retrying D0 failed with ret %d\n", ret);
|
||||
}
|
||||
|
||||
if (comp_pkt.completion_status < 0) {
|
||||
dev_err(&hdev->device,
|
||||
"PCI Pass-through VSP failed D0 Entry with status %x\n",
|
||||
@ -3401,6 +3433,24 @@ static int hv_pci_query_relations(struct hv_device *hdev)
|
||||
if (!ret)
|
||||
ret = wait_for_response(hdev, &comp);
|
||||
|
||||
/*
|
||||
* In the case of fast device addition/removal, it's possible that
|
||||
* vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
|
||||
* already got a PCI_BUS_RELATIONS* message from the host and the
|
||||
* channel callback already scheduled a work to hbus->wq, which can be
|
||||
* running pci_devices_present_work() -> survey_child_resources() ->
|
||||
* complete(&hbus->survey_event), even after hv_pci_query_relations()
|
||||
* exits and the stack variable 'comp' is no longer valid; as a result,
|
||||
* a hang or a page fault may happen when the complete() calls
|
||||
* raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
|
||||
* hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
|
||||
* -ENODEV, there can't be any more work item scheduled to hbus->wq
|
||||
* after the flush_workqueue(): see vmbus_onoffer_rescind() ->
|
||||
* vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
|
||||
* channel->rescind = true.
|
||||
*/
|
||||
flush_workqueue(hbus->wq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3586,7 +3636,6 @@ static int hv_pci_probe(struct hv_device *hdev,
|
||||
struct hv_pcibus_device *hbus;
|
||||
u16 dom_req, dom;
|
||||
char *name;
|
||||
bool enter_d0_retry = true;
|
||||
int ret;
|
||||
|
||||
bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
|
||||
@ -3598,6 +3647,7 @@ static int hv_pci_probe(struct hv_device *hdev,
|
||||
return -ENOMEM;
|
||||
|
||||
hbus->bridge = bridge;
|
||||
mutex_init(&hbus->state_lock);
|
||||
hbus->state = hv_pcibus_init;
|
||||
hbus->wslot_res_allocated = -1;
|
||||
|
||||
@ -3703,49 +3753,15 @@ static int hv_pci_probe(struct hv_device *hdev,
|
||||
if (ret)
|
||||
goto free_fwnode;
|
||||
|
||||
retry:
|
||||
ret = hv_pci_query_relations(hdev);
|
||||
if (ret)
|
||||
goto free_irq_domain;
|
||||
|
||||
mutex_lock(&hbus->state_lock);
|
||||
|
||||
ret = hv_pci_enter_d0(hdev);
|
||||
/*
|
||||
* In certain case (Kdump) the pci device of interest was
|
||||
* not cleanly shut down and resource is still held on host
|
||||
* side, the host could return invalid device status.
|
||||
* We need to explicitly request host to release the resource
|
||||
* and try to enter D0 again.
|
||||
* Since the hv_pci_bus_exit() call releases structures
|
||||
* of all its child devices, we need to start the retry from
|
||||
* hv_pci_query_relations() call, requesting host to send
|
||||
* the synchronous child device relations message before this
|
||||
* information is needed in hv_send_resources_allocated()
|
||||
* call later.
|
||||
*/
|
||||
if (ret == -EPROTO && enter_d0_retry) {
|
||||
enter_d0_retry = false;
|
||||
|
||||
dev_err(&hdev->device, "Retrying D0 Entry\n");
|
||||
|
||||
/*
|
||||
* Hv_pci_bus_exit() calls hv_send_resources_released()
|
||||
* to free up resources of its child devices.
|
||||
* In the kdump kernel we need to set the
|
||||
* wslot_res_allocated to 255 so it scans all child
|
||||
* devices to release resources allocated in the
|
||||
* normal kernel before panic happened.
|
||||
*/
|
||||
hbus->wslot_res_allocated = 255;
|
||||
ret = hv_pci_bus_exit(hdev, true);
|
||||
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
|
||||
dev_err(&hdev->device,
|
||||
"Retrying D0 failed with ret %d\n", ret);
|
||||
}
|
||||
if (ret)
|
||||
goto free_irq_domain;
|
||||
goto release_state_lock;
|
||||
|
||||
ret = hv_pci_allocate_bridge_windows(hbus);
|
||||
if (ret)
|
||||
@ -3763,12 +3779,15 @@ retry:
|
||||
if (ret)
|
||||
goto free_windows;
|
||||
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
return 0;
|
||||
|
||||
free_windows:
|
||||
hv_pci_free_bridge_windows(hbus);
|
||||
exit_d0:
|
||||
(void) hv_pci_bus_exit(hdev, true);
|
||||
release_state_lock:
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
free_irq_domain:
|
||||
irq_domain_remove(hbus->irq_domain);
|
||||
free_fwnode:
|
||||
@ -4018,20 +4037,26 @@ static int hv_pci_resume(struct hv_device *hdev)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&hbus->state_lock);
|
||||
|
||||
ret = hv_pci_enter_d0(hdev);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto release_state_lock;
|
||||
|
||||
ret = hv_send_resources_allocated(hdev);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto release_state_lock;
|
||||
|
||||
prepopulate_bars(hbus);
|
||||
|
||||
hv_pci_restore_msi_state(hbus);
|
||||
|
||||
hbus->state = hv_pcibus_installed;
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
return 0;
|
||||
|
||||
release_state_lock:
|
||||
mutex_unlock(&hbus->state_lock);
|
||||
out:
|
||||
vmbus_close(hdev->channel);
|
||||
return ret;
|
||||
|
@ -200,6 +200,7 @@ enum cpuhp_state {
|
||||
|
||||
/* Online section invoked on the hotplugged CPU from the hotplug thread */
|
||||
CPUHP_AP_ONLINE_IDLE,
|
||||
CPUHP_AP_HYPERV_ONLINE,
|
||||
CPUHP_AP_KVM_ONLINE,
|
||||
CPUHP_AP_SCHED_WAIT_EMPTY,
|
||||
CPUHP_AP_SMPBOOT_THREADS,
|
||||
|
Loading…
Reference in New Issue
Block a user