From c9f19b67f04c817ab83e80012fd2435fc516a44d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 12 Feb 2019 14:37:20 -0600 Subject: [PATCH 01/11] xen: mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/xen/xen-pciback/xenbus.c: In function ‘xen_pcibk_frontend_changed’: drivers/xen/xen-pciback/xenbus.c:545:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (xenbus_dev_is_online(xdev)) ^ drivers/xen/xen-pciback/xenbus.c:548:2: note: here case XenbusStateUnknown: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 581c4e1a8b82..23f7f6ec7d1f 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -544,7 +544,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, xenbus_switch_state(xdev, XenbusStateClosed); if (xenbus_dev_is_online(xdev)) break; - /* fall through if not online */ + /* fall through - if not online */ case XenbusStateUnknown: dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); device_unregister(&xdev->dev); From efac6c75dc4b4aac56c4a40e7f4d2e54fcd87834 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 12 Feb 2019 14:40:35 -0600 Subject: [PATCH 02/11] xen-scsiback: mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/xen/xen-scsiback.c: In function ‘scsiback_frontend_changed’: drivers/xen/xen-scsiback.c:1185:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (xenbus_dev_is_online(dev)) ^ drivers/xen/xen-scsiback.c:1188:2: note: here case XenbusStateUnknown: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Juergen Gross --- drivers/xen/xen-scsiback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index c9e23a126218..cf0cb0898ae3 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1184,7 +1184,7 @@ static void scsiback_frontend_changed(struct xenbus_device *dev, xenbus_switch_state(dev, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; - /* fall through if not online */ + /* fall through - if not online */ case XenbusStateUnknown: device_unregister(&dev->dev); break; From 7681f31ec9cdacab4fd10570be924f2cef6669ba Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 13 Feb 2019 18:21:31 -0500 Subject: [PATCH 03/11] xen/pciback: Don't disable PCI_COMMAND on PCI device reset. There is no need for this at all. Worst it means that if the guest tries to write to BARs it could lead (on certain platforms) to PCI SERR errors. Please note that with af6fc858a35b90e89ea7a7ee58e66628c55c776b "xen-pciback: limit guest control of command register" a guest is still allowed to enable those control bits (safely), but is not allowed to disable them and that therefore a well behaved frontend which enables things before using them will still function correctly. This is done via an write to the configuration register 0x4 which triggers on the backend side: command_write \- pci_enable_device \- pci_enable_device_flags \- do_pci_enable_device \- pcibios_enable_device \-pci_enable_resourcess [which enables the PCI_COMMAND_MEMORY|PCI_COMMAND_IO] However guests (and drivers) which don't do this could cause problems, including the security issues which XSA-120 sought to address. Reported-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Prarit Bhargava Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/pciback_ops.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index ea4a08b83fa0..787966f44589 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -127,8 +127,6 @@ void xen_pcibk_reset_device(struct pci_dev *dev) if (pci_is_enabled(dev)) pci_disable_device(dev); - pci_write_config_word(dev, PCI_COMMAND, 0); - dev->is_busmaster = 0; } else { pci_read_config_word(dev, PCI_COMMAND, &cmd); From fa13e665e02874c0a5f4d06d6967ae34a6cb3d6a Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 14 Feb 2019 16:23:20 +0200 Subject: [PATCH 04/11] xen/gntdev: Do not destroy context while dma-bufs are in use If there are exported DMA buffers which are still in use and grant device is closed by either normal user-space close or by a signal this leads to the grant device context to be destroyed, thus making it not possible to correctly destroy those exported buffers when they are returned back to gntdev and makes the module crash: [ 339.617540] [] dmabuf_exp_ops_release+0x40/0xa8 [ 339.617560] [] dma_buf_release+0x60/0x190 [ 339.617577] [] __fput+0x88/0x1d0 [ 339.617589] [] ____fput+0xc/0x18 [ 339.617607] [] task_work_run+0x9c/0xc0 [ 339.617622] [] do_notify_resume+0xfc/0x108 Fix this by referencing gntdev on each DMA buffer export and unreferencing on buffer release. Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky@oracle.com> Signed-off-by: Juergen Gross --- drivers/xen/gntdev-dmabuf.c | 12 +++++++++++- drivers/xen/gntdev-dmabuf.h | 2 +- drivers/xen/gntdev.c | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index cba6b586bfbd..d97fcfc5e558 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -80,6 +80,12 @@ struct gntdev_dmabuf_priv { struct list_head imp_list; /* This is the lock which protects dma_buf_xxx lists. */ struct mutex lock; + /* + * We reference this file while exporting dma-bufs, so + * the grant device context is not destroyed while there are + * external users alive. + */ + struct file *filp; }; /* DMA buffer export support. */ @@ -311,6 +317,7 @@ static void dmabuf_exp_release(struct kref *kref) dmabuf_exp_wait_obj_signal(gntdev_dmabuf->priv, gntdev_dmabuf); list_del(&gntdev_dmabuf->next); + fput(gntdev_dmabuf->priv->filp); kfree(gntdev_dmabuf); } @@ -423,6 +430,7 @@ static int dmabuf_exp_from_pages(struct gntdev_dmabuf_export_args *args) mutex_lock(&args->dmabuf_priv->lock); list_add(&gntdev_dmabuf->next, &args->dmabuf_priv->exp_list); mutex_unlock(&args->dmabuf_priv->lock); + get_file(gntdev_dmabuf->priv->filp); return 0; fail: @@ -834,7 +842,7 @@ long gntdev_ioctl_dmabuf_imp_release(struct gntdev_priv *priv, return dmabuf_imp_release(priv->dmabuf_priv, op.fd); } -struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void) +struct gntdev_dmabuf_priv *gntdev_dmabuf_init(struct file *filp) { struct gntdev_dmabuf_priv *priv; @@ -847,6 +855,8 @@ struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void) INIT_LIST_HEAD(&priv->exp_wait_list); INIT_LIST_HEAD(&priv->imp_list); + priv->filp = filp; + return priv; } diff --git a/drivers/xen/gntdev-dmabuf.h b/drivers/xen/gntdev-dmabuf.h index 7220a53d0fc5..3d9b9cf9d5a1 100644 --- a/drivers/xen/gntdev-dmabuf.h +++ b/drivers/xen/gntdev-dmabuf.h @@ -14,7 +14,7 @@ struct gntdev_dmabuf_priv; struct gntdev_priv; -struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void); +struct gntdev_dmabuf_priv *gntdev_dmabuf_init(struct file *filp); void gntdev_dmabuf_fini(struct gntdev_dmabuf_priv *priv); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 5efc5eee9544..7cf9c51318aa 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -600,7 +600,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) mutex_init(&priv->lock); #ifdef CONFIG_XEN_GNTDEV_DMABUF - priv->dmabuf_priv = gntdev_dmabuf_init(); + priv->dmabuf_priv = gntdev_dmabuf_init(flip); if (IS_ERR(priv->dmabuf_priv)) { ret = PTR_ERR(priv->dmabuf_priv); kfree(priv); From 068e79f4a9d613f4327cb4062a1e49c0eaca7149 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Thu, 14 Feb 2019 16:23:21 +0200 Subject: [PATCH 05/11] xen/gntdev: Check and release imported dma-bufs on close Check if there are any imported dma-bufs left not released by user-space when grant device's release callback is called and free those if this is the case. This can happen if user-space leaks the buffers because of a bug or application has been terminated for any reason. Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky@oracle.com> Signed-off-by: Juergen Gross --- drivers/xen/gntdev-dmabuf.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index d97fcfc5e558..2c4f324f8626 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -745,6 +745,14 @@ static int dmabuf_imp_release(struct gntdev_dmabuf_priv *priv, u32 fd) return 0; } +static void dmabuf_imp_release_all(struct gntdev_dmabuf_priv *priv) +{ + struct gntdev_dmabuf *q, *gntdev_dmabuf; + + list_for_each_entry_safe(gntdev_dmabuf, q, &priv->imp_list, next) + dmabuf_imp_release(priv, gntdev_dmabuf->fd); +} + /* DMA buffer IOCTL support. */ long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod, @@ -862,5 +870,6 @@ struct gntdev_dmabuf_priv *gntdev_dmabuf_init(struct file *filp) void gntdev_dmabuf_fini(struct gntdev_dmabuf_priv *priv) { + dmabuf_imp_release_all(priv); kfree(priv); } From 357b4da50a62e2fd70eacee21cdbd22d4c7a7b60 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 14 Feb 2019 11:42:39 +0100 Subject: [PATCH 06/11] x86: respect memory size limiting via mem= parameter When limiting memory size via kernel parameter "mem=" this should be respected even in case of memory made accessible via a PCI card. Today this kind of memory won't be made usable in initial memory setup as the memory won't be visible in E820 map, but it might be added when adding PCI devices due to corresponding ACPI table entries. Not respecting "mem=" can be corrected by adding a global max_mem_size variable set by parse_memopt() which will result in rejecting adding memory areas resulting in a memory size above the allowed limit. Signed-off-by: Juergen Gross Acked-by: Ingo Molnar Reviewed-by: William Kucharski Signed-off-by: Juergen Gross --- arch/x86/kernel/e820.c | 5 +++++ include/linux/memory_hotplug.h | 2 ++ mm/memory_hotplug.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 50895c2f937d..e67513e2cbbb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -881,6 +882,10 @@ static int __init parse_memopt(char *p) e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); +#ifdef CONFIG_MEMORY_HOTPLUG + max_mem_size = mem_size; +#endif + return 0; } early_param("mem", parse_memopt); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 368267c1b71b..cfd12078172a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -100,6 +100,8 @@ extern void __online_page_free(struct page *page); extern int try_online_node(int nid); +extern u64 max_mem_size; + extern bool memhp_auto_online; /* If movable_node boot option specified */ extern bool movable_node_enabled; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 124e794867c5..519f9db063ff 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -96,10 +96,16 @@ void mem_hotplug_done(void) cpus_read_unlock(); } +u64 max_mem_size = U64_MAX; + /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { struct resource *res, *conflict; + + if (start + size > max_mem_size) + return ERR_PTR(-E2BIG); + res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); From 1d988ed46543ca36c010634c97ac32114362ddb1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 14 Feb 2019 11:42:40 +0100 Subject: [PATCH 07/11] x86/xen: dont add memory above max allowed allocation Don't allow memory to be added above the allowed maximum allocation limit set by Xen. Trying to do so would result in cases like the following: [ 584.559652] ------------[ cut here ]------------ [ 584.564897] WARNING: CPU: 2 PID: 1 at ../arch/x86/xen/multicalls.c:129 xen_alloc_pte+0x1c7/0x390() [ 584.575151] Modules linked in: [ 584.578643] Supported: Yes [ 584.581750] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.4.120-92.70-default #1 [ 584.590000] Hardware name: Cisco Systems Inc UCSC-C460-M4/UCSC-C460-M4, BIOS C460M4.4.0.1b.0.0629181419 06/29/2018 [ 584.601862] 0000000000000000 ffffffff813175a0 0000000000000000 ffffffff8184777c [ 584.610200] ffffffff8107f4e1 ffff880487eb7000 ffff8801862b79c0 ffff88048608d290 [ 584.618537] 0000000000487eb7 ffffea0000000201 ffffffff81009de7 ffffffff81068561 [ 584.626876] Call Trace: [ 584.629699] [] dump_trace+0x59/0x340 [ 584.635645] [] show_stack_log_lvl+0xea/0x170 [ 584.642391] [] show_stack+0x21/0x40 [ 584.648238] [] dump_stack+0x5c/0x7c [ 584.654085] [] warn_slowpath_common+0x81/0xb0 [ 584.660932] [] xen_alloc_pte+0x1c7/0x390 [ 584.667289] [] pmd_populate_kernel.constprop.6+0x40/0x80 [ 584.675241] [] phys_pmd_init+0x210/0x255 [ 584.681587] [] phys_pud_init+0x1da/0x247 [ 584.687931] [] kernel_physical_mapping_init+0xf5/0x1d4 [ 584.695682] [] init_memory_mapping+0x18d/0x380 [ 584.702631] [] arch_add_memory+0x59/0xf0 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 13 +++++++++++++ drivers/xen/xen-balloon.c | 11 +++++++++++ include/xen/xen.h | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d5f303c0e656..548d1e0a5ba1 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -589,6 +590,14 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start, if (type == E820_TYPE_RAM) { start = PAGE_ALIGN(start); end &= ~((phys_addr_t)PAGE_SIZE - 1); +#ifdef CONFIG_MEMORY_HOTPLUG + /* + * Don't allow adding memory not in E820 map while booting the + * system. Once the balloon driver is up it will remove that + * restriction again. + */ + max_mem_size = end; +#endif } e820__range_add(start, end - start, type); @@ -748,6 +757,10 @@ char * __init xen_memory_setup(void) memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries); set_xen_guest_handle(memmap.buffer, xen_e820_table.entries); +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) + xen_saved_max_mem_size = max_mem_size; +#endif + op = xen_initial_domain() ? XENMEM_machine_memory_map : XENMEM_memory_map; diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 2acbfe104e46..a67236b02452 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,10 @@ #define BALLOON_CLASS_NAME "xen_memory" +#ifdef CONFIG_MEMORY_HOTPLUG +u64 xen_saved_max_mem_size = 0; +#endif + static struct device balloon_dev; static int register_balloon(struct device *dev); @@ -63,6 +68,12 @@ static void watch_target(struct xenbus_watch *watch, static bool watch_fired; static long target_diff; +#ifdef CONFIG_MEMORY_HOTPLUG + /* The balloon driver will take care of adding memory now. */ + if (xen_saved_max_mem_size) + max_mem_size = xen_saved_max_mem_size; +#endif + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); if (err != 1) { /* This is ok (for domain0 at least) - so just return */ diff --git a/include/xen/xen.h b/include/xen/xen.h index 0e2156786ad2..19d032373de5 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -46,4 +46,8 @@ struct bio_vec; bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2); +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) +extern u64 xen_saved_max_mem_size; +#endif + #endif /* _XEN_XEN_H */ From 85eb278c1899f78d1429b45ffa84039d9011cb55 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Mar 2019 11:31:27 +0200 Subject: [PATCH 08/11] xen/ACPI: Switch to bitmap_zalloc() Switch to bitmap_zalloc() to show clearly what we are allocating. Besides that it returns pointer of bitmap type instead of opaque void *. Signed-off-by: Andy Shevchenko Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/xen-acpi-processor.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index fbb9137c7d02..98e35644fda7 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -410,21 +410,21 @@ static int check_acpi_ids(struct acpi_processor *pr_backup) /* All online CPUs have been processed at this stage. Now verify * whether in fact "online CPUs" == physical CPUs. */ - acpi_id_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + acpi_id_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); if (!acpi_id_present) return -ENOMEM; - acpi_id_cst_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + acpi_id_cst_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); if (!acpi_id_cst_present) { - kfree(acpi_id_present); + bitmap_free(acpi_id_present); return -ENOMEM; } acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package), GFP_KERNEL); if (!acpi_psd) { - kfree(acpi_id_present); - kfree(acpi_id_cst_present); + bitmap_free(acpi_id_present); + bitmap_free(acpi_id_cst_present); return -ENOMEM; } @@ -533,14 +533,14 @@ static int __init xen_acpi_processor_init(void) return -ENODEV; nr_acpi_bits = get_max_acpi_id() + 1; - acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + acpi_ids_done = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL); if (!acpi_ids_done) return -ENOMEM; acpi_perf_data = alloc_percpu(struct acpi_processor_performance); if (!acpi_perf_data) { pr_debug("Memory allocation error for acpi_perf_data\n"); - kfree(acpi_ids_done); + bitmap_free(acpi_ids_done); return -ENOMEM; } for_each_possible_cpu(i) { @@ -584,7 +584,7 @@ err_unregister: err_out: /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); - kfree(acpi_ids_done); + bitmap_free(acpi_ids_done); return rc; } static void __exit xen_acpi_processor_exit(void) @@ -592,9 +592,9 @@ static void __exit xen_acpi_processor_exit(void) int i; unregister_syscore_ops(&xap_syscore_ops); - kfree(acpi_ids_done); - kfree(acpi_id_present); - kfree(acpi_id_cst_present); + bitmap_free(acpi_ids_done); + bitmap_free(acpi_id_present); + bitmap_free(acpi_id_cst_present); kfree(acpi_psd); for_each_possible_cpu(i) acpi_processor_unregister_performance(i); From b1ddd406cd1e9bb51fa90d03ee562c832e38eb52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 21:52:39 +0100 Subject: [PATCH 09/11] xen: remove pre-xen3 fallback handlers The legacy hypercall handlers were originally added with a comment explaining that "copying the argument structures in HYPERVISOR_event_channel_op() and HYPERVISOR_physdev_op() into the local variable is sufficiently safe" and only made sure to not write past the end of the argument structure, the checks in linux/string.h disagree with that, when link-time optimizations are used: In function 'memcpy', inlined from 'pirq_query_unmask' at drivers/xen/fallback.c:53:2, inlined from '__startup_pirq' at drivers/xen/events/events_base.c:529:2, inlined from 'restore_pirqs' at drivers/xen/events/events_base.c:1439:3, inlined from 'xen_irq_resume' at drivers/xen/events/events_base.c:1581:2: include/linux/string.h:350:3: error: call to '__read_overflow2' declared with attribute error: detected read beyond size of object passed as 2nd parameter __read_overflow2(); ^ Further research turned out that only Xen 3.0.2 or earlier required the fallback at all, while all versions in use today don't need it. As far as I can tell, it is not even possible to run a mainline kernel on those old Xen releases, at the time when they were in use, only a patched kernel was supported anyway. Fixes: cf47a83fb06e ("xen/hypercall: fix hypercall fallback code for very old hypervisors") Reviewed-by: Boris Ostrovsky Cc: Jan Beulich Signed-off-by: Arnd Bergmann Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h | 13 +---- drivers/xen/Makefile | 1 - drivers/xen/fallback.c | 81 ---------------------------- 3 files changed, 2 insertions(+), 93 deletions(-) delete mode 100644 drivers/xen/fallback.c diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ef05bea7010d..de6f0d59a24f 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -332,15 +332,11 @@ HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, return _hypercall4(int, update_va_mapping, va, new_val.pte, new_val.pte >> 32, flags); } -extern int __must_check xen_event_channel_op_compat(int, void *); static inline int HYPERVISOR_event_channel_op(int cmd, void *arg) { - int rc = _hypercall2(int, event_channel_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) - rc = xen_event_channel_op_compat(cmd, arg); - return rc; + return _hypercall2(int, event_channel_op, cmd, arg); } static inline int @@ -355,15 +351,10 @@ HYPERVISOR_console_io(int cmd, int count, char *str) return _hypercall3(int, console_io, cmd, count, str); } -extern int __must_check xen_physdev_op_compat(int, void *); - static inline int HYPERVISOR_physdev_op(int cmd, void *arg) { - int rc = _hypercall2(int, physdev_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) - rc = xen_physdev_op_compat(cmd, arg); - return rc; + return _hypercall2(int, physdev_op, cmd, arg); } static inline int diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index c48927a58e10..ad3844d9f876 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_X86) += fallback.o obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o obj-y += mem-reservation.o obj-y += events/ diff --git a/drivers/xen/fallback.c b/drivers/xen/fallback.c deleted file mode 100644 index b04fb64c5a91..000000000000 --- a/drivers/xen/fallback.c +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int xen_event_channel_op_compat(int cmd, void *arg) -{ - struct evtchn_op op; - int rc; - - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - - switch (cmd) { - case EVTCHNOP_close: - case EVTCHNOP_send: - case EVTCHNOP_bind_vcpu: - case EVTCHNOP_unmask: - /* no output */ - break; - -#define COPY_BACK(eop) \ - case EVTCHNOP_##eop: \ - memcpy(arg, &op.u.eop, sizeof(op.u.eop)); \ - break - - COPY_BACK(bind_interdomain); - COPY_BACK(bind_virq); - COPY_BACK(bind_pirq); - COPY_BACK(status); - COPY_BACK(alloc_unbound); - COPY_BACK(bind_ipi); -#undef COPY_BACK - - default: - WARN_ON(rc != -ENOSYS); - break; - } - - return rc; -} -EXPORT_SYMBOL_GPL(xen_event_channel_op_compat); - -int xen_physdev_op_compat(int cmd, void *arg) -{ - struct physdev_op op; - int rc; - - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - - switch (cmd) { - case PHYSDEVOP_IRQ_UNMASK_NOTIFY: - case PHYSDEVOP_set_iopl: - case PHYSDEVOP_set_iobitmap: - case PHYSDEVOP_apic_write: - /* no output */ - break; - -#define COPY_BACK(pop, fld) \ - case PHYSDEVOP_##pop: \ - memcpy(arg, &op.u.fld, sizeof(op.u.fld)); \ - break - - COPY_BACK(irq_status_query, irq_status_query); - COPY_BACK(apic_read, apic_op); - COPY_BACK(ASSIGN_VECTOR, irq_op); -#undef COPY_BACK - - default: - WARN_ON(rc != -ENOSYS); - break; - } - - return rc; -} -EXPORT_SYMBOL_GPL(xen_physdev_op_compat); From 201676095dda7e5b31a5e1d116d10fc22985075e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Mar 2019 08:41:22 +0300 Subject: [PATCH 10/11] xen, cpu_hotplug: Prevent an out of bounds access The "cpu" variable comes from the sscanf() so Smatch marks it as untrusted data. We can't pass a higher value than "nr_cpu_ids" to cpu_possible() or it results in an out of bounds access. Fixes: d68d82afd4c8 ("xen: implement CPU hotplugging") Signed-off-by: Dan Carpenter Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/cpu_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index b1357aa4bc55..f192b6f42da9 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -54,7 +54,7 @@ static int vcpu_online(unsigned int cpu) } static void vcpu_hotplug(unsigned int cpu) { - if (!cpu_possible(cpu)) + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) return; switch (vcpu_online(cpu)) { From 01bd2ac2f55a1916d81dace12fa8d7ae1c79b5ea Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 7 Mar 2019 10:11:19 +0100 Subject: [PATCH 11/11] xen: fix dom0 boot on huge systems Commit f7c90c2aa40048 ("x86/xen: don't write ptes directly in 32-bit PV guests") introduced a regression for booting dom0 on huge systems with lots of RAM (in the TB range). Reason is that on those hosts the p2m list needs to be moved early in the boot process and this requires temporary page tables to be created. Said commit modified xen_set_pte_init() to use a hypercall for writing a PTE, but this requires the page table being in the direct mapped area, which is not the case for the temporary page tables used in xen_relocate_p2m(). As the page tables are completely written before being linked to the actual address space instead of set_pte() a plain write to memory can be used in xen_relocate_p2m(). Fixes: f7c90c2aa40048 ("x86/xen: don't write ptes directly in 32-bit PV guests") Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 0f4fe206dcc2..20701977e6c0 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2114,10 +2114,10 @@ void __init xen_relocate_p2m(void) pt = early_memremap(pt_phys, PAGE_SIZE); clear_page(pt); for (idx_pte = 0; - idx_pte < min(n_pte, PTRS_PER_PTE); - idx_pte++) { - set_pte(pt + idx_pte, - pfn_pte(p2m_pfn, PAGE_KERNEL)); + idx_pte < min(n_pte, PTRS_PER_PTE); + idx_pte++) { + pt[idx_pte] = pfn_pte(p2m_pfn, + PAGE_KERNEL); p2m_pfn++; } n_pte -= PTRS_PER_PTE; @@ -2125,8 +2125,7 @@ void __init xen_relocate_p2m(void) make_lowmem_page_readonly(__va(pt_phys)); pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, PFN_DOWN(pt_phys)); - set_pmd(pmd + idx_pt, - __pmd(_PAGE_TABLE | pt_phys)); + pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys); pt_phys += PAGE_SIZE; } n_pt -= PTRS_PER_PMD; @@ -2134,7 +2133,7 @@ void __init xen_relocate_p2m(void) make_lowmem_page_readonly(__va(pmd_phys)); pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, PFN_DOWN(pmd_phys)); - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); + pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys); pmd_phys += PAGE_SIZE; } n_pmd -= PTRS_PER_PUD;